PyPI - cool-seq-tool - Versions diffs - 0.4.0.dev3__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

cool-seq-tool 0.4.0.dev3py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

cool_seq_tool/__init__.py +7 -11
cool_seq_tool/app.py +44 -24
cool_seq_tool/handlers/__init__.py +1 -0
cool_seq_tool/handlers/seqrepo_access.py +27 -25
cool_seq_tool/mappers/__init__.py +3 -1
cool_seq_tool/mappers/alignment.py +5 -6
cool_seq_tool/mappers/exon_genomic_coords.py +139 -124
cool_seq_tool/mappers/liftover.py +90 -0
cool_seq_tool/mappers/mane_transcript.py +208 -113
cool_seq_tool/resources/__init__.py +1 -0
cool_seq_tool/resources/data_files.py +93 -0
cool_seq_tool/resources/status.py +153 -0
cool_seq_tool/schemas.py +92 -54
cool_seq_tool/sources/__init__.py +1 -0
cool_seq_tool/sources/mane_transcript_mappings.py +16 -9
cool_seq_tool/sources/transcript_mappings.py +41 -32
cool_seq_tool/sources/uta_database.py +96 -249
cool_seq_tool/utils.py +44 -4
{cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/LICENSE +1 -1
{cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/METADATA +16 -11
cool_seq_tool-0.5.0.dist-info/RECORD +24 -0
{cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/WHEEL +1 -1
cool_seq_tool/api.py +0 -42
cool_seq_tool/data/__init__.py +0 -2
cool_seq_tool/data/data_downloads.py +0 -89
cool_seq_tool/paths.py +0 -28
cool_seq_tool/routers/__init__.py +0 -16
cool_seq_tool/routers/default.py +0 -125
cool_seq_tool/routers/mane.py +0 -98
cool_seq_tool/routers/mappings.py +0 -155
cool_seq_tool/version.py +0 -2
cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
/cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
{cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/top_level.txt +0 -0

cool_seq_tool/resources/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Provide tools for acquiring and managing Cool-Seq-Tool data resources."""

cool_seq_tool/resources/data_files.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""Fetch data files regarding transcript mapping and annotation."""
+import logging
+from enum import Enum
+from importlib import resources
+from os import environ
+from pathlib import Path
+from wags_tails import NcbiLrgRefSeqGeneData, NcbiManeSummaryData
+_logger = logging.getLogger(__name__)
+class DataFile(str, Enum):
+    """Constrain legal values for file resource fetching in :py:meth:`get_data_file() <cool_seq_tool.resources.data_files.get_data_file>`."""
+    TRANSCRIPT_MAPPINGS = "transcript_mappings"
+    MANE_SUMMARY = "mane_summary"
+    LRG_REFSEQGENE = "lrg_refseqgene"
+    def lower(self) -> str:
+        """Return lower-cased value
+        :return: lower case string
+        """
+        return self.value.lower()
+_resource_acquisition_params = {
+    DataFile.TRANSCRIPT_MAPPINGS: (
+        "TRANSCRIPT_MAPPINGS_PATH",
+        lambda _: resources.files(__package__) / "transcript_mapping.tsv",
+    ),
+    DataFile.MANE_SUMMARY: (
+        "MANE_SUMMARY_PATH",
+        lambda from_local: NcbiManeSummaryData(silent=True).get_latest(
+            from_local=from_local
+        )[0],
+    ),
+    DataFile.LRG_REFSEQGENE: (
+        "LRG_REFSEQGENE_PATH",
+        lambda from_local: NcbiLrgRefSeqGeneData(silent=True).get_latest(
+            from_local=from_local
+        )[0],
+    ),
+}
+def get_data_file(resource: DataFile, from_local: bool = False) -> Path:
+    """Acquire Cool-Seq-Tool file dependency.
+    Each resource can be defined using an environment variable:
+    * ``Resource.TRANSCRIPT_MAPPINGS`` -> ``TRANSCRIPT_MAPPINGS_PATH``
+    * ``Resource.MANE_SUMMARY`` -> ``MANE_SUMMARY_PATH``
+    * ``Resource.LRG_REFSEQGENE`` -> ``LRG_REFSEQGENE_PATH``
+    Otherwise, this function falls back on default expected locations:
+    * ``transcript_mappings.tsv`` is bundled with this library.
+    * LRG RefseqGene and MANE summary files are acquired from NCBI using the `wags-tails <https://wags-tails.readthedocs.io/stable/>`_ if unavailable locally, or out of date.
+    :param resource: resource to fetch
+    :param from_local: if ``True``, don't check for or acquire latest version -- just
+        provide most recent locally available file and raise FileNotFoundError otherwise
+    :return: path to file. Consuming functions can assume that it exists and is a file.
+    :raise FileNotFoundError: if file location configured by env var doesn't exist
+    :raise ValueError: if file location configured by env var isn't a file
+    """
+    params = _resource_acquisition_params[resource]
+    configured_path = environ.get(params[0])
+    if configured_path:
+        _logger.debug(
+            "Acquiring %s via env var %s:%s", resource, params[0], configured_path
+        )
+        path = Path(configured_path)
+        loc_descr = (
+            "the default file bundled with Cool-Seq-Tool"
+            if resource == DataFile.TRANSCRIPT_MAPPINGS
+            else "the the default file pattern and possibly acquire from source via the `wags-tails` package"
+        )
+        msg = f'No {params[0].replace("_", " ").title()} file exists at path {configured_path} defined under env var {params[0]}. Either unset to use {loc_descr}, or ensure that it is available at this location. See the "Environment configuration" section under the Usage page within the documentation for more: https://coolseqtool.readthedocs.io/stable/usage.html#environment-configuration'
+        if not path.exists():
+            raise FileNotFoundError(msg)
+        if not path.is_file():
+            raise ValueError(msg)
+    else:
+        _logger.debug("Acquiring %s from default location/method.", resource)
+        # param[1] is the resource fetcher function -- use `from_local` param to
+        # optionally avoid unnecessary fetches
+        path = params[1](from_local)
+    _logger.debug("Acquired %s at %s", resource, path)
+    return path

cool_seq_tool/resources/status.py ADDED Viewed

@@ -0,0 +1,153 @@
+"""Enable quick status check of Cool-Seq-Tool resources."""
+import logging
+from collections import namedtuple
+from pathlib import Path
+from agct._core import ChainfileError
+from asyncpg import InvalidCatalogNameError, UndefinedTableError
+from biocommons.seqrepo import SeqRepo
+from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
+from cool_seq_tool.mappers.liftover import LiftOver
+from cool_seq_tool.resources.data_files import DataFile, get_data_file
+from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
+_logger = logging.getLogger(__name__)
+ResourceStatus = namedtuple(
+    "ResourceStatus",
+    (
+        "uta",
+        "seqrepo",
+        DataFile.TRANSCRIPT_MAPPINGS.lower(),
+        DataFile.MANE_SUMMARY.lower(),
+        DataFile.LRG_REFSEQGENE.lower(),
+        "liftover",
+    ),
+)
+async def check_status(
+    transcript_file_path: Path | None = None,
+    lrg_refseqgene_path: Path | None = None,
+    mane_data_path: Path | None = None,
+    db_url: str = UTA_DB_URL,
+    sr: SeqRepo | None = None,
+    chain_file_37_to_38: str | None = None,
+    chain_file_38_to_37: str | None = None,
+) -> ResourceStatus:
+    """Perform basic status checks on availability of required data resources.
+    Arguments are intended to mirror arguments to :py:meth:`cool_seq_tool.app.CoolSeqTool.__init__`.
+    Additional arguments are available for testing paths to specific chainfiles (same
+    signature as :py:meth:`cool_seq_tool.mappers.liftover.LiftOver.__init__`).
+    >>> from cool_seq_tool.resources.status import check_status
+    >>> await check_status()
+    ResourceStatus(uta=True, seqrepo=True, transcript_mappings=True, mane_summary=True, lrg_refseqgene=True, liftover=True)
+    :param transcript_file_path: The path to ``transcript_mapping.tsv``
+    :param lrg_refseqgene_path: The path to the LRG_RefSeqGene file
+    :param mane_data_path: Path to RefSeq MANE summary data
+    :param db_url: PostgreSQL connection URL
+        Format: ``driver://user:password@host/database/schema``
+    :param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly. This
+        is used for ``agct``. If this is not provided, will check to see if
+        ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will allow
+        ``agct`` to download a chain file from UCSC
+    :param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly. This
+        is used for ``agct``. If this is not provided, will check to see if
+        ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will allow
+        ``agct`` to download a chain file from UCSC
+    :return: boolean description of availability of each resource, given current
+        environment configurations
+    """
+    file_path_params = {
+        DataFile.TRANSCRIPT_MAPPINGS.lower(): transcript_file_path,
+        DataFile.LRG_REFSEQGENE.lower(): lrg_refseqgene_path,
+        DataFile.MANE_SUMMARY.lower(): mane_data_path,
+    }
+    status = {
+        DataFile.TRANSCRIPT_MAPPINGS.lower(): False,
+        DataFile.LRG_REFSEQGENE.lower(): False,
+        DataFile.MANE_SUMMARY.lower(): False,
+        "liftover": False,
+        "uta": False,
+        "seqrepo": False,
+    }
+    for r in list(DataFile):
+        name_lower = r.lower()
+        declared_path = file_path_params[name_lower]
+        if declared_path and declared_path.exists() and declared_path.is_file():
+            status[name_lower] = True
+            continue
+        try:
+            get_data_file(r)
+        except FileNotFoundError:
+            _logger.error(
+                "%s does not exist at configured location %s", name_lower, declared_path
+            )
+        except ValueError:
+            _logger.error(
+                "%s configured at %s is not a valid file.", name_lower, declared_path
+            )
+        except Exception as e:
+            _logger.critical(
+                "Encountered unexpected error fetching %s: %s", name_lower, e
+            )
+        else:
+            status[name_lower] = True
+    try:
+        LiftOver(
+            chain_file_37_to_38=chain_file_37_to_38,
+            chain_file_38_to_37=chain_file_38_to_37,
+        )
+    except (FileNotFoundError, ChainfileError) as e:
+        _logger.error("agct converter setup failed: %s", e)
+    except Exception as e:
+        _logger.critical("Encountered unexpected error setting up agct: %s", e)
+    else:
+        status["liftover"] = True
+    try:
+        await UtaDatabase.create(db_url)
+    except (OSError, InvalidCatalogNameError, UndefinedTableError) as e:
+        _logger.error(
+            "Encountered error instantiating UTA at URI %s: %s", UTA_DB_URL, e
+        )
+    except Exception as e:
+        _logger.critical(
+            "Encountered unexpected error instantiating UTA from URI %s: %s",
+            UTA_DB_URL,
+            e,
+        )
+    else:
+        status["uta"] = True
+    try:
+        if not sr:
+            sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
+        sra = SeqRepoAccess(sr)
+        sra.sr["NC_000001.11"][1000:1001]
+    except OSError as e:
+        _logger.error("Encountered error while instantiating SeqRepo: %s", e)
+    except KeyError:
+        _logger.error("SeqRepo data fetch test failed -- is it populated?")
+    except Exception as e:
+        _logger.critical("Encountered unexpected error setting up SeqRepo: %s", e)
+    else:
+        status["seqrepo"] = True
+    structured_status = ResourceStatus(**status)
+    if all(status.values()):
+        _logger.info("Cool-Seq-Tool resource status passed")
+    else:
+        _logger.error(
+            "Cool-Seq-Tool resource check failed. Result: %s", structured_status
+        )
+    return structured_status

cool_seq_tool/schemas.py CHANGED Viewed

@@ -1,19 +1,18 @@
 """Defines attribute constants, useful object structures, and API response schemas."""
 import datetime
-import re
 from enum import Enum, IntEnum
-from typing import List, Literal, Optional, Tuple, Union
+from typing import Literal
 from pydantic import (
     BaseModel,
     ConfigDict,
     StrictInt,
     StrictStr,
-    field_validator,
     model_validator,
 )
-from cool_seq_tool.version import __version__
+from cool_seq_tool import __version__
 _now = str(datetime.datetime.now(tz=datetime.timezone.utc))
@@ -34,11 +33,16 @@ class Strand(IntEnum):
 class Assembly(str, Enum):
-    """Create Enum for supported genomic assemblies"""
+    """Define supported genomic assemblies. Must be defined in ascending order"""
     GRCH37 = "GRCh37"
     GRCH38 = "GRCh38"
+    @classmethod
+    def values(cls) -> list[str]:
+        """Return list of values in enum (ascending assembly order)"""
+        return [item.value for item in cls]
 class TranscriptPriority(str, Enum):
     """Create Enum for Transcript Priority labels"""
@@ -52,10 +56,55 @@ class TranscriptPriority(str, Enum):
 class ResidueMode(str, Enum):
     """Create Enum for residue modes.
+    We typically prefer to operate in inter-residue coordinates, but users should be
+    careful to define the coordinate mode of their data when calling ``cool-seq-tool``
+    functions.
                       |   | C |   | T |   | G |   |
     ZERO              |   | 0 |   | 1 |   | 2 |   |
     RESIDUE           |   | 1 |   | 2 |   | 3 |   |
     INTER_RESIDUE     | 0 |   | 1 |   | 2 |   | 3 |
+    .. tabularcolumns:: |L|C|C|C|C|C|C|C|
+    .. list-table::
+       :header-rows: 1
+       * -
+         -
+         - C
+         -
+         - T
+         -
+         - G
+         -
+       * - ``ZERO``
+         -
+         - 0
+         -
+         - 1
+         -
+         - 2
+         -
+       * - ``RESIDUE``
+         -
+         - 1
+         -
+         - 2
+         -
+         - 3
+         -
+       * - ``INTER_RESIDUE``
+         - 0
+         -
+         - 1
+         -
+         - 2
+         -
+         - 3
+    See "Conventions that promote reliable data sharing" and figure 3 within the
+    `Variation Representation Schema (VRS) paper <https://www.ncbi.nlm.nih.gov/pmc/articles/pmid/35311178/>`_ for further discussion.
     """
     ZERO = "zero"
@@ -70,12 +119,12 @@ class BaseModelForbidExtra(BaseModel, extra="forbid"):
 class GenomicRequestBody(BaseModelForbidExtra):
     """Define constraints for genomic to transcript exon coordinates request body"""
-    chromosome: Union[StrictStr, StrictInt]
-    start: Optional[StrictInt] = None
-    end: Optional[StrictInt] = None
-    strand: Optional[Strand] = None
-    transcript: Optional[StrictStr] = None
-    gene: Optional[StrictStr] = None
+    chromosome: StrictStr | StrictInt
+    start: StrictInt | None = None
+    end: StrictInt | None = None
+    strand: Strand | None = None
+    transcript: StrictStr | None = None
+    gene: StrictStr | None = None
     residue_mode: ResidueMode = ResidueMode.RESIDUE
     @model_validator(mode="after")
@@ -106,11 +155,11 @@ class TranscriptRequestBody(BaseModelForbidExtra):
     """Define constraints for transcript exon to genomic coordinates request body"""
     transcript: StrictStr
-    gene: Optional[StrictStr] = None
-    exon_start: Optional[StrictInt] = None
-    exon_start_offset: Optional[StrictInt] = 0
-    exon_end: Optional[StrictInt] = None
-    exon_end_offset: Optional[StrictInt] = 0
+    gene: StrictStr | None = None
+    exon_start: StrictInt | None = None
+    exon_start_offset: StrictInt | None = 0
+    exon_end: StrictInt | None = None
+    exon_end_offset: StrictInt | None = 0
     @model_validator(mode="after")
     def check_exon_start_and_exon_end(cls, values):
@@ -166,12 +215,12 @@ class GenomicData(BaseModelForbidExtra):
     gene: StrictStr
     chr: StrictStr
-    start: Optional[StrictInt] = None  # Genomic start position
-    end: Optional[StrictInt] = None  # Genomic end position
-    exon_start: Optional[StrictInt] = None
-    exon_start_offset: Optional[StrictInt] = 0
-    exon_end: Optional[StrictInt] = None
-    exon_end_offset: Optional[StrictInt] = 0
+    start: StrictInt | None = None  # Genomic start position
+    end: StrictInt | None = None  # Genomic end position
+    exon_start: StrictInt | None = None
+    exon_start_offset: StrictInt | None = 0
+    exon_end: StrictInt | None = None
+    exon_end_offset: StrictInt | None = 0
     transcript: StrictStr
     strand: Strand
@@ -226,20 +275,9 @@ class ServiceMeta(BaseModelForbidExtra):
     name: Literal["cool_seq_tool"] = "cool_seq_tool"
     version: StrictStr
     response_datetime: datetime.datetime
-    url: Literal[
+    url: Literal["https://github.com/GenomicMedLab/cool-seq-tool"] = (
         "https://github.com/GenomicMedLab/cool-seq-tool"
-    ] = "https://github.com/GenomicMedLab/cool-seq-tool"
-    @field_validator("version")
-    def validate_version(cls, v):
-        """Check version matches semantic versioning regex pattern.
-        https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
-        """
-        version_regex = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
-        if not re.match(version_regex, v):
-            msg = f"Invalid version {v}"
-            raise ValueError(msg)
-        return v
+    )
     model_config = ConfigDict(
         json_schema_extra={
@@ -256,8 +294,8 @@ class ServiceMeta(BaseModelForbidExtra):
 class TranscriptExonDataResponse(BaseModelForbidExtra):
     """Response model for Transcript Exon Data"""
-    transcript_exon_data: Optional[TranscriptExonData] = None
-    warnings: List[StrictStr] = []
+    transcript_exon_data: TranscriptExonData | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -287,8 +325,8 @@ class TranscriptExonDataResponse(BaseModelForbidExtra):
 class GenomicDataResponse(BaseModelForbidExtra):
     """Response model for Genomic Data"""
-    genomic_data: Optional[GenomicData] = None
-    warnings: List[StrictStr] = []
+    genomic_data: GenomicData | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -323,7 +361,7 @@ class MappedManeData(BaseModel):
     gene: StrictStr
     refseq: StrictStr
-    ensembl: Optional[StrictStr] = None
+    ensembl: StrictStr | None = None
     strand: Strand
     status: TranscriptPriority
     alt_ac: StrictStr
@@ -338,7 +376,7 @@ class MappedManeData(BaseModel):
                 "strand": Strand.NEGATIVE,
                 "status": TranscriptPriority.MANE_PLUS_CLINICAL,
                 "alt_ac": "NC_000007.13",
-                "assembly": "GRCh37",
+                "assembly": Assembly.GRCH37,
             }
         }
     )
@@ -347,8 +385,8 @@ class MappedManeData(BaseModel):
 class MappedManeDataService(BaseModelForbidExtra):
     """Service model response for mapped mane data"""
-    mapped_mane_data: Optional[MappedManeData] = None
-    warnings: List[StrictStr] = []
+    mapped_mane_data: MappedManeData | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -361,7 +399,7 @@ class MappedManeDataService(BaseModelForbidExtra):
                     "strand": Strand.NEGATIVE,
                     "status": TranscriptPriority.MANE_PLUS_CLINICAL,
                     "alt_ac": "NC_000007.13",
-                    "assembly": "GRCh37",
+                    "assembly": Assembly.GRCH37,
                 },
                 "warnings": [],
                 "service_meta": {
@@ -378,10 +416,10 @@ class MappedManeDataService(BaseModelForbidExtra):
 class ManeData(BaseModel):
     """Define mane data fields"""
-    gene: Optional[StrictStr] = None
-    refseq: Optional[StrictStr] = None
-    ensembl: Optional[StrictStr] = None
-    pos: Tuple[int, int]
+    gene: StrictStr | None = None
+    refseq: StrictStr | None = None
+    ensembl: StrictStr | None = None
+    pos: tuple[int, int]
     strand: Strand
     status: TranscriptPriority
@@ -402,8 +440,8 @@ class ManeData(BaseModel):
 class ManeDataService(BaseModelForbidExtra):
     """Service model response for getting mane data"""
-    mane_data: Optional[ManeData] = None
-    warnings: List[StrictStr] = []
+    mane_data: ManeData | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -457,8 +495,8 @@ class CdnaRepresentation(BaseModelForbidExtra):
 class ToCdnaService(BaseModelForbidExtra):
     """Service model response for protein -> cDNA"""
-    c_data: Optional[CdnaRepresentation] = None
-    warnings: List[StrictStr] = []
+    c_data: CdnaRepresentation | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -506,8 +544,8 @@ class GenomicRepresentation(BaseModelForbidExtra):
 class ToGenomicService(BaseModelForbidExtra):
     """Service model response for cDNA -> genomic"""
-    g_data: Optional[GenomicRepresentation] = None
-    warnings: List[StrictStr] = []
+    g_data: GenomicRepresentation | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(

cool_seq_tool/sources/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Module for providing basic acquisition/setup for the various resources"""
 from .mane_transcript_mappings import ManeTranscriptMappings
 from .transcript_mappings import TranscriptMappings
 from .uta_database import UtaDatabase

cool_seq_tool/sources/mane_transcript_mappings.py CHANGED Viewed

@@ -1,15 +1,15 @@
 """Provide fast tabular access to MANE summary file. Enables retrieval of associated
 MANE transcripts for gene symbols, genomic positions, or transcript accessions.
 """
 import logging
 from pathlib import Path
-from typing import Dict, List
 import polars as pl
-from cool_seq_tool.paths import MANE_SUMMARY_PATH
+from cool_seq_tool.resources.data_files import DataFile, get_data_file
-logger = logging.getLogger(__name__)
+_logger = logging.getLogger(__name__)
 class ManeTranscriptMappings:
@@ -22,11 +22,18 @@ class ManeTranscriptMappings:
     See the `NCBI MANE page <https://www.ncbi.nlm.nih.gov/refseq/MANE/>`_ for more information.
     """
-    def __init__(self, mane_data_path: Path = MANE_SUMMARY_PATH) -> None:
+    def __init__(
+        self, mane_data_path: Path | None = None, from_local: bool = False
+    ) -> None:
         """Initialize the MANE Transcript mappings class.
-        :param Path mane_data_path: Path to RefSeq MANE summary data
+        :param mane_data_path: Path to RefSeq MANE summary data
+        :param from_local: if ``True``, don't check for or acquire latest version --
+            just provide most recent locally available file, if possible, and raise
+            error otherwise
         """
+        if not mane_data_path:
+            mane_data_path = get_data_file(DataFile.MANE_SUMMARY, from_local)
         self.mane_data_path = mane_data_path
         self.df = self._load_mane_transcript_data()
@@ -37,7 +44,7 @@ class ManeTranscriptMappings:
         """
         return pl.read_csv(self.mane_data_path, separator="\t")
-    def get_gene_mane_data(self, gene_symbol: str) -> List[Dict]:
+    def get_gene_mane_data(self, gene_symbol: str) -> list[dict]:
         """Return MANE Transcript data for a gene.
         >>> from cool_seq_tool.sources import ManeTranscriptMappings
@@ -56,7 +63,7 @@ class ManeTranscriptMappings:
         data = self.df.filter(pl.col("symbol") == gene_symbol.upper())
         if len(data) == 0:
-            logger.warning(
+            _logger.warning(
                 "Unable to get MANE Transcript data for gene: %s", gene_symbol
             )
             return []
@@ -64,7 +71,7 @@ class ManeTranscriptMappings:
         data = data.sort(by="MANE_status", descending=True)
         return data.to_dicts()
-    def get_mane_from_transcripts(self, transcripts: List[str]) -> List[Dict]:
+    def get_mane_from_transcripts(self, transcripts: list[str]) -> list[dict]:
         """Get mane transcripts from a list of transcripts
         :param List[str] transcripts: RefSeq transcripts on c. coordinate
@@ -77,7 +84,7 @@ class ManeTranscriptMappings:
     def get_mane_data_from_chr_pos(
         self, alt_ac: str, start: int, end: int
-    ) -> List[Dict]:
+    ) -> list[dict]:
         """Get MANE data given a GRCh38 genomic position.
         :param str alt_ac: NC Accession

cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.5.0__py3-none-any.whl

cool-seq-tool 0.4.0.dev3py3-none-any.whl → 0.5.0py3-none-any.whl