PyPI - cool-seq-tool - Versions diffs - 0.4.0.dev3__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

cool-seq-tool 0.4.0.dev3py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

cool_seq_tool/__init__.py +1 -3
cool_seq_tool/api.py +1 -2
cool_seq_tool/app.py +38 -23
cool_seq_tool/handlers/__init__.py +1 -0
cool_seq_tool/handlers/seqrepo_access.py +13 -15
cool_seq_tool/mappers/__init__.py +1 -0
cool_seq_tool/mappers/alignment.py +5 -6
cool_seq_tool/mappers/exon_genomic_coords.py +75 -73
cool_seq_tool/mappers/mane_transcript.py +84 -86
cool_seq_tool/resources/__init__.py +1 -0
cool_seq_tool/resources/data_files.py +93 -0
cool_seq_tool/resources/status.py +151 -0
cool_seq_tool/routers/__init__.py +1 -0
cool_seq_tool/routers/default.py +1 -0
cool_seq_tool/routers/mane.py +4 -4
cool_seq_tool/routers/mappings.py +2 -2
cool_seq_tool/schemas.py +83 -37
cool_seq_tool/sources/__init__.py +1 -0
cool_seq_tool/sources/mane_transcript_mappings.py +14 -7
cool_seq_tool/sources/transcript_mappings.py +41 -32
cool_seq_tool/sources/uta_database.py +91 -70
cool_seq_tool/utils.py +2 -2
cool_seq_tool/version.py +2 -1
{cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/LICENSE +1 -1
{cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/METADATA +15 -8
cool_seq_tool-0.4.1.dist-info/RECORD +29 -0
{cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/WHEEL +1 -1
cool_seq_tool/data/__init__.py +0 -2
cool_seq_tool/data/data_downloads.py +0 -89
cool_seq_tool/paths.py +0 -28
cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
/cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
{cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/top_level.txt +0 -0

cool_seq_tool/routers/mappings.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Module containing routes related to alignment mapping"""
 import logging
-from typing import Optional
 from fastapi import APIRouter, Query
@@ -66,7 +66,7 @@ async def c_to_g(
     c_ac: str = Query(..., description="cDNA RefSeq accession"),
     c_start_pos: int = Query(..., description="cDNA start position for codon"),
     c_end_pos: int = Query(..., description="cDNA end position for codon"),
-    cds_start: Optional[int] = Query(
+    cds_start: int | None = Query(
         None, description="CDS start site. If not provided, this will be computed."
     ),
     residue_mode: ResidueMode = Query(

cool_seq_tool/schemas.py CHANGED Viewed

@@ -1,8 +1,9 @@
 """Defines attribute constants, useful object structures, and API response schemas."""
 import datetime
 import re
 from enum import Enum, IntEnum
-from typing import List, Literal, Optional, Tuple, Union
+from typing import Literal
 from pydantic import (
     BaseModel,
@@ -52,10 +53,55 @@ class TranscriptPriority(str, Enum):
 class ResidueMode(str, Enum):
     """Create Enum for residue modes.
+    We typically prefer to operate in inter-residue coordinates, but users should be
+    careful to define the coordinate mode of their data when calling ``cool-seq-tool``
+    functions.
                       |   | C |   | T |   | G |   |
     ZERO              |   | 0 |   | 1 |   | 2 |   |
     RESIDUE           |   | 1 |   | 2 |   | 3 |   |
     INTER_RESIDUE     | 0 |   | 1 |   | 2 |   | 3 |
+    .. tabularcolumns:: |L|C|C|C|C|C|C|C|
+    .. list-table::
+       :header-rows: 1
+       * -
+         -
+         - C
+         -
+         - T
+         -
+         - G
+         -
+       * - ``ZERO``
+         -
+         - 0
+         -
+         - 1
+         -
+         - 2
+         -
+       * - ``RESIDUE``
+         -
+         - 1
+         -
+         - 2
+         -
+         - 3
+         -
+       * - ``INTER_RESIDUE``
+         - 0
+         -
+         - 1
+         -
+         - 2
+         -
+         - 3
+    See "Conventions that promote reliable data sharing" and figure 3 within the
+    `Variation Representation Schema (VRS) paper <https://www.ncbi.nlm.nih.gov/pmc/articles/pmid/35311178/>`_ for further discussion.
     """
     ZERO = "zero"
@@ -70,12 +116,12 @@ class BaseModelForbidExtra(BaseModel, extra="forbid"):
 class GenomicRequestBody(BaseModelForbidExtra):
     """Define constraints for genomic to transcript exon coordinates request body"""
-    chromosome: Union[StrictStr, StrictInt]
-    start: Optional[StrictInt] = None
-    end: Optional[StrictInt] = None
-    strand: Optional[Strand] = None
-    transcript: Optional[StrictStr] = None
-    gene: Optional[StrictStr] = None
+    chromosome: StrictStr | StrictInt
+    start: StrictInt | None = None
+    end: StrictInt | None = None
+    strand: Strand | None = None
+    transcript: StrictStr | None = None
+    gene: StrictStr | None = None
     residue_mode: ResidueMode = ResidueMode.RESIDUE
     @model_validator(mode="after")
@@ -106,11 +152,11 @@ class TranscriptRequestBody(BaseModelForbidExtra):
     """Define constraints for transcript exon to genomic coordinates request body"""
     transcript: StrictStr
-    gene: Optional[StrictStr] = None
-    exon_start: Optional[StrictInt] = None
-    exon_start_offset: Optional[StrictInt] = 0
-    exon_end: Optional[StrictInt] = None
-    exon_end_offset: Optional[StrictInt] = 0
+    gene: StrictStr | None = None
+    exon_start: StrictInt | None = None
+    exon_start_offset: StrictInt | None = 0
+    exon_end: StrictInt | None = None
+    exon_end_offset: StrictInt | None = 0
     @model_validator(mode="after")
     def check_exon_start_and_exon_end(cls, values):
@@ -166,12 +212,12 @@ class GenomicData(BaseModelForbidExtra):
     gene: StrictStr
     chr: StrictStr
-    start: Optional[StrictInt] = None  # Genomic start position
-    end: Optional[StrictInt] = None  # Genomic end position
-    exon_start: Optional[StrictInt] = None
-    exon_start_offset: Optional[StrictInt] = 0
-    exon_end: Optional[StrictInt] = None
-    exon_end_offset: Optional[StrictInt] = 0
+    start: StrictInt | None = None  # Genomic start position
+    end: StrictInt | None = None  # Genomic end position
+    exon_start: StrictInt | None = None
+    exon_start_offset: StrictInt | None = 0
+    exon_end: StrictInt | None = None
+    exon_end_offset: StrictInt | None = 0
     transcript: StrictStr
     strand: Strand
@@ -226,9 +272,9 @@ class ServiceMeta(BaseModelForbidExtra):
     name: Literal["cool_seq_tool"] = "cool_seq_tool"
     version: StrictStr
     response_datetime: datetime.datetime
-    url: Literal[
+    url: Literal["https://github.com/GenomicMedLab/cool-seq-tool"] = (
         "https://github.com/GenomicMedLab/cool-seq-tool"
-    ] = "https://github.com/GenomicMedLab/cool-seq-tool"
+    )
     @field_validator("version")
     def validate_version(cls, v):
@@ -256,8 +302,8 @@ class ServiceMeta(BaseModelForbidExtra):
 class TranscriptExonDataResponse(BaseModelForbidExtra):
     """Response model for Transcript Exon Data"""
-    transcript_exon_data: Optional[TranscriptExonData] = None
-    warnings: List[StrictStr] = []
+    transcript_exon_data: TranscriptExonData | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -287,8 +333,8 @@ class TranscriptExonDataResponse(BaseModelForbidExtra):
 class GenomicDataResponse(BaseModelForbidExtra):
     """Response model for Genomic Data"""
-    genomic_data: Optional[GenomicData] = None
-    warnings: List[StrictStr] = []
+    genomic_data: GenomicData | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -323,7 +369,7 @@ class MappedManeData(BaseModel):
     gene: StrictStr
     refseq: StrictStr
-    ensembl: Optional[StrictStr] = None
+    ensembl: StrictStr | None = None
     strand: Strand
     status: TranscriptPriority
     alt_ac: StrictStr
@@ -347,8 +393,8 @@ class MappedManeData(BaseModel):
 class MappedManeDataService(BaseModelForbidExtra):
     """Service model response for mapped mane data"""
-    mapped_mane_data: Optional[MappedManeData] = None
-    warnings: List[StrictStr] = []
+    mapped_mane_data: MappedManeData | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -378,10 +424,10 @@ class MappedManeDataService(BaseModelForbidExtra):
 class ManeData(BaseModel):
     """Define mane data fields"""
-    gene: Optional[StrictStr] = None
-    refseq: Optional[StrictStr] = None
-    ensembl: Optional[StrictStr] = None
-    pos: Tuple[int, int]
+    gene: StrictStr | None = None
+    refseq: StrictStr | None = None
+    ensembl: StrictStr | None = None
+    pos: tuple[int, int]
     strand: Strand
     status: TranscriptPriority
@@ -402,8 +448,8 @@ class ManeData(BaseModel):
 class ManeDataService(BaseModelForbidExtra):
     """Service model response for getting mane data"""
-    mane_data: Optional[ManeData] = None
-    warnings: List[StrictStr] = []
+    mane_data: ManeData | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -457,8 +503,8 @@ class CdnaRepresentation(BaseModelForbidExtra):
 class ToCdnaService(BaseModelForbidExtra):
     """Service model response for protein -> cDNA"""
-    c_data: Optional[CdnaRepresentation] = None
-    warnings: List[StrictStr] = []
+    c_data: CdnaRepresentation | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(
@@ -506,8 +552,8 @@ class GenomicRepresentation(BaseModelForbidExtra):
 class ToGenomicService(BaseModelForbidExtra):
     """Service model response for cDNA -> genomic"""
-    g_data: Optional[GenomicRepresentation] = None
-    warnings: List[StrictStr] = []
+    g_data: GenomicRepresentation | None = None
+    warnings: list[StrictStr] = []
     service_meta: ServiceMeta
     model_config = ConfigDict(

cool_seq_tool/sources/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Module for providing basic acquisition/setup for the various resources"""
 from .mane_transcript_mappings import ManeTranscriptMappings
 from .transcript_mappings import TranscriptMappings
 from .uta_database import UtaDatabase

cool_seq_tool/sources/mane_transcript_mappings.py CHANGED Viewed

@@ -1,13 +1,13 @@
 """Provide fast tabular access to MANE summary file. Enables retrieval of associated
 MANE transcripts for gene symbols, genomic positions, or transcript accessions.
 """
 import logging
 from pathlib import Path
-from typing import Dict, List
 import polars as pl
-from cool_seq_tool.paths import MANE_SUMMARY_PATH
+from cool_seq_tool.resources.data_files import DataFile, get_data_file
 logger = logging.getLogger(__name__)
@@ -22,11 +22,18 @@ class ManeTranscriptMappings:
     See the `NCBI MANE page <https://www.ncbi.nlm.nih.gov/refseq/MANE/>`_ for more information.
     """
-    def __init__(self, mane_data_path: Path = MANE_SUMMARY_PATH) -> None:
+    def __init__(
+        self, mane_data_path: Path | None = None, from_local: bool = False
+    ) -> None:
         """Initialize the MANE Transcript mappings class.
-        :param Path mane_data_path: Path to RefSeq MANE summary data
+        :param mane_data_path: Path to RefSeq MANE summary data
+        :param from_local: if ``True``, don't check for or acquire latest version --
+            just provide most recent locally available file, if possible, and raise
+            error otherwise
         """
+        if not mane_data_path:
+            mane_data_path = get_data_file(DataFile.MANE_SUMMARY, from_local)
         self.mane_data_path = mane_data_path
         self.df = self._load_mane_transcript_data()
@@ -37,7 +44,7 @@ class ManeTranscriptMappings:
         """
         return pl.read_csv(self.mane_data_path, separator="\t")
-    def get_gene_mane_data(self, gene_symbol: str) -> List[Dict]:
+    def get_gene_mane_data(self, gene_symbol: str) -> list[dict]:
         """Return MANE Transcript data for a gene.
         >>> from cool_seq_tool.sources import ManeTranscriptMappings
@@ -64,7 +71,7 @@ class ManeTranscriptMappings:
         data = data.sort(by="MANE_status", descending=True)
         return data.to_dicts()
-    def get_mane_from_transcripts(self, transcripts: List[str]) -> List[Dict]:
+    def get_mane_from_transcripts(self, transcripts: list[str]) -> list[dict]:
         """Get mane transcripts from a list of transcripts
         :param List[str] transcripts: RefSeq transcripts on c. coordinate
@@ -77,7 +84,7 @@ class ManeTranscriptMappings:
     def get_mane_data_from_chr_pos(
         self, alt_ac: str, start: int, end: int
-    ) -> List[Dict]:
+    ) -> list[dict]:
         """Get MANE data given a GRCh38 genomic position.
         :param str alt_ac: NC Accession

cool_seq_tool/sources/transcript_mappings.py CHANGED Viewed

@@ -1,15 +1,15 @@
 """Provide mappings between gene symbols and RefSeq + Ensembl transcript accessions."""
 import csv
 from pathlib import Path
-from typing import Dict, List, Optional
-from cool_seq_tool.paths import LRG_REFSEQGENE_PATH, TRANSCRIPT_MAPPINGS_PATH
+from cool_seq_tool.resources.data_files import DataFile, get_data_file
 class TranscriptMappings:
     """Provide mappings between gene symbols and RefSeq + Ensembl transcript accessions.
-    Uses ``LRG_RefSeqGene`` and ``transcript_mappings.csv``, which will automatically
+    Uses ``LRG_RefSeqGene`` and ``transcript_mappings.tsv``, which will automatically
     be acquired if they aren't already available. See the
     :ref:`configuration <configuration>` section in the documentation for information
     about manual acquisition of data.
@@ -21,44 +21,53 @@ class TranscriptMappings:
     def __init__(
         self,
-        transcript_file_path: Path = TRANSCRIPT_MAPPINGS_PATH,
-        lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH,
+        transcript_file_path: Path | None = None,
+        lrg_refseqgene_path: Path | None = None,
+        from_local: bool = False,
     ) -> None:
         """Initialize the transcript mappings class.
         :param transcript_file_path: Path to transcript mappings file
         :param lrg_refseqgene_path: Path to LRG RefSeqGene file
+        :param from_local: if ``True``, don't check for or acquire latest version --
+            just provide most recent locally available file, if possible, and raise
+            error otherwise
         """
         # ENSP <-> Gene Symbol
-        self.ensembl_protein_version_for_gene_symbol: Dict[str, List[str]] = {}
-        self.ensembl_protein_version_to_gene_symbol: Dict[str, str] = {}
-        self.ensembl_protein_for_gene_symbol: Dict[str, List[str]] = {}
-        self.ensembl_protein_to_gene_symbol: Dict[str, str] = {}
+        self.ensembl_protein_version_for_gene_symbol: dict[str, list[str]] = {}
+        self.ensembl_protein_version_to_gene_symbol: dict[str, str] = {}
+        self.ensembl_protein_for_gene_symbol: dict[str, list[str]] = {}
+        self.ensembl_protein_to_gene_symbol: dict[str, str] = {}
         # Gene Symbol <-> ENST
-        self.ensembl_transcript_version_for_gene_symbol: Dict[str, List[str]] = {}
-        self.ensembl_transcript_version_to_gene_symbol: Dict[str, str] = {}
-        self.ensembl_transcript_for_gene_symbol: Dict[str, List[str]] = {}
-        self.ensembl_transcript_to_gene_symbol: Dict[str, str] = {}
+        self.ensembl_transcript_version_for_gene_symbol: dict[str, list[str]] = {}
+        self.ensembl_transcript_version_to_gene_symbol: dict[str, str] = {}
+        self.ensembl_transcript_for_gene_symbol: dict[str, list[str]] = {}
+        self.ensembl_transcript_to_gene_symbol: dict[str, str] = {}
         # NP_ <-> Gene Symbol
-        self.refseq_protein_for_gene_symbol: Dict[str, List[str]] = {}
-        self.refseq_protein_to_gene_symbol: Dict[str, str] = {}
+        self.refseq_protein_for_gene_symbol: dict[str, list[str]] = {}
+        self.refseq_protein_to_gene_symbol: dict[str, str] = {}
         # NM_ <-> Gene Symbol
-        self.refseq_rna_version_for_gene_symbol: Dict[str, List[str]] = {}
-        self.refseq_rna_version_to_gene_symbol: Dict[str, str] = {}
-        self.refseq_rna_for_gene_symbol: Dict[str, List[str]] = {}
-        self.refseq_rna_to_gene_symbol: Dict[str, str] = {}
+        self.refseq_rna_version_for_gene_symbol: dict[str, list[str]] = {}
+        self.refseq_rna_version_to_gene_symbol: dict[str, str] = {}
+        self.refseq_rna_for_gene_symbol: dict[str, list[str]] = {}
+        self.refseq_rna_to_gene_symbol: dict[str, str] = {}
         # NP -> NM
-        self.np_to_nm: Dict[str, str] = {}
+        self.np_to_nm: dict[str, str] = {}
         # ENSP -> ENST
-        self.ensp_to_enst: Dict[str, str] = {}
+        self.ensp_to_enst: dict[str, str] = {}
-        self._load_transcript_mappings_data(transcript_file_path)
-        self._load_refseq_gene_symbol_data(lrg_refseqgene_path)
+        self._load_transcript_mappings_data(
+            transcript_file_path
+            or get_data_file(DataFile.TRANSCRIPT_MAPPINGS, from_local)
+        )
+        self._load_refseq_gene_symbol_data(
+            lrg_refseqgene_path or get_data_file(DataFile.LRG_REFSEQGENE, from_local)
+        )
     def _load_transcript_mappings_data(self, transcript_file_path: Path) -> None:
         """Load transcript mappings file to dictionaries.
@@ -99,9 +108,9 @@ class TranscriptMappings:
                         ).append(transcript)
                         self.ensembl_transcript_to_gene_symbol[transcript] = gene
                     if versioned_transcript and versioned_protein_transcript:
-                        self.ensp_to_enst[
-                            versioned_protein_transcript
-                        ] = versioned_transcript
+                        self.ensp_to_enst[versioned_protein_transcript] = (
+                            versioned_transcript
+                        )
     def _load_refseq_gene_symbol_data(self, lrg_refseqgene_path: Path) -> None:
         """Load data from RefSeq Gene Symbol file to dictionaries.
@@ -134,7 +143,7 @@ class TranscriptMappings:
                     if refseq_transcript and rna_transcript:
                         self.np_to_nm[refseq_transcript] = rna_transcript
-    def protein_transcripts(self, identifier: str) -> List[str]:
+    def protein_transcripts(self, identifier: str) -> list[str]:
         """Return a list of protein transcripts for a gene symbol.
         >>> from cool_seq_tool.sources import TranscriptMappings
@@ -154,7 +163,7 @@ class TranscriptMappings:
         protein_transcripts += self.refseq_protein_for_gene_symbol.get(identifier, "")
         return list(set(protein_transcripts))
-    def coding_dna_transcripts(self, identifier: str) -> List[str]:
+    def coding_dna_transcripts(self, identifier: str) -> list[str]:
         """Return transcripts from a coding dna refseq for a gene symbol.
         :param identifier: Gene identifier to find transcripts for
@@ -172,7 +181,7 @@ class TranscriptMappings:
         )
         return list(set(genomic_transcripts))
-    def get_gene_symbol_from_ensembl_protein(self, q: str) -> Optional[str]:
+    def get_gene_symbol_from_ensembl_protein(self, q: str) -> str | None:
         """Return the gene symbol for a Ensembl Protein.
         :param q: ensembl protein accession
@@ -184,7 +193,7 @@ class TranscriptMappings:
             gene_symbol = self.ensembl_protein_to_gene_symbol.get(q)
         return gene_symbol
-    def get_gene_symbol_from_refeq_protein(self, q: str) -> Optional[str]:
+    def get_gene_symbol_from_refeq_protein(self, q: str) -> str | None:
         """Return the gene symbol for a Refseq Protein.
         :param q: RefSeq protein accession
@@ -192,7 +201,7 @@ class TranscriptMappings:
         """
         return self.refseq_protein_to_gene_symbol.get(q)
-    def get_gene_symbol_from_refseq_rna(self, q: str) -> Optional[str]:
+    def get_gene_symbol_from_refseq_rna(self, q: str) -> str | None:
         """Return gene symbol for a Refseq RNA Transcript.
         :param q: RefSeq RNA transcript accession
@@ -204,7 +213,7 @@ class TranscriptMappings:
             gene_symbol = self.refseq_rna_to_gene_symbol.get(q)
         return gene_symbol
-    def get_gene_symbol_from_ensembl_transcript(self, q: str) -> Optional[str]:
+    def get_gene_symbol_from_ensembl_transcript(self, q: str) -> str | None:
         """Return gene symbol for an Ensembl Transcript.
         :param q: Ensembl transcript accession

cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.4.1__py3-none-any.whl

cool-seq-tool 0.4.0.dev3py3-none-any.whl → 0.4.1py3-none-any.whl