PyPI - cool-seq-tool - Versions diffs - 0.3.0.dev0__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl - Mend

cool-seq-tool 0.3.0.dev0py3-none-any.whl → 0.4.0.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

cool_seq_tool/api.py +3 -3
cool_seq_tool/app.py +32 -11
cool_seq_tool/data/data_downloads.py +8 -5
cool_seq_tool/handlers/seqrepo_access.py +55 -27
cool_seq_tool/mappers/__init__.py +4 -1
cool_seq_tool/mappers/alignment.py +40 -37
cool_seq_tool/mappers/exon_genomic_coords.py +329 -138
cool_seq_tool/mappers/mane_transcript.py +402 -227
cool_seq_tool/routers/mane.py +1 -1
cool_seq_tool/routers/mappings.py +1 -1
cool_seq_tool/schemas.py +31 -24
cool_seq_tool/sources/__init__.py +4 -2
cool_seq_tool/sources/mane_transcript_mappings.py +28 -7
cool_seq_tool/sources/transcript_mappings.py +27 -11
cool_seq_tool/sources/uta_database.py +179 -232
cool_seq_tool/utils.py +22 -24
cool_seq_tool/version.py +1 -1
{cool_seq_tool-0.3.0.dev0.dist-info → cool_seq_tool-0.4.0.dev0.dist-info}/LICENSE +1 -1
cool_seq_tool-0.4.0.dev0.dist-info/METADATA +130 -0
cool_seq_tool-0.4.0.dev0.dist-info/RECORD +28 -0
{cool_seq_tool-0.3.0.dev0.dist-info → cool_seq_tool-0.4.0.dev0.dist-info}/WHEEL +1 -1
cool_seq_tool/data/transcript_mapping.tsv +0 -256226
cool_seq_tool-0.3.0.dev0.dist-info/METADATA +0 -187
cool_seq_tool-0.3.0.dev0.dist-info/RECORD +0 -29
{cool_seq_tool-0.3.0.dev0.dist-info → cool_seq_tool-0.4.0.dev0.dist-info}/top_level.txt +0 -0

cool_seq_tool/routers/mane.py CHANGED Viewed

@@ -79,8 +79,8 @@ async def get_mane_data(
         mane_data = await cool_seq_tool.mane_transcript.get_mane_transcript(
             ac=ac,
             start_pos=start_pos,
-            start_annotation_layer=start_annotation_layer,
             end_pos=end_pos,
+            start_annotation_layer=start_annotation_layer,
             gene=gene,
             ref=ref,
             try_longest_compatible=try_longest_compatible,

cool_seq_tool/routers/mappings.py CHANGED Viewed

@@ -57,7 +57,7 @@ async def p_to_c(
     "/c_to_g",
     summary="Translate cDNA representation to genomic representation",
     response_description=RESP_DESCR,
-    description="Given cDNA accession and positions for codon(s), return associated genomic"  # noqa: E501
+    description="Given cDNA accession and positions for codon(s), return associated genomic"
     " accession and positions for a given target genome assembly",
     response_model=ToGenomicService,
     tags=[Tags.ALIGNMENT_MAPPER],

cool_seq_tool/schemas.py CHANGED Viewed

@@ -1,7 +1,7 @@
-"""Module for data models."""
+"""Defines attribute constants, useful object structures, and API response schemas."""
 import re
 from datetime import datetime
-from enum import Enum
+from enum import Enum, IntEnum
 from typing import List, Literal, Optional, Tuple, Union
 from pydantic import (
@@ -24,11 +24,11 @@ class AnnotationLayer(str, Enum):
     GENOMIC: Literal["g"] = "g"
-class Strand(str, Enum):
+class Strand(IntEnum):
     """Create enum for positive and negative strand"""
-    POSITIVE = "+"
-    NEGATIVE = "-"
+    POSITIVE = 1
+    NEGATIVE = -1
 class Assembly(str, Enum):
@@ -48,8 +48,15 @@ class TranscriptPriority(str, Enum):
 class ResidueMode(str, Enum):
-    """Create Enum for residue modes."""
+    """Create Enum for residue modes.
+                      |   | C |   | T |   | G |   |
+    ZERO              |   | 0 |   | 1 |   | 2 |   |
+    RESIDUE           |   | 1 |   | 2 |   | 3 |   |
+    INTER_RESIDUE     | 0 |   | 1 |   | 2 |   | 3 |
+    """
+    ZERO = "zero"
     RESIDUE = "residue"
     INTER_RESIDUE = "inter-residue"
@@ -64,14 +71,14 @@ class GenomicRequestBody(BaseModelForbidExtra):
     chromosome: Union[StrictStr, StrictInt]
     start: Optional[StrictInt] = None
     end: Optional[StrictInt] = None
-    strand: Optional[StrictInt] = None
+    strand: Optional[Strand] = None
     transcript: Optional[StrictStr] = None
     gene: Optional[StrictStr] = None
     residue_mode: ResidueMode = ResidueMode.RESIDUE
     @model_validator(mode="after")
     def check_start_and_end(cls, values):
-        """Check that at least one of {`start`, `end`} is set"""
+        """Check that at least one of {``start``, ``end``} is set"""
         msg = "Must provide either `start` or `end`"
         start, end = values.start, values.end
         assert start or end, msg
@@ -83,7 +90,7 @@ class GenomicRequestBody(BaseModelForbidExtra):
                 "chromosome": "NC_000001.11",
                 "start": 154192135,
                 "end": None,
-                "strand": -1,
+                "strand": Strand.NEGATIVE,
                 "transcript": "NM_152263.3",
                 "gene": "TPM3",
                 "residue_mode": "residue",
@@ -95,8 +102,8 @@ class GenomicRequestBody(BaseModelForbidExtra):
 class TranscriptRequestBody(BaseModelForbidExtra):
     """Define constraints for transcript exon to genomic coordinates request body"""
+    transcript: StrictStr
     gene: Optional[StrictStr] = None
-    transcript: Optional[StrictStr] = None
     exon_start: Optional[StrictInt] = None
     exon_start_offset: Optional[StrictInt] = 0
     exon_end: Optional[StrictInt] = None
@@ -104,7 +111,7 @@ class TranscriptRequestBody(BaseModelForbidExtra):
     @model_validator(mode="after")
     def check_exon_start_and_exon_end(cls, values):
-        """Check that at least one of {`exon_start`, `exon_end`} is set"""
+        """Check that at least one of {``exon_start``, ``exon_end``} is set"""
         msg = "Must provide either `exon_start` or `exon_end`"
         exon_start, exon_end = values.exon_start, values.exon_end
         assert exon_start or exon_end, msg
@@ -133,7 +140,7 @@ class TranscriptExonData(BaseModelForbidExtra):
     exon_offset: StrictInt = 0
     gene: StrictStr
     chr: StrictStr
-    strand: StrictInt
+    strand: Strand
     model_config = ConfigDict(
         json_schema_extra={
@@ -144,7 +151,7 @@ class TranscriptExonData(BaseModelForbidExtra):
                 "exon": 1,
                 "exon_offset": 0,
                 "transcript": "NM_152263.3",
-                "strand": -1,
+                "strand": Strand.NEGATIVE,
             }
         }
     )
@@ -162,13 +169,13 @@ class GenomicData(BaseModelForbidExtra):
     exon_end: Optional[StrictInt] = None
     exon_end_offset: Optional[StrictInt] = 0
     transcript: StrictStr
-    strand: StrictInt
+    strand: Strand
     @model_validator(mode="after")
     def check_start_end(cls, values):
-        """Check that at least one of {`start`, `end`} is set.
-        Check that at least one of {`exon_start`, `exon_end`} is set.
-        If not set, set corresponding offset to `None`
+        """Check that at least one of {``start``, ``end``} is set.
+        Check that at least one of {``exon_start``, ``exon_end``} is set.
+        If not set, set corresponding offset to ``None``
         """
         msg = "Missing values for `start` or `end`"
         start = values.start
@@ -200,7 +207,7 @@ class GenomicData(BaseModelForbidExtra):
                 "exon_start_offset": 0,
                 "exon_end_offset": None,
                 "transcript": "NM_152263.3",
-                "strand": -1,
+                "strand": Strand.NEGATIVE,
             }
         }
     )
@@ -254,7 +261,7 @@ class TranscriptExonDataResponse(BaseModelForbidExtra):
                     "exon": 1,
                     "exon_offset": 0,
                     "transcript": "NM_152263.3",
-                    "strand": -1,
+                    "strand": Strand.NEGATIVE,
                 },
                 "warnings": [],
                 "service_meta": {
@@ -288,7 +295,7 @@ class GenomicDataResponse(BaseModelForbidExtra):
                     "exon_start_offset": 0,
                     "exon_end_offset": None,
                     "transcript": "NM_152263.3",
-                    "strand": -1,
+                    "strand": Strand.NEGATIVE,
                 },
                 "warnings": [],
                 "service_meta": {
@@ -319,7 +326,7 @@ class MappedManeData(BaseModel):
                 "gene": "BRAF",
                 "refseq": "NM_001374258.1",
                 "ensembl": "ENST00000644969.2",
-                "strand": "-",
+                "strand": Strand.NEGATIVE,
                 "status": TranscriptPriority.MANE_PLUS_CLINICAL,
                 "alt_ac": "NC_000007.13",
                 "assembly": "GRCh37",
@@ -342,7 +349,7 @@ class MappedManeDataService(BaseModelForbidExtra):
                     "gene": "BRAF",
                     "refseq": "NM_001374258.1",
                     "ensembl": "ENST00000644969.2",
-                    "strand": "-",
+                    "strand": Strand.NEGATIVE,
                     "status": TranscriptPriority.MANE_PLUS_CLINICAL,
                     "alt_ac": "NC_000007.13",
                     "assembly": "GRCh37",
@@ -376,7 +383,7 @@ class ManeData(BaseModel):
                 "refseq": "NP_004324.2",
                 "ensembl": "ENSP00000493543.1",
                 "pos": (598, 598),
-                "strand": "-",
+                "strand": Strand.NEGATIVE,
                 "status": TranscriptPriority.MANE_SELECT,
             }
         }
@@ -398,7 +405,7 @@ class ManeDataService(BaseModelForbidExtra):
                     "refseq": "NP_004324.2",
                     "ensembl": "ENSP00000493543.1",
                     "pos": (598, 598),
-                    "strand": "-",
+                    "strand": Strand.NEGATIVE,
                     "status": TranscriptPriority.MANE_SELECT,
                 },
                 "warnings": [],

cool_seq_tool/sources/__init__.py CHANGED Viewed

@@ -1,4 +1,6 @@
 """Module for providing basic acquisition/setup for the various resources"""
-from .mane_transcript_mappings import MANETranscriptMappings
+from .mane_transcript_mappings import ManeTranscriptMappings
 from .transcript_mappings import TranscriptMappings
-from .uta_database import UTADatabase
+from .uta_database import UtaDatabase
+__all__ = ["ManeTranscriptMappings", "TranscriptMappings", "UtaDatabase"]

cool_seq_tool/sources/mane_transcript_mappings.py CHANGED Viewed

@@ -1,4 +1,6 @@
-"""The module for loading MANE Transcript mappings to genes."""
+"""Provide fast tabular access to MANE summary file. Enables retrieval of associated
+MANE transcripts for gene symbols, genomic positions, or transcript accessions.
+"""
 import logging
 from pathlib import Path
 from typing import Dict, List
@@ -10,11 +12,19 @@ from cool_seq_tool.paths import MANE_SUMMARY_PATH
 logger = logging.getLogger(__name__)
-class MANETranscriptMappings:
-    """The MANE Transcript mappings class."""
+class ManeTranscriptMappings:
+    """Provide fast tabular access to MANE summary file.
+    By default, acquires data from `NCBI FTP server <ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/current/>`_
+    if unavailable locally. The local data location can be passed as an argument or
+    given under the environment variable ``MANE_SUMMARY_PATH``.
+    See the `NCBI MANE page <https://www.ncbi.nlm.nih.gov/refseq/MANE/>`_ for more information.
+    """
     def __init__(self, mane_data_path: Path = MANE_SUMMARY_PATH) -> None:
         """Initialize the MANE Transcript mappings class.
         :param Path mane_data_path: Path to RefSeq MANE summary data
         """
         self.mane_data_path = mane_data_path
@@ -22,16 +32,26 @@ class MANETranscriptMappings:
     def _load_mane_transcript_data(self) -> pl.DataFrame:
         """Load RefSeq MANE data file into DataFrame.
         :return: DataFrame containing RefSeq MANE Transcript data
         """
         return pl.read_csv(self.mane_data_path, separator="\t")
     def get_gene_mane_data(self, gene_symbol: str) -> List[Dict]:
         """Return MANE Transcript data for a gene.
+        >>> from cool_seq_tool.sources import ManeTranscriptMappings
+        >>> m = ManeTranscriptMappings()
+        >>> braf_mane = m.get_gene_mane_data("BRAF")
+        >>> braf_mane[0]["RefSeq_nuc"], braf_mane[0]["MANE_status"]
+        ('NM_004333.6', 'MANE Select')
+        >>> braf_mane[1]["RefSeq_nuc"], braf_mane[1]["MANE_status"]
+        ('NM_001374258.1', 'MANE Plus Clinical')
         :param str gene_symbol: HGNC Gene Symbol
-        :return: List of MANE Transcript data (Transcript accessions,
-            gene, and location information). Sorted list: MANE Select and then MANE Plus
-            Clinical
+        :return: List of MANE Transcript data (Transcript accessions, gene, and
+            location information). The list is sorted so that a MANE Select entry comes
+            first, followed by a MANE Plus Clinical entry, if available.
         """
         data = self.df.filter(pl.col("symbol") == gene_symbol.upper())
@@ -58,7 +78,8 @@ class MANETranscriptMappings:
     def get_mane_data_from_chr_pos(
         self, alt_ac: str, start: int, end: int
     ) -> List[Dict]:
-        """Get MANE data given chromosome, start pos, end end pos. Assumes GRCh38.
+        """Get MANE data given a GRCh38 genomic position.
         :param str alt_ac: NC Accession
         :param int start: Start genomic position. Assumes residue coordinates.
         :param int end: End genomic position. Assumes residue coordinates.

cool_seq_tool/sources/transcript_mappings.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""The module for Transcript Mappings."""
+"""Provide mappings between gene symbols and RefSeq + Ensembl transcript accessions."""
 import csv
 from pathlib import Path
 from typing import Dict, List, Optional
@@ -7,7 +7,17 @@ from cool_seq_tool.paths import LRG_REFSEQGENE_PATH, TRANSCRIPT_MAPPINGS_PATH
 class TranscriptMappings:
-    """The transcript mappings class."""
+    """Provide mappings between gene symbols and RefSeq + Ensembl transcript accessions.
+    Uses ``LRG_RefSeqGene`` and ``transcript_mappings.csv``, which will automatically
+    be acquired if they aren't already available. See the
+    :ref:`configuration <configuration>` section in the documentation for information
+    about manual acquisition of data.
+    In general, this class's methods expect to receive NCBI gene symbols, so users
+    should be careful about the sourcing of their input in cases where terms are
+    conflicted or ambiguous (which, to be fair, should be relatively rare).
+    """
     def __init__(
         self,
@@ -16,8 +26,8 @@ class TranscriptMappings:
     ) -> None:
         """Initialize the transcript mappings class.
-        :param Path transcript_file_path: Path to transcript mappings file
-        :param Path lrg_refseqgene_path: Path to LRG RefSeqGene file
+        :param transcript_file_path: Path to transcript mappings file
+        :param lrg_refseqgene_path: Path to LRG RefSeqGene file
         """
         # ENSP <-> Gene Symbol
         self.ensembl_protein_version_for_gene_symbol: Dict[str, List[str]] = {}
@@ -53,7 +63,7 @@ class TranscriptMappings:
     def _load_transcript_mappings_data(self, transcript_file_path: Path) -> None:
         """Load transcript mappings file to dictionaries.
-        :param Path transcript_file_path: Path to transcript mappings file
+        :param transcript_file_path: Path to transcript mappings file
         """
         with open(transcript_file_path) as file:
             reader = csv.DictReader(file, delimiter="\t")
@@ -127,7 +137,13 @@ class TranscriptMappings:
     def protein_transcripts(self, identifier: str) -> List[str]:
         """Return a list of protein transcripts for a gene symbol.
-        :param str identifier: Gene identifier to get protein transcripts for
+        >>> from cool_seq_tool.sources import TranscriptMappings
+        >>> braf_txs = TranscriptMappings().protein_transcripts("BRAF")
+        >>> braf_txs.sort()
+        >>> braf_txs[-1]
+        'NP_004324.2'
+        :param identifier: Gene identifier to get protein transcripts for
         :return: Protein transcripts for a gene symbol
         """
         protein_transcripts = list()
@@ -141,7 +157,7 @@ class TranscriptMappings:
     def coding_dna_transcripts(self, identifier: str) -> List[str]:
         """Return transcripts from a coding dna refseq for a gene symbol.
-        :param str identifier: Gene identifier to find transcripts for
+        :param identifier: Gene identifier to find transcripts for
         :return: cDNA transcripts for a gene symbol
         """
         genomic_transcripts = list()
@@ -159,7 +175,7 @@ class TranscriptMappings:
     def get_gene_symbol_from_ensembl_protein(self, q: str) -> Optional[str]:
         """Return the gene symbol for a Ensembl Protein.
-        :param str q: ensembl protein accession
+        :param q: ensembl protein accession
         :return: Gene symbol
         """
         gene_symbol = self.ensembl_protein_version_to_gene_symbol.get(q)
@@ -172,7 +188,7 @@ class TranscriptMappings:
     def get_gene_symbol_from_refeq_protein(self, q: str) -> Optional[str]:
         """Return the gene symbol for a Refseq Protein.
-        :param str q: RefSeq protein accession
+        :param q: RefSeq protein accession
         :return: Gene symbol
         """
         return self.refseq_protein_to_gene_symbol.get(q)
@@ -180,7 +196,7 @@ class TranscriptMappings:
     def get_gene_symbol_from_refseq_rna(self, q: str) -> Optional[str]:
         """Return gene symbol for a Refseq RNA Transcript.
-        :param str q: RefSeq RNA transcript accession
+        :param q: RefSeq RNA transcript accession
         :return: Gene symbol
         """
         gene_symbol = self.refseq_rna_version_to_gene_symbol.get(q)
@@ -193,7 +209,7 @@ class TranscriptMappings:
     def get_gene_symbol_from_ensembl_transcript(self, q: str) -> Optional[str]:
         """Return gene symbol for an Ensembl Transcript.
-        :param str q: Ensembl transcript accession
+        :param q: Ensembl transcript accession
         :return: Gene symbol
         """
         gene_symbol = self.ensembl_transcript_version_to_gene_symbol.get(q)

cool-seq-tool 0.3.0.dev0__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl

cool-seq-tool 0.3.0.dev0py3-none-any.whl → 0.4.0.dev0py3-none-any.whl