PyPI - cool-seq-tool - Versions diffs - 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

cool-seq-tool 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

cool_seq_tool/mappers/exon_genomic_coords.py CHANGED Viewed

@@ -865,14 +865,14 @@ class ExonGenomicCoordsMapper:
         if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
             genomic_pos = genomic_pos - 1  # Convert residue coordinate to inter-residue
-        # Validate that the breakpoint occurs on a transcript given a gene
-        coordinate_check = await self._validate_gene_coordinates(
-            pos=genomic_pos, genomic_ac=genomic_ac, gene=gene
+        # Validate that the breakpoint between the first and last exon for the selected transcript
+        coordinate_check = await self._validate_genomic_breakpoint(
+            pos=genomic_pos, genomic_ac=genomic_ac, tx_ac=transcript
         )
         if not coordinate_check:
             return GenomicTxSeg(
                 errors=[
-                    f"{genomic_pos} on {genomic_ac} does not occur within the exons for {gene}"
+                    f"{genomic_pos} on {genomic_ac} does not occur within the exons for {transcript}"
                 ]
             )
@@ -943,38 +943,32 @@ class ExonGenomicCoordsMapper:
         )
         return liftover_data[1] if liftover_data else None
-    async def _validate_gene_coordinates(
+    async def _validate_genomic_breakpoint(
         self,
         pos: int,
         genomic_ac: str,
-        gene: str,
+        tx_ac: str,
     ) -> bool:
         """Validate that a genomic coordinate falls within the first and last exon
-            given a gene and accession
+            for a transcript on a given accession
         :param pos: Genomic position on ``genomic_ac``
         :param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``
-        :param gene: A valid, case-sensitive HGNC gene symbol
+        :param transcript: A transcript accession
         :return: ``True`` if the coordinate falls within the first and last exon
-            for the gene, ``False`` if not
+            for the transcript, ``False`` if not
         """
         query = f"""
             WITH tx_boundaries AS (
-                    SELECT
-                    tx_ac,
-                    hgnc,
-                    MIN(alt_start_i) as min_start,
-                    MAX(alt_end_i) as max_end
+                SELECT
+                MIN(alt_start_i) AS min_start,
+                MAX(alt_end_i) AS max_end
                 FROM {self.uta_db.schema}.tx_exon_aln_v
-                WHERE hgnc = '{gene}'
+                WHERE tx_ac = '{tx_ac}'
                 AND alt_ac = '{genomic_ac}'
-                GROUP BY tx_ac, hgnc
             )
-            SELECT DISTINCT hgnc
-            FROM tx_boundaries
+            SELECT * FROM tx_boundaries
             WHERE {pos} between tx_boundaries.min_start and tx_boundaries.max_end
-            ORDER BY hgnc
-            LIMIT 1;
             """  # noqa: S608
         results = await self.uta_db.execute_query(query)
         return bool(results)

cool_seq_tool/schemas.py CHANGED Viewed

@@ -43,11 +43,18 @@ class Assembly(str, Enum):
         return [item.value for item in cls]
+class ManeStatus(str, Enum):
+    """Define constraints for mane status"""
+    SELECT = "mane_select"
+    PLUS_CLINICAL = "mane_plus_clinical"
 class TranscriptPriority(str, Enum):
     """Create Enum for Transcript Priority labels"""
-    MANE_SELECT = "mane_select"
-    MANE_PLUS_CLINICAL = "mane_plus_clinical"
+    MANE_SELECT = ManeStatus.SELECT.value
+    MANE_PLUS_CLINICAL = ManeStatus.PLUS_CLINICAL.value
     LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining"
     GRCH38 = "grch38"
@@ -137,6 +144,7 @@ class ManeGeneData(BaseModel, extra="forbid"):
     ncbi_gene_id: StrictInt
     hgnc_id: StrictInt | None
     symbol: StrictStr
+    status: list[ManeStatus]
 class ServiceMeta(BaseModelForbidExtra):

cool_seq_tool/sources/mane_transcript_mappings.py CHANGED Viewed

@@ -117,26 +117,58 @@ class ManeTranscriptMappings:
         :param end: Genomic end position. Assumes residue coordinates.
         :return: Unique MANE gene(s) found for a genomic location
         """
+        # Only interested in rows where genomic location lives
         mane_rows = self.df.filter(
             (start >= pl.col("chr_start"))
             & (end <= pl.col("chr_end"))
             & (pl.col("GRCh38_chr") == ac)
-        ).unique(subset=["#NCBI_GeneID"])
+        )
-        if len(mane_rows) == 0:
+        if mane_rows.is_empty():
             return []
-        mane_rows = mane_rows.with_columns(
-            pl.col("#NCBI_GeneID")
-            .str.split_exact(":", 1)
-            .struct.field("field_1")
-            .cast(pl.Int32)
-            .alias("ncbi_gene_id"),
-            pl.col("HGNC_ID")
-            .str.split_exact(":", 1)
-            .struct.field("field_1")
-            .cast(pl.Int32)
-            .alias("hgnc_id"),
+        # Group rows by NCBI ID, transform values to representation we want, MANE status
+        # will be converted to list with DESC order
+        mane_rows = mane_rows.group_by("#NCBI_GeneID").agg(
+            [
+                pl.col("#NCBI_GeneID")
+                .first()
+                .str.split_exact(":", 1)
+                .struct.field("field_1")
+                .cast(pl.Int32)
+                .alias("ncbi_gene_id"),
+                pl.col("HGNC_ID")
+                .first()
+                .str.split_exact(":", 1)
+                .struct.field("field_1")
+                .cast(pl.Int32)
+                .alias("hgnc_id"),
+                pl.col("MANE_status")
+                .unique()
+                .str.to_lowercase()
+                .str.replace_all(" ", "_")
+                .alias("status")
+                .sort(descending=True),
+                pl.col("symbol").first(),
+            ]
+        )
+        # Sort final rows based on MANE status
+        # First by length (which means gene has both select and plus clinical)
+        # Then by DESC order
+        # Then by NCBI ID ASC order
+        mane_rows = (
+            mane_rows.with_columns(
+                [
+                    pl.col("status").list.len().alias("status_count"),
+                    pl.col("status").list.join("_").alias("status_str"),
+                    pl.col("ncbi_gene_id"),
+                ]
+            )
+            .sort(
+                ["status_count", "status_str", "ncbi_gene_id"],
+                descending=[True, True, False],
+            )
+            .drop(["status_count", "status_str", "#NCBI_GeneID"])
         )
-        mane_rows = mane_rows.select(["ncbi_gene_id", "hgnc_id", "symbol"])
         return [ManeGeneData(**mane_gene) for mane_gene in mane_rows.to_dicts()]

{cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: cool_seq_tool
-Version: 0.10.0
+Version: 0.12.0
 Summary: Common Operation on Lots of Sequences Tool
 Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
 License: MIT License

{cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
 cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
-cool_seq_tool/schemas.py,sha256=HInmKpsujybVR6pRmkKNOIzPCBqk9Ni5q1ZKNFtip50,3945
+cool_seq_tool/schemas.py,sha256=D0DsYAR1ZX7RONuc7X4hsPMKcZct7_2LlnE1KKVNre0,4139
 cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
 cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
 cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
 cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
 cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
-cool_seq_tool/mappers/exon_genomic_coords.py,sha256=uOJGy8HFoe1mjoxnaKP9x60hcgQm03vINJpUzJfxE0A,43948
+cool_seq_tool/mappers/exon_genomic_coords.py,sha256=ORYjBVaX1HO6ln0gRJyRKxUCjZrBDi4JfYQEYebxIAc,43824
 cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
 cool_seq_tool/mappers/mane_transcript.py,sha256=C9eKEj8qhVg878oUhBKPYAZS7gpLM5aaQ0HhSkUg-2g,54365
 cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
@@ -14,11 +14,11 @@ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oI
 cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
 cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
 cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
-cool_seq_tool/sources/mane_transcript_mappings.py,sha256=Q6J57O2lLWXlgKT0zq3BIwkwFawySnORHOX-UxzfyDE,5399
+cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
 cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
 cool_seq_tool/sources/uta_database.py,sha256=s7BkFplD_b2AmvXq8vZSCiBuZLy8RlxAqNyf-6QtR8w,36112
-cool_seq_tool-0.10.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
-cool_seq_tool-0.10.0.dist-info/METADATA,sha256=EOIMJIeXl9Om4LrUrr2ccPED68ou52_4lqTCrkWHzHQ,6557
-cool_seq_tool-0.10.0.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
-cool_seq_tool-0.10.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
-cool_seq_tool-0.10.0.dist-info/RECORD,,
+cool_seq_tool-0.12.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
+cool_seq_tool-0.12.0.dist-info/METADATA,sha256=Nt7O4bD59cQqje3eH_sKPkP8uvPz9ApxjKMvS6so0HE,6557
+cool_seq_tool-0.12.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+cool_seq_tool-0.12.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
+cool_seq_tool-0.12.0.dist-info/RECORD,,

{cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.7.0)
+Generator: setuptools (75.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

cool-seq-tool 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

cool-seq-tool 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl