cool-seq-tool 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/mappers/exon_genomic_coords.py +14 -20
- cool_seq_tool/schemas.py +10 -2
- cool_seq_tool/sources/mane_transcript_mappings.py +46 -14
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/METADATA +2 -2
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/RECORD +8 -8
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/WHEEL +1 -1
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.10.0.dist-info → cool_seq_tool-0.12.0.dist-info}/top_level.txt +0 -0
| @@ -865,14 +865,14 @@ class ExonGenomicCoordsMapper: | |
| 865 865 | 
             
                    if use_alt_start_i and coordinate_type == CoordinateType.RESIDUE:
         | 
| 866 866 | 
             
                        genomic_pos = genomic_pos - 1  # Convert residue coordinate to inter-residue
         | 
| 867 867 |  | 
| 868 | 
            -
                    # Validate that the breakpoint  | 
| 869 | 
            -
                    coordinate_check = await self. | 
| 870 | 
            -
                        pos=genomic_pos, genomic_ac=genomic_ac,  | 
| 868 | 
            +
                    # Validate that the breakpoint between the first and last exon for the selected transcript
         | 
| 869 | 
            +
                    coordinate_check = await self._validate_genomic_breakpoint(
         | 
| 870 | 
            +
                        pos=genomic_pos, genomic_ac=genomic_ac, tx_ac=transcript
         | 
| 871 871 | 
             
                    )
         | 
| 872 872 | 
             
                    if not coordinate_check:
         | 
| 873 873 | 
             
                        return GenomicTxSeg(
         | 
| 874 874 | 
             
                            errors=[
         | 
| 875 | 
            -
                                f"{genomic_pos} on {genomic_ac} does not occur within the exons for { | 
| 875 | 
            +
                                f"{genomic_pos} on {genomic_ac} does not occur within the exons for {transcript}"
         | 
| 876 876 | 
             
                            ]
         | 
| 877 877 | 
             
                        )
         | 
| 878 878 |  | 
| @@ -943,38 +943,32 @@ class ExonGenomicCoordsMapper: | |
| 943 943 | 
             
                    )
         | 
| 944 944 | 
             
                    return liftover_data[1] if liftover_data else None
         | 
| 945 945 |  | 
| 946 | 
            -
                async def  | 
| 946 | 
            +
                async def _validate_genomic_breakpoint(
         | 
| 947 947 | 
             
                    self,
         | 
| 948 948 | 
             
                    pos: int,
         | 
| 949 949 | 
             
                    genomic_ac: str,
         | 
| 950 | 
            -
                     | 
| 950 | 
            +
                    tx_ac: str,
         | 
| 951 951 | 
             
                ) -> bool:
         | 
| 952 952 | 
             
                    """Validate that a genomic coordinate falls within the first and last exon
         | 
| 953 | 
            -
                         | 
| 953 | 
            +
                        for a transcript on a given accession
         | 
| 954 954 |  | 
| 955 955 | 
             
                    :param pos: Genomic position on ``genomic_ac``
         | 
| 956 956 | 
             
                    :param genomic_ac: RefSeq genomic accession, e.g. ``"NC_000007.14"``
         | 
| 957 | 
            -
                    :param  | 
| 957 | 
            +
                    :param transcript: A transcript accession
         | 
| 958 958 | 
             
                    :return: ``True`` if the coordinate falls within the first and last exon
         | 
| 959 | 
            -
                        for the  | 
| 959 | 
            +
                        for the transcript, ``False`` if not
         | 
| 960 960 | 
             
                    """
         | 
| 961 961 | 
             
                    query = f"""
         | 
| 962 962 | 
             
                        WITH tx_boundaries AS (
         | 
| 963 | 
            -
             | 
| 964 | 
            -
             | 
| 965 | 
            -
             | 
| 966 | 
            -
                                MIN(alt_start_i) as min_start,
         | 
| 967 | 
            -
                                MAX(alt_end_i) as max_end
         | 
| 963 | 
            +
                            SELECT
         | 
| 964 | 
            +
                            MIN(alt_start_i) AS min_start,
         | 
| 965 | 
            +
                            MAX(alt_end_i) AS max_end
         | 
| 968 966 | 
             
                            FROM {self.uta_db.schema}.tx_exon_aln_v
         | 
| 969 | 
            -
                            WHERE  | 
| 967 | 
            +
                            WHERE tx_ac = '{tx_ac}'
         | 
| 970 968 | 
             
                            AND alt_ac = '{genomic_ac}'
         | 
| 971 | 
            -
                            GROUP BY tx_ac, hgnc
         | 
| 972 969 | 
             
                        )
         | 
| 973 | 
            -
                        SELECT  | 
| 974 | 
            -
                        FROM tx_boundaries
         | 
| 970 | 
            +
                        SELECT * FROM tx_boundaries
         | 
| 975 971 | 
             
                        WHERE {pos} between tx_boundaries.min_start and tx_boundaries.max_end
         | 
| 976 | 
            -
                        ORDER BY hgnc
         | 
| 977 | 
            -
                        LIMIT 1;
         | 
| 978 972 | 
             
                        """  # noqa: S608
         | 
| 979 973 | 
             
                    results = await self.uta_db.execute_query(query)
         | 
| 980 974 | 
             
                    return bool(results)
         | 
    
        cool_seq_tool/schemas.py
    CHANGED
    
    | @@ -43,11 +43,18 @@ class Assembly(str, Enum): | |
| 43 43 | 
             
                    return [item.value for item in cls]
         | 
| 44 44 |  | 
| 45 45 |  | 
| 46 | 
            +
            class ManeStatus(str, Enum):
         | 
| 47 | 
            +
                """Define constraints for mane status"""
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                SELECT = "mane_select"
         | 
| 50 | 
            +
                PLUS_CLINICAL = "mane_plus_clinical"
         | 
| 51 | 
            +
             | 
| 52 | 
            +
             | 
| 46 53 | 
             
            class TranscriptPriority(str, Enum):
         | 
| 47 54 | 
             
                """Create Enum for Transcript Priority labels"""
         | 
| 48 55 |  | 
| 49 | 
            -
                MANE_SELECT =  | 
| 50 | 
            -
                MANE_PLUS_CLINICAL =  | 
| 56 | 
            +
                MANE_SELECT = ManeStatus.SELECT.value
         | 
| 57 | 
            +
                MANE_PLUS_CLINICAL = ManeStatus.PLUS_CLINICAL.value
         | 
| 51 58 | 
             
                LONGEST_COMPATIBLE_REMAINING = "longest_compatible_remaining"
         | 
| 52 59 | 
             
                GRCH38 = "grch38"
         | 
| 53 60 |  | 
| @@ -137,6 +144,7 @@ class ManeGeneData(BaseModel, extra="forbid"): | |
| 137 144 | 
             
                ncbi_gene_id: StrictInt
         | 
| 138 145 | 
             
                hgnc_id: StrictInt | None
         | 
| 139 146 | 
             
                symbol: StrictStr
         | 
| 147 | 
            +
                status: list[ManeStatus]
         | 
| 140 148 |  | 
| 141 149 |  | 
| 142 150 | 
             
            class ServiceMeta(BaseModelForbidExtra):
         | 
| @@ -117,26 +117,58 @@ class ManeTranscriptMappings: | |
| 117 117 | 
             
                    :param end: Genomic end position. Assumes residue coordinates.
         | 
| 118 118 | 
             
                    :return: Unique MANE gene(s) found for a genomic location
         | 
| 119 119 | 
             
                    """
         | 
| 120 | 
            +
                    # Only interested in rows where genomic location lives
         | 
| 120 121 | 
             
                    mane_rows = self.df.filter(
         | 
| 121 122 | 
             
                        (start >= pl.col("chr_start"))
         | 
| 122 123 | 
             
                        & (end <= pl.col("chr_end"))
         | 
| 123 124 | 
             
                        & (pl.col("GRCh38_chr") == ac)
         | 
| 124 | 
            -
                    ) | 
| 125 | 
            +
                    )
         | 
| 125 126 |  | 
| 126 | 
            -
                    if  | 
| 127 | 
            +
                    if mane_rows.is_empty():
         | 
| 127 128 | 
             
                        return []
         | 
| 128 129 |  | 
| 129 | 
            -
                     | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
| 132 | 
            -
                         | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 137 | 
            -
             | 
| 138 | 
            -
             | 
| 139 | 
            -
             | 
| 130 | 
            +
                    # Group rows by NCBI ID, transform values to representation we want, MANE status
         | 
| 131 | 
            +
                    # will be converted to list with DESC order
         | 
| 132 | 
            +
                    mane_rows = mane_rows.group_by("#NCBI_GeneID").agg(
         | 
| 133 | 
            +
                        [
         | 
| 134 | 
            +
                            pl.col("#NCBI_GeneID")
         | 
| 135 | 
            +
                            .first()
         | 
| 136 | 
            +
                            .str.split_exact(":", 1)
         | 
| 137 | 
            +
                            .struct.field("field_1")
         | 
| 138 | 
            +
                            .cast(pl.Int32)
         | 
| 139 | 
            +
                            .alias("ncbi_gene_id"),
         | 
| 140 | 
            +
                            pl.col("HGNC_ID")
         | 
| 141 | 
            +
                            .first()
         | 
| 142 | 
            +
                            .str.split_exact(":", 1)
         | 
| 143 | 
            +
                            .struct.field("field_1")
         | 
| 144 | 
            +
                            .cast(pl.Int32)
         | 
| 145 | 
            +
                            .alias("hgnc_id"),
         | 
| 146 | 
            +
                            pl.col("MANE_status")
         | 
| 147 | 
            +
                            .unique()
         | 
| 148 | 
            +
                            .str.to_lowercase()
         | 
| 149 | 
            +
                            .str.replace_all(" ", "_")
         | 
| 150 | 
            +
                            .alias("status")
         | 
| 151 | 
            +
                            .sort(descending=True),
         | 
| 152 | 
            +
                            pl.col("symbol").first(),
         | 
| 153 | 
            +
                        ]
         | 
| 154 | 
            +
                    )
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                    # Sort final rows based on MANE status
         | 
| 157 | 
            +
                    # First by length (which means gene has both select and plus clinical)
         | 
| 158 | 
            +
                    # Then by DESC order
         | 
| 159 | 
            +
                    # Then by NCBI ID ASC order
         | 
| 160 | 
            +
                    mane_rows = (
         | 
| 161 | 
            +
                        mane_rows.with_columns(
         | 
| 162 | 
            +
                            [
         | 
| 163 | 
            +
                                pl.col("status").list.len().alias("status_count"),
         | 
| 164 | 
            +
                                pl.col("status").list.join("_").alias("status_str"),
         | 
| 165 | 
            +
                                pl.col("ncbi_gene_id"),
         | 
| 166 | 
            +
                            ]
         | 
| 167 | 
            +
                        )
         | 
| 168 | 
            +
                        .sort(
         | 
| 169 | 
            +
                            ["status_count", "status_str", "ncbi_gene_id"],
         | 
| 170 | 
            +
                            descending=[True, True, False],
         | 
| 171 | 
            +
                        )
         | 
| 172 | 
            +
                        .drop(["status_count", "status_str", "#NCBI_GeneID"])
         | 
| 140 173 | 
             
                    )
         | 
| 141 | 
            -
                    mane_rows = mane_rows.select(["ncbi_gene_id", "hgnc_id", "symbol"])
         | 
| 142 174 | 
             
                    return [ManeGeneData(**mane_gene) for mane_gene in mane_rows.to_dicts()]
         | 
| @@ -1,12 +1,12 @@ | |
| 1 1 | 
             
            cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
         | 
| 2 2 | 
             
            cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
         | 
| 3 | 
            -
            cool_seq_tool/schemas.py,sha256= | 
| 3 | 
            +
            cool_seq_tool/schemas.py,sha256=D0DsYAR1ZX7RONuc7X4hsPMKcZct7_2LlnE1KKVNre0,4139
         | 
| 4 4 | 
             
            cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
         | 
| 5 5 | 
             
            cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
         | 
| 6 6 | 
             
            cool_seq_tool/handlers/seqrepo_access.py,sha256=Jd19jbdUvPRPn_XWozL67ph-nSIxpb4_UUimapDrsm4,9162
         | 
| 7 7 | 
             
            cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
         | 
| 8 8 | 
             
            cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
         | 
| 9 | 
            -
            cool_seq_tool/mappers/exon_genomic_coords.py,sha256= | 
| 9 | 
            +
            cool_seq_tool/mappers/exon_genomic_coords.py,sha256=ORYjBVaX1HO6ln0gRJyRKxUCjZrBDi4JfYQEYebxIAc,43824
         | 
| 10 10 | 
             
            cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
         | 
| 11 11 | 
             
            cool_seq_tool/mappers/mane_transcript.py,sha256=C9eKEj8qhVg878oUhBKPYAZS7gpLM5aaQ0HhSkUg-2g,54365
         | 
| 12 12 | 
             
            cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
         | 
| @@ -14,11 +14,11 @@ cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oI | |
| 14 14 | 
             
            cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
         | 
| 15 15 | 
             
            cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
         | 
| 16 16 | 
             
            cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
         | 
| 17 | 
            -
            cool_seq_tool/sources/mane_transcript_mappings.py,sha256= | 
| 17 | 
            +
            cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
         | 
| 18 18 | 
             
            cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
         | 
| 19 19 | 
             
            cool_seq_tool/sources/uta_database.py,sha256=s7BkFplD_b2AmvXq8vZSCiBuZLy8RlxAqNyf-6QtR8w,36112
         | 
| 20 | 
            -
            cool_seq_tool-0. | 
| 21 | 
            -
            cool_seq_tool-0. | 
| 22 | 
            -
            cool_seq_tool-0. | 
| 23 | 
            -
            cool_seq_tool-0. | 
| 24 | 
            -
            cool_seq_tool-0. | 
| 20 | 
            +
            cool_seq_tool-0.12.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
         | 
| 21 | 
            +
            cool_seq_tool-0.12.0.dist-info/METADATA,sha256=Nt7O4bD59cQqje3eH_sKPkP8uvPz9ApxjKMvS6so0HE,6557
         | 
| 22 | 
            +
            cool_seq_tool-0.12.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
         | 
| 23 | 
            +
            cool_seq_tool-0.12.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
         | 
| 24 | 
            +
            cool_seq_tool-0.12.0.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |