cool-seq-tool 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +6 -0
 - cool_seq_tool/app.py +1 -2
 - cool_seq_tool/handlers/seqrepo_access.py +5 -5
 - cool_seq_tool/mappers/alignment.py +16 -16
 - cool_seq_tool/mappers/exon_genomic_coords.py +845 -628
 - cool_seq_tool/mappers/mane_transcript.py +184 -152
 - cool_seq_tool/schemas.py +30 -438
 - cool_seq_tool/sources/mane_transcript_mappings.py +35 -0
 - cool_seq_tool/sources/uta_database.py +149 -229
 - cool_seq_tool/utils.py +9 -9
 - {cool_seq_tool-0.5.1.dist-info → cool_seq_tool-0.7.0.dist-info}/METADATA +8 -8
 - cool_seq_tool-0.7.0.dist-info/RECORD +24 -0
 - {cool_seq_tool-0.5.1.dist-info → cool_seq_tool-0.7.0.dist-info}/WHEEL +1 -1
 - cool_seq_tool-0.5.1.dist-info/RECORD +0 -24
 - {cool_seq_tool-0.5.1.dist-info → cool_seq_tool-0.7.0.dist-info}/LICENSE +0 -0
 - {cool_seq_tool-0.5.1.dist-info → cool_seq_tool-0.7.0.dist-info}/top_level.txt +0 -0
 
    
        cool_seq_tool/__init__.py
    CHANGED
    
    | 
         @@ -8,3 +8,9 @@ except PackageNotFoundError: 
     | 
|
| 
       8 
8 
     | 
    
         
             
                __version__ = "unknown"
         
     | 
| 
       9 
9 
     | 
    
         
             
            finally:
         
     | 
| 
       10 
10 
     | 
    
         
             
                del version, PackageNotFoundError
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            # must import after __version__ declaration to prevent ImportError
         
     | 
| 
      
 14 
     | 
    
         
            +
            from cool_seq_tool.app import CoolSeqTool
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            __all__ = ["CoolSeqTool", "__version__"]
         
     | 
    
        cool_seq_tool/app.py
    CHANGED
    
    | 
         @@ -48,7 +48,7 @@ class CoolSeqTool: 
     | 
|
| 
       48 
48 
     | 
    
         | 
| 
       49 
49 
     | 
    
         
             
                    Initialization with default resource locations is straightforward:
         
     | 
| 
       50 
50 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
                    >>> from cool_seq_tool 
     | 
| 
      
 51 
     | 
    
         
            +
                    >>> from cool_seq_tool import CoolSeqTool
         
     | 
| 
       52 
52 
     | 
    
         
             
                    >>> cst = CoolSeqTool()
         
     | 
| 
       53 
53 
     | 
    
         | 
| 
       54 
54 
     | 
    
         
             
                    By default, this will attempt to fetch the latest versions of static resources,
         
     | 
| 
         @@ -107,7 +107,6 @@ class CoolSeqTool: 
     | 
|
| 
       107 
107 
     | 
    
         
             
                    self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
         
     | 
| 
       108 
108 
     | 
    
         
             
                        self.seqrepo_access,
         
     | 
| 
       109 
109 
     | 
    
         
             
                        self.uta_db,
         
     | 
| 
       110 
     | 
    
         
            -
                        self.mane_transcript,
         
     | 
| 
       111 
110 
     | 
    
         
             
                        self.mane_transcript_mappings,
         
     | 
| 
       112 
111 
     | 
    
         
             
                        self.liftover,
         
     | 
| 
       113 
112 
     | 
    
         
             
                    )
         
     | 
| 
         @@ -8,7 +8,7 @@ from pathlib import Path 
     | 
|
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
            from ga4gh.vrs.dataproxy import SeqRepoDataProxy
         
     | 
| 
       10 
10 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
            from cool_seq_tool.schemas import Assembly,  
     | 
| 
      
 11 
     | 
    
         
            +
            from cool_seq_tool.schemas import Assembly, CoordinateType
         
     | 
| 
       12 
12 
     | 
    
         
             
            from cool_seq_tool.utils import get_inter_residue_pos, process_chromosome_input
         
     | 
| 
       13 
13 
     | 
    
         | 
| 
       14 
14 
     | 
    
         
             
            _logger = logging.getLogger(__name__)
         
     | 
| 
         @@ -29,7 +29,7 @@ class SeqRepoAccess(SeqRepoDataProxy): 
     | 
|
| 
       29 
29 
     | 
    
         
             
                    ac: str,
         
     | 
| 
       30 
30 
     | 
    
         
             
                    start: int | None = None,
         
     | 
| 
       31 
31 
     | 
    
         
             
                    end: int | None = None,
         
     | 
| 
       32 
     | 
    
         
            -
                     
     | 
| 
      
 32 
     | 
    
         
            +
                    coordinate_type: CoordinateType = CoordinateType.RESIDUE,
         
     | 
| 
       33 
33 
     | 
    
         
             
                ) -> tuple[str, str | None]:
         
     | 
| 
       34 
34 
     | 
    
         
             
                    """Get reference sequence for an accession given a start and end position. If
         
     | 
| 
       35 
35 
     | 
    
         
             
                    ``start`` and ``end`` are not given, returns the entire reference sequence.
         
     | 
| 
         @@ -46,7 +46,7 @@ class SeqRepoAccess(SeqRepoDataProxy): 
     | 
|
| 
       46 
46 
     | 
    
         
             
                    :param start: Start pos change
         
     | 
| 
       47 
47 
     | 
    
         
             
                    :param end: End pos change. If ``None`` assumes both ``start`` and ``end`` have
         
     | 
| 
       48 
48 
     | 
    
         
             
                        same values, if ``start`` exists.
         
     | 
| 
       49 
     | 
    
         
            -
                    :param  
     | 
| 
      
 49 
     | 
    
         
            +
                    :param coordinate_type: Coordinate type for ``start`` and ``end``
         
     | 
| 
       50 
50 
     | 
    
         
             
                    :return: Sequence at position (if accession and positions actually
         
     | 
| 
       51 
51 
     | 
    
         
             
                        exist, else return empty string), warning if any
         
     | 
| 
       52 
52 
     | 
    
         
             
                    """
         
     | 
| 
         @@ -55,11 +55,11 @@ class SeqRepoAccess(SeqRepoDataProxy): 
     | 
|
| 
       55 
55 
     | 
    
         
             
                            msg = f"start ({start}) cannot be greater than end ({end})"
         
     | 
| 
       56 
56 
     | 
    
         
             
                            return "", msg
         
     | 
| 
       57 
57 
     | 
    
         | 
| 
       58 
     | 
    
         
            -
                        start, end = get_inter_residue_pos(start, end,  
     | 
| 
      
 58 
     | 
    
         
            +
                        start, end = get_inter_residue_pos(start, end, coordinate_type)
         
     | 
| 
       59 
59 
     | 
    
         
             
                        if start == end:
         
     | 
| 
       60 
60 
     | 
    
         
             
                            end += 1
         
     | 
| 
       61 
61 
     | 
    
         
             
                    else:
         
     | 
| 
       62 
     | 
    
         
            -
                        if start is not None and  
     | 
| 
      
 62 
     | 
    
         
            +
                        if start is not None and coordinate_type == CoordinateType.RESIDUE:
         
     | 
| 
       63 
63 
     | 
    
         
             
                            start -= 1
         
     | 
| 
       64 
64 
     | 
    
         | 
| 
       65 
65 
     | 
    
         
             
                    try:
         
     | 
| 
         @@ -3,7 +3,7 @@ reference sequences. 
     | 
|
| 
       3 
3 
     | 
    
         
             
            """
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
         
     | 
| 
       6 
     | 
    
         
            -
            from cool_seq_tool.schemas import AnnotationLayer, Assembly,  
     | 
| 
      
 6 
     | 
    
         
            +
            from cool_seq_tool.schemas import AnnotationLayer, Assembly, CoordinateType
         
     | 
| 
       7 
7 
     | 
    
         
             
            from cool_seq_tool.sources import TranscriptMappings, UtaDatabase
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         | 
| 
         @@ -32,14 +32,14 @@ class AlignmentMapper: 
     | 
|
| 
       32 
32 
     | 
    
         
             
                    p_ac: str,
         
     | 
| 
       33 
33 
     | 
    
         
             
                    p_start_pos: int,
         
     | 
| 
       34 
34 
     | 
    
         
             
                    p_end_pos: int,
         
     | 
| 
       35 
     | 
    
         
            -
                     
     | 
| 
      
 35 
     | 
    
         
            +
                    coordinate_type: CoordinateType = CoordinateType.RESIDUE,
         
     | 
| 
       36 
36 
     | 
    
         
             
                ) -> tuple[dict | None, str | None]:
         
     | 
| 
       37 
37 
     | 
    
         
             
                    """Translate protein representation to cDNA representation.
         
     | 
| 
       38 
38 
     | 
    
         | 
| 
       39 
39 
     | 
    
         
             
                    :param p_ac: Protein RefSeq accession
         
     | 
| 
       40 
40 
     | 
    
         
             
                    :param p_start_pos: Protein start position
         
     | 
| 
       41 
41 
     | 
    
         
             
                    :param p_end_pos: Protein end position
         
     | 
| 
       42 
     | 
    
         
            -
                    :param  
     | 
| 
      
 42 
     | 
    
         
            +
                    :param coordinate_type: Coordinate type for ``p_start_pos`` and ``p_end_pos``
         
     | 
| 
       43 
43 
     | 
    
         
             
                    :return: Tuple containing:
         
     | 
| 
       44 
44 
     | 
    
         | 
| 
       45 
45 
     | 
    
         
             
                    * cDNA representation (accession, codon range positions for corresponding
         
     | 
| 
         @@ -66,7 +66,7 @@ class AlignmentMapper: 
     | 
|
| 
       66 
66 
     | 
    
         
             
                    # 1 amino acid maps to 3 nucleotides in the codon
         
     | 
| 
       67 
67 
     | 
    
         
             
                    # Since we have the end of the codon, we will subtract 2 to get the start of the
         
     | 
| 
       68 
68 
     | 
    
         
             
                    # codon. We want to return inter-residue (0-based), so we subtract 1 from this.
         
     | 
| 
       69 
     | 
    
         
            -
                    if  
     | 
| 
      
 69 
     | 
    
         
            +
                    if coordinate_type == CoordinateType.RESIDUE:
         
     | 
| 
       70 
70 
     | 
    
         
             
                        c_pos = (p_start_pos * 3) - 3, p_end_pos * 3
         
     | 
| 
       71 
71 
     | 
    
         
             
                    else:
         
     | 
| 
       72 
72 
     | 
    
         
             
                        if p_start_pos == p_end_pos:
         
     | 
| 
         @@ -79,7 +79,7 @@ class AlignmentMapper: 
     | 
|
| 
       79 
79 
     | 
    
         
             
                        "c_start_pos": c_pos[0],
         
     | 
| 
       80 
80 
     | 
    
         
             
                        "c_end_pos": c_pos[1],
         
     | 
| 
       81 
81 
     | 
    
         
             
                        "cds_start": cds_start,
         
     | 
| 
       82 
     | 
    
         
            -
                        " 
     | 
| 
      
 82 
     | 
    
         
            +
                        "coordinate_type": CoordinateType.INTER_RESIDUE.value,
         
     | 
| 
       83 
83 
     | 
    
         
             
                    }, None
         
     | 
| 
       84 
84 
     | 
    
         | 
| 
       85 
85 
     | 
    
         
             
                async def _get_cds_start(self, c_ac: str) -> tuple[int | None, str | None]:
         
     | 
| 
         @@ -105,7 +105,7 @@ class AlignmentMapper: 
     | 
|
| 
       105 
105 
     | 
    
         
             
                    c_start_pos: int,
         
     | 
| 
       106 
106 
     | 
    
         
             
                    c_end_pos: int,
         
     | 
| 
       107 
107 
     | 
    
         
             
                    cds_start: int | None = None,
         
     | 
| 
       108 
     | 
    
         
            -
                     
     | 
| 
      
 108 
     | 
    
         
            +
                    coordinate_type: CoordinateType = CoordinateType.RESIDUE,
         
     | 
| 
       109 
109 
     | 
    
         
             
                    target_genome_assembly: bool = Assembly.GRCH38,
         
     | 
| 
       110 
110 
     | 
    
         
             
                ) -> tuple[dict | None, str | None]:
         
     | 
| 
       111 
111 
     | 
    
         
             
                    """Translate cDNA representation to genomic representation
         
     | 
| 
         @@ -125,9 +125,9 @@ class AlignmentMapper: 
     | 
|
| 
       125 
125 
     | 
    
         
             
                    if any(
         
     | 
| 
       126 
126 
     | 
    
         
             
                        (
         
     | 
| 
       127 
127 
     | 
    
         
             
                            c_start_pos == c_end_pos,
         
     | 
| 
       128 
     | 
    
         
            -
                            ( 
     | 
| 
      
 128 
     | 
    
         
            +
                            (coordinate_type == CoordinateType.INTER_RESIDUE)
         
     | 
| 
       129 
129 
     | 
    
         
             
                            and ((c_end_pos - c_start_pos) % 3 != 0),
         
     | 
| 
       130 
     | 
    
         
            -
                            ( 
     | 
| 
      
 130 
     | 
    
         
            +
                            (coordinate_type == CoordinateType.RESIDUE)
         
     | 
| 
       131 
131 
     | 
    
         
             
                            and ((c_end_pos - (c_start_pos - 1)) % 3 != 0),
         
     | 
| 
       132 
132 
     | 
    
         
             
                        )
         
     | 
| 
       133 
133 
     | 
    
         
             
                    ):
         
     | 
| 
         @@ -146,7 +146,7 @@ class AlignmentMapper: 
     | 
|
| 
       146 
146 
     | 
    
         
             
                            return None, warning
         
     | 
| 
       147 
147 
     | 
    
         | 
| 
       148 
148 
     | 
    
         
             
                    # Change to inter-residue
         
     | 
| 
       149 
     | 
    
         
            -
                    if  
     | 
| 
      
 149 
     | 
    
         
            +
                    if coordinate_type == CoordinateType.RESIDUE:
         
     | 
| 
       150 
150 
     | 
    
         
             
                        c_start_pos -= 1
         
     | 
| 
       151 
151 
     | 
    
         | 
| 
       152 
152 
     | 
    
         
             
                    # Get aligned genomic and transcript data
         
     | 
| 
         @@ -163,7 +163,7 @@ class AlignmentMapper: 
     | 
|
| 
       163 
163 
     | 
    
         
             
                            f"position ({c_start_pos}, {c_end_pos})"
         
     | 
| 
       164 
164 
     | 
    
         
             
                        )
         
     | 
| 
       165 
165 
     | 
    
         
             
                    else:
         
     | 
| 
       166 
     | 
    
         
            -
                        alt_ac = genomic_tx_data 
     | 
| 
      
 166 
     | 
    
         
            +
                        alt_ac = genomic_tx_data.alt_ac
         
     | 
| 
       167 
167 
     | 
    
         | 
| 
       168 
168 
     | 
    
         
             
                        # Validate that genomic accession assembly == target_genome_assembly
         
     | 
| 
       169 
169 
     | 
    
         
             
                        aliases, _ = self.seqrepo_access.translate_identifier(alt_ac)
         
     | 
| 
         @@ -180,7 +180,7 @@ class AlignmentMapper: 
     | 
|
| 
       180 
180 
     | 
    
         
             
                                        f"{target_genome_assembly}"
         
     | 
| 
       181 
181 
     | 
    
         
             
                                    )
         
     | 
| 
       182 
182 
     | 
    
         
             
                                else:
         
     | 
| 
       183 
     | 
    
         
            -
                                    g_pos = genomic_tx_data 
     | 
| 
      
 183 
     | 
    
         
            +
                                    g_pos = genomic_tx_data.alt_pos_change_range
         
     | 
| 
       184 
184 
     | 
    
         | 
| 
       185 
185 
     | 
    
         
             
                                    # start pos should be less than end pos in response
         
     | 
| 
       186 
186 
     | 
    
         
             
                                    if g_pos[0] > g_pos[1]:
         
     | 
| 
         @@ -194,7 +194,7 @@ class AlignmentMapper: 
     | 
|
| 
       194 
194 
     | 
    
         
             
                                        "g_ac": alt_ac,
         
     | 
| 
       195 
195 
     | 
    
         
             
                                        "g_start_pos": g_start_pos,
         
     | 
| 
       196 
196 
     | 
    
         
             
                                        "g_end_pos": g_end_pos,
         
     | 
| 
       197 
     | 
    
         
            -
                                        " 
     | 
| 
      
 197 
     | 
    
         
            +
                                        "coordinate_type": CoordinateType.INTER_RESIDUE.value,
         
     | 
| 
       198 
198 
     | 
    
         
             
                                    }
         
     | 
| 
       199 
199 
     | 
    
         
             
                        else:
         
     | 
| 
       200 
200 
     | 
    
         
             
                            warning = (
         
     | 
| 
         @@ -209,7 +209,7 @@ class AlignmentMapper: 
     | 
|
| 
       209 
209 
     | 
    
         
             
                    p_ac: str,
         
     | 
| 
       210 
210 
     | 
    
         
             
                    p_start_pos: int,
         
     | 
| 
       211 
211 
     | 
    
         
             
                    p_end_pos: int,
         
     | 
| 
       212 
     | 
    
         
            -
                     
     | 
| 
      
 212 
     | 
    
         
            +
                    coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
         
     | 
| 
       213 
213 
     | 
    
         
             
                    target_genome_assembly: Assembly = Assembly.GRCH38,
         
     | 
| 
       214 
214 
     | 
    
         
             
                ) -> tuple[dict | None, str | None]:
         
     | 
| 
       215 
215 
     | 
    
         
             
                    """Translate protein representation to genomic representation, by way of
         
     | 
| 
         @@ -218,7 +218,7 @@ class AlignmentMapper: 
     | 
|
| 
       218 
218 
     | 
    
         
             
                    :param p_ac: Protein RefSeq accession
         
     | 
| 
       219 
219 
     | 
    
         
             
                    :param p_start_pos: Protein start position
         
     | 
| 
       220 
220 
     | 
    
         
             
                    :param p_end_pos: Protein end position
         
     | 
| 
       221 
     | 
    
         
            -
                    :param  
     | 
| 
      
 221 
     | 
    
         
            +
                    :param coordinate_type: Coordinate type for ``p_start_pos`` and ``p_end_pos``.
         
     | 
| 
       222 
222 
     | 
    
         
             
                    :param target_genome_assembly: Genome assembly to get genomic data for
         
     | 
| 
       223 
223 
     | 
    
         
             
                    :return: Tuple containing:
         
     | 
| 
       224 
224 
     | 
    
         | 
| 
         @@ -227,7 +227,7 @@ class AlignmentMapper: 
     | 
|
| 
       227 
227 
     | 
    
         
             
                    * Warnings, if conversion to cDNA or genomic coordinates fails.
         
     | 
| 
       228 
228 
     | 
    
         
             
                    """
         
     | 
| 
       229 
229 
     | 
    
         
             
                    c_data, warning = await self.p_to_c(
         
     | 
| 
       230 
     | 
    
         
            -
                        p_ac, p_start_pos, p_end_pos,  
     | 
| 
      
 230 
     | 
    
         
            +
                        p_ac, p_start_pos, p_end_pos, coordinate_type=coordinate_type
         
     | 
| 
       231 
231 
     | 
    
         
             
                    )
         
     | 
| 
       232 
232 
     | 
    
         
             
                    if not c_data:
         
     | 
| 
       233 
233 
     | 
    
         
             
                        return None, warning
         
     | 
| 
         @@ -238,7 +238,7 @@ class AlignmentMapper: 
     | 
|
| 
       238 
238 
     | 
    
         
             
                        c_data["c_start_pos"],
         
     | 
| 
       239 
239 
     | 
    
         
             
                        c_data["c_end_pos"],
         
     | 
| 
       240 
240 
     | 
    
         
             
                        c_data["cds_start"],
         
     | 
| 
       241 
     | 
    
         
            -
                         
     | 
| 
      
 241 
     | 
    
         
            +
                        coordinate_type=CoordinateType.INTER_RESIDUE,
         
     | 
| 
       242 
242 
     | 
    
         
             
                        target_genome_assembly=target_genome_assembly,
         
     | 
| 
       243 
243 
     | 
    
         
             
                    )
         
     | 
| 
       244 
244 
     | 
    
         
             
                    return g_data, warning
         
     |