cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +1 -3
- cool_seq_tool/api.py +1 -2
- cool_seq_tool/app.py +38 -23
- cool_seq_tool/handlers/__init__.py +1 -0
- cool_seq_tool/handlers/seqrepo_access.py +13 -15
- cool_seq_tool/mappers/__init__.py +1 -0
- cool_seq_tool/mappers/alignment.py +5 -6
- cool_seq_tool/mappers/exon_genomic_coords.py +75 -73
- cool_seq_tool/mappers/mane_transcript.py +84 -86
- cool_seq_tool/resources/__init__.py +1 -0
- cool_seq_tool/resources/data_files.py +93 -0
- cool_seq_tool/resources/status.py +151 -0
- cool_seq_tool/routers/__init__.py +1 -0
- cool_seq_tool/routers/default.py +1 -0
- cool_seq_tool/routers/mane.py +4 -4
- cool_seq_tool/routers/mappings.py +2 -2
- cool_seq_tool/schemas.py +83 -37
- cool_seq_tool/sources/__init__.py +1 -0
- cool_seq_tool/sources/mane_transcript_mappings.py +14 -7
- cool_seq_tool/sources/transcript_mappings.py +41 -32
- cool_seq_tool/sources/uta_database.py +91 -70
- cool_seq_tool/utils.py +2 -2
- cool_seq_tool/version.py +2 -1
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/LICENSE +1 -1
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/METADATA +15 -8
- cool_seq_tool-0.4.1.dist-info/RECORD +29 -0
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/WHEEL +1 -1
- cool_seq_tool/data/__init__.py +0 -2
- cool_seq_tool/data/data_downloads.py +0 -89
- cool_seq_tool/paths.py +0 -28
- cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
- /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.4.1.dist-info}/top_level.txt +0 -0
cool_seq_tool/__init__.py
CHANGED
cool_seq_tool/api.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
"""Main application for FastAPI"""
|
2
|
-
from typing import Dict
|
3
2
|
|
4
3
|
from fastapi import FastAPI
|
5
4
|
from fastapi.openapi.utils import get_openapi
|
@@ -19,7 +18,7 @@ app.include_router(mane.router)
|
|
19
18
|
app.include_router(mappings.router)
|
20
19
|
|
21
20
|
|
22
|
-
def custom_openapi() ->
|
21
|
+
def custom_openapi() -> dict:
|
23
22
|
"""Generate custom fields for OpenAPI response."""
|
24
23
|
if app.openapi_schema:
|
25
24
|
return app.openapi_schema
|
cool_seq_tool/app.py
CHANGED
@@ -1,24 +1,18 @@
|
|
1
1
|
"""Provides core CoolSeqTool class, which non-redundantly initializes all Cool-Seq-Tool
|
2
2
|
data handler and mapping resources for straightforward access.
|
3
3
|
"""
|
4
|
+
|
4
5
|
import logging
|
5
6
|
from pathlib import Path
|
6
|
-
from typing import Optional
|
7
7
|
|
8
8
|
from biocommons.seqrepo import SeqRepo
|
9
9
|
|
10
|
-
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
10
|
+
from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
|
11
11
|
from cool_seq_tool.mappers import (
|
12
12
|
AlignmentMapper,
|
13
13
|
ExonGenomicCoordsMapper,
|
14
14
|
ManeTranscript,
|
15
15
|
)
|
16
|
-
from cool_seq_tool.paths import (
|
17
|
-
LRG_REFSEQGENE_PATH,
|
18
|
-
MANE_SUMMARY_PATH,
|
19
|
-
SEQREPO_ROOT_DIR,
|
20
|
-
TRANSCRIPT_MAPPINGS_PATH,
|
21
|
-
)
|
22
16
|
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
|
23
17
|
from cool_seq_tool.sources.transcript_mappings import TranscriptMappings
|
24
18
|
from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
|
@@ -37,26 +31,44 @@ class CoolSeqTool:
|
|
37
31
|
* ``self.alignment_mapper``: :py:class:`AlignmentMapper <cool_seq_tool.mappers.alignment.AlignmentMapper>`
|
38
32
|
* ``self.mane_transcript``: :py:class:`ManeTranscript <cool_seq_tool.mappers.mane_transcript.ManeTranscript>`
|
39
33
|
* ``self.ex_g_coords_mapper``: :py:class:`ExonGenomicCoordsMapper <cool_seq_tool.mappers.exon_genomic_coords.ExonGenomicCoordsMapper>`
|
40
|
-
|
41
|
-
Initialization with default resource locations is straightforward:
|
42
|
-
|
43
|
-
.. code-block:: pycon
|
44
|
-
|
45
|
-
>>> from cool_seq_tool.app import CoolSeqTool
|
46
|
-
>>> cst = CoolSeqTool()
|
47
|
-
|
48
|
-
See the :ref:`configuration <configuration>` section for more information.
|
49
34
|
"""
|
50
35
|
|
51
36
|
def __init__(
|
52
37
|
self,
|
53
|
-
transcript_file_path: Path =
|
54
|
-
lrg_refseqgene_path: Path =
|
55
|
-
mane_data_path: Path =
|
38
|
+
transcript_file_path: Path | None = None,
|
39
|
+
lrg_refseqgene_path: Path | None = None,
|
40
|
+
mane_data_path: Path | None = None,
|
56
41
|
db_url: str = UTA_DB_URL,
|
57
|
-
sr:
|
42
|
+
sr: SeqRepo | None = None,
|
43
|
+
force_local_files: bool = False,
|
58
44
|
) -> None:
|
59
|
-
"""Initialize CoolSeqTool class
|
45
|
+
"""Initialize CoolSeqTool class.
|
46
|
+
|
47
|
+
Initialization with default resource locations is straightforward:
|
48
|
+
|
49
|
+
>>> from cool_seq_tool.app import CoolSeqTool
|
50
|
+
>>> cst = CoolSeqTool()
|
51
|
+
|
52
|
+
By default, this will attempt to fetch the latest versions of static resources,
|
53
|
+
which means brief FTP and HTTPS requests to NCBI servers upon initialization.
|
54
|
+
To suppress this check and simply rely on the most recent locally-available
|
55
|
+
data:
|
56
|
+
|
57
|
+
>>> cst = CoolSeqTool(force_local_files=True)
|
58
|
+
|
59
|
+
Note that this will raise a FileNotFoundError if no locally-available data exists.
|
60
|
+
|
61
|
+
Paths to those files can also be explicitly passed to avoid checks as well:
|
62
|
+
|
63
|
+
>>> from pathlib import Path
|
64
|
+
>>> cst = CoolSeqTool(
|
65
|
+
... lrg_refseqgene_path=Path("lrg_refseqgene_20240625.tsv"),
|
66
|
+
... mane_data_path=Path("ncbi_mane_summary_1.3.txt"),
|
67
|
+
... )
|
68
|
+
|
69
|
+
If not passed explicit arguments, these locations can also be set via
|
70
|
+
environment variables. See the :ref:`configuration <configuration>` section of
|
71
|
+
the docs for more information.
|
60
72
|
|
61
73
|
:param transcript_file_path: The path to ``transcript_mapping.tsv``
|
62
74
|
:param lrg_refseqgene_path: The path to the LRG_RefSeqGene file
|
@@ -64,6 +76,8 @@ class CoolSeqTool:
|
|
64
76
|
:param db_url: PostgreSQL connection URL
|
65
77
|
Format: ``driver://user:password@host/database/schema``
|
66
78
|
:param sr: SeqRepo instance. If this is not provided, will create a new instance
|
79
|
+
:param force_local_files: if ``True``, don't check for or try to acquire latest
|
80
|
+
versions of static data files -- just use most recently available, if any
|
67
81
|
"""
|
68
82
|
if not sr:
|
69
83
|
sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
|
@@ -71,9 +85,10 @@ class CoolSeqTool:
|
|
71
85
|
self.transcript_mappings = TranscriptMappings(
|
72
86
|
transcript_file_path=transcript_file_path,
|
73
87
|
lrg_refseqgene_path=lrg_refseqgene_path,
|
88
|
+
from_local=force_local_files,
|
74
89
|
)
|
75
90
|
self.mane_transcript_mappings = ManeTranscriptMappings(
|
76
|
-
mane_data_path=mane_data_path
|
91
|
+
mane_data_path=mane_data_path, from_local=force_local_files
|
77
92
|
)
|
78
93
|
self.uta_db = UtaDatabase(db_url=db_url)
|
79
94
|
self.alignment_mapper = AlignmentMapper(
|
@@ -1,10 +1,10 @@
|
|
1
1
|
"""Wrap SeqRepo to provide additional lookup and identification methods on top of basic
|
2
2
|
dereferencing functions.
|
3
3
|
"""
|
4
|
+
|
4
5
|
import logging
|
5
6
|
from os import environ
|
6
7
|
from pathlib import Path
|
7
|
-
from typing import List, Optional, Tuple, Union
|
8
8
|
|
9
9
|
from ga4gh.vrs.dataproxy import SeqRepoDataProxy
|
10
10
|
|
@@ -14,6 +14,9 @@ from cool_seq_tool.utils import get_inter_residue_pos
|
|
14
14
|
logger = logging.getLogger(__name__)
|
15
15
|
|
16
16
|
|
17
|
+
SEQREPO_ROOT_DIR = environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
|
18
|
+
|
19
|
+
|
17
20
|
class SeqRepoAccess(SeqRepoDataProxy):
|
18
21
|
"""Provide a wrapper around the base SeqRepoDataProxy class from ``VRS-Python`` to
|
19
22
|
provide additional lookup and identification methods.
|
@@ -24,10 +27,10 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
24
27
|
def get_reference_sequence(
|
25
28
|
self,
|
26
29
|
ac: str,
|
27
|
-
start:
|
28
|
-
end:
|
30
|
+
start: int | None = None,
|
31
|
+
end: int | None = None,
|
29
32
|
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
30
|
-
) ->
|
33
|
+
) -> tuple[str, str | None]:
|
31
34
|
"""Get reference sequence for an accession given a start and end position. If
|
32
35
|
``start`` and ``end`` are not given, returns the entire reference sequence.
|
33
36
|
|
@@ -93,8 +96,8 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
93
96
|
return sequence, None
|
94
97
|
|
95
98
|
def translate_identifier(
|
96
|
-
self, ac: str, target_namespaces:
|
97
|
-
) ->
|
99
|
+
self, ac: str, target_namespaces: str | list[str] | None = None
|
100
|
+
) -> tuple[list[str], str | None]:
|
98
101
|
"""Return list of identifiers for accession.
|
99
102
|
|
100
103
|
>>> from cool_seq_tool.handlers import SeqRepoAccess
|
@@ -120,9 +123,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
120
123
|
else:
|
121
124
|
return ga4gh_identifiers, None
|
122
125
|
|
123
|
-
def translate_alias(
|
124
|
-
self, input_str: str
|
125
|
-
) -> Tuple[List[Optional[str]], Optional[str]]:
|
126
|
+
def translate_alias(self, input_str: str) -> tuple[list[str | None], str | None]:
|
126
127
|
"""Get aliases for a given input.
|
127
128
|
|
128
129
|
:param str input_str: Input to get aliases for
|
@@ -135,9 +136,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
135
136
|
logger.warning(msg)
|
136
137
|
return [], msg
|
137
138
|
|
138
|
-
def chromosome_to_acs(
|
139
|
-
self, chromosome: str
|
140
|
-
) -> Tuple[Optional[List[str]], Optional[str]]:
|
139
|
+
def chromosome_to_acs(self, chromosome: str) -> tuple[list[str] | None, str | None]:
|
141
140
|
"""Get accessions for a chromosome
|
142
141
|
|
143
142
|
:param chromosome: Chromosome number. Must be either 1-22, X, or Y
|
@@ -148,13 +147,12 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
148
147
|
tmp_acs, _ = self.translate_identifier(
|
149
148
|
f"{assembly}:chr{chromosome}", target_namespaces="refseq"
|
150
149
|
)
|
151
|
-
for ac in tmp_acs
|
152
|
-
acs.append(ac.split("refseq:")[-1])
|
150
|
+
acs += [ac.split("refseq:")[-1] for ac in tmp_acs]
|
153
151
|
if acs:
|
154
152
|
return acs, None
|
155
153
|
return None, f"{chromosome} is not a valid chromosome"
|
156
154
|
|
157
|
-
def ac_to_chromosome(self, ac: str) ->
|
155
|
+
def ac_to_chromosome(self, ac: str) -> tuple[str | None, str | None]:
|
158
156
|
"""Get chromosome for accession.
|
159
157
|
|
160
158
|
:param str ac: Accession
|
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Module containing alignment methods for translating to and from different
|
2
2
|
reference sequences.
|
3
3
|
"""
|
4
|
-
from typing import Dict, Optional, Tuple
|
5
4
|
|
6
5
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
7
6
|
from cool_seq_tool.schemas import AnnotationLayer, Assembly, ResidueMode
|
@@ -34,7 +33,7 @@ class AlignmentMapper:
|
|
34
33
|
p_start_pos: int,
|
35
34
|
p_end_pos: int,
|
36
35
|
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
37
|
-
) ->
|
36
|
+
) -> tuple[dict | None, str | None]:
|
38
37
|
"""Translate protein representation to cDNA representation.
|
39
38
|
|
40
39
|
:param p_ac: Protein RefSeq accession
|
@@ -83,7 +82,7 @@ class AlignmentMapper:
|
|
83
82
|
"residue_mode": ResidueMode.INTER_RESIDUE.value,
|
84
83
|
}, None
|
85
84
|
|
86
|
-
async def _get_cds_start(self, c_ac: str) ->
|
85
|
+
async def _get_cds_start(self, c_ac: str) -> tuple[int | None, str | None]:
|
87
86
|
"""Get CDS start for a given cDNA RefSeq accession
|
88
87
|
|
89
88
|
:param c_ac: cDNA RefSeq accession
|
@@ -105,10 +104,10 @@ class AlignmentMapper:
|
|
105
104
|
c_ac: str,
|
106
105
|
c_start_pos: int,
|
107
106
|
c_end_pos: int,
|
108
|
-
cds_start:
|
107
|
+
cds_start: int | None = None,
|
109
108
|
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
110
109
|
target_genome_assembly: bool = Assembly.GRCH38,
|
111
|
-
) ->
|
110
|
+
) -> tuple[dict | None, str | None]:
|
112
111
|
"""Translate cDNA representation to genomic representation
|
113
112
|
|
114
113
|
:param c_ac: cDNA RefSeq accession
|
@@ -212,7 +211,7 @@ class AlignmentMapper:
|
|
212
211
|
p_end_pos: int,
|
213
212
|
residue_mode: ResidueMode = ResidueMode.INTER_RESIDUE,
|
214
213
|
target_genome_assembly: Assembly = Assembly.GRCH38,
|
215
|
-
) ->
|
214
|
+
) -> tuple[dict | None, str | None]:
|
216
215
|
"""Translate protein representation to genomic representation, by way of
|
217
216
|
intermediary conversion into cDNA coordinates.
|
218
217
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""Provide mapping capabilities between transcript exon and genomic coordinates."""
|
2
|
+
|
2
3
|
import logging
|
3
|
-
from typing import
|
4
|
+
from typing import Literal, TypeVar
|
4
5
|
|
5
6
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
6
7
|
from cool_seq_tool.mappers.mane_transcript import CdnaRepresentation, ManeTranscript
|
@@ -50,15 +51,15 @@ class ExonGenomicCoordsMapper:
|
|
50
51
|
event loop. See the :ref:`Usage section <async_note>` for more information.
|
51
52
|
|
52
53
|
>>> import asyncio
|
53
|
-
>>> result = asyncio.run(
|
54
|
-
...
|
55
|
-
...
|
56
|
-
...
|
57
|
-
... )
|
54
|
+
>>> result = asyncio.run(
|
55
|
+
... egc.transcript_to_genomic_coordinates(
|
56
|
+
... "NM_002529.3", exon_start=2, exon_end=17
|
57
|
+
... )
|
58
|
+
... )
|
58
59
|
>>> result.genomic_data.start, result.genomic_data.end
|
59
60
|
(156864428, 156881456)
|
60
61
|
|
61
|
-
:param
|
62
|
+
:param seqrepo_access: SeqRepo instance to give access to query SeqRepo database
|
62
63
|
:param uta_db: UtaDatabase instance to give access to query UTA database
|
63
64
|
:param mane_transcript: Instance to align to MANE or compatible representation
|
64
65
|
:param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
|
@@ -86,10 +87,10 @@ class ExonGenomicCoordsMapper:
|
|
86
87
|
async def transcript_to_genomic_coordinates(
|
87
88
|
self,
|
88
89
|
transcript: str,
|
89
|
-
gene:
|
90
|
-
exon_start:
|
90
|
+
gene: str | None = None,
|
91
|
+
exon_start: int | None = None,
|
91
92
|
exon_start_offset: int = 0,
|
92
|
-
exon_end:
|
93
|
+
exon_end: int | None = None,
|
93
94
|
exon_end_offset: int = 0,
|
94
95
|
) -> GenomicDataResponse:
|
95
96
|
"""Get genomic data given transcript data.
|
@@ -99,11 +100,14 @@ class ExonGenomicCoordsMapper:
|
|
99
100
|
>>> import asyncio
|
100
101
|
>>> from cool_seq_tool.app import CoolSeqTool
|
101
102
|
>>> egc = CoolSeqTool().ex_g_coords_mapper
|
102
|
-
>>> tpm3 = asyncio.run(
|
103
|
-
...
|
104
|
-
...
|
105
|
-
...
|
106
|
-
...
|
103
|
+
>>> tpm3 = asyncio.run(
|
104
|
+
... egc.transcript_to_genomic_coordinates(
|
105
|
+
... "NM_152263.3",
|
106
|
+
... gene="TPM3",
|
107
|
+
... exon_start=1,
|
108
|
+
... exon_end=8,
|
109
|
+
... )
|
110
|
+
... )
|
107
111
|
>>> tpm3.genomic_data.chr, tpm3.genomic_data.start, tpm3.genomic_data.end
|
108
112
|
('NC_000001.11', 154192135, 154170399)
|
109
113
|
|
@@ -223,17 +227,16 @@ class ExonGenomicCoordsMapper:
|
|
223
227
|
|
224
228
|
async def genomic_to_transcript_exon_coordinates(
|
225
229
|
self,
|
226
|
-
chromosome:
|
227
|
-
alt_ac:
|
228
|
-
start:
|
229
|
-
end:
|
230
|
-
strand:
|
231
|
-
transcript:
|
230
|
+
chromosome: str | None = None,
|
231
|
+
alt_ac: str | None = None,
|
232
|
+
start: int | None = None,
|
233
|
+
end: int | None = None,
|
234
|
+
strand: Strand | None = None,
|
235
|
+
transcript: str | None = None,
|
232
236
|
get_nearest_transcript_junction: bool = False,
|
233
|
-
gene:
|
234
|
-
residue_mode:
|
235
|
-
|
236
|
-
] = ResidueMode.RESIDUE,
|
237
|
+
gene: str | None = None,
|
238
|
+
residue_mode: Literal[ResidueMode.INTER_RESIDUE]
|
239
|
+
| Literal[ResidueMode.RESIDUE] = ResidueMode.RESIDUE,
|
237
240
|
) -> GenomicDataResponse:
|
238
241
|
"""Get transcript data for genomic data, lifted over to GRCh38.
|
239
242
|
|
@@ -244,13 +247,15 @@ class ExonGenomicCoordsMapper:
|
|
244
247
|
>>> from cool_seq_tool.app import CoolSeqTool
|
245
248
|
>>> from cool_seq_tool.schemas import Strand
|
246
249
|
>>> egc = CoolSeqTool().ex_g_coords_mapper
|
247
|
-
>>> result = asyncio.run(
|
248
|
-
...
|
249
|
-
...
|
250
|
-
...
|
251
|
-
...
|
252
|
-
...
|
253
|
-
...
|
250
|
+
>>> result = asyncio.run(
|
251
|
+
... egc.genomic_to_transcript_exon_coordinates(
|
252
|
+
... alt_ac="NC_000001.11",
|
253
|
+
... start=154192136,
|
254
|
+
... end=154170400,
|
255
|
+
... strand=Strand.NEGATIVE,
|
256
|
+
... transcript="NM_152263.3",
|
257
|
+
... )
|
258
|
+
... )
|
254
259
|
>>> result.genomic_data.exon_start, result.genomic_data.exon_end
|
255
260
|
(1, 8)
|
256
261
|
|
@@ -267,7 +272,7 @@ class ExonGenomicCoordsMapper:
|
|
267
272
|
following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
|
268
273
|
Compatible Transcript. See the :ref:`Transcript Selection policy <transcript_selection_policy>`
|
269
274
|
page.
|
270
|
-
param get_nearest_transcript_junction: If ``True``, this will return the
|
275
|
+
:param get_nearest_transcript_junction: If ``True``, this will return the
|
271
276
|
adjacent exon if the position specified by``start`` or ``end`` does not
|
272
277
|
occur on an exon. For the positive strand, adjacent is defined as the exon
|
273
278
|
preceding the breakpoint for the 5' end and the exon following the
|
@@ -358,8 +363,8 @@ class ExonGenomicCoordsMapper:
|
|
358
363
|
|
359
364
|
@staticmethod
|
360
365
|
def _validate_exon(
|
361
|
-
transcript: str, tx_exons:
|
362
|
-
) ->
|
366
|
+
transcript: str, tx_exons: list[tuple[int, int]], exon_number: int
|
367
|
+
) -> tuple[tuple[int, int] | None, str | None]:
|
363
368
|
"""Validate that exon number exists on a given transcript
|
364
369
|
|
365
370
|
:param transcript: Transcript accession
|
@@ -379,12 +384,12 @@ class ExonGenomicCoordsMapper:
|
|
379
384
|
def get_tx_exon_coords(
|
380
385
|
self,
|
381
386
|
transcript: str,
|
382
|
-
tx_exons:
|
383
|
-
exon_start:
|
384
|
-
exon_end:
|
385
|
-
) ->
|
386
|
-
|
387
|
-
|
387
|
+
tx_exons: list[tuple[int, int]],
|
388
|
+
exon_start: int | None = None,
|
389
|
+
exon_end: int | None = None,
|
390
|
+
) -> tuple[
|
391
|
+
tuple[tuple[int, int] | None, tuple[int, int] | None] | None,
|
392
|
+
str | None,
|
388
393
|
]:
|
389
394
|
"""Get exon coordinates for ``exon_start`` and ``exon_end``
|
390
395
|
|
@@ -415,10 +420,10 @@ class ExonGenomicCoordsMapper:
|
|
415
420
|
async def _get_alt_ac_start_and_end(
|
416
421
|
self,
|
417
422
|
tx_ac: str,
|
418
|
-
tx_exon_start:
|
419
|
-
tx_exon_end:
|
420
|
-
gene:
|
421
|
-
) ->
|
423
|
+
tx_exon_start: tuple[int, int] | None = None,
|
424
|
+
tx_exon_end: tuple[int, int] | None = None,
|
425
|
+
gene: str | None = None,
|
426
|
+
) -> tuple[tuple[tuple[int, int], tuple[int, int]] | None, str | None]:
|
422
427
|
"""Get aligned genomic coordinates for transcript exon start and end.
|
423
428
|
|
424
429
|
:param tx_ac: Transcript accession
|
@@ -469,11 +474,11 @@ class ExonGenomicCoordsMapper:
|
|
469
474
|
async def _genomic_to_transcript_exon_coordinate(
|
470
475
|
self,
|
471
476
|
pos: int,
|
472
|
-
chromosome:
|
473
|
-
alt_ac:
|
474
|
-
strand:
|
475
|
-
transcript:
|
476
|
-
gene:
|
477
|
+
chromosome: str | None = None,
|
478
|
+
alt_ac: str | None = None,
|
479
|
+
strand: Strand | None = None,
|
480
|
+
transcript: str | None = None,
|
481
|
+
gene: str | None = None,
|
477
482
|
get_nearest_transcript_junction: bool = False,
|
478
483
|
is_start: bool = True,
|
479
484
|
) -> TranscriptExonDataResponse:
|
@@ -592,7 +597,7 @@ class ExonGenomicCoordsMapper:
|
|
592
597
|
)
|
593
598
|
params["strand"] = strand.value
|
594
599
|
resp.transcript_exon_data = TranscriptExonData(**params)
|
595
|
-
|
600
|
+
return resp
|
596
601
|
|
597
602
|
if alt_ac:
|
598
603
|
# Check if valid accession is given
|
@@ -648,8 +653,8 @@ class ExonGenomicCoordsMapper:
|
|
648
653
|
|
649
654
|
@staticmethod
|
650
655
|
def _get_gene_and_alt_ac(
|
651
|
-
genes_alt_acs:
|
652
|
-
) ->
|
656
|
+
genes_alt_acs: dict, gene: str | None
|
657
|
+
) -> tuple[tuple[str, str] | None, str | None]:
|
653
658
|
"""Return gene genomic accession
|
654
659
|
|
655
660
|
:param genes_alt_acs: Dictionary containing genes and genomic accessions
|
@@ -687,13 +692,13 @@ class ExonGenomicCoordsMapper:
|
|
687
692
|
|
688
693
|
async def _set_mane_genomic_data(
|
689
694
|
self,
|
690
|
-
params:
|
695
|
+
params: dict,
|
691
696
|
gene: str,
|
692
697
|
alt_ac: str,
|
693
698
|
pos: int,
|
694
699
|
strand: Strand,
|
695
700
|
is_start: bool,
|
696
|
-
) ->
|
701
|
+
) -> str | None:
|
697
702
|
"""Set genomic data in `params` found from MANE.
|
698
703
|
|
699
704
|
:param params: Parameters for response
|
@@ -706,9 +711,9 @@ class ExonGenomicCoordsMapper:
|
|
706
711
|
:return: Warnings if found
|
707
712
|
"""
|
708
713
|
start, end = get_inter_residue_pos(pos, pos, residue_mode=ResidueMode.ZERO)
|
709
|
-
mane_data:
|
710
|
-
CdnaRepresentation
|
711
|
-
|
714
|
+
mane_data: (
|
715
|
+
CdnaRepresentation | None
|
716
|
+
) = await self.mane_transcript.get_mane_transcript(
|
712
717
|
alt_ac,
|
713
718
|
start,
|
714
719
|
end,
|
@@ -777,8 +782,8 @@ class ExonGenomicCoordsMapper:
|
|
777
782
|
return None
|
778
783
|
|
779
784
|
async def _set_genomic_data(
|
780
|
-
self, params:
|
781
|
-
) ->
|
785
|
+
self, params: dict, strand: Strand, is_start: bool
|
786
|
+
) -> str | None:
|
782
787
|
"""Set genomic data in ``params``
|
783
788
|
|
784
789
|
:param params: Parameters for response
|
@@ -861,7 +866,7 @@ class ExonGenomicCoordsMapper:
|
|
861
866
|
|
862
867
|
@staticmethod
|
863
868
|
def _set_exon_offset(
|
864
|
-
params:
|
869
|
+
params: dict, start: int, end: int, pos: int, is_start: bool, strand: Strand
|
865
870
|
) -> None:
|
866
871
|
"""Set value for ``exon_offset`` in ``params``.
|
867
872
|
|
@@ -885,26 +890,23 @@ class ExonGenomicCoordsMapper:
|
|
885
890
|
params["exon_offset"] = pos - start
|
886
891
|
|
887
892
|
async def _structure_exons(
|
888
|
-
self, transcript: str, alt_ac:
|
889
|
-
) ->
|
893
|
+
self, transcript: str, alt_ac: str | None = None
|
894
|
+
) -> list[tuple[int, int]]:
|
890
895
|
"""Structure exons as list of tuples.
|
891
896
|
|
892
897
|
:param transcript: Transcript accession
|
893
898
|
:param alt_ac: Genomic accession
|
894
899
|
:return: List of tuples containing transcript exon coordinates
|
895
900
|
"""
|
896
|
-
result = []
|
897
901
|
tx_exons, _ = await self.uta_db.get_tx_exons(transcript, alt_ac=alt_ac)
|
898
902
|
|
899
903
|
if not tx_exons:
|
900
|
-
return
|
904
|
+
return []
|
901
905
|
|
902
|
-
for coords in tx_exons
|
903
|
-
result.append((coords[0], coords[1]))
|
904
|
-
return result
|
906
|
+
return [(coords[0], coords[1]) for coords in tx_exons]
|
905
907
|
|
906
908
|
@staticmethod
|
907
|
-
def _get_exon_number(tx_exons:
|
909
|
+
def _get_exon_number(tx_exons: list, tx_pos: int) -> int:
|
908
910
|
"""Find related exon number for a position
|
909
911
|
|
910
912
|
:param tx_exons: List of exon coordinates for a transcript
|
@@ -920,10 +922,10 @@ class ExonGenomicCoordsMapper:
|
|
920
922
|
|
921
923
|
@staticmethod
|
922
924
|
def _get_adjacent_exon(
|
923
|
-
tx_exons_genomic_coords:
|
925
|
+
tx_exons_genomic_coords: list[tuple[int, int, int, int, int]],
|
924
926
|
strand: Strand,
|
925
|
-
start:
|
926
|
-
end:
|
927
|
+
start: int | None = None,
|
928
|
+
end: int | None = None,
|
927
929
|
) -> int:
|
928
930
|
"""Return the adjacent exon given a non-exonic breakpoint. For the positive
|
929
931
|
strand, adjacent is defined as the exon preceding the breakpoint for the 5' end
|
@@ -961,7 +963,7 @@ class ExonGenomicCoordsMapper:
|
|
961
963
|
return exon[0] + 1 if end else exon[0] + 2
|
962
964
|
|
963
965
|
@staticmethod
|
964
|
-
def _is_exonic_breakpoint(pos: int, tx_genomic_coords:
|
966
|
+
def _is_exonic_breakpoint(pos: int, tx_genomic_coords: list) -> bool:
|
965
967
|
"""Check if a breakpoint occurs on an exon
|
966
968
|
|
967
969
|
:param pos: Genomic breakpoint
|