cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +7 -11
- cool_seq_tool/app.py +44 -24
- cool_seq_tool/handlers/__init__.py +1 -0
- cool_seq_tool/handlers/seqrepo_access.py +27 -25
- cool_seq_tool/mappers/__init__.py +3 -1
- cool_seq_tool/mappers/alignment.py +5 -6
- cool_seq_tool/mappers/exon_genomic_coords.py +139 -124
- cool_seq_tool/mappers/liftover.py +90 -0
- cool_seq_tool/mappers/mane_transcript.py +208 -113
- cool_seq_tool/resources/__init__.py +1 -0
- cool_seq_tool/resources/data_files.py +93 -0
- cool_seq_tool/resources/status.py +153 -0
- cool_seq_tool/schemas.py +92 -54
- cool_seq_tool/sources/__init__.py +1 -0
- cool_seq_tool/sources/mane_transcript_mappings.py +16 -9
- cool_seq_tool/sources/transcript_mappings.py +41 -32
- cool_seq_tool/sources/uta_database.py +96 -249
- cool_seq_tool/utils.py +44 -4
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/LICENSE +1 -1
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/METADATA +16 -11
- cool_seq_tool-0.5.0.dist-info/RECORD +24 -0
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/WHEEL +1 -1
- cool_seq_tool/api.py +0 -42
- cool_seq_tool/data/__init__.py +0 -2
- cool_seq_tool/data/data_downloads.py +0 -89
- cool_seq_tool/paths.py +0 -28
- cool_seq_tool/routers/__init__.py +0 -16
- cool_seq_tool/routers/default.py +0 -125
- cool_seq_tool/routers/mane.py +0 -98
- cool_seq_tool/routers/mappings.py +0 -155
- cool_seq_tool/version.py +0 -2
- cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
- /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
|
|
1
1
|
"""Provide mapping capabilities between transcript exon and genomic coordinates."""
|
2
|
+
|
2
3
|
import logging
|
3
|
-
from typing import
|
4
|
+
from typing import Literal, TypeVar
|
4
5
|
|
5
6
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
7
|
+
from cool_seq_tool.mappers.liftover import LiftOver
|
6
8
|
from cool_seq_tool.mappers.mane_transcript import CdnaRepresentation, ManeTranscript
|
7
9
|
from cool_seq_tool.schemas import (
|
8
10
|
AnnotationLayer,
|
@@ -22,7 +24,7 @@ CoordinatesResponseType = TypeVar(
|
|
22
24
|
"CoordinatesResponseType", GenomicDataResponse, TranscriptExonDataResponse
|
23
25
|
)
|
24
26
|
|
25
|
-
|
27
|
+
_logger = logging.getLogger(__name__)
|
26
28
|
|
27
29
|
|
28
30
|
class ExonGenomicCoordsMapper:
|
@@ -36,6 +38,7 @@ class ExonGenomicCoordsMapper:
|
|
36
38
|
uta_db: UtaDatabase,
|
37
39
|
mane_transcript: ManeTranscript,
|
38
40
|
mane_transcript_mappings: ManeTranscriptMappings,
|
41
|
+
liftover: LiftOver,
|
39
42
|
) -> None:
|
40
43
|
"""Initialize ExonGenomicCoordsMapper class.
|
41
44
|
|
@@ -50,46 +53,49 @@ class ExonGenomicCoordsMapper:
|
|
50
53
|
event loop. See the :ref:`Usage section <async_note>` for more information.
|
51
54
|
|
52
55
|
>>> import asyncio
|
53
|
-
>>> result = asyncio.run(
|
54
|
-
...
|
55
|
-
...
|
56
|
-
...
|
57
|
-
... )
|
56
|
+
>>> result = asyncio.run(
|
57
|
+
... egc.transcript_to_genomic_coordinates(
|
58
|
+
... "NM_002529.3", exon_start=2, exon_end=17
|
59
|
+
... )
|
60
|
+
... )
|
58
61
|
>>> result.genomic_data.start, result.genomic_data.end
|
59
62
|
(156864428, 156881456)
|
60
63
|
|
61
|
-
:param
|
64
|
+
:param seqrepo_access: SeqRepo instance to give access to query SeqRepo database
|
62
65
|
:param uta_db: UtaDatabase instance to give access to query UTA database
|
63
66
|
:param mane_transcript: Instance to align to MANE or compatible representation
|
64
67
|
:param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
|
68
|
+
:param liftover: Instance to provide mapping between human genome assemblies
|
65
69
|
"""
|
66
70
|
self.seqrepo_access = seqrepo_access
|
67
71
|
self.uta_db = uta_db
|
68
72
|
self.mane_transcript = mane_transcript
|
69
73
|
self.mane_transcript_mappings = mane_transcript_mappings
|
74
|
+
self.liftover = liftover
|
70
75
|
|
71
76
|
@staticmethod
|
72
77
|
def _return_warnings(
|
73
|
-
resp: CoordinatesResponseType, warning_msg: str
|
78
|
+
resp: CoordinatesResponseType, warning_msg: list[str]
|
74
79
|
) -> CoordinatesResponseType:
|
75
80
|
"""Add warnings to response object
|
76
81
|
|
77
82
|
:param resp: Response object
|
78
|
-
:param warning_msg: Warning message on why ``transcript_exon_data`` or
|
83
|
+
:param warning_msg: Warning message(s) on why ``transcript_exon_data`` or
|
79
84
|
``genomic_data`` field is ``None``
|
80
85
|
:return: Response object with warning message
|
81
86
|
"""
|
82
|
-
|
83
|
-
|
87
|
+
for msg in warning_msg:
|
88
|
+
_logger.warning(msg)
|
89
|
+
resp.warnings.append(msg)
|
84
90
|
return resp
|
85
91
|
|
86
92
|
async def transcript_to_genomic_coordinates(
|
87
93
|
self,
|
88
94
|
transcript: str,
|
89
|
-
gene:
|
90
|
-
exon_start:
|
95
|
+
gene: str | None = None,
|
96
|
+
exon_start: int | None = None,
|
91
97
|
exon_start_offset: int = 0,
|
92
|
-
exon_end:
|
98
|
+
exon_end: int | None = None,
|
93
99
|
exon_end_offset: int = 0,
|
94
100
|
) -> GenomicDataResponse:
|
95
101
|
"""Get genomic data given transcript data.
|
@@ -99,11 +105,14 @@ class ExonGenomicCoordsMapper:
|
|
99
105
|
>>> import asyncio
|
100
106
|
>>> from cool_seq_tool.app import CoolSeqTool
|
101
107
|
>>> egc = CoolSeqTool().ex_g_coords_mapper
|
102
|
-
>>> tpm3 = asyncio.run(
|
103
|
-
...
|
104
|
-
...
|
105
|
-
...
|
106
|
-
...
|
108
|
+
>>> tpm3 = asyncio.run(
|
109
|
+
... egc.transcript_to_genomic_coordinates(
|
110
|
+
... "NM_152263.3",
|
111
|
+
... gene="TPM3",
|
112
|
+
... exon_start=1,
|
113
|
+
... exon_end=8,
|
114
|
+
... )
|
115
|
+
... )
|
107
116
|
>>> tpm3.genomic_data.chr, tpm3.genomic_data.start, tpm3.genomic_data.end
|
108
117
|
('NC_000001.11', 154192135, 154170399)
|
109
118
|
|
@@ -122,42 +131,44 @@ class ExonGenomicCoordsMapper:
|
|
122
131
|
)
|
123
132
|
|
124
133
|
# Ensure valid inputs
|
134
|
+
warnings = []
|
125
135
|
if not transcript:
|
126
|
-
|
127
|
-
|
136
|
+
warnings.append("Must provide `transcript`")
|
137
|
+
else:
|
138
|
+
transcript = transcript.strip()
|
128
139
|
|
129
140
|
exon_start_exists, exon_end_exists = False, False
|
130
141
|
if exon_start is not None:
|
131
142
|
if exon_start < 1:
|
132
|
-
|
143
|
+
warnings.append("`exon_start` cannot be less than 1")
|
133
144
|
exon_start_exists = True
|
134
145
|
|
135
146
|
if exon_end is not None:
|
136
147
|
if exon_end < 1:
|
137
|
-
|
148
|
+
warnings.append("`exon_end` cannot be less than 1")
|
138
149
|
exon_end_exists = True
|
139
150
|
|
140
151
|
if not exon_start_exists and not exon_end_exists:
|
141
|
-
|
142
|
-
resp, "Must provide either `exon_start` or `exon_end`"
|
143
|
-
)
|
152
|
+
warnings.append("Must provide either `exon_start` or `exon_end`")
|
144
153
|
if exon_start_exists and exon_end_exists and (exon_start > exon_end):
|
145
|
-
|
146
|
-
|
147
|
-
f"Start exon {exon_start} is greater than end exon {exon_end}",
|
154
|
+
warnings.append(
|
155
|
+
f"Start exon {exon_start} is greater than end exon {exon_end}"
|
148
156
|
)
|
149
157
|
|
158
|
+
if warnings:
|
159
|
+
return self._return_warnings(resp, warnings)
|
160
|
+
|
150
161
|
# Get all exons and associated start/end coordinates for transcript
|
151
162
|
tx_exons, warning = await self.uta_db.get_tx_exons(transcript)
|
152
163
|
if not tx_exons:
|
153
|
-
return self._return_warnings(resp, warning
|
164
|
+
return self._return_warnings(resp, [warning] if warning else [])
|
154
165
|
|
155
166
|
# Get exon start and exon end coordinates
|
156
167
|
tx_exon_coords, warning = self.get_tx_exon_coords(
|
157
168
|
transcript, tx_exons, exon_start, exon_end
|
158
169
|
)
|
159
170
|
if not tx_exon_coords:
|
160
|
-
return self._return_warnings(resp, warning
|
171
|
+
return self._return_warnings(resp, [warning] if warning else [])
|
161
172
|
tx_exon_start_coords, tx_exon_end_coords = tx_exon_coords
|
162
173
|
|
163
174
|
if gene:
|
@@ -169,7 +180,7 @@ class ExonGenomicCoordsMapper:
|
|
169
180
|
transcript, tx_exon_start_coords, tx_exon_end_coords, gene=gene
|
170
181
|
)
|
171
182
|
if not alt_ac_start_end:
|
172
|
-
return self._return_warnings(resp, warning
|
183
|
+
return self._return_warnings(resp, [warning] if warning else [])
|
173
184
|
alt_ac_start_data, alt_ac_end_data = alt_ac_start_end
|
174
185
|
|
175
186
|
# Get gene and chromosome data, check that at least one was retrieved
|
@@ -178,8 +189,9 @@ class ExonGenomicCoordsMapper:
|
|
178
189
|
if gene is None or chromosome is None:
|
179
190
|
return self._return_warnings(
|
180
191
|
resp,
|
181
|
-
|
182
|
-
|
192
|
+
[
|
193
|
+
"Unable to retrieve `gene` or `chromosome` from genomic start and genomic end data"
|
194
|
+
],
|
183
195
|
)
|
184
196
|
|
185
197
|
g_start = alt_ac_start_data[3] - 1 if alt_ac_start_data else None
|
@@ -223,17 +235,16 @@ class ExonGenomicCoordsMapper:
|
|
223
235
|
|
224
236
|
async def genomic_to_transcript_exon_coordinates(
|
225
237
|
self,
|
226
|
-
chromosome:
|
227
|
-
alt_ac:
|
228
|
-
start:
|
229
|
-
end:
|
230
|
-
strand:
|
231
|
-
transcript:
|
238
|
+
chromosome: str | None = None,
|
239
|
+
alt_ac: str | None = None,
|
240
|
+
start: int | None = None,
|
241
|
+
end: int | None = None,
|
242
|
+
strand: Strand | None = None,
|
243
|
+
transcript: str | None = None,
|
232
244
|
get_nearest_transcript_junction: bool = False,
|
233
|
-
gene:
|
234
|
-
residue_mode:
|
235
|
-
|
236
|
-
] = ResidueMode.RESIDUE,
|
245
|
+
gene: str | None = None,
|
246
|
+
residue_mode: Literal[ResidueMode.INTER_RESIDUE]
|
247
|
+
| Literal[ResidueMode.RESIDUE] = ResidueMode.RESIDUE,
|
237
248
|
) -> GenomicDataResponse:
|
238
249
|
"""Get transcript data for genomic data, lifted over to GRCh38.
|
239
250
|
|
@@ -244,19 +255,20 @@ class ExonGenomicCoordsMapper:
|
|
244
255
|
>>> from cool_seq_tool.app import CoolSeqTool
|
245
256
|
>>> from cool_seq_tool.schemas import Strand
|
246
257
|
>>> egc = CoolSeqTool().ex_g_coords_mapper
|
247
|
-
>>> result = asyncio.run(
|
248
|
-
...
|
249
|
-
...
|
250
|
-
...
|
251
|
-
...
|
252
|
-
...
|
253
|
-
...
|
258
|
+
>>> result = asyncio.run(
|
259
|
+
... egc.genomic_to_transcript_exon_coordinates(
|
260
|
+
... alt_ac="NC_000001.11",
|
261
|
+
... start=154192136,
|
262
|
+
... end=154170400,
|
263
|
+
... strand=Strand.NEGATIVE,
|
264
|
+
... transcript="NM_152263.3",
|
265
|
+
... )
|
266
|
+
... )
|
254
267
|
>>> result.genomic_data.exon_start, result.genomic_data.exon_end
|
255
268
|
(1, 8)
|
256
269
|
|
257
|
-
:param chromosome:
|
258
|
-
|
259
|
-
If ``alt_ac`` is also provided, ``alt_ac`` will be used.
|
270
|
+
:param chromosome: e.g. ``"1"`` or ``"chr1"``. If not provided, must provide
|
271
|
+
``alt_ac``. If ``alt_ac`` is also provided, ``alt_ac`` will be used.
|
260
272
|
:param alt_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
|
261
273
|
must provide ``chromosome. If ``chromosome`` is also provided, ``alt_ac``
|
262
274
|
will be used.
|
@@ -267,21 +279,30 @@ class ExonGenomicCoordsMapper:
|
|
267
279
|
following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
|
268
280
|
Compatible Transcript. See the :ref:`Transcript Selection policy <transcript_selection_policy>`
|
269
281
|
page.
|
270
|
-
param get_nearest_transcript_junction: If ``True``, this will return the
|
282
|
+
:param get_nearest_transcript_junction: If ``True``, this will return the
|
271
283
|
adjacent exon if the position specified by``start`` or ``end`` does not
|
272
284
|
occur on an exon. For the positive strand, adjacent is defined as the exon
|
273
285
|
preceding the breakpoint for the 5' end and the exon following the
|
274
286
|
breakpoint for the 3' end. For the negative strand, adjacent is defined as
|
275
287
|
the exon following the breakpoint for the 5' end and the exon preceding the
|
276
288
|
breakpoint for the 3' end.
|
289
|
+
:param gene: gene name. Ideally, HGNC symbol. Must be given if no ``transcript``
|
290
|
+
value is provided.
|
277
291
|
:param residue_mode: Residue mode for ``start`` and ``end``
|
278
292
|
:return: Genomic data (inter-residue coordinates)
|
279
293
|
"""
|
280
294
|
resp = GenomicDataResponse(
|
281
295
|
genomic_data=None, warnings=[], service_meta=service_meta()
|
282
296
|
)
|
297
|
+
warnings = []
|
283
298
|
if start is None and end is None:
|
284
|
-
|
299
|
+
warnings.append("Must provide either `start` or `end`")
|
300
|
+
if chromosome is None and alt_ac is None:
|
301
|
+
warnings.append("Must provide either `chromosome` or `alt_ac`")
|
302
|
+
if transcript is None and gene is None:
|
303
|
+
warnings.append("Must provide either `gene` or `transcript`")
|
304
|
+
if warnings:
|
305
|
+
return self._return_warnings(resp, warnings)
|
285
306
|
|
286
307
|
params = {key: None for key in GenomicData.model_fields}
|
287
308
|
if gene is not None:
|
@@ -305,7 +326,7 @@ class ExonGenomicCoordsMapper:
|
|
305
326
|
if start_data.transcript_exon_data:
|
306
327
|
start_data = start_data.transcript_exon_data.model_dump()
|
307
328
|
else:
|
308
|
-
return self._return_warnings(resp, start_data.warnings[0])
|
329
|
+
return self._return_warnings(resp, [start_data.warnings[0]])
|
309
330
|
else:
|
310
331
|
start_data = None
|
311
332
|
|
@@ -325,7 +346,7 @@ class ExonGenomicCoordsMapper:
|
|
325
346
|
if end_data.transcript_exon_data:
|
326
347
|
end_data = end_data.transcript_exon_data.model_dump()
|
327
348
|
else:
|
328
|
-
return self._return_warnings(resp, end_data.warnings[0])
|
349
|
+
return self._return_warnings(resp, [end_data.warnings[0]])
|
329
350
|
else:
|
330
351
|
end_data = None
|
331
352
|
|
@@ -336,7 +357,7 @@ class ExonGenomicCoordsMapper:
|
|
336
357
|
f"Start `{field}`, {start_data[field]}, does "
|
337
358
|
f"not match End `{field}`, {end_data[field]}"
|
338
359
|
)
|
339
|
-
return self._return_warnings(resp, msg)
|
360
|
+
return self._return_warnings(resp, [msg])
|
340
361
|
params[field] = start_data[field]
|
341
362
|
else:
|
342
363
|
params[field] = end_data[field]
|
@@ -346,7 +367,7 @@ class ExonGenomicCoordsMapper:
|
|
346
367
|
f"Input gene, {gene}, does not match expected output"
|
347
368
|
f"gene, {params['gene']}"
|
348
369
|
)
|
349
|
-
return self._return_warnings(resp, msg)
|
370
|
+
return self._return_warnings(resp, [msg])
|
350
371
|
|
351
372
|
for label, data in [("start", start_data), ("end", end_data)]:
|
352
373
|
if data:
|
@@ -358,8 +379,8 @@ class ExonGenomicCoordsMapper:
|
|
358
379
|
|
359
380
|
@staticmethod
|
360
381
|
def _validate_exon(
|
361
|
-
transcript: str, tx_exons:
|
362
|
-
) ->
|
382
|
+
transcript: str, tx_exons: list[tuple[int, int]], exon_number: int
|
383
|
+
) -> tuple[tuple[int, int] | None, str | None]:
|
363
384
|
"""Validate that exon number exists on a given transcript
|
364
385
|
|
365
386
|
:param transcript: Transcript accession
|
@@ -379,12 +400,12 @@ class ExonGenomicCoordsMapper:
|
|
379
400
|
def get_tx_exon_coords(
|
380
401
|
self,
|
381
402
|
transcript: str,
|
382
|
-
tx_exons:
|
383
|
-
exon_start:
|
384
|
-
exon_end:
|
385
|
-
) ->
|
386
|
-
|
387
|
-
|
403
|
+
tx_exons: list[tuple[int, int]],
|
404
|
+
exon_start: int | None = None,
|
405
|
+
exon_end: int | None = None,
|
406
|
+
) -> tuple[
|
407
|
+
tuple[tuple[int, int] | None, tuple[int, int] | None] | None,
|
408
|
+
str | None,
|
388
409
|
]:
|
389
410
|
"""Get exon coordinates for ``exon_start`` and ``exon_end``
|
390
411
|
|
@@ -415,10 +436,10 @@ class ExonGenomicCoordsMapper:
|
|
415
436
|
async def _get_alt_ac_start_and_end(
|
416
437
|
self,
|
417
438
|
tx_ac: str,
|
418
|
-
tx_exon_start:
|
419
|
-
tx_exon_end:
|
420
|
-
gene:
|
421
|
-
) ->
|
439
|
+
tx_exon_start: tuple[int, int] | None = None,
|
440
|
+
tx_exon_end: tuple[int, int] | None = None,
|
441
|
+
gene: str | None = None,
|
442
|
+
) -> tuple[tuple[tuple[int, int], tuple[int, int]] | None, str | None]:
|
422
443
|
"""Get aligned genomic coordinates for transcript exon start and end.
|
423
444
|
|
424
445
|
:param tx_ac: Transcript accession
|
@@ -431,7 +452,7 @@ class ExonGenomicCoordsMapper:
|
|
431
452
|
"""
|
432
453
|
if tx_exon_start is None and tx_exon_end is None:
|
433
454
|
msg = "Must provide either `tx_exon_start` or `tx_exon_end` or both"
|
434
|
-
|
455
|
+
_logger.warning(msg)
|
435
456
|
return None, msg
|
436
457
|
|
437
458
|
alt_ac_data = {"start": None, "end": None}
|
@@ -457,7 +478,7 @@ class ExonGenomicCoordsMapper:
|
|
457
478
|
error = "Genomic accession does not match"
|
458
479
|
else:
|
459
480
|
error = "Strand does not match"
|
460
|
-
|
481
|
+
_logger.warning(
|
461
482
|
"%s: %s != %s",
|
462
483
|
error,
|
463
484
|
alt_ac_data["start"][i],
|
@@ -469,11 +490,11 @@ class ExonGenomicCoordsMapper:
|
|
469
490
|
async def _genomic_to_transcript_exon_coordinate(
|
470
491
|
self,
|
471
492
|
pos: int,
|
472
|
-
chromosome:
|
473
|
-
alt_ac:
|
474
|
-
strand:
|
475
|
-
transcript:
|
476
|
-
gene:
|
493
|
+
chromosome: str | None = None,
|
494
|
+
alt_ac: str | None = None,
|
495
|
+
strand: Strand | None = None,
|
496
|
+
transcript: str | None = None,
|
497
|
+
gene: str | None = None,
|
477
498
|
get_nearest_transcript_junction: bool = False,
|
478
499
|
is_start: bool = True,
|
479
500
|
) -> TranscriptExonDataResponse:
|
@@ -505,25 +526,22 @@ class ExonGenomicCoordsMapper:
|
|
505
526
|
resp = TranscriptExonDataResponse(
|
506
527
|
transcript_exon_data=None, warnings=[], service_meta=service_meta()
|
507
528
|
)
|
508
|
-
|
509
|
-
if transcript is None and gene is None:
|
510
|
-
return self._return_warnings(
|
511
|
-
resp, "Must provide either `gene` or `transcript`"
|
512
|
-
)
|
513
|
-
|
514
529
|
params = {key: None for key in TranscriptExonData.model_fields}
|
515
530
|
|
516
531
|
if get_nearest_transcript_junction:
|
517
532
|
if not gene or not strand:
|
518
533
|
return self._return_warnings(
|
519
534
|
resp,
|
520
|
-
|
535
|
+
[
|
536
|
+
"Gene or strand must be provided to select the adjacent transcript junction"
|
537
|
+
],
|
521
538
|
)
|
522
|
-
|
539
|
+
if not alt_ac:
|
540
|
+
alt_acs, w = self.seqrepo_access.chromosome_to_acs(chromosome)
|
523
541
|
|
524
|
-
|
525
|
-
|
526
|
-
|
542
|
+
if not alt_acs:
|
543
|
+
return self._return_warnings(resp, [w])
|
544
|
+
alt_ac = alt_acs[0]
|
527
545
|
|
528
546
|
if not transcript:
|
529
547
|
# Select a transcript if not provided
|
@@ -557,14 +575,14 @@ class ExonGenomicCoordsMapper:
|
|
557
575
|
else:
|
558
576
|
return self._return_warnings(
|
559
577
|
resp,
|
560
|
-
f"Could not find a transcript for {gene} on {alt_ac}",
|
578
|
+
[f"Could not find a transcript for {gene} on {alt_ac}"],
|
561
579
|
)
|
562
580
|
|
563
581
|
tx_genomic_coords, w = await self.uta_db.get_tx_exons_genomic_coords(
|
564
582
|
tx_ac=transcript, alt_ac=alt_ac
|
565
583
|
)
|
566
584
|
if not tx_genomic_coords:
|
567
|
-
return self._return_warnings(resp, w)
|
585
|
+
return self._return_warnings(resp, [w])
|
568
586
|
|
569
587
|
# Check if breakpoint occurs on an exon.
|
570
588
|
# If not, determine the adjacent exon given the selected transcript
|
@@ -592,13 +610,13 @@ class ExonGenomicCoordsMapper:
|
|
592
610
|
)
|
593
611
|
params["strand"] = strand.value
|
594
612
|
resp.transcript_exon_data = TranscriptExonData(**params)
|
595
|
-
|
613
|
+
return resp
|
596
614
|
|
597
615
|
if alt_ac:
|
598
616
|
# Check if valid accession is given
|
599
617
|
if not await self.uta_db.validate_genomic_ac(alt_ac):
|
600
618
|
return self._return_warnings(
|
601
|
-
resp, f"Invalid genomic accession: {alt_ac}"
|
619
|
+
resp, [f"Invalid genomic accession: {alt_ac}"]
|
602
620
|
)
|
603
621
|
|
604
622
|
genes_alt_acs, warning = await self.uta_db.get_genes_and_alt_acs(
|
@@ -621,11 +639,11 @@ class ExonGenomicCoordsMapper:
|
|
621
639
|
genes_alt_acs = None
|
622
640
|
|
623
641
|
if not genes_alt_acs:
|
624
|
-
return self._return_warnings(resp, warning)
|
642
|
+
return self._return_warnings(resp, [warning])
|
625
643
|
|
626
644
|
gene_alt_ac, warning = self._get_gene_and_alt_ac(genes_alt_acs, gene)
|
627
645
|
if not gene_alt_ac:
|
628
|
-
return self._return_warnings(resp, warning)
|
646
|
+
return self._return_warnings(resp, [warning])
|
629
647
|
gene, alt_ac = gene_alt_ac
|
630
648
|
|
631
649
|
if transcript is None:
|
@@ -633,7 +651,7 @@ class ExonGenomicCoordsMapper:
|
|
633
651
|
params, gene, alt_ac, pos, strand, is_start
|
634
652
|
)
|
635
653
|
if warnings:
|
636
|
-
return self._return_warnings(resp, warnings)
|
654
|
+
return self._return_warnings(resp, [warnings])
|
637
655
|
else:
|
638
656
|
params["transcript"] = transcript
|
639
657
|
params["gene"] = gene
|
@@ -641,15 +659,15 @@ class ExonGenomicCoordsMapper:
|
|
641
659
|
params["chr"] = alt_ac
|
642
660
|
warning = await self._set_genomic_data(params, strand, is_start)
|
643
661
|
if warning:
|
644
|
-
return self._return_warnings(resp, warning)
|
662
|
+
return self._return_warnings(resp, [warning])
|
645
663
|
|
646
664
|
resp.transcript_exon_data = TranscriptExonData(**params)
|
647
665
|
return resp
|
648
666
|
|
649
667
|
@staticmethod
|
650
668
|
def _get_gene_and_alt_ac(
|
651
|
-
genes_alt_acs:
|
652
|
-
) ->
|
669
|
+
genes_alt_acs: dict, gene: str | None
|
670
|
+
) -> tuple[tuple[str, str] | None, str | None]:
|
653
671
|
"""Return gene genomic accession
|
654
672
|
|
655
673
|
:param genes_alt_acs: Dictionary containing genes and genomic accessions
|
@@ -687,13 +705,13 @@ class ExonGenomicCoordsMapper:
|
|
687
705
|
|
688
706
|
async def _set_mane_genomic_data(
|
689
707
|
self,
|
690
|
-
params:
|
708
|
+
params: dict,
|
691
709
|
gene: str,
|
692
710
|
alt_ac: str,
|
693
711
|
pos: int,
|
694
712
|
strand: Strand,
|
695
713
|
is_start: bool,
|
696
|
-
) ->
|
714
|
+
) -> str | None:
|
697
715
|
"""Set genomic data in `params` found from MANE.
|
698
716
|
|
699
717
|
:param params: Parameters for response
|
@@ -706,9 +724,9 @@ class ExonGenomicCoordsMapper:
|
|
706
724
|
:return: Warnings if found
|
707
725
|
"""
|
708
726
|
start, end = get_inter_residue_pos(pos, pos, residue_mode=ResidueMode.ZERO)
|
709
|
-
mane_data:
|
710
|
-
CdnaRepresentation
|
711
|
-
|
727
|
+
mane_data: (
|
728
|
+
CdnaRepresentation | None
|
729
|
+
) = await self.mane_transcript.get_mane_transcript(
|
712
730
|
alt_ac,
|
713
731
|
start,
|
714
732
|
end,
|
@@ -721,7 +739,7 @@ class ExonGenomicCoordsMapper:
|
|
721
739
|
msg = f"Unable to find mane data for {alt_ac} with position {pos}"
|
722
740
|
if gene:
|
723
741
|
msg += f" on gene {gene}"
|
724
|
-
|
742
|
+
_logger.warning(msg)
|
725
743
|
return msg
|
726
744
|
|
727
745
|
params["gene"] = mane_data.gene
|
@@ -745,7 +763,7 @@ class ExonGenomicCoordsMapper:
|
|
745
763
|
f"{params['transcript']} with position {tx_pos} "
|
746
764
|
f"does not exist on exons: {tx_exons}"
|
747
765
|
)
|
748
|
-
|
766
|
+
_logger.warning(msg)
|
749
767
|
return msg
|
750
768
|
|
751
769
|
strand_to_use = strand if strand is not None else mane_data.strand
|
@@ -777,8 +795,8 @@ class ExonGenomicCoordsMapper:
|
|
777
795
|
return None
|
778
796
|
|
779
797
|
async def _set_genomic_data(
|
780
|
-
self, params:
|
781
|
-
) ->
|
798
|
+
self, params: dict, strand: Strand, is_start: bool
|
799
|
+
) -> str | None:
|
782
800
|
"""Set genomic data in ``params``
|
783
801
|
|
784
802
|
:param params: Parameters for response
|
@@ -800,7 +818,7 @@ class ExonGenomicCoordsMapper:
|
|
800
818
|
return f"Unable to get chromosome and assembly for " f"{params['chr']}"
|
801
819
|
|
802
820
|
chromosome_number, assembly = descr
|
803
|
-
liftover_data = self.
|
821
|
+
liftover_data = self.liftover.get_liftover(
|
804
822
|
chromosome_number, params["pos"], Assembly.GRCH38
|
805
823
|
)
|
806
824
|
if liftover_data is None:
|
@@ -861,7 +879,7 @@ class ExonGenomicCoordsMapper:
|
|
861
879
|
|
862
880
|
@staticmethod
|
863
881
|
def _set_exon_offset(
|
864
|
-
params:
|
882
|
+
params: dict, start: int, end: int, pos: int, is_start: bool, strand: Strand
|
865
883
|
) -> None:
|
866
884
|
"""Set value for ``exon_offset`` in ``params``.
|
867
885
|
|
@@ -885,26 +903,23 @@ class ExonGenomicCoordsMapper:
|
|
885
903
|
params["exon_offset"] = pos - start
|
886
904
|
|
887
905
|
async def _structure_exons(
|
888
|
-
self, transcript: str, alt_ac:
|
889
|
-
) ->
|
906
|
+
self, transcript: str, alt_ac: str | None = None
|
907
|
+
) -> list[tuple[int, int]]:
|
890
908
|
"""Structure exons as list of tuples.
|
891
909
|
|
892
910
|
:param transcript: Transcript accession
|
893
911
|
:param alt_ac: Genomic accession
|
894
912
|
:return: List of tuples containing transcript exon coordinates
|
895
913
|
"""
|
896
|
-
result = []
|
897
914
|
tx_exons, _ = await self.uta_db.get_tx_exons(transcript, alt_ac=alt_ac)
|
898
915
|
|
899
916
|
if not tx_exons:
|
900
|
-
return
|
917
|
+
return []
|
901
918
|
|
902
|
-
for coords in tx_exons
|
903
|
-
result.append((coords[0], coords[1]))
|
904
|
-
return result
|
919
|
+
return [(coords[0], coords[1]) for coords in tx_exons]
|
905
920
|
|
906
921
|
@staticmethod
|
907
|
-
def _get_exon_number(tx_exons:
|
922
|
+
def _get_exon_number(tx_exons: list, tx_pos: int) -> int:
|
908
923
|
"""Find related exon number for a position
|
909
924
|
|
910
925
|
:param tx_exons: List of exon coordinates for a transcript
|
@@ -920,10 +935,10 @@ class ExonGenomicCoordsMapper:
|
|
920
935
|
|
921
936
|
@staticmethod
|
922
937
|
def _get_adjacent_exon(
|
923
|
-
tx_exons_genomic_coords:
|
938
|
+
tx_exons_genomic_coords: list[tuple[int, int, int, int, int]],
|
924
939
|
strand: Strand,
|
925
|
-
start:
|
926
|
-
end:
|
940
|
+
start: int | None = None,
|
941
|
+
end: int | None = None,
|
927
942
|
) -> int:
|
928
943
|
"""Return the adjacent exon given a non-exonic breakpoint. For the positive
|
929
944
|
strand, adjacent is defined as the exon preceding the breakpoint for the 5' end
|
@@ -961,7 +976,7 @@ class ExonGenomicCoordsMapper:
|
|
961
976
|
return exon[0] + 1 if end else exon[0] + 2
|
962
977
|
|
963
978
|
@staticmethod
|
964
|
-
def _is_exonic_breakpoint(pos: int, tx_genomic_coords:
|
979
|
+
def _is_exonic_breakpoint(pos: int, tx_genomic_coords: list) -> bool:
|
965
980
|
"""Check if a breakpoint occurs on an exon
|
966
981
|
|
967
982
|
:param pos: Genomic breakpoint
|