cool-seq-tool 0.4.0.dev2__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +1 -3
- cool_seq_tool/api.py +1 -2
- cool_seq_tool/app.py +42 -24
- cool_seq_tool/handlers/__init__.py +1 -0
- cool_seq_tool/handlers/seqrepo_access.py +13 -15
- cool_seq_tool/mappers/__init__.py +1 -0
- cool_seq_tool/mappers/alignment.py +5 -6
- cool_seq_tool/mappers/exon_genomic_coords.py +232 -68
- cool_seq_tool/mappers/mane_transcript.py +84 -86
- cool_seq_tool/resources/__init__.py +1 -0
- cool_seq_tool/resources/data_files.py +93 -0
- cool_seq_tool/resources/status.py +151 -0
- cool_seq_tool/routers/__init__.py +1 -0
- cool_seq_tool/routers/default.py +1 -0
- cool_seq_tool/routers/mane.py +4 -4
- cool_seq_tool/routers/mappings.py +2 -2
- cool_seq_tool/schemas.py +83 -37
- cool_seq_tool/sources/__init__.py +1 -0
- cool_seq_tool/sources/mane_transcript_mappings.py +14 -7
- cool_seq_tool/sources/transcript_mappings.py +41 -32
- cool_seq_tool/sources/uta_database.py +120 -69
- cool_seq_tool/utils.py +2 -2
- cool_seq_tool/version.py +2 -1
- {cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/LICENSE +1 -1
- {cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/METADATA +15 -8
- cool_seq_tool-0.4.1.dist-info/RECORD +29 -0
- {cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/WHEEL +1 -1
- cool_seq_tool/data/__init__.py +0 -2
- cool_seq_tool/data/data_downloads.py +0 -89
- cool_seq_tool/paths.py +0 -28
- cool_seq_tool-0.4.0.dev2.dist-info/RECORD +0 -29
- /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
- {cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,9 @@
|
|
1
1
|
"""Provide mapping capabilities between transcript exon and genomic coordinates."""
|
2
|
+
|
2
3
|
import logging
|
3
|
-
from typing import
|
4
|
+
from typing import Literal, TypeVar
|
4
5
|
|
6
|
+
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
5
7
|
from cool_seq_tool.mappers.mane_transcript import CdnaRepresentation, ManeTranscript
|
6
8
|
from cool_seq_tool.schemas import (
|
7
9
|
AnnotationLayer,
|
@@ -13,6 +15,7 @@ from cool_seq_tool.schemas import (
|
|
13
15
|
TranscriptExonData,
|
14
16
|
TranscriptExonDataResponse,
|
15
17
|
)
|
18
|
+
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
|
16
19
|
from cool_seq_tool.sources.uta_database import UtaDatabase
|
17
20
|
from cool_seq_tool.utils import get_inter_residue_pos, service_meta
|
18
21
|
|
@@ -28,7 +31,13 @@ class ExonGenomicCoordsMapper:
|
|
28
31
|
coordinate representation.
|
29
32
|
"""
|
30
33
|
|
31
|
-
def __init__(
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
seqrepo_access: SeqRepoAccess,
|
37
|
+
uta_db: UtaDatabase,
|
38
|
+
mane_transcript: ManeTranscript,
|
39
|
+
mane_transcript_mappings: ManeTranscriptMappings,
|
40
|
+
) -> None:
|
32
41
|
"""Initialize ExonGenomicCoordsMapper class.
|
33
42
|
|
34
43
|
A lot of resources are required for initialization, so when defaults are enough,
|
@@ -42,19 +51,23 @@ class ExonGenomicCoordsMapper:
|
|
42
51
|
event loop. See the :ref:`Usage section <async_note>` for more information.
|
43
52
|
|
44
53
|
>>> import asyncio
|
45
|
-
>>> result = asyncio.run(
|
46
|
-
...
|
47
|
-
...
|
48
|
-
...
|
49
|
-
... )
|
54
|
+
>>> result = asyncio.run(
|
55
|
+
... egc.transcript_to_genomic_coordinates(
|
56
|
+
... "NM_002529.3", exon_start=2, exon_end=17
|
57
|
+
... )
|
58
|
+
... )
|
50
59
|
>>> result.genomic_data.start, result.genomic_data.end
|
51
60
|
(156864428, 156881456)
|
52
61
|
|
62
|
+
:param seqrepo_access: SeqRepo instance to give access to query SeqRepo database
|
53
63
|
:param uta_db: UtaDatabase instance to give access to query UTA database
|
54
64
|
:param mane_transcript: Instance to align to MANE or compatible representation
|
65
|
+
:param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
|
55
66
|
"""
|
67
|
+
self.seqrepo_access = seqrepo_access
|
56
68
|
self.uta_db = uta_db
|
57
69
|
self.mane_transcript = mane_transcript
|
70
|
+
self.mane_transcript_mappings = mane_transcript_mappings
|
58
71
|
|
59
72
|
@staticmethod
|
60
73
|
def _return_warnings(
|
@@ -74,10 +87,10 @@ class ExonGenomicCoordsMapper:
|
|
74
87
|
async def transcript_to_genomic_coordinates(
|
75
88
|
self,
|
76
89
|
transcript: str,
|
77
|
-
gene:
|
78
|
-
exon_start:
|
90
|
+
gene: str | None = None,
|
91
|
+
exon_start: int | None = None,
|
79
92
|
exon_start_offset: int = 0,
|
80
|
-
exon_end:
|
93
|
+
exon_end: int | None = None,
|
81
94
|
exon_end_offset: int = 0,
|
82
95
|
) -> GenomicDataResponse:
|
83
96
|
"""Get genomic data given transcript data.
|
@@ -87,11 +100,14 @@ class ExonGenomicCoordsMapper:
|
|
87
100
|
>>> import asyncio
|
88
101
|
>>> from cool_seq_tool.app import CoolSeqTool
|
89
102
|
>>> egc = CoolSeqTool().ex_g_coords_mapper
|
90
|
-
>>> tpm3 = asyncio.run(
|
91
|
-
...
|
92
|
-
...
|
93
|
-
...
|
94
|
-
...
|
103
|
+
>>> tpm3 = asyncio.run(
|
104
|
+
... egc.transcript_to_genomic_coordinates(
|
105
|
+
... "NM_152263.3",
|
106
|
+
... gene="TPM3",
|
107
|
+
... exon_start=1,
|
108
|
+
... exon_end=8,
|
109
|
+
... )
|
110
|
+
... )
|
95
111
|
>>> tpm3.genomic_data.chr, tpm3.genomic_data.start, tpm3.genomic_data.end
|
96
112
|
('NC_000001.11', 154192135, 154170399)
|
97
113
|
|
@@ -211,16 +227,16 @@ class ExonGenomicCoordsMapper:
|
|
211
227
|
|
212
228
|
async def genomic_to_transcript_exon_coordinates(
|
213
229
|
self,
|
214
|
-
chromosome:
|
215
|
-
alt_ac:
|
216
|
-
start:
|
217
|
-
end:
|
218
|
-
strand:
|
219
|
-
transcript:
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
] = ResidueMode.RESIDUE,
|
230
|
+
chromosome: str | None = None,
|
231
|
+
alt_ac: str | None = None,
|
232
|
+
start: int | None = None,
|
233
|
+
end: int | None = None,
|
234
|
+
strand: Strand | None = None,
|
235
|
+
transcript: str | None = None,
|
236
|
+
get_nearest_transcript_junction: bool = False,
|
237
|
+
gene: str | None = None,
|
238
|
+
residue_mode: Literal[ResidueMode.INTER_RESIDUE]
|
239
|
+
| Literal[ResidueMode.RESIDUE] = ResidueMode.RESIDUE,
|
224
240
|
) -> GenomicDataResponse:
|
225
241
|
"""Get transcript data for genomic data, lifted over to GRCh38.
|
226
242
|
|
@@ -231,13 +247,15 @@ class ExonGenomicCoordsMapper:
|
|
231
247
|
>>> from cool_seq_tool.app import CoolSeqTool
|
232
248
|
>>> from cool_seq_tool.schemas import Strand
|
233
249
|
>>> egc = CoolSeqTool().ex_g_coords_mapper
|
234
|
-
>>> result = asyncio.run(
|
235
|
-
...
|
236
|
-
...
|
237
|
-
...
|
238
|
-
...
|
239
|
-
...
|
240
|
-
...
|
250
|
+
>>> result = asyncio.run(
|
251
|
+
... egc.genomic_to_transcript_exon_coordinates(
|
252
|
+
... alt_ac="NC_000001.11",
|
253
|
+
... start=154192136,
|
254
|
+
... end=154170400,
|
255
|
+
... strand=Strand.NEGATIVE,
|
256
|
+
... transcript="NM_152263.3",
|
257
|
+
... )
|
258
|
+
... )
|
241
259
|
>>> result.genomic_data.exon_start, result.genomic_data.exon_end
|
242
260
|
(1, 8)
|
243
261
|
|
@@ -254,7 +272,13 @@ class ExonGenomicCoordsMapper:
|
|
254
272
|
following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
|
255
273
|
Compatible Transcript. See the :ref:`Transcript Selection policy <transcript_selection_policy>`
|
256
274
|
page.
|
257
|
-
:param
|
275
|
+
:param get_nearest_transcript_junction: If ``True``, this will return the
|
276
|
+
adjacent exon if the position specified by``start`` or ``end`` does not
|
277
|
+
occur on an exon. For the positive strand, adjacent is defined as the exon
|
278
|
+
preceding the breakpoint for the 5' end and the exon following the
|
279
|
+
breakpoint for the 3' end. For the negative strand, adjacent is defined as
|
280
|
+
the exon following the breakpoint for the 5' end and the exon preceding the
|
281
|
+
breakpoint for the 3' end.
|
258
282
|
:param residue_mode: Residue mode for ``start`` and ``end``
|
259
283
|
:return: Genomic data (inter-residue coordinates)
|
260
284
|
"""
|
@@ -280,6 +304,7 @@ class ExonGenomicCoordsMapper:
|
|
280
304
|
strand=strand,
|
281
305
|
transcript=transcript,
|
282
306
|
gene=gene,
|
307
|
+
get_nearest_transcript_junction=get_nearest_transcript_junction,
|
283
308
|
is_start=True,
|
284
309
|
)
|
285
310
|
if start_data.transcript_exon_data:
|
@@ -299,6 +324,7 @@ class ExonGenomicCoordsMapper:
|
|
299
324
|
strand=strand,
|
300
325
|
transcript=transcript,
|
301
326
|
gene=gene,
|
327
|
+
get_nearest_transcript_junction=get_nearest_transcript_junction,
|
302
328
|
is_start=False,
|
303
329
|
)
|
304
330
|
if end_data.transcript_exon_data:
|
@@ -337,8 +363,8 @@ class ExonGenomicCoordsMapper:
|
|
337
363
|
|
338
364
|
@staticmethod
|
339
365
|
def _validate_exon(
|
340
|
-
transcript: str, tx_exons:
|
341
|
-
) ->
|
366
|
+
transcript: str, tx_exons: list[tuple[int, int]], exon_number: int
|
367
|
+
) -> tuple[tuple[int, int] | None, str | None]:
|
342
368
|
"""Validate that exon number exists on a given transcript
|
343
369
|
|
344
370
|
:param transcript: Transcript accession
|
@@ -358,12 +384,12 @@ class ExonGenomicCoordsMapper:
|
|
358
384
|
def get_tx_exon_coords(
|
359
385
|
self,
|
360
386
|
transcript: str,
|
361
|
-
tx_exons:
|
362
|
-
exon_start:
|
363
|
-
exon_end:
|
364
|
-
) ->
|
365
|
-
|
366
|
-
|
387
|
+
tx_exons: list[tuple[int, int]],
|
388
|
+
exon_start: int | None = None,
|
389
|
+
exon_end: int | None = None,
|
390
|
+
) -> tuple[
|
391
|
+
tuple[tuple[int, int] | None, tuple[int, int] | None] | None,
|
392
|
+
str | None,
|
367
393
|
]:
|
368
394
|
"""Get exon coordinates for ``exon_start`` and ``exon_end``
|
369
395
|
|
@@ -394,10 +420,10 @@ class ExonGenomicCoordsMapper:
|
|
394
420
|
async def _get_alt_ac_start_and_end(
|
395
421
|
self,
|
396
422
|
tx_ac: str,
|
397
|
-
tx_exon_start:
|
398
|
-
tx_exon_end:
|
399
|
-
gene:
|
400
|
-
) ->
|
423
|
+
tx_exon_start: tuple[int, int] | None = None,
|
424
|
+
tx_exon_end: tuple[int, int] | None = None,
|
425
|
+
gene: str | None = None,
|
426
|
+
) -> tuple[tuple[tuple[int, int], tuple[int, int]] | None, str | None]:
|
401
427
|
"""Get aligned genomic coordinates for transcript exon start and end.
|
402
428
|
|
403
429
|
:param tx_ac: Transcript accession
|
@@ -448,11 +474,12 @@ class ExonGenomicCoordsMapper:
|
|
448
474
|
async def _genomic_to_transcript_exon_coordinate(
|
449
475
|
self,
|
450
476
|
pos: int,
|
451
|
-
chromosome:
|
452
|
-
alt_ac:
|
453
|
-
strand:
|
454
|
-
transcript:
|
455
|
-
gene:
|
477
|
+
chromosome: str | None = None,
|
478
|
+
alt_ac: str | None = None,
|
479
|
+
strand: Strand | None = None,
|
480
|
+
transcript: str | None = None,
|
481
|
+
gene: str | None = None,
|
482
|
+
get_nearest_transcript_junction: bool = False,
|
456
483
|
is_start: bool = True,
|
457
484
|
) -> TranscriptExonDataResponse:
|
458
485
|
"""Convert individual genomic data to transcript data
|
@@ -469,6 +496,13 @@ class ExonGenomicCoordsMapper:
|
|
469
496
|
following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
|
470
497
|
Compatible Transcript
|
471
498
|
:param gene: HGNC gene symbol
|
499
|
+
:param get_nearest_transcript_junction: If ``True``, this will return the
|
500
|
+
adjacent exon if the position specified by``start`` or ``end`` does not
|
501
|
+
occur on an exon. For the positive strand, adjacent is defined as the exon
|
502
|
+
preceding the breakpoint for the 5' end and the exon following the
|
503
|
+
breakpoint for the 3' end. For the negative strand, adjacent is defined as
|
504
|
+
the exon following the breakpoint for the 5' end and the exon preceding the
|
505
|
+
breakpoint for the 3' end.
|
472
506
|
:param is_start: ``True`` if ``pos`` is start position. ``False`` if ``pos`` is
|
473
507
|
end position.
|
474
508
|
:return: Transcript data (inter-residue coordinates)
|
@@ -484,6 +518,87 @@ class ExonGenomicCoordsMapper:
|
|
484
518
|
|
485
519
|
params = {key: None for key in TranscriptExonData.model_fields}
|
486
520
|
|
521
|
+
if get_nearest_transcript_junction:
|
522
|
+
if not gene or not strand:
|
523
|
+
return self._return_warnings(
|
524
|
+
resp,
|
525
|
+
"Gene or strand must be provided to select the adjacent transcript junction",
|
526
|
+
)
|
527
|
+
alt_acs, w = self.seqrepo_access.chromosome_to_acs(chromosome)
|
528
|
+
|
529
|
+
if not alt_acs:
|
530
|
+
return self._return_warnings(resp, w)
|
531
|
+
alt_ac = alt_acs[0]
|
532
|
+
|
533
|
+
if not transcript:
|
534
|
+
# Select a transcript if not provided
|
535
|
+
mane_transcripts = self.mane_transcript_mappings.get_gene_mane_data(
|
536
|
+
gene
|
537
|
+
)
|
538
|
+
|
539
|
+
if mane_transcripts:
|
540
|
+
transcript = mane_transcripts[0]["RefSeq_nuc"]
|
541
|
+
else:
|
542
|
+
# Attempt to find a coding transcript if a MANE transcript
|
543
|
+
# cannot be found
|
544
|
+
results = await self.uta_db.get_transcripts(
|
545
|
+
gene=gene, alt_ac=alt_ac
|
546
|
+
)
|
547
|
+
|
548
|
+
if not results.is_empty():
|
549
|
+
transcript = results[0]["tx_ac"][0]
|
550
|
+
else:
|
551
|
+
# Run if gene is for a noncoding transcript
|
552
|
+
query = f"""
|
553
|
+
SELECT DISTINCT tx_ac
|
554
|
+
FROM {self.uta_db.schema}.tx_exon_aln_v
|
555
|
+
WHERE hgnc = '{gene}'
|
556
|
+
AND alt_ac = '{alt_ac}'
|
557
|
+
""" # noqa: S608
|
558
|
+
result = await self.uta_db.execute_query(query)
|
559
|
+
|
560
|
+
if result:
|
561
|
+
transcript = result[0]["tx_ac"]
|
562
|
+
else:
|
563
|
+
return self._return_warnings(
|
564
|
+
resp,
|
565
|
+
f"Could not find a transcript for {gene} on {alt_ac}",
|
566
|
+
)
|
567
|
+
|
568
|
+
tx_genomic_coords, w = await self.uta_db.get_tx_exons_genomic_coords(
|
569
|
+
tx_ac=transcript, alt_ac=alt_ac
|
570
|
+
)
|
571
|
+
if not tx_genomic_coords:
|
572
|
+
return self._return_warnings(resp, w)
|
573
|
+
|
574
|
+
# Check if breakpoint occurs on an exon.
|
575
|
+
# If not, determine the adjacent exon given the selected transcript
|
576
|
+
if not self._is_exonic_breakpoint(pos, tx_genomic_coords):
|
577
|
+
exon = self._get_adjacent_exon(
|
578
|
+
tx_exons_genomic_coords=tx_genomic_coords,
|
579
|
+
strand=strand,
|
580
|
+
start=pos if is_start else None,
|
581
|
+
end=pos if not is_start else None,
|
582
|
+
)
|
583
|
+
|
584
|
+
params["exon"] = exon
|
585
|
+
params["transcript"] = transcript
|
586
|
+
params["gene"] = gene
|
587
|
+
params["pos"] = pos
|
588
|
+
params["chr"] = alt_ac
|
589
|
+
|
590
|
+
self._set_exon_offset(
|
591
|
+
params=params,
|
592
|
+
start=tx_genomic_coords[exon - 1][3], # Start exon coordinate
|
593
|
+
end=tx_genomic_coords[exon - 1][4], # End exon coordinate
|
594
|
+
pos=pos,
|
595
|
+
is_start=is_start,
|
596
|
+
strand=strand,
|
597
|
+
)
|
598
|
+
params["strand"] = strand.value
|
599
|
+
resp.transcript_exon_data = TranscriptExonData(**params)
|
600
|
+
return resp
|
601
|
+
|
487
602
|
if alt_ac:
|
488
603
|
# Check if valid accession is given
|
489
604
|
if not await self.uta_db.validate_genomic_ac(alt_ac):
|
@@ -538,8 +653,8 @@ class ExonGenomicCoordsMapper:
|
|
538
653
|
|
539
654
|
@staticmethod
|
540
655
|
def _get_gene_and_alt_ac(
|
541
|
-
genes_alt_acs:
|
542
|
-
) ->
|
656
|
+
genes_alt_acs: dict, gene: str | None
|
657
|
+
) -> tuple[tuple[str, str] | None, str | None]:
|
543
658
|
"""Return gene genomic accession
|
544
659
|
|
545
660
|
:param genes_alt_acs: Dictionary containing genes and genomic accessions
|
@@ -577,13 +692,13 @@ class ExonGenomicCoordsMapper:
|
|
577
692
|
|
578
693
|
async def _set_mane_genomic_data(
|
579
694
|
self,
|
580
|
-
params:
|
695
|
+
params: dict,
|
581
696
|
gene: str,
|
582
697
|
alt_ac: str,
|
583
698
|
pos: int,
|
584
699
|
strand: Strand,
|
585
700
|
is_start: bool,
|
586
|
-
) ->
|
701
|
+
) -> str | None:
|
587
702
|
"""Set genomic data in `params` found from MANE.
|
588
703
|
|
589
704
|
:param params: Parameters for response
|
@@ -596,9 +711,9 @@ class ExonGenomicCoordsMapper:
|
|
596
711
|
:return: Warnings if found
|
597
712
|
"""
|
598
713
|
start, end = get_inter_residue_pos(pos, pos, residue_mode=ResidueMode.ZERO)
|
599
|
-
mane_data:
|
600
|
-
CdnaRepresentation
|
601
|
-
|
714
|
+
mane_data: (
|
715
|
+
CdnaRepresentation | None
|
716
|
+
) = await self.mane_transcript.get_mane_transcript(
|
602
717
|
alt_ac,
|
603
718
|
start,
|
604
719
|
end,
|
@@ -667,8 +782,8 @@ class ExonGenomicCoordsMapper:
|
|
667
782
|
return None
|
668
783
|
|
669
784
|
async def _set_genomic_data(
|
670
|
-
self, params:
|
671
|
-
) ->
|
785
|
+
self, params: dict, strand: Strand, is_start: bool
|
786
|
+
) -> str | None:
|
672
787
|
"""Set genomic data in ``params``
|
673
788
|
|
674
789
|
:param params: Parameters for response
|
@@ -751,7 +866,7 @@ class ExonGenomicCoordsMapper:
|
|
751
866
|
|
752
867
|
@staticmethod
|
753
868
|
def _set_exon_offset(
|
754
|
-
params:
|
869
|
+
params: dict, start: int, end: int, pos: int, is_start: bool, strand: Strand
|
755
870
|
) -> None:
|
756
871
|
"""Set value for ``exon_offset`` in ``params``.
|
757
872
|
|
@@ -775,26 +890,23 @@ class ExonGenomicCoordsMapper:
|
|
775
890
|
params["exon_offset"] = pos - start
|
776
891
|
|
777
892
|
async def _structure_exons(
|
778
|
-
self, transcript: str, alt_ac:
|
779
|
-
) ->
|
893
|
+
self, transcript: str, alt_ac: str | None = None
|
894
|
+
) -> list[tuple[int, int]]:
|
780
895
|
"""Structure exons as list of tuples.
|
781
896
|
|
782
897
|
:param transcript: Transcript accession
|
783
898
|
:param alt_ac: Genomic accession
|
784
899
|
:return: List of tuples containing transcript exon coordinates
|
785
900
|
"""
|
786
|
-
result = []
|
787
901
|
tx_exons, _ = await self.uta_db.get_tx_exons(transcript, alt_ac=alt_ac)
|
788
902
|
|
789
903
|
if not tx_exons:
|
790
|
-
return
|
904
|
+
return []
|
791
905
|
|
792
|
-
for coords in tx_exons
|
793
|
-
result.append((coords[0], coords[1]))
|
794
|
-
return result
|
906
|
+
return [(coords[0], coords[1]) for coords in tx_exons]
|
795
907
|
|
796
908
|
@staticmethod
|
797
|
-
def _get_exon_number(tx_exons:
|
909
|
+
def _get_exon_number(tx_exons: list, tx_pos: int) -> int:
|
798
910
|
"""Find related exon number for a position
|
799
911
|
|
800
912
|
:param tx_exons: List of exon coordinates for a transcript
|
@@ -807,3 +919,55 @@ class ExonGenomicCoordsMapper:
|
|
807
919
|
break
|
808
920
|
i += 1
|
809
921
|
return i
|
922
|
+
|
923
|
+
@staticmethod
|
924
|
+
def _get_adjacent_exon(
|
925
|
+
tx_exons_genomic_coords: list[tuple[int, int, int, int, int]],
|
926
|
+
strand: Strand,
|
927
|
+
start: int | None = None,
|
928
|
+
end: int | None = None,
|
929
|
+
) -> int:
|
930
|
+
"""Return the adjacent exon given a non-exonic breakpoint. For the positive
|
931
|
+
strand, adjacent is defined as the exon preceding the breakpoint for the 5' end
|
932
|
+
and the exon following the breakpoint for the 3' end. For the negative strand,
|
933
|
+
adjacent is defined as the exon following the breakpoint for the 5' end and the
|
934
|
+
exon preceding the breakpoint for the 3' end.
|
935
|
+
|
936
|
+
:param: tx_exons_genomic_coords: List of tuples describing exons and genomic
|
937
|
+
coordinates for a transcript. Each tuple contains the transcript number
|
938
|
+
(0-indexed), the transcript coordinates for the exon, and the genomic
|
939
|
+
coordinates for the exon. Pos 0 in the tuple corresponds to the exon
|
940
|
+
number, pos 1 and pos 2 refer to the start and end transcript coordinates,
|
941
|
+
respectively, and pos 3 and 4 refer to the start and end genomic
|
942
|
+
coordinates, respectively.
|
943
|
+
:param strand: Strand
|
944
|
+
:param: start: Genomic coordinate of breakpoint
|
945
|
+
:param: end: Genomic coordinate of breakpoint
|
946
|
+
:return: Exon number corresponding to adjacent exon. Will be 1-based
|
947
|
+
"""
|
948
|
+
for i in range(len(tx_exons_genomic_coords) - 1):
|
949
|
+
exon = tx_exons_genomic_coords[i]
|
950
|
+
next_exon = tx_exons_genomic_coords[i + 1]
|
951
|
+
bp = start if start else end
|
952
|
+
if strand == strand.POSITIVE:
|
953
|
+
lte_exon = exon
|
954
|
+
gte_exon = next_exon
|
955
|
+
else:
|
956
|
+
lte_exon = next_exon
|
957
|
+
gte_exon = exon
|
958
|
+
if bp >= lte_exon[4] and bp <= gte_exon[3]:
|
959
|
+
break
|
960
|
+
# Return current exon if end position is provided, next exon if start position
|
961
|
+
# is provided. exon[0] needs to be incremented by 1 in both cases as exons are
|
962
|
+
# 0-based in UTA
|
963
|
+
return exon[0] + 1 if end else exon[0] + 2
|
964
|
+
|
965
|
+
@staticmethod
|
966
|
+
def _is_exonic_breakpoint(pos: int, tx_genomic_coords: list) -> bool:
|
967
|
+
"""Check if a breakpoint occurs on an exon
|
968
|
+
|
969
|
+
:param pos: Genomic breakpoint
|
970
|
+
:param tx_genomic_coords: A list of genomic coordinates for a transcript
|
971
|
+
:return: True if the breakpoint occurs on an exon
|
972
|
+
"""
|
973
|
+
return any(pos >= exon[3] and pos <= exon[4] for exon in tx_genomic_coords)
|