cool-seq-tool 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +6 -0
- cool_seq_tool/app.py +1 -2
- cool_seq_tool/handlers/seqrepo_access.py +5 -5
- cool_seq_tool/mappers/alignment.py +16 -16
- cool_seq_tool/mappers/exon_genomic_coords.py +845 -628
- cool_seq_tool/mappers/mane_transcript.py +109 -104
- cool_seq_tool/schemas.py +30 -165
- cool_seq_tool/sources/uta_database.py +149 -229
- cool_seq_tool/utils.py +9 -9
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.0.dist-info}/METADATA +8 -8
- cool_seq_tool-0.7.0.dist-info/RECORD +24 -0
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.0.dist-info}/WHEEL +1 -1
- cool_seq_tool-0.6.0.dist-info/RECORD +0 -24
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.0.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.0.dist-info}/top_level.txt +0 -0
@@ -25,8 +25,9 @@ from cool_seq_tool.mappers.liftover import LiftOver
|
|
25
25
|
from cool_seq_tool.schemas import (
|
26
26
|
AnnotationLayer,
|
27
27
|
Assembly,
|
28
|
+
CoordinateType,
|
29
|
+
GenomicTxMetadata,
|
28
30
|
ManeGeneData,
|
29
|
-
ResidueMode,
|
30
31
|
Strand,
|
31
32
|
TranscriptPriority,
|
32
33
|
)
|
@@ -101,7 +102,7 @@ class ManeTranscript:
|
|
101
102
|
A handful of resources are required for initialization, so when defaults are
|
102
103
|
enough, it's easiest to let the core CoolSeqTool class handle it for you:
|
103
104
|
|
104
|
-
>>> from cool_seq_tool
|
105
|
+
>>> from cool_seq_tool import CoolSeqTool
|
105
106
|
>>> mane_mapper = CoolSeqTool().mane_transcript
|
106
107
|
|
107
108
|
Note that most methods are defined as Python coroutines, so they must be called
|
@@ -182,13 +183,12 @@ class ManeTranscript:
|
|
182
183
|
pos = self._p_to_c_pos(start_pos, end_pos)
|
183
184
|
return ac, pos
|
184
185
|
|
185
|
-
async def _c_to_g(self, ac: str, pos: tuple[int, int]) ->
|
186
|
+
async def _c_to_g(self, ac: str, pos: tuple[int, int]) -> GenomicTxMetadata | None:
|
186
187
|
"""Get g. annotation from c. annotation.
|
187
188
|
|
188
189
|
:param ac: cDNA accession
|
189
190
|
:param pos: [cDNA pos start, cDNA pos end]
|
190
|
-
:return:
|
191
|
-
Altered transcript accession and position change, Strand
|
191
|
+
:return: Metadata for genomic and transcript accessions
|
192
192
|
"""
|
193
193
|
# UTA does not store ENST versions
|
194
194
|
# So we want to make sure version is valid
|
@@ -220,13 +220,13 @@ class ManeTranscript:
|
|
220
220
|
ac, pos, AnnotationLayer.CDNA, coding_start_site=coding_start_site
|
221
221
|
)
|
222
222
|
|
223
|
-
async def _liftover_to_38(self, genomic_tx_data:
|
223
|
+
async def _liftover_to_38(self, genomic_tx_data: GenomicTxMetadata) -> None:
|
224
224
|
"""Liftover genomic_tx_data to hg38 assembly.
|
225
225
|
|
226
|
-
:param genomic_tx_data:
|
227
|
-
|
226
|
+
:param genomic_tx_data: Metadata for genomic and transcript accessions. This
|
227
|
+
will be mutated in-place if not GRCh38 assembly.
|
228
228
|
"""
|
229
|
-
descr = await self.uta_db.get_chr_assembly(genomic_tx_data
|
229
|
+
descr = await self.uta_db.get_chr_assembly(genomic_tx_data.alt_ac)
|
230
230
|
if descr is None:
|
231
231
|
# already grch38
|
232
232
|
return
|
@@ -235,14 +235,14 @@ class ManeTranscript:
|
|
235
235
|
query = f"""
|
236
236
|
SELECT DISTINCT alt_ac
|
237
237
|
FROM {self.uta_db.schema}.tx_exon_aln_v
|
238
|
-
WHERE tx_ac = '{genomic_tx_data
|
238
|
+
WHERE tx_ac = '{genomic_tx_data.tx_ac}';
|
239
239
|
""" # noqa: S608
|
240
240
|
nc_acs = await self.uta_db.execute_query(query)
|
241
241
|
nc_acs = [nc_ac[0] for nc_ac in nc_acs]
|
242
|
-
if nc_acs == [genomic_tx_data
|
242
|
+
if nc_acs == [genomic_tx_data.alt_ac]:
|
243
243
|
_logger.warning(
|
244
244
|
"UTA does not have GRCh38 assembly for %s",
|
245
|
-
genomic_tx_data
|
245
|
+
genomic_tx_data.alt_ac.split(".")[0],
|
246
246
|
)
|
247
247
|
return
|
248
248
|
|
@@ -258,7 +258,7 @@ class ManeTranscript:
|
|
258
258
|
)
|
259
259
|
|
260
260
|
# Change alt_ac to most recent
|
261
|
-
if genomic_tx_data
|
261
|
+
if genomic_tx_data.alt_ac.startswith("EN"):
|
262
262
|
order_by_cond = "ORDER BY alt_ac DESC;"
|
263
263
|
else:
|
264
264
|
order_by_cond = """
|
@@ -268,50 +268,49 @@ class ManeTranscript:
|
|
268
268
|
query = f"""
|
269
269
|
SELECT alt_ac
|
270
270
|
FROM {self.uta_db.schema}.genomic
|
271
|
-
WHERE alt_ac LIKE '{genomic_tx_data
|
271
|
+
WHERE alt_ac LIKE '{genomic_tx_data.alt_ac.split('.')[0]}%'
|
272
272
|
{order_by_cond}
|
273
273
|
""" # noqa: S608
|
274
274
|
nc_acs = await self.uta_db.execute_query(query)
|
275
|
-
genomic_tx_data
|
275
|
+
genomic_tx_data.alt_ac = nc_acs[0][0]
|
276
276
|
|
277
277
|
def _set_liftover(
|
278
278
|
self,
|
279
|
-
genomic_tx_data:
|
279
|
+
genomic_tx_data: GenomicTxMetadata,
|
280
280
|
key: str,
|
281
281
|
chromosome: str,
|
282
282
|
liftover_to_assembly: Assembly,
|
283
283
|
) -> None:
|
284
284
|
"""Update genomic_tx_data to have coordinates for given assembly.
|
285
285
|
|
286
|
-
:param genomic_tx_data:
|
287
|
-
strand
|
286
|
+
:param genomic_tx_data: Metadata for genomic and transcript accessions
|
288
287
|
:param key: Key to access coordinate positions
|
289
288
|
:param chromosome: Chromosome, must be prefixed with ``chr``
|
290
289
|
:param liftover_to_assembly: Assembly to liftover to
|
291
290
|
"""
|
291
|
+
coords = getattr(genomic_tx_data, key)
|
292
292
|
liftover_start_i = self.liftover.get_liftover(
|
293
|
-
chromosome,
|
293
|
+
chromosome, coords[0], liftover_to_assembly
|
294
294
|
)
|
295
295
|
if liftover_start_i is None:
|
296
296
|
_logger.warning(
|
297
297
|
"Unable to liftover position %s on %s",
|
298
|
-
|
298
|
+
coords[0],
|
299
299
|
chromosome,
|
300
300
|
)
|
301
301
|
return
|
302
302
|
|
303
303
|
liftover_end_i = self.liftover.get_liftover(
|
304
|
-
chromosome,
|
304
|
+
chromosome, coords[1], liftover_to_assembly
|
305
305
|
)
|
306
306
|
if liftover_end_i is None:
|
307
307
|
_logger.warning(
|
308
308
|
"Unable to liftover position %s on %s",
|
309
|
-
|
309
|
+
coords[1],
|
310
310
|
chromosome,
|
311
311
|
)
|
312
312
|
return
|
313
|
-
|
314
|
-
genomic_tx_data[key] = liftover_start_i[1], liftover_end_i[1]
|
313
|
+
setattr(genomic_tx_data, key, (liftover_start_i[1], liftover_end_i[1]))
|
315
314
|
|
316
315
|
async def _get_and_validate_genomic_tx_data(
|
317
316
|
self,
|
@@ -321,7 +320,7 @@ class ManeTranscript:
|
|
321
320
|
| Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.CDNA,
|
322
321
|
coding_start_site: int | None = None,
|
323
322
|
alt_ac: str | None = None,
|
324
|
-
) ->
|
323
|
+
) -> GenomicTxMetadata | None:
|
325
324
|
"""Get and validate genomic_tx_data
|
326
325
|
|
327
326
|
:param tx_ac: Accession on c. coordinate
|
@@ -329,7 +328,8 @@ class ManeTranscript:
|
|
329
328
|
:param annotation_layer: Annotation layer for ``ac`` and ``pos``
|
330
329
|
:param coding_start_site: Coding start site
|
331
330
|
:param alt_ac: Accession on g. coordinate
|
332
|
-
:return:
|
331
|
+
:return: Metadata for genomic and transcript accessions if found and validated,
|
332
|
+
else None
|
333
333
|
"""
|
334
334
|
genomic_tx_data = await self.uta_db.get_genomic_tx_data(
|
335
335
|
tx_ac, pos, annotation_layer, alt_ac=alt_ac
|
@@ -342,14 +342,14 @@ class ManeTranscript:
|
|
342
342
|
annotation_layer,
|
343
343
|
)
|
344
344
|
return None
|
345
|
-
genomic_tx_data
|
345
|
+
genomic_tx_data.coding_start_site = coding_start_site
|
346
346
|
|
347
347
|
if not alt_ac:
|
348
348
|
# Only want to liftover if alt_ac not provided. If alt_ac is provided,
|
349
349
|
# it means user wants to stick with the queried assembly
|
350
|
-
og_alt_exon_id = genomic_tx_data
|
350
|
+
og_alt_exon_id = genomic_tx_data.alt_exon_id
|
351
351
|
await self._liftover_to_38(genomic_tx_data)
|
352
|
-
liftover_alt_exon_id = genomic_tx_data
|
352
|
+
liftover_alt_exon_id = genomic_tx_data.alt_exon_id
|
353
353
|
|
354
354
|
# Validation check: Exon structure
|
355
355
|
if og_alt_exon_id != liftover_alt_exon_id:
|
@@ -467,14 +467,14 @@ class ManeTranscript:
|
|
467
467
|
:return: Transcript data
|
468
468
|
"""
|
469
469
|
if found_result:
|
470
|
-
tx_g_pos = g
|
471
|
-
tx_pos_range = g
|
470
|
+
tx_g_pos = g.alt_pos_range
|
471
|
+
tx_pos_range = g.tx_pos_range
|
472
472
|
else:
|
473
473
|
result = await self.uta_db.get_tx_exon_aln_v_data(
|
474
474
|
refseq_c_ac,
|
475
|
-
g
|
476
|
-
g
|
477
|
-
alt_ac=alt_ac if alt_ac else g
|
475
|
+
g.alt_pos_change_range[0],
|
476
|
+
g.alt_pos_change_range[1],
|
477
|
+
alt_ac=alt_ac if alt_ac else g.alt_ac,
|
478
478
|
use_tx_pos=False,
|
479
479
|
)
|
480
480
|
|
@@ -484,18 +484,18 @@ class ManeTranscript:
|
|
484
484
|
)
|
485
485
|
return None
|
486
486
|
result = result[-1]
|
487
|
-
tx_g_pos = result
|
488
|
-
tx_pos_range = result
|
487
|
+
tx_g_pos = result.alt_start_i, result.alt_end_i
|
488
|
+
tx_pos_range = result.tx_start_i, result.tx_end_i
|
489
489
|
|
490
490
|
cds_start_end = await self.uta_db.get_cds_start_end(refseq_c_ac)
|
491
491
|
if not cds_start_end:
|
492
492
|
return None
|
493
493
|
coding_start_site = cds_start_end[0]
|
494
494
|
|
495
|
-
g_pos = g
|
495
|
+
g_pos = g.alt_pos_change_range # start/end genomic change
|
496
496
|
g_pos_change = g_pos[0] - tx_g_pos[0], tx_g_pos[1] - g_pos[1]
|
497
497
|
|
498
|
-
if g
|
498
|
+
if g.strand == Strand.NEGATIVE:
|
499
499
|
g_pos_change = (tx_g_pos[1] - g_pos[0], g_pos[1] - tx_g_pos[0])
|
500
500
|
|
501
501
|
c_pos_change = (
|
@@ -507,10 +507,10 @@ class ManeTranscript:
|
|
507
507
|
c_pos_change = c_pos_change[1], c_pos_change[0]
|
508
508
|
|
509
509
|
return self._get_c_data(
|
510
|
-
gene=g
|
510
|
+
gene=g.gene,
|
511
511
|
cds_start_end=cds_start_end,
|
512
512
|
c_pos_change=c_pos_change,
|
513
|
-
strand=g
|
513
|
+
strand=g.strand,
|
514
514
|
alt_ac=alt_ac,
|
515
515
|
status=status,
|
516
516
|
refseq_c_ac=refseq_c_ac,
|
@@ -562,7 +562,7 @@ class ManeTranscript:
|
|
562
562
|
| GenomicRepresentation,
|
563
563
|
expected_ref: str,
|
564
564
|
anno: AnnotationLayer,
|
565
|
-
|
565
|
+
coordinate_type: CoordinateType,
|
566
566
|
) -> bool:
|
567
567
|
"""Return whether or not reference changes are the same.
|
568
568
|
|
@@ -574,7 +574,7 @@ class ManeTranscript:
|
|
574
574
|
position change
|
575
575
|
:param expected_ref: Reference at position given during input
|
576
576
|
:param anno: Annotation layer we are starting from
|
577
|
-
:param
|
577
|
+
:param coordinate_type: Coordinate type for ``start_pos`` and ``end_pos``
|
578
578
|
:return: ``True`` if reference check passes. ``False`` otherwise.
|
579
579
|
"""
|
580
580
|
if anno == AnnotationLayer.CDNA:
|
@@ -582,7 +582,7 @@ class ManeTranscript:
|
|
582
582
|
end_pos += coding_start_site
|
583
583
|
|
584
584
|
ref, _ = self.seqrepo_access.get_reference_sequence(
|
585
|
-
ac, start=start_pos, end=end_pos,
|
585
|
+
ac, start=start_pos, end=end_pos, coordinate_type=coordinate_type
|
586
586
|
)
|
587
587
|
if ref is None:
|
588
588
|
return False
|
@@ -598,7 +598,7 @@ class ManeTranscript:
|
|
598
598
|
mane_transcript.refseq,
|
599
599
|
start=mane_start_pos,
|
600
600
|
end=mane_end_pos if mane_start_pos != mane_end_pos else None,
|
601
|
-
|
601
|
+
coordinate_type=coordinate_type,
|
602
602
|
)
|
603
603
|
if not mane_ref:
|
604
604
|
_logger.info("Unable to validate reference for MANE Transcript")
|
@@ -633,7 +633,10 @@ class ManeTranscript:
|
|
633
633
|
end_pos = pos[1] + coding_start_site
|
634
634
|
return bool(
|
635
635
|
self.seqrepo_access.get_reference_sequence(
|
636
|
-
ac,
|
636
|
+
ac,
|
637
|
+
start=start_pos,
|
638
|
+
end=end_pos,
|
639
|
+
coordinate_type=CoordinateType.INTER_RESIDUE,
|
637
640
|
)[0]
|
638
641
|
)
|
639
642
|
|
@@ -690,7 +693,7 @@ class ManeTranscript:
|
|
690
693
|
start_annotation_layer: AnnotationLayer,
|
691
694
|
gene: str | None = None,
|
692
695
|
ref: str | None = None,
|
693
|
-
|
696
|
+
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
|
694
697
|
mane_transcripts: set | None = None,
|
695
698
|
alt_ac: str | None = None,
|
696
699
|
end_annotation_layer: EndAnnotationLayer | None = None,
|
@@ -700,8 +703,8 @@ class ManeTranscript:
|
|
700
703
|
information.
|
701
704
|
|
702
705
|
>>> import asyncio
|
703
|
-
>>> from cool_seq_tool
|
704
|
-
>>> from cool_seq_tool.schemas import AnnotationLayer,
|
706
|
+
>>> from cool_seq_tool import CoolSeqTool
|
707
|
+
>>> from cool_seq_tool.schemas import AnnotationLayer, CoordinateType
|
705
708
|
>>> mane_mapper = CoolSeqTool().mane_transcript
|
706
709
|
>>> mane_transcripts = {
|
707
710
|
... "ENST00000646891.2",
|
@@ -715,7 +718,7 @@ class ManeTranscript:
|
|
715
718
|
... 599,
|
716
719
|
... gene="BRAF",
|
717
720
|
... start_annotation_layer=AnnotationLayer.PROTEIN,
|
718
|
-
...
|
721
|
+
... coordinate_type=CoordinateType.INTER_RESIDUE,
|
719
722
|
... mane_transcripts=mane_transcripts,
|
720
723
|
... )
|
721
724
|
... )
|
@@ -732,7 +735,7 @@ class ManeTranscript:
|
|
732
735
|
:param start_annotation_layer: Starting annotation layer
|
733
736
|
:param gene: HGNC gene symbol
|
734
737
|
:param ref: Reference at position given during input
|
735
|
-
:param
|
738
|
+
:param coordinate_type: Coordinate type for ``start_pos`` and ``end_pos``
|
736
739
|
:param mane_transcripts: Attempted mane transcripts that were not compatible
|
737
740
|
:param alt_ac: Genomic accession
|
738
741
|
:param end_annotation_layer: The end annotation layer. If not provided, will be
|
@@ -768,8 +771,8 @@ class ManeTranscript:
|
|
768
771
|
)
|
769
772
|
|
770
773
|
lcr_result = None
|
771
|
-
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos,
|
772
|
-
|
774
|
+
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
|
775
|
+
coordinate_type = CoordinateType.INTER_RESIDUE
|
773
776
|
|
774
777
|
is_p_or_c_start_anno = True
|
775
778
|
if start_annotation_layer == AnnotationLayer.PROTEIN:
|
@@ -857,7 +860,7 @@ class ManeTranscript:
|
|
857
860
|
{},
|
858
861
|
ref,
|
859
862
|
AnnotationLayer.PROTEIN,
|
860
|
-
|
863
|
+
coordinate_type,
|
861
864
|
)
|
862
865
|
elif start_annotation_layer == AnnotationLayer.CDNA:
|
863
866
|
valid_references = self._validate_references(
|
@@ -868,7 +871,7 @@ class ManeTranscript:
|
|
868
871
|
{},
|
869
872
|
ref,
|
870
873
|
AnnotationLayer.CDNA,
|
871
|
-
|
874
|
+
coordinate_type,
|
872
875
|
)
|
873
876
|
else:
|
874
877
|
valid_references = self._validate_references(
|
@@ -879,7 +882,7 @@ class ManeTranscript:
|
|
879
882
|
{},
|
880
883
|
ref,
|
881
884
|
AnnotationLayer.GENOMIC,
|
882
|
-
|
885
|
+
coordinate_type,
|
883
886
|
)
|
884
887
|
|
885
888
|
if not valid_references:
|
@@ -903,7 +906,7 @@ class ManeTranscript:
|
|
903
906
|
gene,
|
904
907
|
row["pro_ac"],
|
905
908
|
lcr_c_data.pos,
|
906
|
-
g
|
909
|
+
g.strand,
|
907
910
|
lcr_c_data.status,
|
908
911
|
)
|
909
912
|
coding_start_site = 0
|
@@ -925,7 +928,7 @@ class ManeTranscript:
|
|
925
928
|
gene,
|
926
929
|
row["pro_ac"],
|
927
930
|
lcr_c_data.pos,
|
928
|
-
g
|
931
|
+
g.strand,
|
929
932
|
lcr_c_data.status,
|
930
933
|
),
|
931
934
|
cdna=lcr_c_data,
|
@@ -960,8 +963,8 @@ class ManeTranscript:
|
|
960
963
|
gene: str | None = None,
|
961
964
|
ref: str | None = None,
|
962
965
|
try_longest_compatible: bool = False,
|
963
|
-
|
964
|
-
| Literal[
|
966
|
+
coordinate_type: Literal[CoordinateType.RESIDUE]
|
967
|
+
| Literal[CoordinateType.INTER_RESIDUE] = CoordinateType.RESIDUE,
|
965
968
|
) -> DataRepresentation | CdnaRepresentation | None:
|
966
969
|
"""Return MANE representation
|
967
970
|
|
@@ -974,8 +977,8 @@ class ManeTranscript:
|
|
974
977
|
``AnnotationLayer.GENOMIC`` GRCh38 representation if ``gene`` is NOT
|
975
978
|
provided.
|
976
979
|
|
977
|
-
>>> from cool_seq_tool
|
978
|
-
>>> from cool_seq_tool.schemas import AnnotationLayer,
|
980
|
+
>>> from cool_seq_tool import CoolSeqTool
|
981
|
+
>>> from cool_seq_tool.schemas import AnnotationLayer, CoordinateType
|
979
982
|
>>> import asyncio
|
980
983
|
>>> mane_mapper = CoolSeqTool().mane_transcript
|
981
984
|
>>> result = asyncio.run(
|
@@ -983,7 +986,7 @@ class ManeTranscript:
|
|
983
986
|
... "NP_004324.2",
|
984
987
|
... 599,
|
985
988
|
... AnnotationLayer.PROTEIN,
|
986
|
-
...
|
989
|
+
... coordinate_type=CoordinateType.INTER_RESIDUE,
|
987
990
|
... )
|
988
991
|
... )
|
989
992
|
>>> result.gene, result.refseq, result.status
|
@@ -1001,13 +1004,13 @@ class ManeTranscript:
|
|
1001
1004
|
:param ref: Reference at position given during input
|
1002
1005
|
:param try_longest_compatible: ``True`` if should try longest compatible remaining
|
1003
1006
|
if mane transcript was not compatible. ``False`` otherwise.
|
1004
|
-
:param
|
1005
|
-
``end_pos``. Will always return
|
1007
|
+
:param CoordinateType coordinate_type: Starting Coordinate type for
|
1008
|
+
``start_pos`` and ``end_pos``. Will always return inter-residue coordinates
|
1006
1009
|
:return: MANE data or longest transcript compatible data if validation
|
1007
1010
|
checks are correct. Will return inter-residue coordinates. Else, ``None``.
|
1008
1011
|
"""
|
1009
|
-
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos,
|
1010
|
-
|
1012
|
+
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
|
1013
|
+
coordinate_type = CoordinateType.INTER_RESIDUE
|
1011
1014
|
if ref:
|
1012
1015
|
ref = ref[: end_pos - start_pos]
|
1013
1016
|
|
@@ -1026,7 +1029,7 @@ class ManeTranscript:
|
|
1026
1029
|
if g is None:
|
1027
1030
|
return None
|
1028
1031
|
# Get mane data for gene
|
1029
|
-
mane_data = self.mane_transcript_mappings.get_gene_mane_data(g
|
1032
|
+
mane_data = self.mane_transcript_mappings.get_gene_mane_data(g.gene)
|
1030
1033
|
if not mane_data:
|
1031
1034
|
return None
|
1032
1035
|
|
@@ -1053,10 +1056,8 @@ class ManeTranscript:
|
|
1053
1056
|
if not mane:
|
1054
1057
|
continue
|
1055
1058
|
|
1056
|
-
if not mane.alt_ac:
|
1057
|
-
|
1058
|
-
if g_alt_ac:
|
1059
|
-
mane.alt_ac = g_alt_ac
|
1059
|
+
if not mane.alt_ac and g.alt_ac:
|
1060
|
+
mane.alt_ac = g.alt_ac
|
1060
1061
|
|
1061
1062
|
valid_reading_frame = self._validate_reading_frames(
|
1062
1063
|
c_ac, c_pos[0], c_pos[1], mane
|
@@ -1072,13 +1073,13 @@ class ManeTranscript:
|
|
1072
1073
|
if ref:
|
1073
1074
|
valid_references = self._validate_references(
|
1074
1075
|
ac,
|
1075
|
-
g
|
1076
|
+
g.coding_start_site,
|
1076
1077
|
start_pos,
|
1077
1078
|
end_pos,
|
1078
1079
|
mane,
|
1079
1080
|
ref,
|
1080
1081
|
start_annotation_layer,
|
1081
|
-
|
1082
|
+
coordinate_type,
|
1082
1083
|
)
|
1083
1084
|
if not valid_references:
|
1084
1085
|
continue
|
@@ -1092,8 +1093,8 @@ class ManeTranscript:
|
|
1092
1093
|
end_pos,
|
1093
1094
|
AnnotationLayer.PROTEIN,
|
1094
1095
|
ref=ref,
|
1095
|
-
gene=g
|
1096
|
-
|
1096
|
+
gene=g.gene,
|
1097
|
+
coordinate_type=coordinate_type,
|
1097
1098
|
mane_transcripts=mane_transcripts,
|
1098
1099
|
)
|
1099
1100
|
return await self.get_longest_compatible_transcript(
|
@@ -1102,7 +1103,7 @@ class ManeTranscript:
|
|
1102
1103
|
AnnotationLayer.CDNA,
|
1103
1104
|
ref=ref,
|
1104
1105
|
gene=g["gene"],
|
1105
|
-
|
1106
|
+
coordinate_type=coordinate_type,
|
1106
1107
|
mane_transcripts=mane_transcripts,
|
1107
1108
|
)
|
1108
1109
|
return None
|
@@ -1113,11 +1114,11 @@ class ManeTranscript:
|
|
1113
1114
|
start_pos,
|
1114
1115
|
end_pos,
|
1115
1116
|
get_mane_genes=True,
|
1116
|
-
|
1117
|
+
coordinate_type=coordinate_type,
|
1117
1118
|
)
|
1118
1119
|
|
1119
1120
|
return await self.g_to_mane_c(
|
1120
|
-
ac, start_pos, end_pos, gene,
|
1121
|
+
ac, start_pos, end_pos, gene, coordinate_type=coordinate_type
|
1121
1122
|
)
|
1122
1123
|
_logger.warning("Annotation layer not supported: %s", start_annotation_layer)
|
1123
1124
|
return None
|
@@ -1128,7 +1129,7 @@ class ManeTranscript:
|
|
1128
1129
|
start_pos: int,
|
1129
1130
|
end_pos: int,
|
1130
1131
|
get_mane_genes: bool = False,
|
1131
|
-
|
1132
|
+
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
|
1132
1133
|
) -> GenomicRepresentation | None:
|
1133
1134
|
"""Return genomic coordinate on GRCh38 when not given gene context.
|
1134
1135
|
|
@@ -1137,11 +1138,11 @@ class ManeTranscript:
|
|
1137
1138
|
:param end_pos: Genomic end position
|
1138
1139
|
:param get_mane_genes: ``True`` if mane genes for genomic position should be
|
1139
1140
|
included in response. ``False``, otherwise.
|
1140
|
-
:param
|
1141
|
+
:param coordinate_type: Coordinate type for ``start_pos`` and ``end_pos``
|
1141
1142
|
:return: GRCh38 genomic representation (accession and start/end inter-residue
|
1142
1143
|
position)
|
1143
1144
|
"""
|
1144
|
-
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos,
|
1145
|
+
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
|
1145
1146
|
|
1146
1147
|
# Checking to see what chromosome and assembly we're on
|
1147
1148
|
descr = await self.uta_db.get_chr_assembly(ac)
|
@@ -1200,7 +1201,7 @@ class ManeTranscript:
|
|
1200
1201
|
|
1201
1202
|
@staticmethod
|
1202
1203
|
def get_mane_c_pos_change(
|
1203
|
-
mane_tx_genomic_data:
|
1204
|
+
mane_tx_genomic_data: GenomicTxMetadata, coding_start_site: int
|
1204
1205
|
) -> tuple[int, int]:
|
1205
1206
|
"""Get mane c position change
|
1206
1207
|
|
@@ -1208,12 +1209,12 @@ class ManeTranscript:
|
|
1208
1209
|
:param coding_start_site: Coding start site
|
1209
1210
|
:return: cDNA pos start, cDNA pos end
|
1210
1211
|
"""
|
1211
|
-
tx_pos_range = mane_tx_genomic_data
|
1212
|
-
|
1212
|
+
tx_pos_range = mane_tx_genomic_data.tx_pos_range
|
1213
|
+
pos_change = mane_tx_genomic_data.pos_change
|
1213
1214
|
|
1214
1215
|
mane_c_pos_change = (
|
1215
|
-
tx_pos_range[0] +
|
1216
|
-
tx_pos_range[1] -
|
1216
|
+
tx_pos_range[0] + pos_change[0] - coding_start_site,
|
1217
|
+
tx_pos_range[1] - pos_change[1] - coding_start_site,
|
1217
1218
|
)
|
1218
1219
|
|
1219
1220
|
if mane_c_pos_change[0] > mane_c_pos_change[1]:
|
@@ -1226,12 +1227,12 @@ class ManeTranscript:
|
|
1226
1227
|
start_pos: int,
|
1227
1228
|
end_pos: int,
|
1228
1229
|
gene: str,
|
1229
|
-
|
1230
|
+
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
|
1230
1231
|
) -> CdnaRepresentation | None:
|
1231
1232
|
"""Return MANE Transcript on the c. coordinate.
|
1232
1233
|
|
1233
1234
|
>>> import asyncio
|
1234
|
-
>>> from cool_seq_tool
|
1235
|
+
>>> from cool_seq_tool import CoolSeqTool
|
1235
1236
|
>>> cst = CoolSeqTool()
|
1236
1237
|
>>> result = asyncio.run(
|
1237
1238
|
... cst.mane_transcript.g_to_mane_c(
|
@@ -1248,12 +1249,12 @@ class ManeTranscript:
|
|
1248
1249
|
:param start_pos: genomic start position
|
1249
1250
|
:param end_pos: genomic end position
|
1250
1251
|
:param gene: HGNC gene symbol
|
1251
|
-
:param
|
1252
|
-
Will always return
|
1252
|
+
:param coordinate_type: Starting Coordinate type for ``start_pos`` and
|
1253
|
+
``end_pos``. Will always return inter-residue coordinates.
|
1253
1254
|
:return: MANE Transcripts with cDNA change on c. coordinate
|
1254
1255
|
"""
|
1255
|
-
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos,
|
1256
|
-
|
1256
|
+
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
|
1257
|
+
coordinate_type = CoordinateType.INTER_RESIDUE
|
1257
1258
|
|
1258
1259
|
if not await self.uta_db.validate_genomic_ac(ac):
|
1259
1260
|
_logger.warning("Genomic accession does not exist: %s", ac)
|
@@ -1268,7 +1269,11 @@ class ManeTranscript:
|
|
1268
1269
|
|
1269
1270
|
# Liftover to GRCh38
|
1270
1271
|
grch38 = await self.g_to_grch38(
|
1271
|
-
ac,
|
1272
|
+
ac,
|
1273
|
+
start_pos,
|
1274
|
+
end_pos,
|
1275
|
+
get_mane_genes=False,
|
1276
|
+
coordinate_type=coordinate_type,
|
1272
1277
|
)
|
1273
1278
|
mane_tx_genomic_data = None
|
1274
1279
|
if grch38:
|
@@ -1286,8 +1291,8 @@ class ManeTranscript:
|
|
1286
1291
|
continue
|
1287
1292
|
_logger.info("Not using most recent assembly")
|
1288
1293
|
|
1289
|
-
coding_start_site = mane_tx_genomic_data
|
1290
|
-
coding_end_site = mane_tx_genomic_data
|
1294
|
+
coding_start_site = mane_tx_genomic_data.coding_start_site
|
1295
|
+
coding_end_site = mane_tx_genomic_data.coding_end_site
|
1291
1296
|
mane_c_pos_change = self.get_mane_c_pos_change(
|
1292
1297
|
mane_tx_genomic_data, coding_start_site
|
1293
1298
|
)
|
@@ -1323,7 +1328,7 @@ class ManeTranscript:
|
|
1323
1328
|
start_pos: int,
|
1324
1329
|
end_pos: int,
|
1325
1330
|
gene: str | None = None,
|
1326
|
-
|
1331
|
+
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
|
1327
1332
|
try_longest_compatible: bool = False,
|
1328
1333
|
) -> dict | None:
|
1329
1334
|
"""Given GRCh38 genomic representation, return protein representation.
|
@@ -1336,8 +1341,8 @@ class ManeTranscript:
|
|
1336
1341
|
:param start_pos: Start position
|
1337
1342
|
:param end_pos: End position
|
1338
1343
|
:param gene: HGNC gene symbol
|
1339
|
-
:param
|
1340
|
-
always return
|
1344
|
+
:param coordinate_type: Starting Coordinate type for ``start_pos`` and
|
1345
|
+
``end_pos``. Will always return inter-residue coordinates.
|
1341
1346
|
:param try_longest_compatible: ``True`` if should try longest compatible remaining
|
1342
1347
|
if mane transcript(s) not compatible. ``False`` otherwise.
|
1343
1348
|
:return: If successful, return MANE data or longest compatible remaining (if
|
@@ -1356,8 +1361,8 @@ class ManeTranscript:
|
|
1356
1361
|
return None
|
1357
1362
|
|
1358
1363
|
# Step 2: Get inter-residue position
|
1359
|
-
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos,
|
1360
|
-
|
1364
|
+
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
|
1365
|
+
coordinate_type = CoordinateType.INTER_RESIDUE
|
1361
1366
|
|
1362
1367
|
# Step 3: Try getting MANE protein representation
|
1363
1368
|
mane_transcripts = set() # Used if getting longest compatible remaining
|
@@ -1373,8 +1378,8 @@ class ManeTranscript:
|
|
1373
1378
|
continue
|
1374
1379
|
|
1375
1380
|
# Get MANE C positions
|
1376
|
-
coding_start_site = mane_tx_genomic_data
|
1377
|
-
coding_end_site = mane_tx_genomic_data
|
1381
|
+
coding_start_site = mane_tx_genomic_data.coding_start_site
|
1382
|
+
coding_end_site = mane_tx_genomic_data.coding_end_site
|
1378
1383
|
mane_c_pos_change = self.get_mane_c_pos_change(
|
1379
1384
|
mane_tx_genomic_data, coding_start_site
|
1380
1385
|
)
|
@@ -1394,7 +1399,7 @@ class ManeTranscript:
|
|
1394
1399
|
cdna=self._get_c_data(
|
1395
1400
|
(coding_start_site, coding_end_site),
|
1396
1401
|
mane_c_pos_change,
|
1397
|
-
mane_tx_genomic_data
|
1402
|
+
mane_tx_genomic_data.strand,
|
1398
1403
|
TranscriptPriority(
|
1399
1404
|
"_".join(current_mane_data["MANE_status"].split()).lower()
|
1400
1405
|
),
|
@@ -1410,7 +1415,7 @@ class ManeTranscript:
|
|
1410
1415
|
start_pos,
|
1411
1416
|
end_pos,
|
1412
1417
|
AnnotationLayer.GENOMIC,
|
1413
|
-
|
1418
|
+
coordinate_type=coordinate_type,
|
1414
1419
|
alt_ac=alt_ac,
|
1415
1420
|
end_annotation_layer=EndAnnotationLayer.PROTEIN_AND_CDNA,
|
1416
1421
|
mane_transcripts=mane_transcripts,
|