cool-seq-tool 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,9 @@ from cool_seq_tool.mappers.liftover import LiftOver
25
25
  from cool_seq_tool.schemas import (
26
26
  AnnotationLayer,
27
27
  Assembly,
28
- ResidueMode,
28
+ CoordinateType,
29
+ GenomicTxMetadata,
30
+ ManeGeneData,
29
31
  Strand,
30
32
  TranscriptPriority,
31
33
  )
@@ -71,10 +73,10 @@ class CdnaRepresentation(DataRepresentation):
71
73
  class GenomicRepresentation(BaseModel):
72
74
  """Define object model for genomic representation"""
73
75
 
74
- refseq: str
75
76
  pos: tuple[int, int]
76
- status: TranscriptPriority
77
- alt_ac: str
77
+ mane_genes: list[ManeGeneData] = []
78
+ status: Literal["grch38"] = TranscriptPriority.GRCH38.value
79
+ ac: str
78
80
 
79
81
 
80
82
  class ProteinAndCdnaRepresentation(BaseModel):
@@ -100,7 +102,7 @@ class ManeTranscript:
100
102
  A handful of resources are required for initialization, so when defaults are
101
103
  enough, it's easiest to let the core CoolSeqTool class handle it for you:
102
104
 
103
- >>> from cool_seq_tool.app import CoolSeqTool
105
+ >>> from cool_seq_tool import CoolSeqTool
104
106
  >>> mane_mapper = CoolSeqTool().mane_transcript
105
107
 
106
108
  Note that most methods are defined as Python coroutines, so they must be called
@@ -108,7 +110,7 @@ class ManeTranscript:
108
110
 
109
111
  >>> import asyncio
110
112
  >>> result = asyncio.run(mane_mapper.g_to_grch38("NC_000001.11", 100, 200))
111
- >>> result["ac"]
113
+ >>> result.ac
112
114
  'NC_000001.11'
113
115
 
114
116
  See the :ref:`Usage section <async_note>` for more information.
@@ -128,7 +130,7 @@ class ManeTranscript:
128
130
  self.liftover = liftover
129
131
 
130
132
  @staticmethod
131
- def _get_reading_frame(pos: int) -> int:
133
+ def get_reading_frame(pos: int) -> int:
132
134
  """Return reading frame number. Only used on c. coordinate.
133
135
 
134
136
  :param pos: cDNA position
@@ -181,13 +183,12 @@ class ManeTranscript:
181
183
  pos = self._p_to_c_pos(start_pos, end_pos)
182
184
  return ac, pos
183
185
 
184
- async def _c_to_g(self, ac: str, pos: tuple[int, int]) -> dict | None:
186
+ async def _c_to_g(self, ac: str, pos: tuple[int, int]) -> GenomicTxMetadata | None:
185
187
  """Get g. annotation from c. annotation.
186
188
 
187
189
  :param ac: cDNA accession
188
190
  :param pos: [cDNA pos start, cDNA pos end]
189
- :return: Gene, Transcript accession and position change,
190
- Altered transcript accession and position change, Strand
191
+ :return: Metadata for genomic and transcript accessions
191
192
  """
192
193
  # UTA does not store ENST versions
193
194
  # So we want to make sure version is valid
@@ -219,13 +220,13 @@ class ManeTranscript:
219
220
  ac, pos, AnnotationLayer.CDNA, coding_start_site=coding_start_site
220
221
  )
221
222
 
222
- async def _liftover_to_38(self, genomic_tx_data: dict) -> None:
223
+ async def _liftover_to_38(self, genomic_tx_data: GenomicTxMetadata) -> None:
223
224
  """Liftover genomic_tx_data to hg38 assembly.
224
225
 
225
- :param genomic_tx_data: Dictionary containing gene, nc_accession, alt_pos, and
226
- strand. This will be mutated in-place if not GRCh38 assembly.
226
+ :param genomic_tx_data: Metadata for genomic and transcript accessions. This
227
+ will be mutated in-place if not GRCh38 assembly.
227
228
  """
228
- descr = await self.uta_db.get_chr_assembly(genomic_tx_data["alt_ac"])
229
+ descr = await self.uta_db.get_chr_assembly(genomic_tx_data.alt_ac)
229
230
  if descr is None:
230
231
  # already grch38
231
232
  return
@@ -234,14 +235,14 @@ class ManeTranscript:
234
235
  query = f"""
235
236
  SELECT DISTINCT alt_ac
236
237
  FROM {self.uta_db.schema}.tx_exon_aln_v
237
- WHERE tx_ac = '{genomic_tx_data['tx_ac']}';
238
+ WHERE tx_ac = '{genomic_tx_data.tx_ac}';
238
239
  """ # noqa: S608
239
240
  nc_acs = await self.uta_db.execute_query(query)
240
241
  nc_acs = [nc_ac[0] for nc_ac in nc_acs]
241
- if nc_acs == [genomic_tx_data["alt_ac"]]:
242
+ if nc_acs == [genomic_tx_data.alt_ac]:
242
243
  _logger.warning(
243
244
  "UTA does not have GRCh38 assembly for %s",
244
- genomic_tx_data["alt_ac"].split(".")[0],
245
+ genomic_tx_data.alt_ac.split(".")[0],
245
246
  )
246
247
  return
247
248
 
@@ -257,7 +258,7 @@ class ManeTranscript:
257
258
  )
258
259
 
259
260
  # Change alt_ac to most recent
260
- if genomic_tx_data["alt_ac"].startswith("EN"):
261
+ if genomic_tx_data.alt_ac.startswith("EN"):
261
262
  order_by_cond = "ORDER BY alt_ac DESC;"
262
263
  else:
263
264
  order_by_cond = """
@@ -267,50 +268,49 @@ class ManeTranscript:
267
268
  query = f"""
268
269
  SELECT alt_ac
269
270
  FROM {self.uta_db.schema}.genomic
270
- WHERE alt_ac LIKE '{genomic_tx_data['alt_ac'].split('.')[0]}%'
271
+ WHERE alt_ac LIKE '{genomic_tx_data.alt_ac.split('.')[0]}%'
271
272
  {order_by_cond}
272
273
  """ # noqa: S608
273
274
  nc_acs = await self.uta_db.execute_query(query)
274
- genomic_tx_data["alt_ac"] = nc_acs[0][0]
275
+ genomic_tx_data.alt_ac = nc_acs[0][0]
275
276
 
276
277
  def _set_liftover(
277
278
  self,
278
- genomic_tx_data: dict,
279
+ genomic_tx_data: GenomicTxMetadata,
279
280
  key: str,
280
281
  chromosome: str,
281
282
  liftover_to_assembly: Assembly,
282
283
  ) -> None:
283
284
  """Update genomic_tx_data to have coordinates for given assembly.
284
285
 
285
- :param genomic_tx_data: Dictionary containing gene, nc_accession, alt_pos, and
286
- strand
286
+ :param genomic_tx_data: Metadata for genomic and transcript accessions
287
287
  :param key: Key to access coordinate positions
288
288
  :param chromosome: Chromosome, must be prefixed with ``chr``
289
289
  :param liftover_to_assembly: Assembly to liftover to
290
290
  """
291
+ coords = getattr(genomic_tx_data, key)
291
292
  liftover_start_i = self.liftover.get_liftover(
292
- chromosome, genomic_tx_data[key][0], liftover_to_assembly
293
+ chromosome, coords[0], liftover_to_assembly
293
294
  )
294
295
  if liftover_start_i is None:
295
296
  _logger.warning(
296
297
  "Unable to liftover position %s on %s",
297
- genomic_tx_data[key][0],
298
+ coords[0],
298
299
  chromosome,
299
300
  )
300
301
  return
301
302
 
302
303
  liftover_end_i = self.liftover.get_liftover(
303
- chromosome, genomic_tx_data[key][1], liftover_to_assembly
304
+ chromosome, coords[1], liftover_to_assembly
304
305
  )
305
306
  if liftover_end_i is None:
306
307
  _logger.warning(
307
308
  "Unable to liftover position %s on %s",
308
- genomic_tx_data[key][1],
309
+ coords[1],
309
310
  chromosome,
310
311
  )
311
312
  return
312
-
313
- genomic_tx_data[key] = liftover_start_i[1], liftover_end_i[1]
313
+ setattr(genomic_tx_data, key, (liftover_start_i[1], liftover_end_i[1]))
314
314
 
315
315
  async def _get_and_validate_genomic_tx_data(
316
316
  self,
@@ -320,7 +320,7 @@ class ManeTranscript:
320
320
  | Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.CDNA,
321
321
  coding_start_site: int | None = None,
322
322
  alt_ac: str | None = None,
323
- ) -> dict | None:
323
+ ) -> GenomicTxMetadata | None:
324
324
  """Get and validate genomic_tx_data
325
325
 
326
326
  :param tx_ac: Accession on c. coordinate
@@ -328,7 +328,8 @@ class ManeTranscript:
328
328
  :param annotation_layer: Annotation layer for ``ac`` and ``pos``
329
329
  :param coding_start_site: Coding start site
330
330
  :param alt_ac: Accession on g. coordinate
331
- :return: genomic_tx_data if found and validated, else None
331
+ :return: Metadata for genomic and transcript accessions if found and validated,
332
+ else None
332
333
  """
333
334
  genomic_tx_data = await self.uta_db.get_genomic_tx_data(
334
335
  tx_ac, pos, annotation_layer, alt_ac=alt_ac
@@ -341,14 +342,14 @@ class ManeTranscript:
341
342
  annotation_layer,
342
343
  )
343
344
  return None
344
- genomic_tx_data["coding_start_site"] = coding_start_site
345
+ genomic_tx_data.coding_start_site = coding_start_site
345
346
 
346
347
  if not alt_ac:
347
348
  # Only want to liftover if alt_ac not provided. If alt_ac is provided,
348
349
  # it means user wants to stick with the queried assembly
349
- og_alt_exon_id = genomic_tx_data["alt_exon_id"]
350
+ og_alt_exon_id = genomic_tx_data.alt_exon_id
350
351
  await self._liftover_to_38(genomic_tx_data)
351
- liftover_alt_exon_id = genomic_tx_data["alt_exon_id"]
352
+ liftover_alt_exon_id = genomic_tx_data.alt_exon_id
352
353
 
353
354
  # Validation check: Exon structure
354
355
  if og_alt_exon_id != liftover_alt_exon_id:
@@ -466,14 +467,14 @@ class ManeTranscript:
466
467
  :return: Transcript data
467
468
  """
468
469
  if found_result:
469
- tx_g_pos = g["alt_pos_range"]
470
- tx_pos_range = g["tx_pos_range"]
470
+ tx_g_pos = g.alt_pos_range
471
+ tx_pos_range = g.tx_pos_range
471
472
  else:
472
473
  result = await self.uta_db.get_tx_exon_aln_v_data(
473
474
  refseq_c_ac,
474
- g["alt_pos_change_range"][0],
475
- g["alt_pos_change_range"][1],
476
- alt_ac=alt_ac if alt_ac else g["alt_ac"],
475
+ g.alt_pos_change_range[0],
476
+ g.alt_pos_change_range[1],
477
+ alt_ac=alt_ac if alt_ac else g.alt_ac,
477
478
  use_tx_pos=False,
478
479
  )
479
480
 
@@ -483,18 +484,18 @@ class ManeTranscript:
483
484
  )
484
485
  return None
485
486
  result = result[-1]
486
- tx_g_pos = result[5], result[6] # alt_start_i, alt_end_i
487
- tx_pos_range = result[2], result[3] # tx_start_i, tx_end_i
487
+ tx_g_pos = result.alt_start_i, result.alt_end_i
488
+ tx_pos_range = result.tx_start_i, result.tx_end_i
488
489
 
489
490
  cds_start_end = await self.uta_db.get_cds_start_end(refseq_c_ac)
490
491
  if not cds_start_end:
491
492
  return None
492
493
  coding_start_site = cds_start_end[0]
493
494
 
494
- g_pos = g["alt_pos_change_range"] # start/end genomic change
495
+ g_pos = g.alt_pos_change_range # start/end genomic change
495
496
  g_pos_change = g_pos[0] - tx_g_pos[0], tx_g_pos[1] - g_pos[1]
496
497
 
497
- if g["strand"] == Strand.NEGATIVE:
498
+ if g.strand == Strand.NEGATIVE:
498
499
  g_pos_change = (tx_g_pos[1] - g_pos[0], g_pos[1] - tx_g_pos[0])
499
500
 
500
501
  c_pos_change = (
@@ -506,10 +507,10 @@ class ManeTranscript:
506
507
  c_pos_change = c_pos_change[1], c_pos_change[0]
507
508
 
508
509
  return self._get_c_data(
509
- gene=g["gene"],
510
+ gene=g.gene,
510
511
  cds_start_end=cds_start_end,
511
512
  c_pos_change=c_pos_change,
512
- strand=g["strand"],
513
+ strand=g.strand,
513
514
  alt_ac=alt_ac,
514
515
  status=status,
515
516
  refseq_c_ac=refseq_c_ac,
@@ -531,8 +532,8 @@ class ManeTranscript:
531
532
  """
532
533
  for pos, pos_index in [(start_pos, 0), (end_pos, 1)]:
533
534
  if pos is not None:
534
- og_rf = self._get_reading_frame(pos)
535
- new_rf = self._get_reading_frame(transcript_data.pos[pos_index])
535
+ og_rf = self.get_reading_frame(pos)
536
+ new_rf = self.get_reading_frame(transcript_data.pos[pos_index])
536
537
 
537
538
  if og_rf != new_rf:
538
539
  _logger.warning(
@@ -561,7 +562,7 @@ class ManeTranscript:
561
562
  | GenomicRepresentation,
562
563
  expected_ref: str,
563
564
  anno: AnnotationLayer,
564
- residue_mode: ResidueMode,
565
+ coordinate_type: CoordinateType,
565
566
  ) -> bool:
566
567
  """Return whether or not reference changes are the same.
567
568
 
@@ -573,7 +574,7 @@ class ManeTranscript:
573
574
  position change
574
575
  :param expected_ref: Reference at position given during input
575
576
  :param anno: Annotation layer we are starting from
576
- :param residue_mode: Residue mode for ``start_pos`` and ``end_pos``
577
+ :param coordinate_type: Coordinate type for ``start_pos`` and ``end_pos``
577
578
  :return: ``True`` if reference check passes. ``False`` otherwise.
578
579
  """
579
580
  if anno == AnnotationLayer.CDNA:
@@ -581,7 +582,7 @@ class ManeTranscript:
581
582
  end_pos += coding_start_site
582
583
 
583
584
  ref, _ = self.seqrepo_access.get_reference_sequence(
584
- ac, start=start_pos, end=end_pos, residue_mode=residue_mode
585
+ ac, start=start_pos, end=end_pos, coordinate_type=coordinate_type
585
586
  )
586
587
  if ref is None:
587
588
  return False
@@ -597,7 +598,7 @@ class ManeTranscript:
597
598
  mane_transcript.refseq,
598
599
  start=mane_start_pos,
599
600
  end=mane_end_pos if mane_start_pos != mane_end_pos else None,
600
- residue_mode=residue_mode,
601
+ coordinate_type=coordinate_type,
601
602
  )
602
603
  if not mane_ref:
603
604
  _logger.info("Unable to validate reference for MANE Transcript")
@@ -618,7 +619,7 @@ class ManeTranscript:
618
619
 
619
620
  return True
620
621
 
621
- def _validate_index(
622
+ def validate_index(
622
623
  self, ac: str, pos: tuple[int, int], coding_start_site: int
623
624
  ) -> bool:
624
625
  """Validate that positions actually exist on accession
@@ -632,7 +633,10 @@ class ManeTranscript:
632
633
  end_pos = pos[1] + coding_start_site
633
634
  return bool(
634
635
  self.seqrepo_access.get_reference_sequence(
635
- ac, start=start_pos, end=end_pos, residue_mode=ResidueMode.INTER_RESIDUE
636
+ ac,
637
+ start=start_pos,
638
+ end=end_pos,
639
+ coordinate_type=CoordinateType.INTER_RESIDUE,
636
640
  )[0]
637
641
  )
638
642
 
@@ -689,7 +693,7 @@ class ManeTranscript:
689
693
  start_annotation_layer: AnnotationLayer,
690
694
  gene: str | None = None,
691
695
  ref: str | None = None,
692
- residue_mode: ResidueMode = ResidueMode.RESIDUE,
696
+ coordinate_type: CoordinateType = CoordinateType.RESIDUE,
693
697
  mane_transcripts: set | None = None,
694
698
  alt_ac: str | None = None,
695
699
  end_annotation_layer: EndAnnotationLayer | None = None,
@@ -699,8 +703,8 @@ class ManeTranscript:
699
703
  information.
700
704
 
701
705
  >>> import asyncio
702
- >>> from cool_seq_tool.app import CoolSeqTool
703
- >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
706
+ >>> from cool_seq_tool import CoolSeqTool
707
+ >>> from cool_seq_tool.schemas import AnnotationLayer, CoordinateType
704
708
  >>> mane_mapper = CoolSeqTool().mane_transcript
705
709
  >>> mane_transcripts = {
706
710
  ... "ENST00000646891.2",
@@ -714,7 +718,7 @@ class ManeTranscript:
714
718
  ... 599,
715
719
  ... gene="BRAF",
716
720
  ... start_annotation_layer=AnnotationLayer.PROTEIN,
717
- ... residue_mode=ResidueMode.INTER_RESIDUE,
721
+ ... coordinate_type=CoordinateType.INTER_RESIDUE,
718
722
  ... mane_transcripts=mane_transcripts,
719
723
  ... )
720
724
  ... )
@@ -731,7 +735,7 @@ class ManeTranscript:
731
735
  :param start_annotation_layer: Starting annotation layer
732
736
  :param gene: HGNC gene symbol
733
737
  :param ref: Reference at position given during input
734
- :param residue_mode: Residue mode for ``start_pos`` and ``end_pos``
738
+ :param coordinate_type: Coordinate type for ``start_pos`` and ``end_pos``
735
739
  :param mane_transcripts: Attempted mane transcripts that were not compatible
736
740
  :param alt_ac: Genomic accession
737
741
  :param end_annotation_layer: The end annotation layer. If not provided, will be
@@ -767,8 +771,8 @@ class ManeTranscript:
767
771
  )
768
772
 
769
773
  lcr_result = None
770
- start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, residue_mode)
771
- residue_mode = ResidueMode.INTER_RESIDUE
774
+ start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
775
+ coordinate_type = CoordinateType.INTER_RESIDUE
772
776
 
773
777
  is_p_or_c_start_anno = True
774
778
  if start_annotation_layer == AnnotationLayer.PROTEIN:
@@ -856,7 +860,7 @@ class ManeTranscript:
856
860
  {},
857
861
  ref,
858
862
  AnnotationLayer.PROTEIN,
859
- residue_mode,
863
+ coordinate_type,
860
864
  )
861
865
  elif start_annotation_layer == AnnotationLayer.CDNA:
862
866
  valid_references = self._validate_references(
@@ -867,7 +871,7 @@ class ManeTranscript:
867
871
  {},
868
872
  ref,
869
873
  AnnotationLayer.CDNA,
870
- residue_mode,
874
+ coordinate_type,
871
875
  )
872
876
  else:
873
877
  valid_references = self._validate_references(
@@ -878,7 +882,7 @@ class ManeTranscript:
878
882
  {},
879
883
  ref,
880
884
  AnnotationLayer.GENOMIC,
881
- residue_mode,
885
+ coordinate_type,
882
886
  )
883
887
 
884
888
  if not valid_references:
@@ -902,7 +906,7 @@ class ManeTranscript:
902
906
  gene,
903
907
  row["pro_ac"],
904
908
  lcr_c_data.pos,
905
- g["strand"],
909
+ g.strand,
906
910
  lcr_c_data.status,
907
911
  )
908
912
  coding_start_site = 0
@@ -910,7 +914,7 @@ class ManeTranscript:
910
914
  ac = lcr_result.refseq or lcr_result.ensembl
911
915
  pos = lcr_result.pos
912
916
 
913
- if not self._validate_index(ac, pos, coding_start_site):
917
+ if not self.validate_index(ac, pos, coding_start_site):
914
918
  _logger.warning(
915
919
  "%s are not valid positions on %s with coding start site %s",
916
920
  pos,
@@ -924,7 +928,7 @@ class ManeTranscript:
924
928
  gene,
925
929
  row["pro_ac"],
926
930
  lcr_c_data.pos,
927
- g["strand"],
931
+ g.strand,
928
932
  lcr_c_data.status,
929
933
  ),
930
934
  cdna=lcr_c_data,
@@ -936,7 +940,7 @@ class ManeTranscript:
936
940
  cds = lcr_result_dict[k].get("coding_start_site", 0)
937
941
  ac = lcr_result_dict[k]["refseq"] or lcr_result_dict[k]["ensembl"]
938
942
  pos = lcr_result_dict[k]["pos"]
939
- if not self._validate_index(ac, pos, cds):
943
+ if not self.validate_index(ac, pos, cds):
940
944
  valid = False
941
945
  _logger.warning(
942
946
  "%s are not valid positions on %s with coding start site %s",
@@ -959,13 +963,22 @@ class ManeTranscript:
959
963
  gene: str | None = None,
960
964
  ref: str | None = None,
961
965
  try_longest_compatible: bool = False,
962
- residue_mode: Literal[ResidueMode.RESIDUE]
963
- | Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.RESIDUE,
966
+ coordinate_type: Literal[CoordinateType.RESIDUE]
967
+ | Literal[CoordinateType.INTER_RESIDUE] = CoordinateType.RESIDUE,
964
968
  ) -> DataRepresentation | CdnaRepresentation | None:
965
- """Return MANE transcript.
966
-
967
- >>> from cool_seq_tool.app import CoolSeqTool
968
- >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
969
+ """Return MANE representation
970
+
971
+ If ``start_annotation_layer`` is ``AnnotationLayer.PROTEIN``, will return
972
+ ``AnnotationLayer.PROTEIN`` representation.
973
+ If ``start_annotation_layer`` is ``AnnotationLayer.CDNA``, will return
974
+ ``AnnotationLayer.CDNA`` representation.
975
+ If ``start_annotation_layer`` is ``AnnotationLayer.GENOMIC`` will return
976
+ ``AnnotationLayer.CDNA`` representation if ``gene`` is provided and
977
+ ``AnnotationLayer.GENOMIC`` GRCh38 representation if ``gene`` is NOT
978
+ provided.
979
+
980
+ >>> from cool_seq_tool import CoolSeqTool
981
+ >>> from cool_seq_tool.schemas import AnnotationLayer, CoordinateType
969
982
  >>> import asyncio
970
983
  >>> mane_mapper = CoolSeqTool().mane_transcript
971
984
  >>> result = asyncio.run(
@@ -973,7 +986,7 @@ class ManeTranscript:
973
986
  ... "NP_004324.2",
974
987
  ... 599,
975
988
  ... AnnotationLayer.PROTEIN,
976
- ... residue_mode=ResidueMode.INTER_RESIDUE,
989
+ ... coordinate_type=CoordinateType.INTER_RESIDUE,
977
990
  ... )
978
991
  ... )
979
992
  >>> result.gene, result.refseq, result.status
@@ -983,17 +996,21 @@ class ManeTranscript:
983
996
  :param start_pos: Start position change
984
997
  :param end_pos: End position change
985
998
  :param start_annotation_layer: Starting annotation layer.
986
- :param gene: HGNC gene symbol
999
+ :param gene: HGNC gene symbol.
1000
+ If ``gene`` is not provided and ``start_annotation_layer`` is
1001
+ ``AnnotationLayer.GENOMIC``, will return GRCh38 representation.
1002
+ If ``gene`` is provided and ``start_annotation_layer`` is
1003
+ ``AnnotationLayer.GENOMIC``, will return cDNA representation.
987
1004
  :param ref: Reference at position given during input
988
1005
  :param try_longest_compatible: ``True`` if should try longest compatible remaining
989
1006
  if mane transcript was not compatible. ``False`` otherwise.
990
- :param ResidueMode residue_mode: Starting residue mode for ``start_pos`` and
991
- ``end_pos``. Will always return coordinates in inter-residue
1007
+ :param CoordinateType coordinate_type: Starting Coordinate type for
1008
+ ``start_pos`` and ``end_pos``. Will always return inter-residue coordinates
992
1009
  :return: MANE data or longest transcript compatible data if validation
993
1010
  checks are correct. Will return inter-residue coordinates. Else, ``None``.
994
1011
  """
995
- start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, residue_mode)
996
- residue_mode = ResidueMode.INTER_RESIDUE
1012
+ start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
1013
+ coordinate_type = CoordinateType.INTER_RESIDUE
997
1014
  if ref:
998
1015
  ref = ref[: end_pos - start_pos]
999
1016
 
@@ -1012,7 +1029,7 @@ class ManeTranscript:
1012
1029
  if g is None:
1013
1030
  return None
1014
1031
  # Get mane data for gene
1015
- mane_data = self.mane_transcript_mappings.get_gene_mane_data(g["gene"])
1032
+ mane_data = self.mane_transcript_mappings.get_gene_mane_data(g.gene)
1016
1033
  if not mane_data:
1017
1034
  return None
1018
1035
 
@@ -1039,10 +1056,8 @@ class ManeTranscript:
1039
1056
  if not mane:
1040
1057
  continue
1041
1058
 
1042
- if not mane.alt_ac:
1043
- g_alt_ac = g.get("alt_ac")
1044
- if g_alt_ac:
1045
- mane.alt_ac = g_alt_ac
1059
+ if not mane.alt_ac and g.alt_ac:
1060
+ mane.alt_ac = g.alt_ac
1046
1061
 
1047
1062
  valid_reading_frame = self._validate_reading_frames(
1048
1063
  c_ac, c_pos[0], c_pos[1], mane
@@ -1058,13 +1073,13 @@ class ManeTranscript:
1058
1073
  if ref:
1059
1074
  valid_references = self._validate_references(
1060
1075
  ac,
1061
- g["coding_start_site"],
1076
+ g.coding_start_site,
1062
1077
  start_pos,
1063
1078
  end_pos,
1064
1079
  mane,
1065
1080
  ref,
1066
1081
  start_annotation_layer,
1067
- residue_mode,
1082
+ coordinate_type,
1068
1083
  )
1069
1084
  if not valid_references:
1070
1085
  continue
@@ -1078,8 +1093,8 @@ class ManeTranscript:
1078
1093
  end_pos,
1079
1094
  AnnotationLayer.PROTEIN,
1080
1095
  ref=ref,
1081
- gene=g["gene"],
1082
- residue_mode=residue_mode,
1096
+ gene=g.gene,
1097
+ coordinate_type=coordinate_type,
1083
1098
  mane_transcripts=mane_transcripts,
1084
1099
  )
1085
1100
  return await self.get_longest_compatible_transcript(
@@ -1088,34 +1103,61 @@ class ManeTranscript:
1088
1103
  AnnotationLayer.CDNA,
1089
1104
  ref=ref,
1090
1105
  gene=g["gene"],
1091
- residue_mode=residue_mode,
1106
+ coordinate_type=coordinate_type,
1092
1107
  mane_transcripts=mane_transcripts,
1093
1108
  )
1094
1109
  return None
1095
1110
  if start_annotation_layer == AnnotationLayer.GENOMIC:
1111
+ if not gene:
1112
+ return await self.g_to_grch38(
1113
+ ac,
1114
+ start_pos,
1115
+ end_pos,
1116
+ get_mane_genes=True,
1117
+ coordinate_type=coordinate_type,
1118
+ )
1119
+
1096
1120
  return await self.g_to_mane_c(
1097
- ac, start_pos, end_pos, gene=gene, residue_mode=residue_mode
1121
+ ac, start_pos, end_pos, gene, coordinate_type=coordinate_type
1098
1122
  )
1099
1123
  _logger.warning("Annotation layer not supported: %s", start_annotation_layer)
1100
1124
  return None
1101
1125
 
1102
- async def g_to_grch38(self, ac: str, start_pos: int, end_pos: int) -> dict | None:
1126
+ async def g_to_grch38(
1127
+ self,
1128
+ ac: str,
1129
+ start_pos: int,
1130
+ end_pos: int,
1131
+ get_mane_genes: bool = False,
1132
+ coordinate_type: CoordinateType = CoordinateType.RESIDUE,
1133
+ ) -> GenomicRepresentation | None:
1103
1134
  """Return genomic coordinate on GRCh38 when not given gene context.
1104
1135
 
1105
1136
  :param ac: Genomic accession
1106
1137
  :param start_pos: Genomic start position
1107
1138
  :param end_pos: Genomic end position
1108
- :return: NC accession, start and end pos on GRCh38 assembly
1139
+ :param get_mane_genes: ``True`` if mane genes for genomic position should be
1140
+ included in response. ``False``, otherwise.
1141
+ :param coordinate_type: Coordinate type for ``start_pos`` and ``end_pos``
1142
+ :return: GRCh38 genomic representation (accession and start/end inter-residue
1143
+ position)
1109
1144
  """
1110
- if end_pos is None:
1111
- end_pos = start_pos
1145
+ start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
1112
1146
 
1113
1147
  # Checking to see what chromosome and assembly we're on
1114
1148
  descr = await self.uta_db.get_chr_assembly(ac)
1115
1149
  if not descr:
1116
1150
  # Already GRCh38 assembly
1117
- if self._validate_index(ac, (start_pos, end_pos), 0):
1118
- return {"ac": ac, "pos": (start_pos, end_pos)}
1151
+ if self.validate_index(ac, (start_pos, end_pos), 0):
1152
+ return GenomicRepresentation(
1153
+ ac=ac,
1154
+ pos=(start_pos, end_pos),
1155
+ mane_genes=self.mane_transcript_mappings.get_genomic_mane_genes(
1156
+ ac, start_pos + 1, end_pos
1157
+ )
1158
+ if get_mane_genes
1159
+ else [],
1160
+ )
1119
1161
  return None
1120
1162
  chromosome, assembly = descr
1121
1163
  is_same_pos = start_pos == end_pos
@@ -1145,13 +1187,21 @@ class ManeTranscript:
1145
1187
  newest_ac = await self.uta_db.get_newest_assembly_ac(ac)
1146
1188
  if newest_ac:
1147
1189
  ac = newest_ac[0]
1148
- if self._validate_index(ac, (start_pos, end_pos), 0):
1149
- return {"ac": ac, "pos": (start_pos, end_pos)}
1190
+ if self.validate_index(ac, (start_pos, end_pos), 0):
1191
+ return GenomicRepresentation(
1192
+ ac=ac,
1193
+ pos=(start_pos, end_pos),
1194
+ mane_genes=self.mane_transcript_mappings.get_genomic_mane_genes(
1195
+ ac, start_pos + 1, end_pos
1196
+ )
1197
+ if get_mane_genes
1198
+ else [],
1199
+ )
1150
1200
  return None
1151
1201
 
1152
1202
  @staticmethod
1153
1203
  def get_mane_c_pos_change(
1154
- mane_tx_genomic_data: dict, coding_start_site: int
1204
+ mane_tx_genomic_data: GenomicTxMetadata, coding_start_site: int
1155
1205
  ) -> tuple[int, int]:
1156
1206
  """Get mane c position change
1157
1207
 
@@ -1159,12 +1209,12 @@ class ManeTranscript:
1159
1209
  :param coding_start_site: Coding start site
1160
1210
  :return: cDNA pos start, cDNA pos end
1161
1211
  """
1162
- tx_pos_range = mane_tx_genomic_data["tx_pos_range"]
1163
- alt_pos_change = mane_tx_genomic_data["alt_pos_change"]
1212
+ tx_pos_range = mane_tx_genomic_data.tx_pos_range
1213
+ pos_change = mane_tx_genomic_data.pos_change
1164
1214
 
1165
1215
  mane_c_pos_change = (
1166
- tx_pos_range[0] + alt_pos_change[0] - coding_start_site,
1167
- tx_pos_range[1] - alt_pos_change[1] - coding_start_site,
1216
+ tx_pos_range[0] + pos_change[0] - coding_start_site,
1217
+ tx_pos_range[1] - pos_change[1] - coding_start_site,
1168
1218
  )
1169
1219
 
1170
1220
  if mane_c_pos_change[0] > mane_c_pos_change[1]:
@@ -1176,16 +1226,13 @@ class ManeTranscript:
1176
1226
  ac: str,
1177
1227
  start_pos: int,
1178
1228
  end_pos: int,
1179
- gene: str | None = None,
1180
- residue_mode: ResidueMode = ResidueMode.RESIDUE,
1181
- ) -> GenomicRepresentation | CdnaRepresentation | None:
1229
+ gene: str,
1230
+ coordinate_type: CoordinateType = CoordinateType.RESIDUE,
1231
+ ) -> CdnaRepresentation | None:
1182
1232
  """Return MANE Transcript on the c. coordinate.
1183
1233
 
1184
- If an arg for ``gene`` is provided, lifts to GRCh38, then gets MANE cDNA
1185
- representation.
1186
-
1187
1234
  >>> import asyncio
1188
- >>> from cool_seq_tool.app import CoolSeqTool
1235
+ >>> from cool_seq_tool import CoolSeqTool
1189
1236
  >>> cst = CoolSeqTool()
1190
1237
  >>> result = asyncio.run(
1191
1238
  ... cst.mane_transcript.g_to_mane_c(
@@ -1198,33 +1245,16 @@ class ManeTranscript:
1198
1245
  <TranscriptPriority.MANE_SELECT: 'mane_select'>
1199
1246
  >>> del cst
1200
1247
 
1201
- Locating a MANE transcript requires a ``gene`` symbol argument -- if none is
1202
- given, this method will only lift over to genomic coordinates on GRCh38.
1203
-
1204
1248
  :param ac: Transcript accession on g. coordinate
1205
1249
  :param start_pos: genomic start position
1206
1250
  :param end_pos: genomic end position
1207
1251
  :param gene: HGNC gene symbol
1208
- :param residue_mode: Starting residue mode for ``start_pos`` and ``end_pos``.
1209
- Will always return coordinates in inter-residue.
1210
- :return: MANE Transcripts with cDNA change on c. coordinate if gene
1211
- is provided. Else, GRCh38 data
1252
+ :param coordinate_type: Starting Coordinate type for ``start_pos`` and
1253
+ ``end_pos``. Will always return inter-residue coordinates.
1254
+ :return: MANE Transcripts with cDNA change on c. coordinate
1212
1255
  """
1213
- start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, residue_mode)
1214
- residue_mode = ResidueMode.INTER_RESIDUE
1215
-
1216
- # If gene not provided, return GRCh38
1217
- if not gene:
1218
- grch38 = await self.g_to_grch38(ac, start_pos, end_pos)
1219
- if not grch38:
1220
- return None
1221
-
1222
- return GenomicRepresentation(
1223
- refseq=grch38["ac"],
1224
- pos=grch38["pos"],
1225
- status=TranscriptPriority.GRCH38,
1226
- alt_ac=grch38["ac"],
1227
- )
1256
+ start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
1257
+ coordinate_type = CoordinateType.INTER_RESIDUE
1228
1258
 
1229
1259
  if not await self.uta_db.validate_genomic_ac(ac):
1230
1260
  _logger.warning("Genomic accession does not exist: %s", ac)
@@ -1238,12 +1268,18 @@ class ManeTranscript:
1238
1268
  mane_c_ac = current_mane_data["RefSeq_nuc"]
1239
1269
 
1240
1270
  # Liftover to GRCh38
1241
- grch38 = await self.g_to_grch38(ac, start_pos, end_pos)
1271
+ grch38 = await self.g_to_grch38(
1272
+ ac,
1273
+ start_pos,
1274
+ end_pos,
1275
+ get_mane_genes=False,
1276
+ coordinate_type=coordinate_type,
1277
+ )
1242
1278
  mane_tx_genomic_data = None
1243
1279
  if grch38:
1244
1280
  # GRCh38 -> MANE C
1245
1281
  mane_tx_genomic_data = await self.uta_db.get_mane_c_genomic_data(
1246
- mane_c_ac, grch38["ac"], grch38["pos"][0], grch38["pos"][1]
1282
+ mane_c_ac, grch38.ac, grch38.pos[0], grch38.pos[1]
1247
1283
  )
1248
1284
 
1249
1285
  if not grch38 or not mane_tx_genomic_data:
@@ -1255,15 +1291,13 @@ class ManeTranscript:
1255
1291
  continue
1256
1292
  _logger.info("Not using most recent assembly")
1257
1293
 
1258
- coding_start_site = mane_tx_genomic_data["coding_start_site"]
1259
- coding_end_site = mane_tx_genomic_data["coding_end_site"]
1294
+ coding_start_site = mane_tx_genomic_data.coding_start_site
1295
+ coding_end_site = mane_tx_genomic_data.coding_end_site
1260
1296
  mane_c_pos_change = self.get_mane_c_pos_change(
1261
1297
  mane_tx_genomic_data, coding_start_site
1262
1298
  )
1263
1299
 
1264
- if not self._validate_index(
1265
- mane_c_ac, mane_c_pos_change, coding_start_site
1266
- ):
1300
+ if not self.validate_index(mane_c_ac, mane_c_pos_change, coding_start_site):
1267
1301
  _logger.warning(
1268
1302
  "%s are not valid positions on %s with coding start site %s",
1269
1303
  mane_c_pos_change,
@@ -1284,7 +1318,7 @@ class ManeTranscript:
1284
1318
  ),
1285
1319
  refseq_c_ac=current_mane_data["RefSeq_nuc"],
1286
1320
  ensembl_c_ac=current_mane_data["Ensembl_nuc"],
1287
- alt_ac=grch38["ac"] if grch38 else None,
1321
+ alt_ac=grch38.ac if grch38 else None,
1288
1322
  )
1289
1323
  return None
1290
1324
 
@@ -1294,7 +1328,7 @@ class ManeTranscript:
1294
1328
  start_pos: int,
1295
1329
  end_pos: int,
1296
1330
  gene: str | None = None,
1297
- residue_mode: ResidueMode = ResidueMode.RESIDUE,
1331
+ coordinate_type: CoordinateType = CoordinateType.RESIDUE,
1298
1332
  try_longest_compatible: bool = False,
1299
1333
  ) -> dict | None:
1300
1334
  """Given GRCh38 genomic representation, return protein representation.
@@ -1307,8 +1341,8 @@ class ManeTranscript:
1307
1341
  :param start_pos: Start position
1308
1342
  :param end_pos: End position
1309
1343
  :param gene: HGNC gene symbol
1310
- :param residue_mode: Starting residue mode for ``start_pos`` and ``end_pos``. Will
1311
- always return coordinates as inter-residue.
1344
+ :param coordinate_type: Starting Coordinate type for ``start_pos`` and
1345
+ ``end_pos``. Will always return inter-residue coordinates.
1312
1346
  :param try_longest_compatible: ``True`` if should try longest compatible remaining
1313
1347
  if mane transcript(s) not compatible. ``False`` otherwise.
1314
1348
  :return: If successful, return MANE data or longest compatible remaining (if
@@ -1327,8 +1361,8 @@ class ManeTranscript:
1327
1361
  return None
1328
1362
 
1329
1363
  # Step 2: Get inter-residue position
1330
- start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, residue_mode)
1331
- residue_mode = ResidueMode.INTER_RESIDUE
1364
+ start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, coordinate_type)
1365
+ coordinate_type = CoordinateType.INTER_RESIDUE
1332
1366
 
1333
1367
  # Step 3: Try getting MANE protein representation
1334
1368
  mane_transcripts = set() # Used if getting longest compatible remaining
@@ -1344,16 +1378,14 @@ class ManeTranscript:
1344
1378
  continue
1345
1379
 
1346
1380
  # Get MANE C positions
1347
- coding_start_site = mane_tx_genomic_data["coding_start_site"]
1348
- coding_end_site = mane_tx_genomic_data["coding_end_site"]
1381
+ coding_start_site = mane_tx_genomic_data.coding_start_site
1382
+ coding_end_site = mane_tx_genomic_data.coding_end_site
1349
1383
  mane_c_pos_change = self.get_mane_c_pos_change(
1350
1384
  mane_tx_genomic_data, coding_start_site
1351
1385
  )
1352
1386
 
1353
1387
  # Validate MANE C positions
1354
- if not self._validate_index(
1355
- mane_c_ac, mane_c_pos_change, coding_start_site
1356
- ):
1388
+ if not self.validate_index(mane_c_ac, mane_c_pos_change, coding_start_site):
1357
1389
  _logger.warning(
1358
1390
  "%s are not valid positions on %s with coding start site %s",
1359
1391
  mane_c_pos_change,
@@ -1367,7 +1399,7 @@ class ManeTranscript:
1367
1399
  cdna=self._get_c_data(
1368
1400
  (coding_start_site, coding_end_site),
1369
1401
  mane_c_pos_change,
1370
- mane_tx_genomic_data["strand"],
1402
+ mane_tx_genomic_data.strand,
1371
1403
  TranscriptPriority(
1372
1404
  "_".join(current_mane_data["MANE_status"].split()).lower()
1373
1405
  ),
@@ -1383,7 +1415,7 @@ class ManeTranscript:
1383
1415
  start_pos,
1384
1416
  end_pos,
1385
1417
  AnnotationLayer.GENOMIC,
1386
- residue_mode=residue_mode,
1418
+ coordinate_type=coordinate_type,
1387
1419
  alt_ac=alt_ac,
1388
1420
  end_annotation_layer=EndAnnotationLayer.PROTEIN_AND_CDNA,
1389
1421
  mane_transcripts=mane_transcripts,