cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. cool_seq_tool/__init__.py +7 -11
  2. cool_seq_tool/app.py +44 -24
  3. cool_seq_tool/handlers/__init__.py +1 -0
  4. cool_seq_tool/handlers/seqrepo_access.py +27 -25
  5. cool_seq_tool/mappers/__init__.py +3 -1
  6. cool_seq_tool/mappers/alignment.py +5 -6
  7. cool_seq_tool/mappers/exon_genomic_coords.py +139 -124
  8. cool_seq_tool/mappers/liftover.py +90 -0
  9. cool_seq_tool/mappers/mane_transcript.py +208 -113
  10. cool_seq_tool/resources/__init__.py +1 -0
  11. cool_seq_tool/resources/data_files.py +93 -0
  12. cool_seq_tool/resources/status.py +153 -0
  13. cool_seq_tool/schemas.py +92 -54
  14. cool_seq_tool/sources/__init__.py +1 -0
  15. cool_seq_tool/sources/mane_transcript_mappings.py +16 -9
  16. cool_seq_tool/sources/transcript_mappings.py +41 -32
  17. cool_seq_tool/sources/uta_database.py +96 -249
  18. cool_seq_tool/utils.py +44 -4
  19. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/LICENSE +1 -1
  20. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/METADATA +16 -11
  21. cool_seq_tool-0.5.0.dist-info/RECORD +24 -0
  22. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/WHEEL +1 -1
  23. cool_seq_tool/api.py +0 -42
  24. cool_seq_tool/data/__init__.py +0 -2
  25. cool_seq_tool/data/data_downloads.py +0 -89
  26. cool_seq_tool/paths.py +0 -28
  27. cool_seq_tool/routers/__init__.py +0 -16
  28. cool_seq_tool/routers/default.py +0 -125
  29. cool_seq_tool/routers/mane.py +0 -98
  30. cool_seq_tool/routers/mappings.py +0 -155
  31. cool_seq_tool/version.py +0 -2
  32. cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
  33. /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
  34. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/top_level.txt +0 -0
@@ -11,15 +11,17 @@ Steps:
11
11
  In addition to a mapper utility class, this module also defines several vocabulary
12
12
  constraints and data models for coordinate representation.
13
13
  """
14
+
14
15
  import logging
15
16
  import math
16
17
  from enum import Enum
17
- from typing import Dict, List, Optional, Set, Tuple, Union
18
+ from typing import Literal
18
19
 
19
20
  import polars as pl
20
21
  from pydantic import BaseModel
21
22
 
22
23
  from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
24
+ from cool_seq_tool.mappers.liftover import LiftOver
23
25
  from cool_seq_tool.schemas import (
24
26
  AnnotationLayer,
25
27
  Assembly,
@@ -34,7 +36,7 @@ from cool_seq_tool.sources import (
34
36
  )
35
37
  from cool_seq_tool.utils import get_inter_residue_pos
36
38
 
37
- logger = logging.getLogger(__name__)
39
+ _logger = logging.getLogger(__name__)
38
40
 
39
41
 
40
42
  class EndAnnotationLayer(str, Enum):
@@ -50,10 +52,10 @@ class EndAnnotationLayer(str, Enum):
50
52
  class DataRepresentation(BaseModel):
51
53
  """Define object model for final output representation"""
52
54
 
53
- gene: Optional[str] = None
55
+ gene: str | None = None
54
56
  refseq: str
55
- ensembl: Optional[str] = None
56
- pos: Tuple[int, int]
57
+ ensembl: str | None = None
58
+ pos: tuple[int, int]
57
59
  strand: Strand
58
60
  status: TranscriptPriority
59
61
 
@@ -63,14 +65,14 @@ class CdnaRepresentation(DataRepresentation):
63
65
 
64
66
  coding_start_site: int
65
67
  coding_end_site: int
66
- alt_ac: Optional[str] = None
68
+ alt_ac: str | None = None
67
69
 
68
70
 
69
71
  class GenomicRepresentation(BaseModel):
70
72
  """Define object model for genomic representation"""
71
73
 
72
74
  refseq: str
73
- pos: Tuple[int, int]
75
+ pos: tuple[int, int]
74
76
  status: TranscriptPriority
75
77
  alt_ac: str
76
78
 
@@ -91,6 +93,7 @@ class ManeTranscript:
91
93
  transcript_mappings: TranscriptMappings,
92
94
  mane_transcript_mappings: ManeTranscriptMappings,
93
95
  uta_db: UtaDatabase,
96
+ liftover: LiftOver,
94
97
  ) -> None:
95
98
  """Initialize the ManeTranscript class.
96
99
 
@@ -105,7 +108,7 @@ class ManeTranscript:
105
108
 
106
109
  >>> import asyncio
107
110
  >>> result = asyncio.run(mane_mapper.g_to_grch38("NC_000001.11", 100, 200))
108
- >>> result['ac']
111
+ >>> result["ac"]
109
112
  'NC_000001.11'
110
113
 
111
114
  See the :ref:`Usage section <async_note>` for more information.
@@ -116,11 +119,13 @@ class ManeTranscript:
116
119
  :param mane_transcript_mappings: Access to MANE Transcript accession mapping
117
120
  data
118
121
  :param uta_db: UtaDatabase instance to give access to query UTA database
122
+ :param liftover: Instance to provide mapping between human genome assemblies
119
123
  """
120
124
  self.seqrepo_access = seqrepo_access
121
125
  self.transcript_mappings = transcript_mappings
122
126
  self.mane_transcript_mappings = mane_transcript_mappings
123
127
  self.uta_db = uta_db
128
+ self.liftover = liftover
124
129
 
125
130
  @staticmethod
126
131
  def _get_reading_frame(pos: int) -> int:
@@ -135,7 +140,7 @@ class ManeTranscript:
135
140
  return pos_mod_3
136
141
 
137
142
  @staticmethod
138
- def _p_to_c_pos(start: int, end: int) -> Tuple[int, int]:
143
+ def _p_to_c_pos(start: int, end: int) -> tuple[int, int]:
139
144
  """Return cDNA position given a protein position.
140
145
 
141
146
  :param start: Start protein position. Inter-residue coordinates
@@ -148,7 +153,7 @@ class ManeTranscript:
148
153
 
149
154
  async def _p_to_c(
150
155
  self, ac: str, start_pos: int, end_pos: int
151
- ) -> Optional[Tuple[str, Tuple[int, int]]]:
156
+ ) -> tuple[str, tuple[int, int]] | None:
152
157
  """Convert protein (p.) annotation to cDNA (c.) annotation.
153
158
 
154
159
  :param ac: Protein accession
@@ -167,16 +172,16 @@ class ManeTranscript:
167
172
  elif ac.startswith("ENSP"):
168
173
  ac = self.transcript_mappings.ensp_to_enst[ac]
169
174
  else:
170
- logger.warning("Unable to find accession: %s", ac)
175
+ _logger.warning("Unable to find accession: %s", ac)
171
176
  return None
172
177
  except KeyError:
173
- logger.warning("%s not found in transcript_mappings", ac)
178
+ _logger.warning("%s not found in transcript_mappings", ac)
174
179
  return None
175
180
 
176
181
  pos = self._p_to_c_pos(start_pos, end_pos)
177
182
  return ac, pos
178
183
 
179
- async def _c_to_g(self, ac: str, pos: Tuple[int, int]) -> Optional[Dict]:
184
+ async def _c_to_g(self, ac: str, pos: tuple[int, int]) -> dict | None:
180
185
  """Get g. annotation from c. annotation.
181
186
 
182
187
  :param ac: cDNA accession
@@ -195,7 +200,7 @@ class ManeTranscript:
195
200
  0
196
201
  ]
197
202
  ):
198
- logger.warning("Ensembl transcript not found: %s", ac)
203
+ _logger.warning("Ensembl transcript not found: %s", ac)
199
204
  return None
200
205
 
201
206
  temp_ac = ac.split(".")[0]
@@ -205,7 +210,7 @@ class ManeTranscript:
205
210
  # c. coordinate does not contain cds start, so we need to add it
206
211
  cds_start_end = await self.uta_db.get_cds_start_end(temp_ac)
207
212
  if not cds_start_end:
208
- logger.warning("Accession %s not found in UTA", temp_ac)
213
+ _logger.warning("Accession %s not found in UTA", temp_ac)
209
214
  return None
210
215
  coding_start_site = cds_start_end[0]
211
216
  pos = pos[0] + coding_start_site, pos[1] + coding_start_site
@@ -214,16 +219,108 @@ class ManeTranscript:
214
219
  ac, pos, AnnotationLayer.CDNA, coding_start_site=coding_start_site
215
220
  )
216
221
 
222
+ async def _liftover_to_38(self, genomic_tx_data: dict) -> None:
223
+ """Liftover genomic_tx_data to hg38 assembly.
224
+
225
+ :param genomic_tx_data: Dictionary containing gene, nc_accession, alt_pos, and
226
+ strand. This will be mutated in-place if not GRCh38 assembly.
227
+ """
228
+ descr = await self.uta_db.get_chr_assembly(genomic_tx_data["alt_ac"])
229
+ if descr is None:
230
+ # already grch38
231
+ return
232
+ chromosome, _ = descr
233
+
234
+ query = f"""
235
+ SELECT DISTINCT alt_ac
236
+ FROM {self.uta_db.schema}.tx_exon_aln_v
237
+ WHERE tx_ac = '{genomic_tx_data['tx_ac']}';
238
+ """ # noqa: S608
239
+ nc_acs = await self.uta_db.execute_query(query)
240
+ nc_acs = [nc_ac[0] for nc_ac in nc_acs]
241
+ if nc_acs == [genomic_tx_data["alt_ac"]]:
242
+ _logger.warning(
243
+ "UTA does not have GRCh38 assembly for %s",
244
+ genomic_tx_data["alt_ac"].split(".")[0],
245
+ )
246
+ return
247
+
248
+ # Get most recent assembly version position
249
+ # Liftover range
250
+ self._set_liftover(
251
+ genomic_tx_data, "alt_pos_range", chromosome, Assembly.GRCH38
252
+ )
253
+
254
+ # Liftover changes range
255
+ self._set_liftover(
256
+ genomic_tx_data, "alt_pos_change_range", chromosome, Assembly.GRCH38
257
+ )
258
+
259
+ # Change alt_ac to most recent
260
+ if genomic_tx_data["alt_ac"].startswith("EN"):
261
+ order_by_cond = "ORDER BY alt_ac DESC;"
262
+ else:
263
+ order_by_cond = """
264
+ ORDER BY CAST(SUBSTR(alt_ac, position('.' in alt_ac) + 1,
265
+ LENGTH(alt_ac)) AS INT) DESC;
266
+ """
267
+ query = f"""
268
+ SELECT alt_ac
269
+ FROM {self.uta_db.schema}.genomic
270
+ WHERE alt_ac LIKE '{genomic_tx_data['alt_ac'].split('.')[0]}%'
271
+ {order_by_cond}
272
+ """ # noqa: S608
273
+ nc_acs = await self.uta_db.execute_query(query)
274
+ genomic_tx_data["alt_ac"] = nc_acs[0][0]
275
+
276
+ def _set_liftover(
277
+ self,
278
+ genomic_tx_data: dict,
279
+ key: str,
280
+ chromosome: str,
281
+ liftover_to_assembly: Assembly,
282
+ ) -> None:
283
+ """Update genomic_tx_data to have coordinates for given assembly.
284
+
285
+ :param genomic_tx_data: Dictionary containing gene, nc_accession, alt_pos, and
286
+ strand
287
+ :param key: Key to access coordinate positions
288
+ :param chromosome: Chromosome, must be prefixed with ``chr``
289
+ :param liftover_to_assembly: Assembly to liftover to
290
+ """
291
+ liftover_start_i = self.liftover.get_liftover(
292
+ chromosome, genomic_tx_data[key][0], liftover_to_assembly
293
+ )
294
+ if liftover_start_i is None:
295
+ _logger.warning(
296
+ "Unable to liftover position %s on %s",
297
+ genomic_tx_data[key][0],
298
+ chromosome,
299
+ )
300
+ return
301
+
302
+ liftover_end_i = self.liftover.get_liftover(
303
+ chromosome, genomic_tx_data[key][1], liftover_to_assembly
304
+ )
305
+ if liftover_end_i is None:
306
+ _logger.warning(
307
+ "Unable to liftover position %s on %s",
308
+ genomic_tx_data[key][1],
309
+ chromosome,
310
+ )
311
+ return
312
+
313
+ genomic_tx_data[key] = liftover_start_i[1], liftover_end_i[1]
314
+
217
315
  async def _get_and_validate_genomic_tx_data(
218
316
  self,
219
317
  tx_ac: str,
220
- pos: Tuple[int, int],
221
- annotation_layer: Union[
222
- AnnotationLayer.CDNA, AnnotationLayer.GENOMIC
223
- ] = AnnotationLayer.CDNA,
224
- coding_start_site: Optional[int] = None,
225
- alt_ac: Optional[str] = None,
226
- ) -> Optional[Dict]:
318
+ pos: tuple[int, int],
319
+ annotation_layer: Literal[AnnotationLayer.CDNA]
320
+ | Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.CDNA,
321
+ coding_start_site: int | None = None,
322
+ alt_ac: str | None = None,
323
+ ) -> dict | None:
227
324
  """Get and validate genomic_tx_data
228
325
 
229
326
  :param tx_ac: Accession on c. coordinate
@@ -237,7 +334,7 @@ class ManeTranscript:
237
334
  tx_ac, pos, annotation_layer, alt_ac=alt_ac
238
335
  )
239
336
  if not genomic_tx_data:
240
- logger.warning(
337
+ _logger.warning(
241
338
  "Unable to find genomic_tx_data for %s at position %s on annotation layer %s",
242
339
  alt_ac,
243
340
  pos,
@@ -250,12 +347,12 @@ class ManeTranscript:
250
347
  # Only want to liftover if alt_ac not provided. If alt_ac is provided,
251
348
  # it means user wants to stick with the queried assembly
252
349
  og_alt_exon_id = genomic_tx_data["alt_exon_id"]
253
- await self.uta_db.liftover_to_38(genomic_tx_data)
350
+ await self._liftover_to_38(genomic_tx_data)
254
351
  liftover_alt_exon_id = genomic_tx_data["alt_exon_id"]
255
352
 
256
353
  # Validation check: Exon structure
257
354
  if og_alt_exon_id != liftover_alt_exon_id:
258
- logger.warning(
355
+ _logger.warning(
259
356
  "Original alt_exon_id %s does not match liftover alt_exon_id %s",
260
357
  og_alt_exon_id,
261
358
  liftover_alt_exon_id,
@@ -266,14 +363,14 @@ class ManeTranscript:
266
363
 
267
364
  @staticmethod
268
365
  def _get_c_data(
269
- cds_start_end: Tuple[int, int],
270
- c_pos_change: Tuple[int, int],
366
+ cds_start_end: tuple[int, int],
367
+ c_pos_change: tuple[int, int],
271
368
  strand: Strand,
272
369
  status: TranscriptPriority,
273
370
  refseq_c_ac: str,
274
- gene: Optional[str] = None,
275
- ensembl_c_ac: Optional[str] = None,
276
- alt_ac: Optional[str] = None,
371
+ gene: str | None = None,
372
+ ensembl_c_ac: str | None = None,
373
+ alt_ac: str | None = None,
277
374
  ) -> CdnaRepresentation:
278
375
  """Return transcript data on c. coordinate.
279
376
 
@@ -293,7 +390,7 @@ class ManeTranscript:
293
390
  gt_cds_end = c_pos_change[1] > cds_end and c_pos_change[1] > cds_end
294
391
 
295
392
  if lt_cds_start or gt_cds_end:
296
- logger.info(
393
+ _logger.info(
297
394
  "%s with position %s is not within CDS start/end",
298
395
  refseq_c_ac,
299
396
  c_pos_change,
@@ -311,7 +408,7 @@ class ManeTranscript:
311
408
  alt_ac=alt_ac,
312
409
  )
313
410
 
314
- def _c_to_p_pos(self, c_pos: Tuple[int, int]) -> Tuple[int, int]:
411
+ def _c_to_p_pos(self, c_pos: tuple[int, int]) -> tuple[int, int]:
315
412
  """Get protein position from cdna position
316
413
 
317
414
  :param c_pos: cdna position. inter-residue coordinates
@@ -325,7 +422,7 @@ class ManeTranscript:
325
422
  return start, end
326
423
 
327
424
  def _get_mane_p(
328
- self, mane_data: Dict, mane_c_pos_range: Tuple[int, int]
425
+ self, mane_data: dict, mane_c_pos_range: tuple[int, int]
329
426
  ) -> DataRepresentation:
330
427
  """Translate MANE Transcript c. annotation to p. annotation
331
428
 
@@ -349,13 +446,13 @@ class ManeTranscript:
349
446
 
350
447
  async def _g_to_c(
351
448
  self,
352
- g: Dict,
449
+ g: dict,
353
450
  refseq_c_ac: str,
354
451
  status: TranscriptPriority,
355
- ensembl_c_ac: Optional[str] = None,
356
- alt_ac: Optional[str] = None,
452
+ ensembl_c_ac: str | None = None,
453
+ alt_ac: str | None = None,
357
454
  found_result: bool = False,
358
- ) -> Optional[CdnaRepresentation]:
455
+ ) -> CdnaRepresentation | None:
359
456
  """Get transcript c. annotation data from g. annotation.
360
457
 
361
458
  :param g: Genomic data
@@ -381,7 +478,7 @@ class ManeTranscript:
381
478
  )
382
479
 
383
480
  if not result:
384
- logger.warning(
481
+ _logger.warning(
385
482
  "Unable to find transcript, %s, position change", refseq_c_ac
386
483
  )
387
484
  return None
@@ -438,7 +535,7 @@ class ManeTranscript:
438
535
  new_rf = self._get_reading_frame(transcript_data.pos[pos_index])
439
536
 
440
537
  if og_rf != new_rf:
441
- logger.warning(
538
+ _logger.warning(
442
539
  "%s original reading frame (%s) does not match new %s, %s reading frame (%s)",
443
540
  ac,
444
541
  og_rf,
@@ -449,7 +546,7 @@ class ManeTranscript:
449
546
  return False
450
547
  else:
451
548
  if pos_index == 0:
452
- logger.warning("%s must having start position", ac)
549
+ _logger.warning("%s must having start position", ac)
453
550
  return False
454
551
  return True
455
552
 
@@ -459,9 +556,9 @@ class ManeTranscript:
459
556
  coding_start_site: int,
460
557
  start_pos: int,
461
558
  end_pos: int,
462
- mane_transcript: Union[
463
- DataRepresentation, CdnaRepresentation, GenomicRepresentation
464
- ],
559
+ mane_transcript: DataRepresentation
560
+ | CdnaRepresentation
561
+ | GenomicRepresentation,
465
562
  expected_ref: str,
466
563
  anno: AnnotationLayer,
467
564
  residue_mode: ResidueMode,
@@ -503,10 +600,10 @@ class ManeTranscript:
503
600
  residue_mode=residue_mode,
504
601
  )
505
602
  if not mane_ref:
506
- logger.info("Unable to validate reference for MANE Transcript")
603
+ _logger.info("Unable to validate reference for MANE Transcript")
507
604
 
508
605
  if expected_ref != mane_ref:
509
- logger.info(
606
+ _logger.info(
510
607
  "Expected ref, %s, but got %s on MANE accession, %s",
511
608
  expected_ref,
512
609
  mane_ref,
@@ -514,7 +611,7 @@ class ManeTranscript:
514
611
  )
515
612
 
516
613
  if expected_ref != ref:
517
- logger.warning(
614
+ _logger.warning(
518
615
  "Expected ref, %s, but got %s on accession, %s", expected_ref, ref, ac
519
616
  )
520
617
  return False
@@ -522,7 +619,7 @@ class ManeTranscript:
522
619
  return True
523
620
 
524
621
  def _validate_index(
525
- self, ac: str, pos: Tuple[int, int], coding_start_site: int
622
+ self, ac: str, pos: tuple[int, int], coding_start_site: int
526
623
  ) -> bool:
527
624
  """Validate that positions actually exist on accession
528
625
 
@@ -533,13 +630,13 @@ class ManeTranscript:
533
630
  """
534
631
  start_pos = pos[0] + coding_start_site
535
632
  end_pos = pos[1] + coding_start_site
536
- if self.seqrepo_access.get_reference_sequence(
537
- ac, start=start_pos, end=end_pos, residue_mode=ResidueMode.INTER_RESIDUE
538
- )[0]:
539
- return True
540
- return False
633
+ return bool(
634
+ self.seqrepo_access.get_reference_sequence(
635
+ ac, start=start_pos, end=end_pos, residue_mode=ResidueMode.INTER_RESIDUE
636
+ )[0]
637
+ )
541
638
 
542
- def _get_prioritized_transcripts_from_gene(self, df: pl.DataFrame) -> List:
639
+ def _get_prioritized_transcripts_from_gene(self, df: pl.DataFrame) -> list:
543
640
  """Sort and filter transcripts from gene to get priority list
544
641
 
545
642
  :param df: Data frame containing transcripts from gene
@@ -550,7 +647,7 @@ class ManeTranscript:
550
647
  most recent version of a transcript associated with an assembly will be kept
551
648
  """
552
649
  copy_df = df.clone()
553
- copy_df = copy_df.drop(columns="alt_ac").unique()
650
+ copy_df = copy_df.drop("alt_ac").unique()
554
651
  copy_df = copy_df.with_columns(
555
652
  [
556
653
  pl.col("tx_ac")
@@ -590,15 +687,13 @@ class ManeTranscript:
590
687
  start_pos: int,
591
688
  end_pos: int,
592
689
  start_annotation_layer: AnnotationLayer,
593
- gene: Optional[str] = None,
594
- ref: Optional[str] = None,
690
+ gene: str | None = None,
691
+ ref: str | None = None,
595
692
  residue_mode: ResidueMode = ResidueMode.RESIDUE,
596
- mane_transcripts: Optional[Set] = None,
597
- alt_ac: Optional[str] = None,
598
- end_annotation_layer: Optional[EndAnnotationLayer] = None,
599
- ) -> Optional[
600
- Union[DataRepresentation, CdnaRepresentation, ProteinAndCdnaRepresentation]
601
- ]:
693
+ mane_transcripts: set | None = None,
694
+ alt_ac: str | None = None,
695
+ end_annotation_layer: EndAnnotationLayer | None = None,
696
+ ) -> DataRepresentation | CdnaRepresentation | ProteinAndCdnaRepresentation | None:
602
697
  """Get longest compatible transcript from a gene. See the documentation for
603
698
  the :ref:`transcript compatibility policy <transcript_compatibility>` for more
604
699
  information.
@@ -613,14 +708,16 @@ class ManeTranscript:
613
708
  ... "NM_004333.6",
614
709
  ... "ENST00000644969.2",
615
710
  ... }
616
- >>> result = asyncio.run(mane_mapper.get_longest_compatible_transcript(
617
- ... 599,
618
- ... 599,
619
- ... gene="BRAF",
620
- ... start_annotation_layer=AnnotationLayer.PROTEIN,
621
- ... residue_mode=ResidueMode.INTER_RESIDUE,
622
- ... mane_transcripts=mane_transcripts,
623
- ... ))
711
+ >>> result = asyncio.run(
712
+ ... mane_mapper.get_longest_compatible_transcript(
713
+ ... 599,
714
+ ... 599,
715
+ ... gene="BRAF",
716
+ ... start_annotation_layer=AnnotationLayer.PROTEIN,
717
+ ... residue_mode=ResidueMode.INTER_RESIDUE,
718
+ ... mane_transcripts=mane_transcripts,
719
+ ... )
720
+ ... )
624
721
  >>> result.refseq
625
722
  'NP_001365396.1'
626
723
 
@@ -645,9 +742,9 @@ class ManeTranscript:
645
742
  """
646
743
 
647
744
  def _get_protein_rep(
648
- gene: Optional[str],
745
+ gene: str | None,
649
746
  pro_ac: str,
650
- lcr_c_data_pos: Tuple[int, int],
747
+ lcr_c_data_pos: tuple[int, int],
651
748
  strand: Strand,
652
749
  status: TranscriptPriority,
653
750
  ) -> DataRepresentation:
@@ -692,7 +789,7 @@ class ManeTranscript:
692
789
  )
693
790
 
694
791
  if df.is_empty():
695
- logger.warning("Unable to get transcripts from gene %s", gene)
792
+ _logger.warning("Unable to get transcripts from gene %s", gene)
696
793
  return lcr_result
697
794
 
698
795
  prioritized_tx_acs = self._get_prioritized_transcripts_from_gene(df)
@@ -731,7 +828,7 @@ class ManeTranscript:
731
828
 
732
829
  # Get prioritized transcript data for gene
733
830
  # grch38 -> c
734
- lcr_c_data: Optional[CdnaRepresentation] = await self._g_to_c(
831
+ lcr_c_data: CdnaRepresentation | None = await self._g_to_c(
735
832
  g=g,
736
833
  refseq_c_ac=tx_ac,
737
834
  status=TranscriptPriority.LONGEST_COMPATIBLE_REMAINING,
@@ -814,7 +911,7 @@ class ManeTranscript:
814
911
  pos = lcr_result.pos
815
912
 
816
913
  if not self._validate_index(ac, pos, coding_start_site):
817
- logger.warning(
914
+ _logger.warning(
818
915
  "%s are not valid positions on %s with coding start site %s",
819
916
  pos,
820
917
  ac,
@@ -841,7 +938,7 @@ class ManeTranscript:
841
938
  pos = lcr_result_dict[k]["pos"]
842
939
  if not self._validate_index(ac, pos, cds):
843
940
  valid = False
844
- logger.warning(
941
+ _logger.warning(
845
942
  "%s are not valid positions on %s with coding start site %s",
846
943
  pos,
847
944
  ac,
@@ -859,25 +956,26 @@ class ManeTranscript:
859
956
  start_pos: int,
860
957
  end_pos: int,
861
958
  start_annotation_layer: AnnotationLayer,
862
- gene: Optional[str] = None,
863
- ref: Optional[str] = None,
959
+ gene: str | None = None,
960
+ ref: str | None = None,
864
961
  try_longest_compatible: bool = False,
865
- residue_mode: Union[
866
- ResidueMode.RESIDUE, ResidueMode.INTER_RESIDUE
867
- ] = ResidueMode.RESIDUE,
868
- ) -> Optional[Union[DataRepresentation, CdnaRepresentation]]:
962
+ residue_mode: Literal[ResidueMode.RESIDUE]
963
+ | Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.RESIDUE,
964
+ ) -> DataRepresentation | CdnaRepresentation | None:
869
965
  """Return MANE transcript.
870
966
 
871
967
  >>> from cool_seq_tool.app import CoolSeqTool
872
968
  >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
873
969
  >>> import asyncio
874
970
  >>> mane_mapper = CoolSeqTool().mane_transcript
875
- >>> result = asyncio.run(mane_mapper.get_mane_transcript(
876
- ... "NP_004324.2",
877
- ... 599,
878
- ... AnnotationLayer.PROTEIN,
879
- ... residue_mode=ResidueMode.INTER_RESIDUE,
880
- ... ))
971
+ >>> result = asyncio.run(
972
+ ... mane_mapper.get_mane_transcript(
973
+ ... "NP_004324.2",
974
+ ... 599,
975
+ ... AnnotationLayer.PROTEIN,
976
+ ... residue_mode=ResidueMode.INTER_RESIDUE,
977
+ ... )
978
+ ... )
881
979
  >>> result.gene, result.refseq, result.status
882
980
  ('BRAF', 'NP_004324.2', <TranscriptPriority.MANE_SELECT: 'mane_select'>)
883
981
 
@@ -930,7 +1028,7 @@ class ManeTranscript:
930
1028
  current_mane_data["RefSeq_nuc"],
931
1029
  current_mane_data["Ensembl_nuc"],
932
1030
  }
933
- mane: Optional[CdnaRepresentation] = await self._g_to_c(
1031
+ mane: CdnaRepresentation | None = await self._g_to_c(
934
1032
  g=g,
935
1033
  refseq_c_ac=current_mane_data["RefSeq_nuc"],
936
1034
  status=TranscriptPriority(
@@ -998,12 +1096,10 @@ class ManeTranscript:
998
1096
  return await self.g_to_mane_c(
999
1097
  ac, start_pos, end_pos, gene=gene, residue_mode=residue_mode
1000
1098
  )
1001
- logger.warning("Annotation layer not supported: %s", start_annotation_layer)
1099
+ _logger.warning("Annotation layer not supported: %s", start_annotation_layer)
1002
1100
  return None
1003
1101
 
1004
- async def g_to_grch38(
1005
- self, ac: str, start_pos: int, end_pos: int
1006
- ) -> Optional[Dict]:
1102
+ async def g_to_grch38(self, ac: str, start_pos: int, end_pos: int) -> dict | None:
1007
1103
  """Return genomic coordinate on GRCh38 when not given gene context.
1008
1104
 
1009
1105
  :param ac: Genomic accession
@@ -1025,11 +1121,11 @@ class ManeTranscript:
1025
1121
  is_same_pos = start_pos == end_pos
1026
1122
 
1027
1123
  # Coordinate liftover
1028
- if assembly < "GRCh37":
1029
- logger.warning("Liftover only supported for GRCh37")
1124
+ if assembly < Assembly.GRCH37:
1125
+ _logger.warning("Liftover only supported for GRCh37")
1030
1126
  return None
1031
1127
 
1032
- liftover_start_i = self.uta_db.get_liftover(
1128
+ liftover_start_i = self.liftover.get_liftover(
1033
1129
  chromosome, start_pos, Assembly.GRCH38
1034
1130
  )
1035
1131
  if liftover_start_i is None:
@@ -1037,7 +1133,7 @@ class ManeTranscript:
1037
1133
  start_pos = liftover_start_i[1]
1038
1134
 
1039
1135
  if not is_same_pos:
1040
- liftover_end_i = self.uta_db.get_liftover(
1136
+ liftover_end_i = self.liftover.get_liftover(
1041
1137
  chromosome, end_pos, Assembly.GRCH38
1042
1138
  )
1043
1139
  if liftover_end_i is None:
@@ -1055,8 +1151,8 @@ class ManeTranscript:
1055
1151
 
1056
1152
  @staticmethod
1057
1153
  def get_mane_c_pos_change(
1058
- mane_tx_genomic_data: Dict, coding_start_site: int
1059
- ) -> Tuple[int, int]:
1154
+ mane_tx_genomic_data: dict, coding_start_site: int
1155
+ ) -> tuple[int, int]:
1060
1156
  """Get mane c position change
1061
1157
 
1062
1158
  :param mane_tx_genomic_data: MANE transcript and genomic data
@@ -1080,9 +1176,9 @@ class ManeTranscript:
1080
1176
  ac: str,
1081
1177
  start_pos: int,
1082
1178
  end_pos: int,
1083
- gene: Optional[str] = None,
1179
+ gene: str | None = None,
1084
1180
  residue_mode: ResidueMode = ResidueMode.RESIDUE,
1085
- ) -> Optional[Union[GenomicRepresentation, CdnaRepresentation]]:
1181
+ ) -> GenomicRepresentation | CdnaRepresentation | None:
1086
1182
  """Return MANE Transcript on the c. coordinate.
1087
1183
 
1088
1184
  If an arg for ``gene`` is provided, lifts to GRCh38, then gets MANE cDNA
@@ -1091,12 +1187,11 @@ class ManeTranscript:
1091
1187
  >>> import asyncio
1092
1188
  >>> from cool_seq_tool.app import CoolSeqTool
1093
1189
  >>> cst = CoolSeqTool()
1094
- >>> result = asyncio.run(cst.mane_transcript.g_to_mane_c(
1095
- ... "NC_000007.13",
1096
- ... 55259515,
1097
- ... None,
1098
- ... gene="EGFR"
1099
- ... ))
1190
+ >>> result = asyncio.run(
1191
+ ... cst.mane_transcript.g_to_mane_c(
1192
+ ... "NC_000007.13", 55259515, None, gene="EGFR"
1193
+ ... )
1194
+ ... )
1100
1195
  >>> type(result)
1101
1196
  <class 'cool_seq_tool.mappers.mane_transcript.CdnaRepresentation'>
1102
1197
  >>> result.status
@@ -1132,7 +1227,7 @@ class ManeTranscript:
1132
1227
  )
1133
1228
 
1134
1229
  if not await self.uta_db.validate_genomic_ac(ac):
1135
- logger.warning("Genomic accession does not exist: %s", ac)
1230
+ _logger.warning("Genomic accession does not exist: %s", ac)
1136
1231
  return None
1137
1232
 
1138
1233
  mane_data = self.mane_transcript_mappings.get_gene_mane_data(gene)
@@ -1158,7 +1253,7 @@ class ManeTranscript:
1158
1253
  )
1159
1254
  if not mane_tx_genomic_data:
1160
1255
  continue
1161
- logger.info("Not using most recent assembly")
1256
+ _logger.info("Not using most recent assembly")
1162
1257
 
1163
1258
  coding_start_site = mane_tx_genomic_data["coding_start_site"]
1164
1259
  coding_end_site = mane_tx_genomic_data["coding_end_site"]
@@ -1169,7 +1264,7 @@ class ManeTranscript:
1169
1264
  if not self._validate_index(
1170
1265
  mane_c_ac, mane_c_pos_change, coding_start_site
1171
1266
  ):
1172
- logger.warning(
1267
+ _logger.warning(
1173
1268
  "%s are not valid positions on %s with coding start site %s",
1174
1269
  mane_c_pos_change,
1175
1270
  mane_c_ac,
@@ -1198,10 +1293,10 @@ class ManeTranscript:
1198
1293
  alt_ac: str,
1199
1294
  start_pos: int,
1200
1295
  end_pos: int,
1201
- gene: Optional[str] = None,
1296
+ gene: str | None = None,
1202
1297
  residue_mode: ResidueMode = ResidueMode.RESIDUE,
1203
1298
  try_longest_compatible: bool = False,
1204
- ) -> Optional[Dict]:
1299
+ ) -> dict | None:
1205
1300
  """Given GRCh38 genomic representation, return protein representation.
1206
1301
 
1207
1302
  Will try MANE Select and then MANE Plus Clinical. If neither is found and
@@ -1259,7 +1354,7 @@ class ManeTranscript:
1259
1354
  if not self._validate_index(
1260
1355
  mane_c_ac, mane_c_pos_change, coding_start_site
1261
1356
  ):
1262
- logger.warning(
1357
+ _logger.warning(
1263
1358
  "%s are not valid positions on %s with coding start site %s",
1264
1359
  mane_c_pos_change,
1265
1360
  mane_c_ac,