cool-seq-tool 0.3.0.dev1__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,20 @@
1
- """Module for mapping transcript exon to and from genomic coordinates"""
1
+ """Provide mapping capabilities between transcript exon and genomic coordinates."""
2
2
  import logging
3
3
  from typing import Dict, List, Optional, Tuple, TypeVar, Union
4
4
 
5
- from cool_seq_tool.mappers import MANETranscript
5
+ from cool_seq_tool.mappers.mane_transcript import CdnaRepresentation, ManeTranscript
6
6
  from cool_seq_tool.schemas import (
7
7
  AnnotationLayer,
8
8
  Assembly,
9
9
  GenomicData,
10
10
  GenomicDataResponse,
11
11
  ResidueMode,
12
+ Strand,
12
13
  TranscriptExonData,
13
14
  TranscriptExonDataResponse,
14
15
  )
15
- from cool_seq_tool.sources.uta_database import UTADatabase
16
- from cool_seq_tool.utils import service_meta
16
+ from cool_seq_tool.sources.uta_database import UtaDatabase
17
+ from cool_seq_tool.utils import get_inter_residue_pos, service_meta
17
18
 
18
19
  CoordinatesResponseType = TypeVar(
19
20
  "CoordinatesResponseType", GenomicDataResponse, TranscriptExonDataResponse
@@ -23,14 +24,33 @@ logger = logging.getLogger(__name__)
23
24
 
24
25
 
25
26
  class ExonGenomicCoordsMapper:
26
- """Class for mapping transcript exon representation to/from genomic coordinate
27
- representation
27
+ """Provide capabilities for mapping transcript exon representation to/from genomic
28
+ coordinate representation.
28
29
  """
29
30
 
30
- def __init__(self, uta_db: UTADatabase, mane_transcript: MANETranscript) -> None:
31
- """Initialize ExonGenomicCoordsMapper class
31
+ def __init__(self, uta_db: UtaDatabase, mane_transcript: ManeTranscript) -> None:
32
+ """Initialize ExonGenomicCoordsMapper class.
32
33
 
33
- :param uta_db: UTADatabase instance to give access to query UTA database
34
+ A lot of resources are required for initialization, so when defaults are enough,
35
+ it's easiest to let the core CoolSeqTool class handle it for you:
36
+
37
+ >>> from cool_seq_tool.app import CoolSeqTool
38
+ >>> egc = CoolSeqTool().ex_g_coords_mapper
39
+
40
+ Note that this class's public methods are all defined as ``async``, so they will
41
+ need to be called with ``await`` when called from a function, or run from an
42
+ event loop. See the :ref:`Usage section <async_note>` for more information.
43
+
44
+ >>> import asyncio
45
+ >>> result = asyncio.run(egc.transcript_to_genomic_coordinates(
46
+ ... "NM_002529.3",
47
+ ... exon_start=2,
48
+ ... exon_end=17
49
+ ... ))
50
+ >>> result.genomic_data.start, result.genomic_data.end
51
+ (156864428, 156881456)
52
+
53
+ :param uta_db: UtaDatabase instance to give access to query UTA database
34
54
  :param mane_transcript: Instance to align to MANE or compatible representation
35
55
  """
36
56
  self.uta_db = uta_db
@@ -43,8 +63,8 @@ class ExonGenomicCoordsMapper:
43
63
  """Add warnings to response object
44
64
 
45
65
  :param resp: Response object
46
- :param warning_msg: Warning message on why `transcript_exon_data` or
47
- `genomic_data` field is None
66
+ :param warning_msg: Warning message on why ``transcript_exon_data`` or
67
+ ``genomic_data`` field is ``None``
48
68
  :return: Response object with warning message
49
69
  """
50
70
  logger.warning(warning_msg)
@@ -53,22 +73,35 @@ class ExonGenomicCoordsMapper:
53
73
 
54
74
  async def transcript_to_genomic_coordinates(
55
75
  self,
76
+ transcript: str,
56
77
  gene: Optional[str] = None,
57
- transcript: Optional[str] = None,
58
78
  exon_start: Optional[int] = None,
59
79
  exon_start_offset: int = 0,
60
80
  exon_end: Optional[int] = None,
61
81
  exon_end_offset: int = 0,
62
- **kwargs,
63
82
  ) -> GenomicDataResponse:
64
83
  """Get genomic data given transcript data.
65
- Will use GRCh38 coordinates if possible
66
84
 
67
- :param gene: Gene symbol
85
+ By default, transcript data is aligned to the GRCh38 assembly.
86
+
87
+ >>> import asyncio
88
+ >>> from cool_seq_tool.app import CoolSeqTool
89
+ >>> egc = CoolSeqTool().ex_g_coords_mapper
90
+ >>> tpm3 = asyncio.run(egc.transcript_to_genomic_coordinates(
91
+ ... "NM_152263.3"
92
+ ... gene="TPM3", chr="NC_000001.11",
93
+ ... exon_start=1, exon_end=8,
94
+ ... ))
95
+ >>> tpm3.genomic_data.chr, tpm3.genomic_data.start, tpm3.genomic_data.end
96
+ ('NC_000001.11', 154192135, 154170399)
97
+
68
98
  :param transcript: Transcript accession
69
- :param exon_start: Starting transcript exon number
70
- :param exon_end: Ending transcript exon number
99
+ :param gene: HGNC gene symbol
100
+ :param exon_start: Starting transcript exon number (1-based). If not provided,
101
+ must provide ``exon_end``
71
102
  :param exon_start_offset: Starting exon offset
103
+ :param exon_end: Ending transcript exon number (1-based). If not provided, must
104
+ provide ``exon_start``
72
105
  :param exon_end_offset: Ending exon offset
73
106
  :return: GRCh38 genomic data (inter-residue coordinates)
74
107
  """
@@ -76,80 +109,102 @@ class ExonGenomicCoordsMapper:
76
109
  genomic_data=None, warnings=[], service_meta=service_meta()
77
110
  )
78
111
 
112
+ # Ensure valid inputs
79
113
  if not transcript:
80
114
  return self._return_warnings(resp, "Must provide `transcript`")
81
115
  else:
82
116
  transcript = transcript.strip()
83
117
 
84
- if exon_start is None and exon_end is None:
118
+ exon_start_exists, exon_end_exists = False, False
119
+ if exon_start is not None:
120
+ if exon_start < 1:
121
+ return self._return_warnings(resp, "`exon_start` cannot be less than 1")
122
+ exon_start_exists = True
123
+
124
+ if exon_end is not None:
125
+ if exon_end < 1:
126
+ return self._return_warnings(resp, "`exon_end` cannot be less than 1")
127
+ exon_end_exists = True
128
+
129
+ if not exon_start_exists and not exon_end_exists:
85
130
  return self._return_warnings(
86
131
  resp, "Must provide either `exon_start` or `exon_end`"
87
132
  )
88
-
89
- if gene:
90
- gene = gene.upper().strip()
91
-
92
- if exon_start and exon_end:
133
+ elif exon_start_exists and exon_end_exists:
93
134
  if exon_start > exon_end:
94
135
  return self._return_warnings(
95
136
  resp,
96
137
  f"Start exon {exon_start} is greater than end exon {exon_end}",
97
138
  )
98
139
 
140
+ # Get all exons and associated start/end coordinates for transcript
99
141
  tx_exons, warning = await self.uta_db.get_tx_exons(transcript)
100
142
  if not tx_exons:
101
143
  return self._return_warnings(resp, warning or "")
102
144
 
103
- tx_exon_coords, warning = self.uta_db.get_tx_exon_coords(
145
+ # Get exon start and exon end coordinates
146
+ tx_exon_coords, warning = self.get_tx_exon_coords(
104
147
  transcript, tx_exons, exon_start, exon_end
105
148
  )
106
149
  if not tx_exon_coords:
107
150
  return self._return_warnings(resp, warning or "")
108
- tx_exon_start, tx_exon_end = tx_exon_coords
151
+ tx_exon_start_coords, tx_exon_end_coords = tx_exon_coords
109
152
 
110
- alt_ac_start_end, warning = await self.uta_db.get_alt_ac_start_and_end(
111
- transcript, tx_exon_start, tx_exon_end, gene=gene
153
+ if gene:
154
+ gene = gene.upper().strip()
155
+
156
+ # Get aligned genomic data (hgnc gene, alt_ac, alt_start_i, alt_end_i, strand)
157
+ # for exon(s)
158
+ alt_ac_start_end, warning = await self._get_alt_ac_start_and_end(
159
+ transcript, tx_exon_start_coords, tx_exon_end_coords, gene=gene
112
160
  )
113
161
  if not alt_ac_start_end:
114
162
  return self._return_warnings(resp, warning or "")
115
- alt_ac_start, alt_ac_end = alt_ac_start_end
163
+ alt_ac_start_data, alt_ac_end_data = alt_ac_start_end
116
164
 
117
- gene = alt_ac_start[0] if alt_ac_start else alt_ac_end[0]
118
- chromosome = alt_ac_start[1] if alt_ac_start else alt_ac_end[1]
165
+ # Get gene and chromosome data, check that at least one was retrieved
166
+ gene = alt_ac_start_data[0] if alt_ac_start_data else alt_ac_end_data[0]
167
+ chromosome = alt_ac_start_data[1] if alt_ac_start_data else alt_ac_end_data[1]
119
168
  if gene is None or chromosome is None:
120
169
  return self._return_warnings(
121
170
  resp,
122
- "Unable to retrieve `gene` or `chromosome` from "
123
- "genomic start or end data",
171
+ "Unable to retrieve `gene` or `chromosome` from genomic start and "
172
+ "genomic end data",
124
173
  )
125
174
 
126
- start = alt_ac_start[3] if alt_ac_start else None
127
- end = alt_ac_end[2] if alt_ac_end else None
128
- strand = alt_ac_start[4] if alt_ac_start else alt_ac_end[4]
175
+ g_start = alt_ac_start_data[3] - 1 if alt_ac_start_data else None
176
+ g_end = alt_ac_end_data[2] + 1 if alt_ac_end_data else None
177
+ strand = (
178
+ Strand(alt_ac_start_data[4])
179
+ if alt_ac_start_data
180
+ else Strand(alt_ac_end_data[4])
181
+ )
129
182
 
130
183
  # Using none since could set to 0
131
- start_exits = start is not None
132
- end_exists = end is not None
184
+ start_exits = g_start is not None
185
+ end_exists = g_end is not None
133
186
 
134
- if strand == -1:
187
+ # Calculate offsets
188
+ if strand == Strand.NEGATIVE:
135
189
  start_offset = exon_start_offset * -1 if start_exits else None
136
- end_offset = exon_end_offset * -1 if end_exists else None
190
+ end_offset = exon_end_offset * -1 if end_exists else 0
137
191
  else:
138
- start_offset = exon_start_offset if start_exits else None
139
- end_offset = exon_end_offset if end_exists else None
192
+ start_offset = exon_start_offset if start_exits else 0
193
+ end_offset = exon_end_offset if end_exists else 0
140
194
 
141
- start = start + start_offset if start_exits else None
142
- end = end + end_offset if end_exists else None
195
+ # Get genomic coordinates with offsets included
196
+ g_start = g_start + start_offset if start_exits else None
197
+ g_end = g_end + end_offset if end_exists else None
143
198
 
144
199
  resp.genomic_data = GenomicData(
145
200
  gene=gene,
146
201
  chr=chromosome,
147
- start=start,
148
- end=end,
202
+ start=g_start,
203
+ end=g_end,
149
204
  exon_start=exon_start if start_exits else None,
150
- exon_start_offset=exon_start_offset if start_exits else None,
205
+ exon_start_offset=exon_start_offset,
151
206
  exon_end=exon_end if end_exists else None,
152
- exon_end_offset=exon_end_offset if end_exists else None,
207
+ exon_end_offset=exon_end_offset,
153
208
  transcript=transcript,
154
209
  strand=strand,
155
210
  )
@@ -158,31 +213,51 @@ class ExonGenomicCoordsMapper:
158
213
 
159
214
  async def genomic_to_transcript_exon_coordinates(
160
215
  self,
161
- chromosome: Union[str, int],
216
+ chromosome: Optional[str] = None,
217
+ alt_ac: Optional[str] = None,
162
218
  start: Optional[int] = None,
163
219
  end: Optional[int] = None,
164
- strand: Optional[int] = None,
220
+ strand: Optional[Strand] = None,
165
221
  transcript: Optional[str] = None,
166
222
  gene: Optional[str] = None,
167
- residue_mode: ResidueMode = ResidueMode.RESIDUE,
168
- **kwargs,
223
+ residue_mode: Union[
224
+ ResidueMode.INTER_RESIDUE, ResidueMode.RESIDUE
225
+ ] = ResidueMode.RESIDUE,
169
226
  ) -> GenomicDataResponse:
170
- """Get transcript data for genomic data.
171
- MANE Transcript data will be returned iff `transcript` is not supplied.
172
- `gene` must be supplied in order to retrieve MANE Transcript data.
173
- Liftovers genomic coordinates to GRCh38
174
-
175
- :param chromosome: Chromosome. Must either give chromosome number (i.e. `1`) or
176
- accession (i.e. `NC_000001.11`).
227
+ """Get transcript data for genomic data, lifted over to GRCh38.
228
+
229
+ MANE Transcript data will be returned if and only if ``transcript`` is not
230
+ supplied. ``gene`` must be given in order to retrieve MANE Transcript data.
231
+
232
+ >>> import asyncio
233
+ >>> from cool_seq_tool.app import CoolSeqTool
234
+ >>> from cool_seq_tool.schemas import Strand
235
+ >>> egc = CoolSeqTool().ex_g_coords_mapper
236
+ >>> result = asyncio.run(egc.genomic_to_transcript_exon_coordinates(
237
+ ... chromosome="NC_000001.11",
238
+ ... start=154192136,
239
+ ... end=154170400,
240
+ ... strand=Strand.NEGATIVE,
241
+ ... transcript="NM_152263.3"
242
+ ... ))
243
+ >>> result.genomic_data.exon_start, result.genomic_data.exon_end
244
+ (1, 8)
245
+
246
+ :param chromosome: Chromosome. Must give chromosome without a prefix
247
+ (i.e. ``1`` or ``X``). If not provided, must provide ``alt_ac``.
248
+ If ``alt_ac`` is also provided, ``alt_ac`` will be used.
249
+ :param alt_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
250
+ must provide ``chromosome. If ``chromosome`` is also provided, ``alt_ac``
251
+ will be used.
177
252
  :param start: Start genomic position
178
253
  :param end: End genomic position
179
- :param strand: Strand. Must be either `-1` or `1`.
254
+ :param strand: Strand
180
255
  :param transcript: The transcript to use. If this is not given, we will try the
181
256
  following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
182
- Compatible Transcript
183
- :param gene: Gene symbol
184
- :param residue_mode: Default is `resiude` (1-based). Must be either `residue` or
185
- `inter-residue` (0-based).
257
+ Compatible Transcript. See the :ref:`Transcript Selection policy <transcript_selection_policy>`
258
+ page.
259
+ :param gene: HGNC gene symbol
260
+ :param residue_mode: Residue mode for ``start`` and ``end``
186
261
  :return: Genomic data (inter-residue coordinates)
187
262
  """
188
263
  resp = GenomicDataResponse(
@@ -191,21 +266,23 @@ class ExonGenomicCoordsMapper:
191
266
  if start is None and end is None:
192
267
  return self._return_warnings(resp, "Must provide either `start` or `end`")
193
268
 
194
- params = {key: None for key in GenomicData.__fields__.keys()}
269
+ params = {key: None for key in GenomicData.model_fields.keys()}
195
270
  if gene is not None:
196
271
  gene = gene.upper().strip()
197
272
 
198
273
  if start:
199
274
  if residue_mode == ResidueMode.RESIDUE:
275
+ # zero-based for UTA
200
276
  start -= 1
277
+ residue_mode = ResidueMode.ZERO
201
278
  start_data = await self._genomic_to_transcript_exon_coordinate(
202
- chromosome,
203
279
  start,
280
+ chromosome=chromosome,
281
+ alt_ac=alt_ac,
204
282
  strand=strand,
205
283
  transcript=transcript,
206
284
  gene=gene,
207
285
  is_start=True,
208
- residue_mode=ResidueMode.INTER_RESIDUE,
209
286
  )
210
287
  if start_data.transcript_exon_data:
211
288
  start_data = start_data.transcript_exon_data.model_dump()
@@ -215,16 +292,16 @@ class ExonGenomicCoordsMapper:
215
292
  start_data = None
216
293
 
217
294
  if end:
218
- if residue_mode == ResidueMode.RESIDUE:
219
- end -= 1
295
+ end -= 1
296
+ residue_mode = ResidueMode.ZERO
220
297
  end_data = await self._genomic_to_transcript_exon_coordinate(
221
- chromosome,
222
298
  end,
299
+ chromosome=chromosome,
300
+ alt_ac=alt_ac,
223
301
  strand=strand,
224
302
  transcript=transcript,
225
303
  gene=gene,
226
304
  is_start=False,
227
- residue_mode=ResidueMode.INTER_RESIDUE,
228
305
  )
229
306
  if end_data.transcript_exon_data:
230
307
  end_data = end_data.transcript_exon_data.model_dump()
@@ -261,30 +338,139 @@ class ExonGenomicCoordsMapper:
261
338
  resp.genomic_data = GenomicData(**params)
262
339
  return resp
263
340
 
341
+ @staticmethod
342
+ def _validate_exon(
343
+ transcript: str, tx_exons: List[Tuple[int, int]], exon_number: int
344
+ ) -> Tuple[Optional[Tuple[int, int]], Optional[str]]:
345
+ """Validate that exon number exists on a given transcript
346
+
347
+ :param transcript: Transcript accession
348
+ :param tx_exons: List of transcript's exons and associated coordinates
349
+ :param exon_number: Exon number to validate
350
+ :return: Exon coordinates for a given exon number and warnings if found
351
+ """
352
+ msg = f"Exon {exon_number} does not exist on {transcript}"
353
+ try:
354
+ if exon_number < 1:
355
+ return None, msg
356
+ exon = tx_exons[exon_number - 1]
357
+ except IndexError:
358
+ return None, msg
359
+ return exon, None
360
+
361
+ def get_tx_exon_coords(
362
+ self,
363
+ transcript: str,
364
+ tx_exons: List[Tuple[int, int]],
365
+ exon_start: Optional[int] = None,
366
+ exon_end: Optional[int] = None,
367
+ ) -> Tuple[
368
+ Optional[Tuple[Optional[Tuple[int, int]], Optional[Tuple[int, int]]]],
369
+ Optional[str],
370
+ ]:
371
+ """Get exon coordinates for ``exon_start`` and ``exon_end``
372
+
373
+ :param transcript: Transcript accession
374
+ :param tx_exons: List of all transcript exons and coordinates
375
+ :param exon_start: Start exon number
376
+ :param exon_end: End exon number
377
+ :return: [Transcript start exon coords, Transcript end exon coords],
378
+ and warnings if found
379
+ """
380
+ if exon_start is not None:
381
+ tx_exon_start, warning = self._validate_exon(
382
+ transcript, tx_exons, exon_start
383
+ )
384
+ if not tx_exon_start:
385
+ return None, warning
386
+ else:
387
+ tx_exon_start = None
388
+
389
+ if exon_end is not None:
390
+ tx_exon_end, warning = self._validate_exon(transcript, tx_exons, exon_end)
391
+ if not tx_exon_end:
392
+ return None, warning
393
+ else:
394
+ tx_exon_end = None
395
+ return (tx_exon_start, tx_exon_end), None
396
+
397
+ async def _get_alt_ac_start_and_end(
398
+ self,
399
+ tx_ac: str,
400
+ tx_exon_start: Optional[Tuple[int, int]] = None,
401
+ tx_exon_end: Optional[Tuple[int, int]] = None,
402
+ gene: Optional[str] = None,
403
+ ) -> Tuple[Optional[Tuple[Tuple[int, int], Tuple[int, int]]], Optional[str]]:
404
+ """Get aligned genomic coordinates for transcript exon start and end.
405
+
406
+ :param tx_ac: Transcript accession
407
+ :param tx_exon_start: Transcript's exon start coordinates. If not provided,
408
+ must provide ``tx_exon_end``
409
+ :param tx_exon_end: Transcript's exon end coordinates. If not provided, must
410
+ provide ``tx_exon_start``
411
+ :param gene: HGNC gene symbol
412
+ :return: Aligned genomic data, and warnings if found
413
+ """
414
+ if tx_exon_start is None and tx_exon_end is None:
415
+ msg = "Must provide either `tx_exon_start` or `tx_exon_end` or both"
416
+ logger.warning(msg)
417
+ return None, msg
418
+
419
+ alt_ac_data = {"start": None, "end": None}
420
+ for exon, key in [(tx_exon_start, "start"), (tx_exon_end, "end")]:
421
+ if exon:
422
+ alt_ac_val, warning = await self.uta_db.get_alt_ac_start_or_end(
423
+ tx_ac, exon[0], exon[1], gene=gene
424
+ )
425
+ if alt_ac_val:
426
+ alt_ac_data[key] = alt_ac_val
427
+ else:
428
+ return None, warning
429
+
430
+ alt_ac_data_values = alt_ac_data.values()
431
+ # Validate that start and end alignments have matching gene, genomic accession,
432
+ # and strand
433
+ if all(alt_ac_data_values):
434
+ for i in (0, 1, 4):
435
+ if alt_ac_data["start"][i] != alt_ac_data["end"][i]:
436
+ if i == 0:
437
+ error = "HGNC gene symbol does not match"
438
+ elif i == 1:
439
+ error = "Genomic accession does not match"
440
+ else:
441
+ error = "Strand does not match"
442
+ logger.warning(
443
+ f"{error}: {alt_ac_data['start'][i]} != {alt_ac_data['end'][i]}"
444
+ )
445
+ return None, error
446
+ return tuple(alt_ac_data_values), None
447
+
264
448
  async def _genomic_to_transcript_exon_coordinate(
265
449
  self,
266
- chromosome: Union[str, int],
267
450
  pos: int,
268
- strand: int = None,
269
- transcript: str = None,
270
- gene: str = None,
451
+ chromosome: Optional[str] = None,
452
+ alt_ac: Optional[str] = None,
453
+ strand: Optional[Strand] = None,
454
+ transcript: Optional[str] = None,
455
+ gene: Optional[str] = None,
271
456
  is_start: bool = True,
272
- residue_mode: ResidueMode = ResidueMode.RESIDUE,
273
457
  ) -> TranscriptExonDataResponse:
274
458
  """Convert individual genomic data to transcript data
275
459
 
276
- :param chromosome: Chromosome. Must either give chromosome number (i.e. `1`) or
277
- accession (i.e. `NC_000001.11`).
278
- :param pos: Genomic position
279
- :param strand: Strand. Must be either `-1` or `1`.
460
+ :param pos: Genomic position (zero-based)
461
+ :param chromosome: Chromosome. Must give chromosome without a prefix
462
+ (i.e. ``1`` or ``X``). If not provided, must provide ``alt_ac``.
463
+ If ``alt_ac`` is also provided, ``alt_ac`` will be used.
464
+ :param alt_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
465
+ must provide ``chromosome. If ``chromosome`` is also provided, ``alt_ac``
466
+ will be used.
467
+ :param strand: Strand
280
468
  :param transcript: The transcript to use. If this is not given, we will try the
281
469
  following transcripts: MANE Select, MANE Clinical Plus, Longest Remaining
282
470
  Compatible Transcript
283
- :param gene: Gene symbol
284
- :param is_start: `True` if `pos` is start position. `False` if `pos` is end
285
- position.
286
- :param residue_mode: Default is `resiude` (1-based). Must be either `residue`
287
- or `inter-residue` (0-based).
471
+ :param gene: HGNC gene symbol
472
+ :param is_start: ``True`` if ``pos`` is start position. ``False`` if ``pos`` is
473
+ end position.
288
474
  :return: Transcript data (inter-residue coordinates)
289
475
  """
290
476
  resp = TranscriptExonDataResponse(
@@ -296,9 +482,19 @@ class ExonGenomicCoordsMapper:
296
482
  resp, "Must provide either `gene` or `transcript`"
297
483
  )
298
484
 
299
- params = {key: None for key in TranscriptExonData.__fields__.keys()}
485
+ params = {key: None for key in TranscriptExonData.model_fields.keys()}
300
486
 
301
- try:
487
+ if alt_ac:
488
+ # Check if valid accession is given
489
+ if not await self.uta_db.validate_genomic_ac(alt_ac):
490
+ return self._return_warnings(
491
+ resp, f"Invalid genomic accession: {alt_ac}"
492
+ )
493
+
494
+ genes_alt_acs, warning = await self.uta_db.get_genes_and_alt_acs(
495
+ pos, strand=strand, alt_ac=alt_ac, gene=gene
496
+ )
497
+ elif chromosome:
302
498
  # Check if just chromosome is given. If it is, we should
303
499
  # convert this to the correct accession version
304
500
  if chromosome == "X":
@@ -307,21 +503,13 @@ class ExonGenomicCoordsMapper:
307
503
  chromosome = 24
308
504
  else:
309
505
  chromosome = int(chromosome)
310
- except ValueError:
311
- # Check if valid accession is given
312
- if not await self.uta_db.validate_genomic_ac(chromosome):
313
- return self._return_warnings(resp, f"Invalid chromosome: {chromosome}")
314
506
 
315
- if isinstance(chromosome, str):
316
- # Accession given
317
- genes_alt_acs, warning = await self.uta_db.chr_to_gene_and_accessions(
318
- chromosome, pos, strand=strand, alt_ac=chromosome, gene=gene
507
+ genes_alt_acs, warning = await self.uta_db.get_genes_and_alt_acs(
508
+ pos, strand=strand, chromosome=chromosome, gene=gene
319
509
  )
320
510
  else:
321
- # Number given
322
- genes_alt_acs, warning = await self.uta_db.chr_to_gene_and_accessions(
323
- chromosome, pos, strand=strand, alt_ac=None, gene=gene
324
- )
511
+ genes_alt_acs = None
512
+
325
513
  if not genes_alt_acs:
326
514
  return self._return_warnings(resp, warning)
327
515
 
@@ -332,7 +520,7 @@ class ExonGenomicCoordsMapper:
332
520
 
333
521
  if transcript is None:
334
522
  warnings = await self._set_mane_genomic_data(
335
- params, gene, alt_ac, pos, strand, is_start, residue_mode
523
+ params, gene, alt_ac, pos, strand, is_start
336
524
  )
337
525
  if warnings:
338
526
  return self._return_warnings(resp, warnings)
@@ -394,9 +582,8 @@ class ExonGenomicCoordsMapper:
394
582
  gene: str,
395
583
  alt_ac: str,
396
584
  pos: int,
397
- strand: int,
585
+ strand: Strand,
398
586
  is_start: bool,
399
- residue_mode: ResidueMode,
400
587
  ) -> Optional[str]:
401
588
  """Set genomic data in `params` found from MANE.
402
589
 
@@ -407,16 +594,19 @@ class ExonGenomicCoordsMapper:
407
594
  :param strand: Strand
408
595
  :param is_start: `True` if `pos` is start position. `False` if `pos` is end
409
596
  position.
410
- :param residue_mode: Residue mode for `pos`
411
597
  :return: Warnings if found
412
598
  """
413
- mane_data = await self.mane_transcript.get_mane_transcript(
599
+ start, end = get_inter_residue_pos(pos, pos, residue_mode=ResidueMode.ZERO)
600
+ mane_data: Optional[
601
+ CdnaRepresentation
602
+ ] = await self.mane_transcript.get_mane_transcript(
414
603
  alt_ac,
415
- pos,
604
+ start,
605
+ end,
416
606
  AnnotationLayer.GENOMIC,
417
607
  gene=gene,
418
608
  try_longest_compatible=True,
419
- residue_mode=residue_mode,
609
+ residue_mode=ResidueMode.INTER_RESIDUE,
420
610
  )
421
611
  if not mane_data:
422
612
  msg = f"Unable to find mane data for {alt_ac} with position {pos}"
@@ -425,23 +615,18 @@ class ExonGenomicCoordsMapper:
425
615
  logger.warning(msg)
426
616
  return msg
427
617
 
428
- if mane_data["strand"] == "-":
429
- mane_data["strand"] = -1
430
- elif mane_data["strand"] == "+":
431
- mane_data["strand"] = 1
432
-
433
- params["gene"] = mane_data["gene"]
618
+ params["gene"] = mane_data.gene
434
619
  params["transcript"] = (
435
- mane_data["refseq"]
436
- if mane_data["refseq"]
437
- else mane_data["ensembl"]
438
- if mane_data["ensembl"]
620
+ mane_data.refseq
621
+ if mane_data.refseq
622
+ else mane_data.ensembl
623
+ if mane_data.ensembl
439
624
  else None
440
625
  )
441
626
  tx_exons = await self._structure_exons(params["transcript"], alt_ac=alt_ac)
442
627
  if not tx_exons:
443
628
  return f"Unable to get exons for {params['transcript']}"
444
- tx_pos = mane_data["pos"][0] + mane_data["coding_start_site"]
629
+ tx_pos = mane_data.pos[0] + mane_data.coding_start_site
445
630
  params["exon"] = self._get_exon_number(tx_exons, tx_pos)
446
631
 
447
632
  try:
@@ -454,7 +639,7 @@ class ExonGenomicCoordsMapper:
454
639
  logger.warning(msg)
455
640
  return msg
456
641
 
457
- strand_to_use = strand if strand is not None else mane_data["strand"]
642
+ strand_to_use = strand if strand is not None else mane_data.strand
458
643
  params["strand"] = strand_to_use
459
644
  self._set_exon_offset(
460
645
  params,
@@ -474,7 +659,7 @@ class ExonGenomicCoordsMapper:
474
659
 
475
660
  params["chr"] = genomic_data[1]
476
661
  genomic_coords = genomic_data[2], genomic_data[3]
477
- genomic_pos = genomic_coords[1] if is_start else genomic_coords[0]
662
+ genomic_pos = genomic_coords[1] - 1 if is_start else genomic_coords[0] + 1
478
663
  params["pos"] = (
479
664
  genomic_pos - params["exon_offset"]
480
665
  if strand_to_use == -1
@@ -483,14 +668,14 @@ class ExonGenomicCoordsMapper:
483
668
  return None
484
669
 
485
670
  async def _set_genomic_data(
486
- self, params: Dict, strand: int, is_start: bool
671
+ self, params: Dict, strand: Strand, is_start: bool
487
672
  ) -> Optional[str]:
488
- """Set genomic data in `params`
673
+ """Set genomic data in ``params``
489
674
 
490
675
  :param params: Parameters for response
491
676
  :param strand: Strand
492
- :param is_start: `True` if `pos` is start position. `False` if `pos` is end
493
- position.
677
+ :param is_start: ``True`` if ``pos`` is start position. ``False`` if ``pos`` is
678
+ end position.
494
679
  :return: Warnings if found
495
680
  """
496
681
  # We should always try to liftover
@@ -521,6 +706,7 @@ class ExonGenomicCoordsMapper:
521
706
  tx_exons = await self._structure_exons(params["transcript"], alt_ac=grch38_ac)
522
707
  if not tx_exons:
523
708
  return f"Unable to get exons for {params['transcript']}"
709
+
524
710
  data = await self.uta_db.get_tx_exon_aln_v_data(
525
711
  params["transcript"],
526
712
  params["pos"],
@@ -549,12 +735,15 @@ class ExonGenomicCoordsMapper:
549
735
  i = 1 if data_exons == (0, tx_exons[0][1]) else i - 1
550
736
  params["exon"] = i
551
737
 
552
- strand_to_use = strand if strand is not None else data[7]
738
+ strand_to_use = strand if strand is not None else Strand(data[7])
553
739
  params["strand"] = strand_to_use
740
+ if not is_start:
741
+ # convert back to inter-residue for end position
742
+ params["pos"] += 1
554
743
  self._set_exon_offset(
555
744
  params,
556
- data[5],
557
- data[6],
745
+ data[5] if is_start else data[5] + 1, # need to convert to inter-residue
746
+ data[6] - 1 if is_start else data[6], # need to convert to inter-residue
558
747
  params["pos"],
559
748
  is_start=is_start,
560
749
  strand=strand_to_use,
@@ -563,25 +752,25 @@ class ExonGenomicCoordsMapper:
563
752
 
564
753
  @staticmethod
565
754
  def _set_exon_offset(
566
- params: Dict, start: int, end: int, pos: int, is_start: bool, strand: int
755
+ params: Dict, start: int, end: int, pos: int, is_start: bool, strand: Strand
567
756
  ) -> None:
568
- """Set `exon_offset` in params.
757
+ """Set value for ``exon_offset`` in ``params``.
569
758
 
570
759
  :param params: Parameters for response
571
- :param start: Start exon coord (can be transcript or genomic)
572
- :param end: End exon coord (can be transcript or genomic)
760
+ :param start: Start exon coord (can be transcript or aligned genomic)
761
+ :param end: End exon coord (can be transcript or aligned genomic)
573
762
  :param pos: Position change (can be transcript or genomic)
574
- :param is_start: `True` if `pos` is start position. `False` if `pos` is end
575
- position
576
- :param int strand: Strand
763
+ :param is_start: ``True`` if ``pos`` is start position. ``False`` if ``pos`` is
764
+ end position
765
+ :param strand: Strand
577
766
  """
578
767
  if is_start:
579
- if strand == -1:
768
+ if strand == Strand.NEGATIVE:
580
769
  params["exon_offset"] = end - pos
581
770
  else:
582
771
  params["exon_offset"] = pos - end
583
772
  else:
584
- if strand == -1:
773
+ if strand == Strand.NEGATIVE:
585
774
  params["exon_offset"] = start - pos
586
775
  else:
587
776
  params["exon_offset"] = pos - start
@@ -595,21 +784,23 @@ class ExonGenomicCoordsMapper:
595
784
  :param alt_ac: Genomic accession
596
785
  :return: List of tuples containing transcript exon coordinates
597
786
  """
598
- result = list()
787
+ result = []
599
788
  tx_exons, _ = await self.uta_db.get_tx_exons(transcript, alt_ac=alt_ac)
789
+
600
790
  if not tx_exons:
601
791
  return result
792
+
602
793
  for coords in tx_exons:
603
794
  result.append((coords[0], coords[1]))
604
795
  return result
605
796
 
606
797
  @staticmethod
607
798
  def _get_exon_number(tx_exons: List, tx_pos: int) -> int:
608
- """Find exon number.
799
+ """Find related exon number for a position
609
800
 
610
- :param tx_exons: List of exon coordinates
801
+ :param tx_exons: List of exon coordinates for a transcript
611
802
  :param tx_pos: Transcript position change
612
- :return: Exon number associated to transcript position change
803
+ :return: Exon number associated to transcript position change. Will be 1-based
613
804
  """
614
805
  i = 1
615
806
  for coords in tx_exons: