cool-seq-tool 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +7 -9
- cool_seq_tool/app.py +6 -1
- cool_seq_tool/handlers/seqrepo_access.py +14 -10
- cool_seq_tool/mappers/__init__.py +2 -1
- cool_seq_tool/mappers/exon_genomic_coords.py +64 -51
- cool_seq_tool/mappers/liftover.py +90 -0
- cool_seq_tool/mappers/mane_transcript.py +124 -27
- cool_seq_tool/resources/status.py +7 -5
- cool_seq_tool/schemas.py +9 -17
- cool_seq_tool/sources/mane_transcript_mappings.py +2 -2
- cool_seq_tool/sources/uta_database.py +45 -219
- cool_seq_tool/utils.py +42 -2
- {cool_seq_tool-0.4.1.dist-info → cool_seq_tool-0.5.0.dist-info}/METADATA +2 -4
- cool_seq_tool-0.5.0.dist-info/RECORD +24 -0
- {cool_seq_tool-0.4.1.dist-info → cool_seq_tool-0.5.0.dist-info}/WHEEL +1 -1
- cool_seq_tool/api.py +0 -41
- cool_seq_tool/routers/__init__.py +0 -17
- cool_seq_tool/routers/default.py +0 -126
- cool_seq_tool/routers/mane.py +0 -98
- cool_seq_tool/routers/mappings.py +0 -155
- cool_seq_tool/version.py +0 -3
- cool_seq_tool-0.4.1.dist-info/RECORD +0 -29
- {cool_seq_tool-0.4.1.dist-info → cool_seq_tool-0.5.0.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.4.1.dist-info → cool_seq_tool-0.5.0.dist-info}/top_level.txt +0 -0
cool_seq_tool/__init__.py
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
"""The cool_seq_tool package"""
|
2
2
|
|
3
|
-
import
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
LOG_FN = "cool_seq_tool.log"
|
5
|
+
try:
|
6
|
+
__version__ = version("cool_seq_tool")
|
7
|
+
except PackageNotFoundError:
|
8
|
+
__version__ = "unknown"
|
9
|
+
finally:
|
10
|
+
del version, PackageNotFoundError
|
cool_seq_tool/app.py
CHANGED
@@ -11,13 +11,14 @@ from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAcces
|
|
11
11
|
from cool_seq_tool.mappers import (
|
12
12
|
AlignmentMapper,
|
13
13
|
ExonGenomicCoordsMapper,
|
14
|
+
LiftOver,
|
14
15
|
ManeTranscript,
|
15
16
|
)
|
16
17
|
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
|
17
18
|
from cool_seq_tool.sources.transcript_mappings import TranscriptMappings
|
18
19
|
from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
|
19
20
|
|
20
|
-
|
21
|
+
_logger = logging.getLogger(__name__)
|
21
22
|
|
22
23
|
|
23
24
|
class CoolSeqTool:
|
@@ -29,6 +30,7 @@ class CoolSeqTool:
|
|
29
30
|
* ``self.mane_transcript_mappings``: :py:class:`ManeTranscriptMappings <cool_seq_tool.sources.mane_transcript_mappings.ManeTranscriptMappings>`
|
30
31
|
* ``self.uta_db``: :py:class:`UtaDatabase <cool_seq_tool.sources.uta_database.UtaDatabase>`
|
31
32
|
* ``self.alignment_mapper``: :py:class:`AlignmentMapper <cool_seq_tool.mappers.alignment.AlignmentMapper>`
|
33
|
+
* ``self.liftover``: :py:class:`LiftOver <cool_seq_tool.mappers.liftover.LiftOver>`
|
32
34
|
* ``self.mane_transcript``: :py:class:`ManeTranscript <cool_seq_tool.mappers.mane_transcript.ManeTranscript>`
|
33
35
|
* ``self.ex_g_coords_mapper``: :py:class:`ExonGenomicCoordsMapper <cool_seq_tool.mappers.exon_genomic_coords.ExonGenomicCoordsMapper>`
|
34
36
|
"""
|
@@ -94,15 +96,18 @@ class CoolSeqTool:
|
|
94
96
|
self.alignment_mapper = AlignmentMapper(
|
95
97
|
self.seqrepo_access, self.transcript_mappings, self.uta_db
|
96
98
|
)
|
99
|
+
self.liftover = LiftOver()
|
97
100
|
self.mane_transcript = ManeTranscript(
|
98
101
|
self.seqrepo_access,
|
99
102
|
self.transcript_mappings,
|
100
103
|
self.mane_transcript_mappings,
|
101
104
|
self.uta_db,
|
105
|
+
self.liftover,
|
102
106
|
)
|
103
107
|
self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
|
104
108
|
self.seqrepo_access,
|
105
109
|
self.uta_db,
|
106
110
|
self.mane_transcript,
|
107
111
|
self.mane_transcript_mappings,
|
112
|
+
self.liftover,
|
108
113
|
)
|
@@ -8,10 +8,10 @@ from pathlib import Path
|
|
8
8
|
|
9
9
|
from ga4gh.vrs.dataproxy import SeqRepoDataProxy
|
10
10
|
|
11
|
-
from cool_seq_tool.schemas import ResidueMode
|
12
|
-
from cool_seq_tool.utils import get_inter_residue_pos
|
11
|
+
from cool_seq_tool.schemas import Assembly, ResidueMode
|
12
|
+
from cool_seq_tool.utils import get_inter_residue_pos, process_chromosome_input
|
13
13
|
|
14
|
-
|
14
|
+
_logger = logging.getLogger(__name__)
|
15
15
|
|
16
16
|
|
17
17
|
SEQREPO_ROOT_DIR = environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
|
@@ -66,7 +66,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
66
66
|
sequence = self.sr.fetch(ac, start=start, end=end)
|
67
67
|
except KeyError:
|
68
68
|
msg = f"Accession, {ac}, not found in SeqRepo"
|
69
|
-
|
69
|
+
_logger.warning(msg)
|
70
70
|
return "", msg
|
71
71
|
except ValueError as e:
|
72
72
|
error = str(e)
|
@@ -80,7 +80,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
80
80
|
)
|
81
81
|
else:
|
82
82
|
msg = f"{e}"
|
83
|
-
|
83
|
+
_logger.warning(msg)
|
84
84
|
return "", msg
|
85
85
|
else:
|
86
86
|
# If start is valid, but end is invalid, SeqRepo still returns
|
@@ -118,7 +118,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
118
118
|
)
|
119
119
|
except KeyError:
|
120
120
|
msg = f"SeqRepo unable to get translated identifiers for {ac}"
|
121
|
-
|
121
|
+
_logger.warning(msg)
|
122
122
|
return [], msg
|
123
123
|
else:
|
124
124
|
return ga4gh_identifiers, None
|
@@ -133,7 +133,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
133
133
|
return self.sr.translate_alias(input_str), None
|
134
134
|
except KeyError:
|
135
135
|
msg = f"SeqRepo could not translate alias {input_str}"
|
136
|
-
|
136
|
+
_logger.warning(msg)
|
137
137
|
return [], msg
|
138
138
|
|
139
139
|
def chromosome_to_acs(self, chromosome: str) -> tuple[list[str] | None, str | None]:
|
@@ -143,14 +143,18 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
143
143
|
:return: Accessions for chromosome (ordered by latest assembly)
|
144
144
|
"""
|
145
145
|
acs = []
|
146
|
-
for assembly in
|
146
|
+
for assembly in reversed(Assembly.values()):
|
147
147
|
tmp_acs, _ = self.translate_identifier(
|
148
|
-
f"{assembly}:
|
148
|
+
f"{assembly}:{process_chromosome_input(chromosome)}",
|
149
|
+
target_namespaces="refseq",
|
149
150
|
)
|
150
151
|
acs += [ac.split("refseq:")[-1] for ac in tmp_acs]
|
151
152
|
if acs:
|
152
153
|
return acs, None
|
153
|
-
return
|
154
|
+
return (
|
155
|
+
None,
|
156
|
+
f'Unable to find matching accessions for "{chromosome}" in SeqRepo.',
|
157
|
+
)
|
154
158
|
|
155
159
|
def ac_to_chromosome(self, ac: str) -> tuple[str | None, str | None]:
|
156
160
|
"""Get chromosome for accession.
|
@@ -1,8 +1,9 @@
|
|
1
1
|
"""Module for mapping data"""
|
2
2
|
|
3
3
|
from .alignment import AlignmentMapper # noqa: I001
|
4
|
+
from .liftover import LiftOver
|
4
5
|
from .mane_transcript import ManeTranscript
|
5
6
|
from .exon_genomic_coords import ExonGenomicCoordsMapper
|
6
7
|
|
7
8
|
|
8
|
-
__all__ = ["AlignmentMapper", "ManeTranscript", "ExonGenomicCoordsMapper"]
|
9
|
+
__all__ = ["AlignmentMapper", "LiftOver", "ManeTranscript", "ExonGenomicCoordsMapper"]
|
@@ -4,6 +4,7 @@ import logging
|
|
4
4
|
from typing import Literal, TypeVar
|
5
5
|
|
6
6
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
7
|
+
from cool_seq_tool.mappers.liftover import LiftOver
|
7
8
|
from cool_seq_tool.mappers.mane_transcript import CdnaRepresentation, ManeTranscript
|
8
9
|
from cool_seq_tool.schemas import (
|
9
10
|
AnnotationLayer,
|
@@ -23,7 +24,7 @@ CoordinatesResponseType = TypeVar(
|
|
23
24
|
"CoordinatesResponseType", GenomicDataResponse, TranscriptExonDataResponse
|
24
25
|
)
|
25
26
|
|
26
|
-
|
27
|
+
_logger = logging.getLogger(__name__)
|
27
28
|
|
28
29
|
|
29
30
|
class ExonGenomicCoordsMapper:
|
@@ -37,6 +38,7 @@ class ExonGenomicCoordsMapper:
|
|
37
38
|
uta_db: UtaDatabase,
|
38
39
|
mane_transcript: ManeTranscript,
|
39
40
|
mane_transcript_mappings: ManeTranscriptMappings,
|
41
|
+
liftover: LiftOver,
|
40
42
|
) -> None:
|
41
43
|
"""Initialize ExonGenomicCoordsMapper class.
|
42
44
|
|
@@ -63,25 +65,28 @@ class ExonGenomicCoordsMapper:
|
|
63
65
|
:param uta_db: UtaDatabase instance to give access to query UTA database
|
64
66
|
:param mane_transcript: Instance to align to MANE or compatible representation
|
65
67
|
:param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
|
68
|
+
:param liftover: Instance to provide mapping between human genome assemblies
|
66
69
|
"""
|
67
70
|
self.seqrepo_access = seqrepo_access
|
68
71
|
self.uta_db = uta_db
|
69
72
|
self.mane_transcript = mane_transcript
|
70
73
|
self.mane_transcript_mappings = mane_transcript_mappings
|
74
|
+
self.liftover = liftover
|
71
75
|
|
72
76
|
@staticmethod
|
73
77
|
def _return_warnings(
|
74
|
-
resp: CoordinatesResponseType, warning_msg: str
|
78
|
+
resp: CoordinatesResponseType, warning_msg: list[str]
|
75
79
|
) -> CoordinatesResponseType:
|
76
80
|
"""Add warnings to response object
|
77
81
|
|
78
82
|
:param resp: Response object
|
79
|
-
:param warning_msg: Warning message on why ``transcript_exon_data`` or
|
83
|
+
:param warning_msg: Warning message(s) on why ``transcript_exon_data`` or
|
80
84
|
``genomic_data`` field is ``None``
|
81
85
|
:return: Response object with warning message
|
82
86
|
"""
|
83
|
-
|
84
|
-
|
87
|
+
for msg in warning_msg:
|
88
|
+
_logger.warning(msg)
|
89
|
+
resp.warnings.append(msg)
|
85
90
|
return resp
|
86
91
|
|
87
92
|
async def transcript_to_genomic_coordinates(
|
@@ -126,42 +131,44 @@ class ExonGenomicCoordsMapper:
|
|
126
131
|
)
|
127
132
|
|
128
133
|
# Ensure valid inputs
|
134
|
+
warnings = []
|
129
135
|
if not transcript:
|
130
|
-
|
131
|
-
|
136
|
+
warnings.append("Must provide `transcript`")
|
137
|
+
else:
|
138
|
+
transcript = transcript.strip()
|
132
139
|
|
133
140
|
exon_start_exists, exon_end_exists = False, False
|
134
141
|
if exon_start is not None:
|
135
142
|
if exon_start < 1:
|
136
|
-
|
143
|
+
warnings.append("`exon_start` cannot be less than 1")
|
137
144
|
exon_start_exists = True
|
138
145
|
|
139
146
|
if exon_end is not None:
|
140
147
|
if exon_end < 1:
|
141
|
-
|
148
|
+
warnings.append("`exon_end` cannot be less than 1")
|
142
149
|
exon_end_exists = True
|
143
150
|
|
144
151
|
if not exon_start_exists and not exon_end_exists:
|
145
|
-
|
146
|
-
resp, "Must provide either `exon_start` or `exon_end`"
|
147
|
-
)
|
152
|
+
warnings.append("Must provide either `exon_start` or `exon_end`")
|
148
153
|
if exon_start_exists and exon_end_exists and (exon_start > exon_end):
|
149
|
-
|
150
|
-
|
151
|
-
f"Start exon {exon_start} is greater than end exon {exon_end}",
|
154
|
+
warnings.append(
|
155
|
+
f"Start exon {exon_start} is greater than end exon {exon_end}"
|
152
156
|
)
|
153
157
|
|
158
|
+
if warnings:
|
159
|
+
return self._return_warnings(resp, warnings)
|
160
|
+
|
154
161
|
# Get all exons and associated start/end coordinates for transcript
|
155
162
|
tx_exons, warning = await self.uta_db.get_tx_exons(transcript)
|
156
163
|
if not tx_exons:
|
157
|
-
return self._return_warnings(resp, warning
|
164
|
+
return self._return_warnings(resp, [warning] if warning else [])
|
158
165
|
|
159
166
|
# Get exon start and exon end coordinates
|
160
167
|
tx_exon_coords, warning = self.get_tx_exon_coords(
|
161
168
|
transcript, tx_exons, exon_start, exon_end
|
162
169
|
)
|
163
170
|
if not tx_exon_coords:
|
164
|
-
return self._return_warnings(resp, warning
|
171
|
+
return self._return_warnings(resp, [warning] if warning else [])
|
165
172
|
tx_exon_start_coords, tx_exon_end_coords = tx_exon_coords
|
166
173
|
|
167
174
|
if gene:
|
@@ -173,7 +180,7 @@ class ExonGenomicCoordsMapper:
|
|
173
180
|
transcript, tx_exon_start_coords, tx_exon_end_coords, gene=gene
|
174
181
|
)
|
175
182
|
if not alt_ac_start_end:
|
176
|
-
return self._return_warnings(resp, warning
|
183
|
+
return self._return_warnings(resp, [warning] if warning else [])
|
177
184
|
alt_ac_start_data, alt_ac_end_data = alt_ac_start_end
|
178
185
|
|
179
186
|
# Get gene and chromosome data, check that at least one was retrieved
|
@@ -182,8 +189,9 @@ class ExonGenomicCoordsMapper:
|
|
182
189
|
if gene is None or chromosome is None:
|
183
190
|
return self._return_warnings(
|
184
191
|
resp,
|
185
|
-
|
186
|
-
|
192
|
+
[
|
193
|
+
"Unable to retrieve `gene` or `chromosome` from genomic start and genomic end data"
|
194
|
+
],
|
187
195
|
)
|
188
196
|
|
189
197
|
g_start = alt_ac_start_data[3] - 1 if alt_ac_start_data else None
|
@@ -259,9 +267,8 @@ class ExonGenomicCoordsMapper:
|
|
259
267
|
>>> result.genomic_data.exon_start, result.genomic_data.exon_end
|
260
268
|
(1, 8)
|
261
269
|
|
262
|
-
:param chromosome:
|
263
|
-
|
264
|
-
If ``alt_ac`` is also provided, ``alt_ac`` will be used.
|
270
|
+
:param chromosome: e.g. ``"1"`` or ``"chr1"``. If not provided, must provide
|
271
|
+
``alt_ac``. If ``alt_ac`` is also provided, ``alt_ac`` will be used.
|
265
272
|
:param alt_ac: Genomic accession (i.e. ``NC_000001.11``). If not provided,
|
266
273
|
must provide ``chromosome. If ``chromosome`` is also provided, ``alt_ac``
|
267
274
|
will be used.
|
@@ -279,14 +286,23 @@ class ExonGenomicCoordsMapper:
|
|
279
286
|
breakpoint for the 3' end. For the negative strand, adjacent is defined as
|
280
287
|
the exon following the breakpoint for the 5' end and the exon preceding the
|
281
288
|
breakpoint for the 3' end.
|
289
|
+
:param gene: gene name. Ideally, HGNC symbol. Must be given if no ``transcript``
|
290
|
+
value is provided.
|
282
291
|
:param residue_mode: Residue mode for ``start`` and ``end``
|
283
292
|
:return: Genomic data (inter-residue coordinates)
|
284
293
|
"""
|
285
294
|
resp = GenomicDataResponse(
|
286
295
|
genomic_data=None, warnings=[], service_meta=service_meta()
|
287
296
|
)
|
297
|
+
warnings = []
|
288
298
|
if start is None and end is None:
|
289
|
-
|
299
|
+
warnings.append("Must provide either `start` or `end`")
|
300
|
+
if chromosome is None and alt_ac is None:
|
301
|
+
warnings.append("Must provide either `chromosome` or `alt_ac`")
|
302
|
+
if transcript is None and gene is None:
|
303
|
+
warnings.append("Must provide either `gene` or `transcript`")
|
304
|
+
if warnings:
|
305
|
+
return self._return_warnings(resp, warnings)
|
290
306
|
|
291
307
|
params = {key: None for key in GenomicData.model_fields}
|
292
308
|
if gene is not None:
|
@@ -310,7 +326,7 @@ class ExonGenomicCoordsMapper:
|
|
310
326
|
if start_data.transcript_exon_data:
|
311
327
|
start_data = start_data.transcript_exon_data.model_dump()
|
312
328
|
else:
|
313
|
-
return self._return_warnings(resp, start_data.warnings[0])
|
329
|
+
return self._return_warnings(resp, [start_data.warnings[0]])
|
314
330
|
else:
|
315
331
|
start_data = None
|
316
332
|
|
@@ -330,7 +346,7 @@ class ExonGenomicCoordsMapper:
|
|
330
346
|
if end_data.transcript_exon_data:
|
331
347
|
end_data = end_data.transcript_exon_data.model_dump()
|
332
348
|
else:
|
333
|
-
return self._return_warnings(resp, end_data.warnings[0])
|
349
|
+
return self._return_warnings(resp, [end_data.warnings[0]])
|
334
350
|
else:
|
335
351
|
end_data = None
|
336
352
|
|
@@ -341,7 +357,7 @@ class ExonGenomicCoordsMapper:
|
|
341
357
|
f"Start `{field}`, {start_data[field]}, does "
|
342
358
|
f"not match End `{field}`, {end_data[field]}"
|
343
359
|
)
|
344
|
-
return self._return_warnings(resp, msg)
|
360
|
+
return self._return_warnings(resp, [msg])
|
345
361
|
params[field] = start_data[field]
|
346
362
|
else:
|
347
363
|
params[field] = end_data[field]
|
@@ -351,7 +367,7 @@ class ExonGenomicCoordsMapper:
|
|
351
367
|
f"Input gene, {gene}, does not match expected output"
|
352
368
|
f"gene, {params['gene']}"
|
353
369
|
)
|
354
|
-
return self._return_warnings(resp, msg)
|
370
|
+
return self._return_warnings(resp, [msg])
|
355
371
|
|
356
372
|
for label, data in [("start", start_data), ("end", end_data)]:
|
357
373
|
if data:
|
@@ -436,7 +452,7 @@ class ExonGenomicCoordsMapper:
|
|
436
452
|
"""
|
437
453
|
if tx_exon_start is None and tx_exon_end is None:
|
438
454
|
msg = "Must provide either `tx_exon_start` or `tx_exon_end` or both"
|
439
|
-
|
455
|
+
_logger.warning(msg)
|
440
456
|
return None, msg
|
441
457
|
|
442
458
|
alt_ac_data = {"start": None, "end": None}
|
@@ -462,7 +478,7 @@ class ExonGenomicCoordsMapper:
|
|
462
478
|
error = "Genomic accession does not match"
|
463
479
|
else:
|
464
480
|
error = "Strand does not match"
|
465
|
-
|
481
|
+
_logger.warning(
|
466
482
|
"%s: %s != %s",
|
467
483
|
error,
|
468
484
|
alt_ac_data["start"][i],
|
@@ -510,25 +526,22 @@ class ExonGenomicCoordsMapper:
|
|
510
526
|
resp = TranscriptExonDataResponse(
|
511
527
|
transcript_exon_data=None, warnings=[], service_meta=service_meta()
|
512
528
|
)
|
513
|
-
|
514
|
-
if transcript is None and gene is None:
|
515
|
-
return self._return_warnings(
|
516
|
-
resp, "Must provide either `gene` or `transcript`"
|
517
|
-
)
|
518
|
-
|
519
529
|
params = {key: None for key in TranscriptExonData.model_fields}
|
520
530
|
|
521
531
|
if get_nearest_transcript_junction:
|
522
532
|
if not gene or not strand:
|
523
533
|
return self._return_warnings(
|
524
534
|
resp,
|
525
|
-
|
535
|
+
[
|
536
|
+
"Gene or strand must be provided to select the adjacent transcript junction"
|
537
|
+
],
|
526
538
|
)
|
527
|
-
|
539
|
+
if not alt_ac:
|
540
|
+
alt_acs, w = self.seqrepo_access.chromosome_to_acs(chromosome)
|
528
541
|
|
529
|
-
|
530
|
-
|
531
|
-
|
542
|
+
if not alt_acs:
|
543
|
+
return self._return_warnings(resp, [w])
|
544
|
+
alt_ac = alt_acs[0]
|
532
545
|
|
533
546
|
if not transcript:
|
534
547
|
# Select a transcript if not provided
|
@@ -562,14 +575,14 @@ class ExonGenomicCoordsMapper:
|
|
562
575
|
else:
|
563
576
|
return self._return_warnings(
|
564
577
|
resp,
|
565
|
-
f"Could not find a transcript for {gene} on {alt_ac}",
|
578
|
+
[f"Could not find a transcript for {gene} on {alt_ac}"],
|
566
579
|
)
|
567
580
|
|
568
581
|
tx_genomic_coords, w = await self.uta_db.get_tx_exons_genomic_coords(
|
569
582
|
tx_ac=transcript, alt_ac=alt_ac
|
570
583
|
)
|
571
584
|
if not tx_genomic_coords:
|
572
|
-
return self._return_warnings(resp, w)
|
585
|
+
return self._return_warnings(resp, [w])
|
573
586
|
|
574
587
|
# Check if breakpoint occurs on an exon.
|
575
588
|
# If not, determine the adjacent exon given the selected transcript
|
@@ -603,7 +616,7 @@ class ExonGenomicCoordsMapper:
|
|
603
616
|
# Check if valid accession is given
|
604
617
|
if not await self.uta_db.validate_genomic_ac(alt_ac):
|
605
618
|
return self._return_warnings(
|
606
|
-
resp, f"Invalid genomic accession: {alt_ac}"
|
619
|
+
resp, [f"Invalid genomic accession: {alt_ac}"]
|
607
620
|
)
|
608
621
|
|
609
622
|
genes_alt_acs, warning = await self.uta_db.get_genes_and_alt_acs(
|
@@ -626,11 +639,11 @@ class ExonGenomicCoordsMapper:
|
|
626
639
|
genes_alt_acs = None
|
627
640
|
|
628
641
|
if not genes_alt_acs:
|
629
|
-
return self._return_warnings(resp, warning)
|
642
|
+
return self._return_warnings(resp, [warning])
|
630
643
|
|
631
644
|
gene_alt_ac, warning = self._get_gene_and_alt_ac(genes_alt_acs, gene)
|
632
645
|
if not gene_alt_ac:
|
633
|
-
return self._return_warnings(resp, warning)
|
646
|
+
return self._return_warnings(resp, [warning])
|
634
647
|
gene, alt_ac = gene_alt_ac
|
635
648
|
|
636
649
|
if transcript is None:
|
@@ -638,7 +651,7 @@ class ExonGenomicCoordsMapper:
|
|
638
651
|
params, gene, alt_ac, pos, strand, is_start
|
639
652
|
)
|
640
653
|
if warnings:
|
641
|
-
return self._return_warnings(resp, warnings)
|
654
|
+
return self._return_warnings(resp, [warnings])
|
642
655
|
else:
|
643
656
|
params["transcript"] = transcript
|
644
657
|
params["gene"] = gene
|
@@ -646,7 +659,7 @@ class ExonGenomicCoordsMapper:
|
|
646
659
|
params["chr"] = alt_ac
|
647
660
|
warning = await self._set_genomic_data(params, strand, is_start)
|
648
661
|
if warning:
|
649
|
-
return self._return_warnings(resp, warning)
|
662
|
+
return self._return_warnings(resp, [warning])
|
650
663
|
|
651
664
|
resp.transcript_exon_data = TranscriptExonData(**params)
|
652
665
|
return resp
|
@@ -726,7 +739,7 @@ class ExonGenomicCoordsMapper:
|
|
726
739
|
msg = f"Unable to find mane data for {alt_ac} with position {pos}"
|
727
740
|
if gene:
|
728
741
|
msg += f" on gene {gene}"
|
729
|
-
|
742
|
+
_logger.warning(msg)
|
730
743
|
return msg
|
731
744
|
|
732
745
|
params["gene"] = mane_data.gene
|
@@ -750,7 +763,7 @@ class ExonGenomicCoordsMapper:
|
|
750
763
|
f"{params['transcript']} with position {tx_pos} "
|
751
764
|
f"does not exist on exons: {tx_exons}"
|
752
765
|
)
|
753
|
-
|
766
|
+
_logger.warning(msg)
|
754
767
|
return msg
|
755
768
|
|
756
769
|
strand_to_use = strand if strand is not None else mane_data.strand
|
@@ -805,7 +818,7 @@ class ExonGenomicCoordsMapper:
|
|
805
818
|
return f"Unable to get chromosome and assembly for " f"{params['chr']}"
|
806
819
|
|
807
820
|
chromosome_number, assembly = descr
|
808
|
-
liftover_data = self.
|
821
|
+
liftover_data = self.liftover.get_liftover(
|
809
822
|
chromosome_number, params["pos"], Assembly.GRCH38
|
810
823
|
)
|
811
824
|
if liftover_data is None:
|
@@ -0,0 +1,90 @@
|
|
1
|
+
"""Module for mapping to/from human genome assemblies.
|
2
|
+
|
3
|
+
Currently only supports GRCh37 <-> GRCh38
|
4
|
+
"""
|
5
|
+
|
6
|
+
import logging
|
7
|
+
from os import environ
|
8
|
+
|
9
|
+
from agct import Converter, Genome
|
10
|
+
|
11
|
+
from cool_seq_tool.schemas import Assembly
|
12
|
+
from cool_seq_tool.utils import process_chromosome_input
|
13
|
+
|
14
|
+
# Environment variables for paths to chain files for agct
|
15
|
+
LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38")
|
16
|
+
LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37")
|
17
|
+
|
18
|
+
|
19
|
+
_logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
class LiftOver:
|
23
|
+
"""Class for mapping to/from human genome assemblies
|
24
|
+
|
25
|
+
Currently only supports GRCh37 <-> GRCh38
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
chain_file_37_to_38: str | None = None,
|
31
|
+
chain_file_38_to_37: str | None = None,
|
32
|
+
) -> None:
|
33
|
+
"""Initialize liftover class
|
34
|
+
|
35
|
+
:param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly.
|
36
|
+
This is used for ``agct``. If this is not provided, will check to see
|
37
|
+
if ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will
|
38
|
+
allow ``agct`` to download a chain file from UCSC
|
39
|
+
:param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly.
|
40
|
+
This is used for ``agct``. If this is not provided, will check to see
|
41
|
+
if ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will
|
42
|
+
allow ``agct`` to download a chain file from UCSC
|
43
|
+
"""
|
44
|
+
self.from_37_to_38 = Converter(
|
45
|
+
chainfile=chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38,
|
46
|
+
from_db=Genome.HG19,
|
47
|
+
to_db=Genome.HG38,
|
48
|
+
)
|
49
|
+
self.from_38_to_37 = Converter(
|
50
|
+
chainfile=chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37,
|
51
|
+
from_db=Genome.HG38,
|
52
|
+
to_db=Genome.HG19,
|
53
|
+
)
|
54
|
+
|
55
|
+
def get_liftover(
|
56
|
+
self, chromosome: str, pos: int, liftover_to_assembly: Assembly
|
57
|
+
) -> tuple[str, int] | None:
|
58
|
+
"""Get new genome assembly data for a position on a chromosome.
|
59
|
+
|
60
|
+
Use a UCSC-style chromosome name:
|
61
|
+
|
62
|
+
>>> from cool_seq_tool.mappers import LiftOver
|
63
|
+
>>> from cool_seq_tool.schemas import Assembly
|
64
|
+
>>> lo = LiftOver()
|
65
|
+
>>> lo.get_liftover("chr7", 140453136, Assembly.GRCH38)
|
66
|
+
('chr7', 140753336)
|
67
|
+
|
68
|
+
Chromosome names can also be NCBI-style, without prefixes:
|
69
|
+
|
70
|
+
>>> lo.get_liftover("7", 140453136, Assembly.GRCH38)
|
71
|
+
('chr7', 140753336)
|
72
|
+
|
73
|
+
:param chromosome: The chromosome number, e.g. ``"chr7"``, ``"chrX"``, ``"5"``.
|
74
|
+
:param pos: Position on the chromosome
|
75
|
+
:param liftover_to_assembly: Assembly to liftover to
|
76
|
+
:return: Target chromosome and target position for assembly
|
77
|
+
"""
|
78
|
+
chromosome = process_chromosome_input(chromosome, "LiftOver.get_liftover()")
|
79
|
+
if liftover_to_assembly == Assembly.GRCH38:
|
80
|
+
liftover = self.from_37_to_38.convert_coordinate(chromosome, pos)
|
81
|
+
elif liftover_to_assembly == Assembly.GRCH37:
|
82
|
+
liftover = self.from_38_to_37.convert_coordinate(chromosome, pos)
|
83
|
+
else:
|
84
|
+
_logger.warning("%s assembly not supported", liftover_to_assembly)
|
85
|
+
liftover = None
|
86
|
+
|
87
|
+
if not liftover:
|
88
|
+
_logger.warning("%s does not exist on %s", pos, chromosome)
|
89
|
+
return None
|
90
|
+
return liftover[0][:2]
|