cool-seq-tool 0.3.0.dev1__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/api.py +3 -3
- cool_seq_tool/app.py +32 -11
- cool_seq_tool/data/data_downloads.py +8 -5
- cool_seq_tool/handlers/seqrepo_access.py +55 -27
- cool_seq_tool/mappers/__init__.py +4 -1
- cool_seq_tool/mappers/alignment.py +40 -37
- cool_seq_tool/mappers/exon_genomic_coords.py +329 -138
- cool_seq_tool/mappers/mane_transcript.py +402 -227
- cool_seq_tool/routers/mane.py +1 -1
- cool_seq_tool/routers/mappings.py +1 -1
- cool_seq_tool/schemas.py +31 -24
- cool_seq_tool/sources/__init__.py +4 -2
- cool_seq_tool/sources/mane_transcript_mappings.py +28 -7
- cool_seq_tool/sources/transcript_mappings.py +27 -11
- cool_seq_tool/sources/uta_database.py +179 -232
- cool_seq_tool/utils.py +22 -24
- cool_seq_tool/version.py +1 -1
- {cool_seq_tool-0.3.0.dev1.dist-info → cool_seq_tool-0.4.0.dev0.dist-info}/LICENSE +1 -1
- cool_seq_tool-0.4.0.dev0.dist-info/METADATA +130 -0
- cool_seq_tool-0.4.0.dev0.dist-info/RECORD +28 -0
- {cool_seq_tool-0.3.0.dev1.dist-info → cool_seq_tool-0.4.0.dev0.dist-info}/WHEEL +1 -1
- cool_seq_tool/data/transcript_mapping.tsv +0 -256226
- cool_seq_tool-0.3.0.dev1.dist-info/METADATA +0 -187
- cool_seq_tool-0.3.0.dev1.dist-info/RECORD +0 -29
- {cool_seq_tool-0.3.0.dev1.dist-info → cool_seq_tool-0.4.0.dev0.dist-info}/top_level.txt +0 -0
cool_seq_tool/api.py
CHANGED
@@ -24,16 +24,16 @@ def custom_openapi() -> Dict:
|
|
24
24
|
if app.openapi_schema:
|
25
25
|
return app.openapi_schema
|
26
26
|
openapi_schema = get_openapi(
|
27
|
-
title="The GenomicMedLab Cool
|
27
|
+
title="The GenomicMedLab Cool-Seq-Tool",
|
28
28
|
version=__version__,
|
29
|
-
description="Common Operations On Lots
|
29
|
+
description="Common Operations On Lots of Sequences Tool.",
|
30
30
|
routes=app.routes,
|
31
31
|
)
|
32
32
|
|
33
33
|
openapi_schema["info"]["contact"] = {
|
34
34
|
"name": "Alex H. Wagner",
|
35
35
|
"email": "Alex.Wagner@nationwidechildrens.org",
|
36
|
-
"url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab",
|
36
|
+
"url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab",
|
37
37
|
}
|
38
38
|
app.openapi_schema = openapi_schema
|
39
39
|
return app.openapi_schema
|
cool_seq_tool/app.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
"""
|
1
|
+
"""Provides core CoolSeqTool class, which non-redundantly initializes all Cool-Seq-Tool
|
2
|
+
data handler and mapping resources for straightforward access.
|
3
|
+
"""
|
2
4
|
import logging
|
3
5
|
from pathlib import Path
|
4
6
|
from typing import Optional
|
@@ -9,7 +11,7 @@ from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
|
9
11
|
from cool_seq_tool.mappers import (
|
10
12
|
AlignmentMapper,
|
11
13
|
ExonGenomicCoordsMapper,
|
12
|
-
|
14
|
+
ManeTranscript,
|
13
15
|
)
|
14
16
|
from cool_seq_tool.paths import (
|
15
17
|
LRG_REFSEQGENE_PATH,
|
@@ -17,15 +19,34 @@ from cool_seq_tool.paths import (
|
|
17
19
|
SEQREPO_ROOT_DIR,
|
18
20
|
TRANSCRIPT_MAPPINGS_PATH,
|
19
21
|
)
|
20
|
-
from cool_seq_tool.sources.mane_transcript_mappings import
|
22
|
+
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
|
21
23
|
from cool_seq_tool.sources.transcript_mappings import TranscriptMappings
|
22
|
-
from cool_seq_tool.sources.uta_database import UTA_DB_URL,
|
24
|
+
from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
|
23
25
|
|
24
26
|
logger = logging.getLogger(__name__)
|
25
27
|
|
26
28
|
|
27
29
|
class CoolSeqTool:
|
28
|
-
"""
|
30
|
+
"""Non-redundantly initialize all Cool-Seq-Tool data resources, available under the
|
31
|
+
following attribute names:
|
32
|
+
|
33
|
+
* ``self.seqrepo_access``: :py:class:`SeqRepoAccess <cool_seq_tool.handlers.seqrepo_access.SeqRepoAccess>`
|
34
|
+
* ``self.transcript_mappings``: :py:class:`TranscriptMappings <cool_seq_tool.sources.transcript_mappings.TranscriptMappings>`
|
35
|
+
* ``self.mane_transcript_mappings``: :py:class:`ManeTranscriptMappings <cool_seq_tool.sources.mane_transcript_mappings.ManeTranscriptMappings>`
|
36
|
+
* ``self.uta_db``: :py:class:`UtaDatabase <cool_seq_tool.sources.uta_database.UtaDatabase>`
|
37
|
+
* ``self.alignment_mapper``: :py:class:`AlignmentMapper <cool_seq_tool.mappers.alignment.AlignmentMapper>`
|
38
|
+
* ``self.mane_transcript``: :py:class:`ManeTranscript <cool_seq_tool.mappers.mane_transcript.ManeTranscript>`
|
39
|
+
* ``self.ex_g_coords_mapper``: :py:class:`ExonGenomicCoordsMapper <cool_seq_tool.mappers.exon_genomic_coords.ExonGenomicCoordsMapper>`
|
40
|
+
|
41
|
+
Initialization with default resource locations is straightforward:
|
42
|
+
|
43
|
+
.. code-block:: pycon
|
44
|
+
|
45
|
+
>>> from cool_seq_tool.app import CoolSeqTool
|
46
|
+
>>> cst = CoolSeqTool()
|
47
|
+
|
48
|
+
See the :ref:`configuration <configuration>` section for more information.
|
49
|
+
"""
|
29
50
|
|
30
51
|
def __init__(
|
31
52
|
self,
|
@@ -37,11 +58,11 @@ class CoolSeqTool:
|
|
37
58
|
) -> None:
|
38
59
|
"""Initialize CoolSeqTool class
|
39
60
|
|
40
|
-
:param transcript_file_path: The path to transcript_mapping.tsv
|
41
|
-
:param lrg_refseqgene_path: The path to LRG_RefSeqGene
|
61
|
+
:param transcript_file_path: The path to ``transcript_mapping.tsv``
|
62
|
+
:param lrg_refseqgene_path: The path to the LRG_RefSeqGene file
|
42
63
|
:param mane_data_path: Path to RefSeq MANE summary data
|
43
64
|
:param db_url: PostgreSQL connection URL
|
44
|
-
Format:
|
65
|
+
Format: ``driver://user:password@host/database/schema``
|
45
66
|
:param sr: SeqRepo instance. If this is not provided, will create a new instance
|
46
67
|
"""
|
47
68
|
if not sr:
|
@@ -51,14 +72,14 @@ class CoolSeqTool:
|
|
51
72
|
transcript_file_path=transcript_file_path,
|
52
73
|
lrg_refseqgene_path=lrg_refseqgene_path,
|
53
74
|
)
|
54
|
-
self.mane_transcript_mappings =
|
75
|
+
self.mane_transcript_mappings = ManeTranscriptMappings(
|
55
76
|
mane_data_path=mane_data_path
|
56
77
|
)
|
57
|
-
self.uta_db =
|
78
|
+
self.uta_db = UtaDatabase(db_url=db_url)
|
58
79
|
self.alignment_mapper = AlignmentMapper(
|
59
80
|
self.seqrepo_access, self.transcript_mappings, self.uta_db
|
60
81
|
)
|
61
|
-
self.mane_transcript =
|
82
|
+
self.mane_transcript = ManeTranscript(
|
62
83
|
self.seqrepo_access,
|
63
84
|
self.transcript_mappings,
|
64
85
|
self.mane_transcript_mappings,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
"""
|
1
|
+
"""Handle acquisition of external data."""
|
2
2
|
import datetime
|
3
3
|
import gzip
|
4
4
|
import logging
|
@@ -15,8 +15,11 @@ logger = logging.getLogger("cool_seq_tool")
|
|
15
15
|
|
16
16
|
|
17
17
|
class DataDownload:
|
18
|
-
"""
|
19
|
-
|
18
|
+
"""Manage downloadable data files. Responsible for checking if files are available
|
19
|
+
under expected locations, and fetching them if not.
|
20
|
+
|
21
|
+
Relevant methods are called automatically by data classes; users should not have
|
22
|
+
to interact with this class under normal circumstances.
|
20
23
|
"""
|
21
24
|
|
22
25
|
def __init__(self) -> None:
|
@@ -25,7 +28,7 @@ class DataDownload:
|
|
25
28
|
|
26
29
|
def get_mane_summary(self) -> Path:
|
27
30
|
"""Identify latest MANE summary data. If unavailable locally, download from
|
28
|
-
|
31
|
+
`NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/current/>`_.
|
29
32
|
|
30
33
|
:return: path to MANE summary file
|
31
34
|
"""
|
@@ -52,7 +55,7 @@ class DataDownload:
|
|
52
55
|
|
53
56
|
def get_lrg_refseq_gene_data(self) -> Path:
|
54
57
|
"""Identify latest LRG RefSeq Gene file. If unavailable locally, download from
|
55
|
-
|
58
|
+
`NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/>`_.
|
56
59
|
|
57
60
|
:return: path to acquired LRG RefSeq Gene data file
|
58
61
|
"""
|
@@ -1,4 +1,6 @@
|
|
1
|
-
"""
|
1
|
+
"""Wrap SeqRepo to provide additional lookup and identification methods on top of basic
|
2
|
+
dereferencing functions.
|
3
|
+
"""
|
2
4
|
import logging
|
3
5
|
from os import environ
|
4
6
|
from pathlib import Path
|
@@ -13,7 +15,9 @@ logger = logging.getLogger(__name__)
|
|
13
15
|
|
14
16
|
|
15
17
|
class SeqRepoAccess(SeqRepoDataProxy):
|
16
|
-
"""
|
18
|
+
"""Provide a wrapper around the base SeqRepoDataProxy class from ``VRS-Python`` to
|
19
|
+
provide additional lookup and identification methods.
|
20
|
+
"""
|
17
21
|
|
18
22
|
environ["SEQREPO_LRU_CACHE_MAXSIZE"] = "none"
|
19
23
|
|
@@ -24,25 +28,37 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
24
28
|
end: Optional[int] = None,
|
25
29
|
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
26
30
|
) -> Tuple[str, Optional[str]]:
|
27
|
-
"""Get reference sequence for an accession given a start and end position.
|
28
|
-
|
31
|
+
"""Get reference sequence for an accession given a start and end position. If
|
32
|
+
``start`` and ``end`` are not given, returns the entire reference sequence.
|
33
|
+
|
34
|
+
>>> from cool_seq_tool.handlers import SeqRepoAccess
|
35
|
+
>>> from biocommons.seqrepo import SeqRepo
|
36
|
+
>>> sr = SeqRepoAccess(SeqRepo("/usr/local/share/seqrepo/latest"))
|
37
|
+
>>> sr.get_reference_sequence("NM_002529.3", 1, 10)[0]
|
38
|
+
'TGCAGCTGG'
|
39
|
+
>>> sr.get_reference_sequence("NP_001341538.1", 1, 10)[0]
|
40
|
+
'MAALSGGGG'
|
29
41
|
|
30
42
|
:param ac: Accession
|
31
43
|
:param start: Start pos change
|
32
|
-
:param end: End pos change. If
|
33
|
-
values, if
|
34
|
-
:param residue_mode: Residue mode for
|
44
|
+
:param end: End pos change. If ``None`` assumes both ``start`` and ``end`` have
|
45
|
+
same values, if ``start`` exists.
|
46
|
+
:param residue_mode: Residue mode for ``start`` and ``end``
|
35
47
|
:return: Sequence at position (if accession and positions actually
|
36
48
|
exist, else return empty string), warning if any
|
37
49
|
"""
|
38
|
-
if start
|
39
|
-
|
40
|
-
|
41
|
-
return "",
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
50
|
+
if start and end:
|
51
|
+
if start > end:
|
52
|
+
msg = f"start ({start}) cannot be greater than end ({end})"
|
53
|
+
return "", msg
|
54
|
+
|
55
|
+
start, end = get_inter_residue_pos(start, end, residue_mode)
|
56
|
+
if start == end:
|
57
|
+
end += 1
|
58
|
+
else:
|
59
|
+
if start is not None and residue_mode == ResidueMode.RESIDUE:
|
60
|
+
start -= 1
|
61
|
+
|
46
62
|
try:
|
47
63
|
sequence = self.sr.fetch(ac, start=start, end=end)
|
48
64
|
except KeyError:
|
@@ -53,18 +69,12 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
53
69
|
error = str(e)
|
54
70
|
if error.startswith("start out of range"):
|
55
71
|
msg = (
|
56
|
-
f"Start inter-residue coordinate ({start}) is out of "
|
57
|
-
f"index on {ac}"
|
72
|
+
f"Start inter-residue coordinate ({start}) is out of index on {ac}"
|
58
73
|
)
|
59
74
|
elif error.startswith("stop out of range"):
|
60
75
|
msg = (
|
61
76
|
f"End inter-residue coordinate ({end}) is out of " f"index on {ac}"
|
62
77
|
)
|
63
|
-
elif error.startswith("invalid coordinates") and ">" in error:
|
64
|
-
msg = (
|
65
|
-
f"Invalid inter-residue coordinates: start ({start}) "
|
66
|
-
f"cannot be greater than end ({end})"
|
67
|
-
)
|
68
78
|
else:
|
69
79
|
msg = f"{e}"
|
70
80
|
logger.warning(msg)
|
@@ -78,8 +88,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
78
88
|
if len(sequence) != expected_len_of_seq:
|
79
89
|
return (
|
80
90
|
"",
|
81
|
-
f"End inter-residue coordinate ({end})"
|
82
|
-
f" is out of index on {ac}",
|
91
|
+
f"End inter-residue coordinate ({end}) is out of index on {ac}",
|
83
92
|
)
|
84
93
|
return sequence, None
|
85
94
|
|
@@ -88,6 +97,14 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
88
97
|
) -> Tuple[List[str], Optional[str]]:
|
89
98
|
"""Return list of identifiers for accession.
|
90
99
|
|
100
|
+
>>> from cool_seq_tool.handlers import SeqRepoAccess
|
101
|
+
>>> from biocommons.seqrepo import SeqRepo
|
102
|
+
>>> sr = SeqRepoAccess(SeqRepo("/usr/local/share/seqrepo/latest"))
|
103
|
+
>>> sr.translate_identifier("NM_002529.3")[0]
|
104
|
+
['MD5:18f0a6e3af9e1bbd8fef1948c7156012', 'NCBI:NM_002529.3', 'refseq:NM_002529.3', 'SEGUID:dEJQBkga9d9VeBHTyTbg6JEtTGQ', 'SHA1:74425006481af5df557811d3c936e0e8912d4c64', 'VMC:GS_RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA', 'sha512t24u:RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA', 'ga4gh:SQ.RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA']
|
105
|
+
>>> sr.translate_identifier("NM_002529.3", "ga4gh")[0]
|
106
|
+
['ga4gh:SQ.RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA']
|
107
|
+
|
91
108
|
:param ac: Identifier accession
|
92
109
|
:param target_namespace: The namespace(s) of identifier to return
|
93
110
|
:return: List of identifiers, warning
|
@@ -123,7 +140,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
123
140
|
) -> Tuple[Optional[List[str]], Optional[str]]:
|
124
141
|
"""Get accessions for a chromosome
|
125
142
|
|
126
|
-
:param
|
143
|
+
:param chromosome: Chromosome number. Must be either 1-22, X, or Y
|
127
144
|
:return: Accessions for chromosome (ordered by latest assembly)
|
128
145
|
"""
|
129
146
|
acs = []
|
@@ -160,9 +177,20 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
160
177
|
|
161
178
|
def get_fasta_file(self, sequence_id: str, outfile_path: Path) -> None:
|
162
179
|
"""Retrieve FASTA file containing sequence for requested sequence ID.
|
163
|
-
|
180
|
+
|
181
|
+
>>> from pathlib import Path
|
182
|
+
>>> from cool_seq_tool.handlers import SeqRepoAccess
|
183
|
+
>>> from biocommons.seqrepo import SeqRepo
|
184
|
+
>>> sr = SeqRepoAccess(SeqRepo("/usr/local/share/seqrepo/latest"))
|
185
|
+
>>> # write to local file tpm3.fasta:
|
186
|
+
>>> sr.get_fasta_file("NM_002529.3", Path("tpm3.fasta"))
|
187
|
+
|
188
|
+
FASTA file headers will include GA4GH sequence digest, Ensembl accession ID,
|
189
|
+
and RefSeq accession ID.
|
190
|
+
|
191
|
+
:param sequence_id: accession ID, sans namespace, eg ``NM_152263.3``
|
164
192
|
:param outfile_path: path to save file to
|
165
|
-
:return: None, but saves sequence data to
|
193
|
+
:return: None, but saves sequence data to ``outfile_path`` if successful
|
166
194
|
:raise: KeyError if SeqRepo doesn't have sequence data for the given ID
|
167
195
|
"""
|
168
196
|
sequence = self.get_reference_sequence(sequence_id)[0]
|
@@ -1,4 +1,7 @@
|
|
1
1
|
"""Module for mapping data"""
|
2
2
|
from .alignment import AlignmentMapper # noqa: I001
|
3
|
-
from .mane_transcript import
|
3
|
+
from .mane_transcript import ManeTranscript
|
4
4
|
from .exon_genomic_coords import ExonGenomicCoordsMapper
|
5
|
+
|
6
|
+
|
7
|
+
__all__ = ["AlignmentMapper", "ManeTranscript", "ExonGenomicCoordsMapper"]
|
@@ -5,7 +5,7 @@ from typing import Dict, Optional, Tuple
|
|
5
5
|
|
6
6
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
7
7
|
from cool_seq_tool.schemas import AnnotationLayer, Assembly, ResidueMode
|
8
|
-
from cool_seq_tool.sources import TranscriptMappings,
|
8
|
+
from cool_seq_tool.sources import TranscriptMappings, UtaDatabase
|
9
9
|
|
10
10
|
|
11
11
|
class AlignmentMapper:
|
@@ -15,15 +15,14 @@ class AlignmentMapper:
|
|
15
15
|
self,
|
16
16
|
seqrepo_access: SeqRepoAccess,
|
17
17
|
transcript_mappings: TranscriptMappings,
|
18
|
-
uta_db:
|
18
|
+
uta_db: UtaDatabase,
|
19
19
|
) -> None:
|
20
20
|
"""Initialize the AlignmentMapper class.
|
21
21
|
|
22
|
-
:param
|
23
|
-
:param
|
24
|
-
|
25
|
-
:param
|
26
|
-
UTA database
|
22
|
+
:param seqrepo_access: Access to seqrepo queries
|
23
|
+
:param transcript_mappings: Access to transcript accession mappings and
|
24
|
+
conversions
|
25
|
+
:param uta_db: UtaDatabase instance to give access to query UTA database
|
27
26
|
"""
|
28
27
|
self.seqrepo_access = seqrepo_access
|
29
28
|
self.transcript_mappings = transcript_mappings
|
@@ -38,15 +37,16 @@ class AlignmentMapper:
|
|
38
37
|
) -> Tuple[Optional[Dict], Optional[str]]:
|
39
38
|
"""Translate protein representation to cDNA representation.
|
40
39
|
|
41
|
-
:param
|
42
|
-
:param
|
43
|
-
:param
|
44
|
-
:param
|
40
|
+
:param p_ac: Protein RefSeq accession
|
41
|
+
:param p_start_pos: Protein start position
|
42
|
+
:param p_end_pos: Protein end position
|
43
|
+
:param residue_mode: Residue mode for ``p_start_pos`` and ``p_end_pos``
|
45
44
|
:return: Tuple containing:
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
|
46
|
+
* cDNA representation (accession, codon range positions for corresponding
|
47
|
+
change, cds start site) if able to translate. Will return positions as
|
48
|
+
inter-residue coordinates. If unable to translate, returns ``None``.
|
49
|
+
* Warning, if unable to translate to cDNA representation. Else ``None``
|
50
50
|
"""
|
51
51
|
# Get cDNA accession
|
52
52
|
temp_c_ac = await self.uta_db.p_to_c_ac(p_ac)
|
@@ -86,10 +86,10 @@ class AlignmentMapper:
|
|
86
86
|
async def _get_cds_start(self, c_ac: str) -> Tuple[Optional[int], Optional[str]]:
|
87
87
|
"""Get CDS start for a given cDNA RefSeq accession
|
88
88
|
|
89
|
-
:param
|
89
|
+
:param c_ac: cDNA RefSeq accession
|
90
90
|
:return: Tuple containing:
|
91
|
-
- CDS start site if found. Else
|
92
|
-
- Warning, if unable to get CDS start. Else
|
91
|
+
- CDS start site if found. Else ``None``
|
92
|
+
- Warning, if unable to get CDS start. Else ``None``
|
93
93
|
"""
|
94
94
|
cds_start_end = await self.uta_db.get_cds_start_end(c_ac)
|
95
95
|
if not cds_start_end:
|
@@ -111,16 +111,17 @@ class AlignmentMapper:
|
|
111
111
|
) -> Tuple[Optional[Dict], Optional[str]]:
|
112
112
|
"""Translate cDNA representation to genomic representation
|
113
113
|
|
114
|
-
:param
|
115
|
-
:param
|
116
|
-
:param
|
117
|
-
:param
|
118
|
-
|
119
|
-
:param
|
114
|
+
:param c_ac: cDNA RefSeq accession
|
115
|
+
:param c_start_pos: cDNA start position for codon
|
116
|
+
:param c_end_pos: cDNA end position for codon
|
117
|
+
:param coding_start_site: Coding start site. If not provided, this will be
|
118
|
+
computed.
|
119
|
+
:param target_genome_assembly: Genome assembly to get genomic data for
|
120
120
|
:return: Tuple containing:
|
121
|
-
|
122
|
-
|
123
|
-
|
121
|
+
|
122
|
+
* Genomic representation (ac, positions) if able to translate. Will return
|
123
|
+
positions as inter-residue coordinates. Else ``None``.
|
124
|
+
* Warning, if unable to translate to genomic representation. Else ``None``
|
124
125
|
"""
|
125
126
|
if any(
|
126
127
|
(
|
@@ -212,17 +213,19 @@ class AlignmentMapper:
|
|
212
213
|
residue_mode: ResidueMode = ResidueMode.INTER_RESIDUE,
|
213
214
|
target_genome_assembly: Assembly = Assembly.GRCH38,
|
214
215
|
) -> Tuple[Optional[Dict], Optional[str]]:
|
215
|
-
"""Translate protein representation to genomic representation
|
216
|
-
|
217
|
-
|
218
|
-
:param
|
219
|
-
:param
|
220
|
-
:param
|
221
|
-
:param
|
216
|
+
"""Translate protein representation to genomic representation, by way of
|
217
|
+
intermediary conversion into cDNA coordinates.
|
218
|
+
|
219
|
+
:param p_ac: Protein RefSeq accession
|
220
|
+
:param p_start_pos: Protein start position
|
221
|
+
:param p_end_pos: Protein end position
|
222
|
+
:param residue_mode: Residue mode for ``p_start_pos`` and ``p_end_pos``.
|
223
|
+
:param target_genome_assembly: Genome assembly to get genomic data for
|
222
224
|
:return: Tuple containing:
|
223
|
-
|
224
|
-
|
225
|
-
|
225
|
+
|
226
|
+
* Genomic representation (ac, positions) if able to translate. Will return
|
227
|
+
positions as inter-residue coordinates. Else ``None``.
|
228
|
+
* Warnings, if conversion to cDNA or genomic coordinates fails.
|
226
229
|
"""
|
227
230
|
c_data, warning = await self.p_to_c(
|
228
231
|
p_ac, p_start_pos, p_end_pos, residue_mode=residue_mode
|