cool-seq-tool 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +6 -0
- cool_seq_tool/app.py +1 -2
- cool_seq_tool/handlers/seqrepo_access.py +5 -5
- cool_seq_tool/mappers/alignment.py +16 -16
- cool_seq_tool/mappers/exon_genomic_coords.py +911 -667
- cool_seq_tool/mappers/mane_transcript.py +109 -104
- cool_seq_tool/schemas.py +30 -165
- cool_seq_tool/sources/uta_database.py +149 -229
- cool_seq_tool/utils.py +9 -9
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.1.dist-info}/METADATA +8 -8
- cool_seq_tool-0.7.1.dist-info/RECORD +24 -0
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.1.dist-info}/WHEEL +1 -1
- cool_seq_tool-0.6.0.dist-info/RECORD +0 -24
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.1.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.6.0.dist-info → cool_seq_tool-0.7.1.dist-info}/top_level.txt +0 -0
cool_seq_tool/__init__.py
CHANGED
@@ -8,3 +8,9 @@ except PackageNotFoundError:
|
|
8
8
|
__version__ = "unknown"
|
9
9
|
finally:
|
10
10
|
del version, PackageNotFoundError
|
11
|
+
|
12
|
+
|
13
|
+
# must import after __version__ declaration to prevent ImportError
|
14
|
+
from cool_seq_tool.app import CoolSeqTool
|
15
|
+
|
16
|
+
__all__ = ["CoolSeqTool", "__version__"]
|
cool_seq_tool/app.py
CHANGED
@@ -48,7 +48,7 @@ class CoolSeqTool:
|
|
48
48
|
|
49
49
|
Initialization with default resource locations is straightforward:
|
50
50
|
|
51
|
-
>>> from cool_seq_tool
|
51
|
+
>>> from cool_seq_tool import CoolSeqTool
|
52
52
|
>>> cst = CoolSeqTool()
|
53
53
|
|
54
54
|
By default, this will attempt to fetch the latest versions of static resources,
|
@@ -107,7 +107,6 @@ class CoolSeqTool:
|
|
107
107
|
self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
|
108
108
|
self.seqrepo_access,
|
109
109
|
self.uta_db,
|
110
|
-
self.mane_transcript,
|
111
110
|
self.mane_transcript_mappings,
|
112
111
|
self.liftover,
|
113
112
|
)
|
@@ -8,7 +8,7 @@ from pathlib import Path
|
|
8
8
|
|
9
9
|
from ga4gh.vrs.dataproxy import SeqRepoDataProxy
|
10
10
|
|
11
|
-
from cool_seq_tool.schemas import Assembly,
|
11
|
+
from cool_seq_tool.schemas import Assembly, CoordinateType
|
12
12
|
from cool_seq_tool.utils import get_inter_residue_pos, process_chromosome_input
|
13
13
|
|
14
14
|
_logger = logging.getLogger(__name__)
|
@@ -29,7 +29,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
29
29
|
ac: str,
|
30
30
|
start: int | None = None,
|
31
31
|
end: int | None = None,
|
32
|
-
|
32
|
+
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
|
33
33
|
) -> tuple[str, str | None]:
|
34
34
|
"""Get reference sequence for an accession given a start and end position. If
|
35
35
|
``start`` and ``end`` are not given, returns the entire reference sequence.
|
@@ -46,7 +46,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
46
46
|
:param start: Start pos change
|
47
47
|
:param end: End pos change. If ``None`` assumes both ``start`` and ``end`` have
|
48
48
|
same values, if ``start`` exists.
|
49
|
-
:param
|
49
|
+
:param coordinate_type: Coordinate type for ``start`` and ``end``
|
50
50
|
:return: Sequence at position (if accession and positions actually
|
51
51
|
exist, else return empty string), warning if any
|
52
52
|
"""
|
@@ -55,11 +55,11 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
55
55
|
msg = f"start ({start}) cannot be greater than end ({end})"
|
56
56
|
return "", msg
|
57
57
|
|
58
|
-
start, end = get_inter_residue_pos(start, end,
|
58
|
+
start, end = get_inter_residue_pos(start, end, coordinate_type)
|
59
59
|
if start == end:
|
60
60
|
end += 1
|
61
61
|
else:
|
62
|
-
if start is not None and
|
62
|
+
if start is not None and coordinate_type == CoordinateType.RESIDUE:
|
63
63
|
start -= 1
|
64
64
|
|
65
65
|
try:
|
@@ -3,7 +3,7 @@ reference sequences.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
6
|
-
from cool_seq_tool.schemas import AnnotationLayer, Assembly,
|
6
|
+
from cool_seq_tool.schemas import AnnotationLayer, Assembly, CoordinateType
|
7
7
|
from cool_seq_tool.sources import TranscriptMappings, UtaDatabase
|
8
8
|
|
9
9
|
|
@@ -32,14 +32,14 @@ class AlignmentMapper:
|
|
32
32
|
p_ac: str,
|
33
33
|
p_start_pos: int,
|
34
34
|
p_end_pos: int,
|
35
|
-
|
35
|
+
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
|
36
36
|
) -> tuple[dict | None, str | None]:
|
37
37
|
"""Translate protein representation to cDNA representation.
|
38
38
|
|
39
39
|
:param p_ac: Protein RefSeq accession
|
40
40
|
:param p_start_pos: Protein start position
|
41
41
|
:param p_end_pos: Protein end position
|
42
|
-
:param
|
42
|
+
:param coordinate_type: Coordinate type for ``p_start_pos`` and ``p_end_pos``
|
43
43
|
:return: Tuple containing:
|
44
44
|
|
45
45
|
* cDNA representation (accession, codon range positions for corresponding
|
@@ -66,7 +66,7 @@ class AlignmentMapper:
|
|
66
66
|
# 1 amino acid maps to 3 nucleotides in the codon
|
67
67
|
# Since we have the end of the codon, we will subtract 2 to get the start of the
|
68
68
|
# codon. We want to return inter-residue (0-based), so we subtract 1 from this.
|
69
|
-
if
|
69
|
+
if coordinate_type == CoordinateType.RESIDUE:
|
70
70
|
c_pos = (p_start_pos * 3) - 3, p_end_pos * 3
|
71
71
|
else:
|
72
72
|
if p_start_pos == p_end_pos:
|
@@ -79,7 +79,7 @@ class AlignmentMapper:
|
|
79
79
|
"c_start_pos": c_pos[0],
|
80
80
|
"c_end_pos": c_pos[1],
|
81
81
|
"cds_start": cds_start,
|
82
|
-
"
|
82
|
+
"coordinate_type": CoordinateType.INTER_RESIDUE.value,
|
83
83
|
}, None
|
84
84
|
|
85
85
|
async def _get_cds_start(self, c_ac: str) -> tuple[int | None, str | None]:
|
@@ -105,7 +105,7 @@ class AlignmentMapper:
|
|
105
105
|
c_start_pos: int,
|
106
106
|
c_end_pos: int,
|
107
107
|
cds_start: int | None = None,
|
108
|
-
|
108
|
+
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
|
109
109
|
target_genome_assembly: bool = Assembly.GRCH38,
|
110
110
|
) -> tuple[dict | None, str | None]:
|
111
111
|
"""Translate cDNA representation to genomic representation
|
@@ -125,9 +125,9 @@ class AlignmentMapper:
|
|
125
125
|
if any(
|
126
126
|
(
|
127
127
|
c_start_pos == c_end_pos,
|
128
|
-
(
|
128
|
+
(coordinate_type == CoordinateType.INTER_RESIDUE)
|
129
129
|
and ((c_end_pos - c_start_pos) % 3 != 0),
|
130
|
-
(
|
130
|
+
(coordinate_type == CoordinateType.RESIDUE)
|
131
131
|
and ((c_end_pos - (c_start_pos - 1)) % 3 != 0),
|
132
132
|
)
|
133
133
|
):
|
@@ -146,7 +146,7 @@ class AlignmentMapper:
|
|
146
146
|
return None, warning
|
147
147
|
|
148
148
|
# Change to inter-residue
|
149
|
-
if
|
149
|
+
if coordinate_type == CoordinateType.RESIDUE:
|
150
150
|
c_start_pos -= 1
|
151
151
|
|
152
152
|
# Get aligned genomic and transcript data
|
@@ -163,7 +163,7 @@ class AlignmentMapper:
|
|
163
163
|
f"position ({c_start_pos}, {c_end_pos})"
|
164
164
|
)
|
165
165
|
else:
|
166
|
-
alt_ac = genomic_tx_data
|
166
|
+
alt_ac = genomic_tx_data.alt_ac
|
167
167
|
|
168
168
|
# Validate that genomic accession assembly == target_genome_assembly
|
169
169
|
aliases, _ = self.seqrepo_access.translate_identifier(alt_ac)
|
@@ -180,7 +180,7 @@ class AlignmentMapper:
|
|
180
180
|
f"{target_genome_assembly}"
|
181
181
|
)
|
182
182
|
else:
|
183
|
-
g_pos = genomic_tx_data
|
183
|
+
g_pos = genomic_tx_data.alt_pos_change_range
|
184
184
|
|
185
185
|
# start pos should be less than end pos in response
|
186
186
|
if g_pos[0] > g_pos[1]:
|
@@ -194,7 +194,7 @@ class AlignmentMapper:
|
|
194
194
|
"g_ac": alt_ac,
|
195
195
|
"g_start_pos": g_start_pos,
|
196
196
|
"g_end_pos": g_end_pos,
|
197
|
-
"
|
197
|
+
"coordinate_type": CoordinateType.INTER_RESIDUE.value,
|
198
198
|
}
|
199
199
|
else:
|
200
200
|
warning = (
|
@@ -209,7 +209,7 @@ class AlignmentMapper:
|
|
209
209
|
p_ac: str,
|
210
210
|
p_start_pos: int,
|
211
211
|
p_end_pos: int,
|
212
|
-
|
212
|
+
coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
|
213
213
|
target_genome_assembly: Assembly = Assembly.GRCH38,
|
214
214
|
) -> tuple[dict | None, str | None]:
|
215
215
|
"""Translate protein representation to genomic representation, by way of
|
@@ -218,7 +218,7 @@ class AlignmentMapper:
|
|
218
218
|
:param p_ac: Protein RefSeq accession
|
219
219
|
:param p_start_pos: Protein start position
|
220
220
|
:param p_end_pos: Protein end position
|
221
|
-
:param
|
221
|
+
:param coordinate_type: Coordinate type for ``p_start_pos`` and ``p_end_pos``.
|
222
222
|
:param target_genome_assembly: Genome assembly to get genomic data for
|
223
223
|
:return: Tuple containing:
|
224
224
|
|
@@ -227,7 +227,7 @@ class AlignmentMapper:
|
|
227
227
|
* Warnings, if conversion to cDNA or genomic coordinates fails.
|
228
228
|
"""
|
229
229
|
c_data, warning = await self.p_to_c(
|
230
|
-
p_ac, p_start_pos, p_end_pos,
|
230
|
+
p_ac, p_start_pos, p_end_pos, coordinate_type=coordinate_type
|
231
231
|
)
|
232
232
|
if not c_data:
|
233
233
|
return None, warning
|
@@ -238,7 +238,7 @@ class AlignmentMapper:
|
|
238
238
|
c_data["c_start_pos"],
|
239
239
|
c_data["c_end_pos"],
|
240
240
|
c_data["cds_start"],
|
241
|
-
|
241
|
+
coordinate_type=CoordinateType.INTER_RESIDUE,
|
242
242
|
target_genome_assembly=target_genome_assembly,
|
243
243
|
)
|
244
244
|
return g_data, warning
|