cool-seq-tool 0.14.3__py3-none-any.whl → 0.14.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/mappers/exon_genomic_coords.py +35 -2
- cool_seq_tool/resources/status.py +10 -3
- cool_seq_tool/sources/uta_database.py +27 -3
- {cool_seq_tool-0.14.3.dist-info → cool_seq_tool-0.14.5.dist-info}/METADATA +1 -1
- {cool_seq_tool-0.14.3.dist-info → cool_seq_tool-0.14.5.dist-info}/RECORD +8 -8
- {cool_seq_tool-0.14.3.dist-info → cool_seq_tool-0.14.5.dist-info}/WHEEL +0 -0
- {cool_seq_tool-0.14.3.dist-info → cool_seq_tool-0.14.5.dist-info}/licenses/LICENSE +0 -0
- {cool_seq_tool-0.14.3.dist-info → cool_seq_tool-0.14.5.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
import logging
|
4
4
|
|
5
|
+
from ga4gh.core.models import Extension
|
5
6
|
from ga4gh.vrs.models import SequenceLocation, SequenceReference
|
6
7
|
from pydantic import ConfigDict, Field, StrictInt, StrictStr, model_validator
|
7
8
|
|
@@ -66,6 +67,24 @@ class TxSegment(BaseModelForbidExtra):
|
|
66
67
|
..., description="The genomic position of a transcript segment."
|
67
68
|
)
|
68
69
|
|
70
|
+
@model_validator(mode="before")
|
71
|
+
def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
|
72
|
+
"""Ensure that only one of `start` or `end` is set in the
|
73
|
+
genomic_location field
|
74
|
+
|
75
|
+
:param values: The values in the TxSegment class
|
76
|
+
:raises ValueError: If `start` and `end` are both set in
|
77
|
+
`genomic_location`
|
78
|
+
:return: Values in model
|
79
|
+
"""
|
80
|
+
loc = values.get("genomic_location")
|
81
|
+
start = getattr(loc, "start", None)
|
82
|
+
end = getattr(loc, "end", None)
|
83
|
+
if start and end:
|
84
|
+
err_msg = "Only one of `start` or `end` may be set as this describes the start or end of a transcript segment"
|
85
|
+
raise ValueError(err_msg)
|
86
|
+
return values
|
87
|
+
|
69
88
|
model_config = ConfigDict(
|
70
89
|
json_schema_extra={
|
71
90
|
"example": {
|
@@ -78,6 +97,7 @@ class TxSegment(BaseModelForbidExtra):
|
|
78
97
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
79
98
|
},
|
80
99
|
"end": 154192135,
|
100
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
81
101
|
},
|
82
102
|
}
|
83
103
|
}
|
@@ -135,6 +155,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
135
155
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
136
156
|
},
|
137
157
|
"end": 154192135,
|
158
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
138
159
|
},
|
139
160
|
},
|
140
161
|
"errors": [],
|
@@ -201,6 +222,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
201
222
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
202
223
|
},
|
203
224
|
"end": 154192135,
|
225
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
204
226
|
},
|
205
227
|
},
|
206
228
|
"seg_end": {
|
@@ -213,6 +235,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
213
235
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
214
236
|
},
|
215
237
|
"start": 154170399,
|
238
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
216
239
|
},
|
217
240
|
},
|
218
241
|
}
|
@@ -705,7 +728,12 @@ class ExonGenomicCoordsMapper:
|
|
705
728
|
), None
|
706
729
|
|
707
730
|
def _get_vrs_seq_loc(
|
708
|
-
self,
|
731
|
+
self,
|
732
|
+
genomic_ac: str,
|
733
|
+
genomic_pos: int,
|
734
|
+
is_seg_start: bool,
|
735
|
+
strand: Strand,
|
736
|
+
is_exonic: bool = True,
|
709
737
|
) -> tuple[SequenceLocation | None, str | None]:
|
710
738
|
"""Create VRS Sequence Location for genomic position where transcript segment
|
711
739
|
occurs
|
@@ -715,6 +743,8 @@ class ExonGenomicCoordsMapper:
|
|
715
743
|
:param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment
|
716
744
|
starts. ``False`` if ``genomic_pos`` is where the transcript segment ends.
|
717
745
|
:param strand: Strand
|
746
|
+
:param is_exonic: A boolean indicating if the genomic breakpoint occurs
|
747
|
+
on an exon. By default, this is set to ``True``.
|
718
748
|
:return: Tuple containing VRS location (if successful) and error message (if
|
719
749
|
unable to get GA4GH identifier for ``genomic_ac``).
|
720
750
|
"""
|
@@ -734,6 +764,7 @@ class ExonGenomicCoordsMapper:
|
|
734
764
|
),
|
735
765
|
start=genomic_pos if use_start else None,
|
736
766
|
end=genomic_pos if not use_start else None,
|
767
|
+
extensions=[Extension(name="is_exonic", value=is_exonic)],
|
737
768
|
), None
|
738
769
|
|
739
770
|
async def _genomic_to_tx_segment(
|
@@ -895,6 +926,7 @@ class ExonGenomicCoordsMapper:
|
|
895
926
|
# Check if breakpoint occurs on an exon.
|
896
927
|
# If not, determine the adjacent exon given the selected transcript
|
897
928
|
if not self._is_exonic_breakpoint(genomic_pos, tx_exons):
|
929
|
+
is_exonic = False
|
898
930
|
exon_num = self._get_adjacent_exon(
|
899
931
|
tx_exons_genomic_coords=tx_exons,
|
900
932
|
strand=strand,
|
@@ -902,6 +934,7 @@ class ExonGenomicCoordsMapper:
|
|
902
934
|
end=genomic_pos if not is_seg_start else None,
|
903
935
|
)
|
904
936
|
else:
|
937
|
+
is_exonic = True
|
905
938
|
exon_data = await self.uta_db.get_tx_exon_aln_v_data(
|
906
939
|
transcript,
|
907
940
|
genomic_pos,
|
@@ -920,7 +953,7 @@ class ExonGenomicCoordsMapper:
|
|
920
953
|
)
|
921
954
|
|
922
955
|
genomic_location, err_msg = self._get_vrs_seq_loc(
|
923
|
-
genomic_ac, genomic_pos, is_seg_start, strand
|
956
|
+
genomic_ac, genomic_pos, is_seg_start, strand, is_exonic
|
924
957
|
)
|
925
958
|
if err_msg:
|
926
959
|
return GenomicTxSeg(errors=[err_msg])
|
@@ -3,6 +3,7 @@
|
|
3
3
|
import logging
|
4
4
|
from collections import namedtuple
|
5
5
|
from pathlib import Path
|
6
|
+
from urllib.parse import urlparse
|
6
7
|
|
7
8
|
from agct._core import ChainfileError
|
8
9
|
from asyncpg import InvalidCatalogNameError, UndefinedTableError
|
@@ -11,7 +12,7 @@ from biocommons.seqrepo import SeqRepo
|
|
11
12
|
from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
|
12
13
|
from cool_seq_tool.mappers.liftover import LiftOver
|
13
14
|
from cool_seq_tool.resources.data_files import DataFile, get_data_file
|
14
|
-
from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
|
15
|
+
from cool_seq_tool.sources.uta_database import UTA_DB_URL, ParseResult, UtaDatabase
|
15
16
|
|
16
17
|
_logger = logging.getLogger(__name__)
|
17
18
|
|
@@ -119,14 +120,20 @@ async def check_status(
|
|
119
120
|
else:
|
120
121
|
status["liftover"] = True
|
121
122
|
|
123
|
+
parsed_result = ParseResult(urlparse(db_url))
|
124
|
+
sanitized_url = parsed_result.sanitized_url
|
122
125
|
try:
|
123
126
|
await UtaDatabase.create(db_url)
|
127
|
+
except ValueError:
|
128
|
+
_logger.exception("Database URL is not valid")
|
124
129
|
except (OSError, InvalidCatalogNameError, UndefinedTableError):
|
125
|
-
_logger.exception(
|
130
|
+
_logger.exception(
|
131
|
+
"Encountered error instantiating UTA at URI %s", sanitized_url
|
132
|
+
)
|
126
133
|
except Exception as e:
|
127
134
|
_logger.critical(
|
128
135
|
"Encountered unexpected error instantiating UTA from URI %s: %s",
|
129
|
-
|
136
|
+
sanitized_url,
|
130
137
|
e,
|
131
138
|
)
|
132
139
|
else:
|
@@ -5,7 +5,7 @@ import logging
|
|
5
5
|
from os import environ
|
6
6
|
from typing import Any, Literal, TypeVar
|
7
7
|
from urllib.parse import ParseResult as UrlLibParseResult
|
8
|
-
from urllib.parse import
|
8
|
+
from urllib.parse import unquote, urlparse, urlunparse
|
9
9
|
|
10
10
|
import asyncpg
|
11
11
|
import boto3
|
@@ -101,8 +101,7 @@ class UtaDatabase:
|
|
101
101
|
"""
|
102
102
|
self.schema = None
|
103
103
|
self._connection_pool = None
|
104
|
-
|
105
|
-
self.db_url = db_url.replace(original_pwd, quote(original_pwd))
|
104
|
+
self.db_url = db_url
|
106
105
|
self.args = self._get_conn_args()
|
107
106
|
|
108
107
|
def _get_conn_args(self) -> DbConnectionArgs:
|
@@ -954,3 +953,28 @@ class ParseResult(UrlLibParseResult):
|
|
954
953
|
"""Create schema property."""
|
955
954
|
path_elems = self.path.split("/")
|
956
955
|
return path_elems[2] if len(path_elems) > 2 else None
|
956
|
+
|
957
|
+
@property
|
958
|
+
def sanitized_url(self) -> str:
|
959
|
+
"""Sanitized DB URL with the password masked"""
|
960
|
+
netloc = ""
|
961
|
+
if self.username:
|
962
|
+
netloc += self.username
|
963
|
+
if self.password is not None and self.password != "":
|
964
|
+
netloc += ":***"
|
965
|
+
netloc += "@"
|
966
|
+
if self.hostname:
|
967
|
+
netloc += f"{self.hostname}"
|
968
|
+
if self.port:
|
969
|
+
netloc += f":{self.port}"
|
970
|
+
|
971
|
+
return urlunparse(
|
972
|
+
(
|
973
|
+
self.scheme,
|
974
|
+
netloc,
|
975
|
+
self.path,
|
976
|
+
self.params,
|
977
|
+
self.query,
|
978
|
+
self.fragment,
|
979
|
+
)
|
980
|
+
)
|
@@ -6,20 +6,20 @@ cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2p
|
|
6
6
|
cool_seq_tool/handlers/seqrepo_access.py,sha256=lRzPc8V0eZJTlefbHuVKeZTEC8-KcyPzpqX7vx3amu8,9118
|
7
7
|
cool_seq_tool/mappers/__init__.py,sha256=tavpwkNogg_nF1J_kb6Q9jk7ezqdRz063v7BMZ4koLM,390
|
8
8
|
cool_seq_tool/mappers/alignment.py,sha256=kWgYssM8YL-Z13H9GdpL77P7simNcbxltAs9YDXHE54,9640
|
9
|
-
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=N6Wi7D8hs6gZi-BC4ICuWQEGeqUbBysqxG18EtIIgSk,46187
|
10
10
|
cool_seq_tool/mappers/feature_overlap.py,sha256=X5UFClaH6ixRsO2fDLxqjywp-Z0bvNx4uzgBICy394U,9758
|
11
11
|
cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
|
12
12
|
cool_seq_tool/mappers/mane_transcript.py,sha256=IluiLBxPQoY-CxkpqpjEBcMlHvrNLa34wdKdQxtKgDY,54613
|
13
13
|
cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
|
14
14
|
cool_seq_tool/resources/data_files.py,sha256=6d1M5WjeFHdTQpzxqjQ78auQRZvIBVqH8QNCrmRRDXw,4205
|
15
|
-
cool_seq_tool/resources/status.py,sha256=
|
15
|
+
cool_seq_tool/resources/status.py,sha256=iP-4NiSmqV-D--gypZyrSqVbOWQvyBZICKQb-VinTik,6241
|
16
16
|
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
17
17
|
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
18
18
|
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
|
19
19
|
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
20
|
-
cool_seq_tool/sources/uta_database.py,sha256=
|
21
|
-
cool_seq_tool-0.14.
|
22
|
-
cool_seq_tool-0.14.
|
23
|
-
cool_seq_tool-0.14.
|
24
|
-
cool_seq_tool-0.14.
|
25
|
-
cool_seq_tool-0.14.
|
20
|
+
cool_seq_tool/sources/uta_database.py,sha256=38CQ0QHHh0kA87tdgsJHJiHdJHQc06ylBYfemGFUlZc,36759
|
21
|
+
cool_seq_tool-0.14.5.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
22
|
+
cool_seq_tool-0.14.5.dist-info/METADATA,sha256=tgWwlBS_2Z71zUJpmO7MjDE3cNFy6GYH7W8YyJrTo0I,6535
|
23
|
+
cool_seq_tool-0.14.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
24
|
+
cool_seq_tool-0.14.5.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
25
|
+
cool_seq_tool-0.14.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|