cool-seq-tool 0.14.4__tar.gz → 0.15.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool-0.15.0/.github/CODEOWNERS +1 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/PKG-INFO +2 -2
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/pyproject.toml +1 -1
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/app.py +1 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/exon_genomic_coords.py +42 -15
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/resources/status.py +10 -3
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/mane_transcript_mappings.py +17 -1
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/uta_database.py +27 -3
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/PKG-INFO +2 -2
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/SOURCES.txt +1 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/requires.txt +1 -1
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/mappers/test_exon_genomic_coords.py +58 -14
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/sources/test_mane_transcript_mappings.py +10 -1
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/sources/test_uta_database.py +98 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.coveragerc +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/ISSUE_TEMPLATE/bug-report.yaml +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/ISSUE_TEMPLATE/feature-request.yaml +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/workflows/checks.yaml +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/workflows/pr-priority-label.yaml +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/workflows/release.yml +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.github/workflows/stale.yaml +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.gitignore +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.pre-commit-config.yaml +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/.readthedocs.yaml +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/CITATION.cff +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/LICENSE +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/README.md +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/Makefile +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/make.bat +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/_static/img/biomart.png +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/_templates/module_summary.rst +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/changelog.rst +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/conf.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/contributing.rst +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/index.rst +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/install.rst +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/license.rst +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/reference/index.rst +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/transcript_selection.rst +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/docs/source/usage.rst +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/setup.cfg +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/__init__.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/handlers/__init__.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/handlers/seqrepo_access.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/__init__.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/alignment.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/feature_overlap.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/liftover.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/mane_transcript.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/resources/__init__.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/resources/data_files.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/resources/transcript_mapping.tsv +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/schemas.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/__init__.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/transcript_mappings.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/utils.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/dependency_links.txt +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/top_level.txt +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/conftest.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/handlers/test_feature_overlap.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/handlers/test_seqrepo_access.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/mappers/test_alignment.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/mappers/test_liftover.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/mappers/test_mane_transcript.py +0 -0
- {cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/test_utils.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
* @GenomicMedLab/cool-seq-tool-maintainers
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.15.0
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
@@ -50,7 +50,7 @@ Requires-Dist: agct>=0.1.0-dev1
|
|
50
50
|
Requires-Dist: polars~=1.0
|
51
51
|
Requires-Dist: biocommons.seqrepo
|
52
52
|
Requires-Dist: pydantic<3.0,>=2.0
|
53
|
-
Requires-Dist: ga4gh.vrs<3.0,>=2.1.
|
53
|
+
Requires-Dist: ga4gh.vrs<3.0,>=2.1.4
|
54
54
|
Requires-Dist: wags-tails~=0.4.0
|
55
55
|
Requires-Dist: bioutils
|
56
56
|
Provides-Extra: dev
|
{cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/mappers/exon_genomic_coords.py
RENAMED
@@ -2,17 +2,21 @@
|
|
2
2
|
|
3
3
|
import logging
|
4
4
|
|
5
|
+
from ga4gh.core.models import Extension
|
5
6
|
from ga4gh.vrs.models import SequenceLocation, SequenceReference
|
6
7
|
from pydantic import ConfigDict, Field, StrictInt, StrictStr, model_validator
|
7
8
|
|
8
9
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
9
10
|
from cool_seq_tool.mappers.liftover import LiftOver
|
11
|
+
from cool_seq_tool.mappers.mane_transcript import ManeTranscript
|
10
12
|
from cool_seq_tool.schemas import (
|
13
|
+
AnnotationLayer,
|
11
14
|
Assembly,
|
12
15
|
BaseModelForbidExtra,
|
13
16
|
CoordinateType,
|
14
17
|
ServiceMeta,
|
15
18
|
Strand,
|
19
|
+
TranscriptPriority,
|
16
20
|
)
|
17
21
|
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
|
18
22
|
from cool_seq_tool.sources.uta_database import GenomicAlnData, UtaDatabase
|
@@ -65,9 +69,6 @@ class TxSegment(BaseModelForbidExtra):
|
|
65
69
|
genomic_location: SequenceLocation = Field(
|
66
70
|
..., description="The genomic position of a transcript segment."
|
67
71
|
)
|
68
|
-
is_exonic: bool = Field(
|
69
|
-
default=True, description="If the position occurs on an exon"
|
70
|
-
)
|
71
72
|
|
72
73
|
@model_validator(mode="before")
|
73
74
|
def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
|
@@ -99,8 +100,8 @@ class TxSegment(BaseModelForbidExtra):
|
|
99
100
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
100
101
|
},
|
101
102
|
"end": 154192135,
|
103
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
102
104
|
},
|
103
|
-
"is_exonic": True,
|
104
105
|
}
|
105
106
|
}
|
106
107
|
)
|
@@ -115,6 +116,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
115
116
|
)
|
116
117
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
117
118
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
119
|
+
tx_status: TranscriptPriority | None = Field(
|
120
|
+
None, description="Transcript priority for RefSeq transcript accession"
|
121
|
+
)
|
118
122
|
strand: Strand | None = Field(
|
119
123
|
None, description="The strand that the transcript accession exists on."
|
120
124
|
)
|
@@ -146,6 +150,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
146
150
|
"gene": "TPM3",
|
147
151
|
"genomic_ac": "NC_000001.11",
|
148
152
|
"tx_ac": "NM_152263.3",
|
153
|
+
"tx_status": "longest_compatible_remaining",
|
149
154
|
"strand": -1,
|
150
155
|
"seg": {
|
151
156
|
"exon_ord": 0,
|
@@ -157,8 +162,8 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
157
162
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
158
163
|
},
|
159
164
|
"end": 154192135,
|
165
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
160
166
|
},
|
161
|
-
"is_exonic": True,
|
162
167
|
},
|
163
168
|
"errors": [],
|
164
169
|
}
|
@@ -174,6 +179,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
174
179
|
)
|
175
180
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
176
181
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
182
|
+
tx_status: TranscriptPriority | None = Field(
|
183
|
+
None, description="Transcript priority for RefSeq transcript accession"
|
184
|
+
)
|
177
185
|
strand: Strand | None = Field(
|
178
186
|
None, description="The strand that the transcript exists on."
|
179
187
|
)
|
@@ -213,6 +221,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
213
221
|
"gene": "TPM3",
|
214
222
|
"genomic_ac": "NC_000001.11",
|
215
223
|
"tx_ac": "NM_152263.3",
|
224
|
+
"tx_status": "longest_compatible_remaining",
|
216
225
|
"strand": -1,
|
217
226
|
"seg_start": {
|
218
227
|
"exon_ord": 0,
|
@@ -224,8 +233,8 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
224
233
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
225
234
|
},
|
226
235
|
"end": 154192135,
|
236
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
227
237
|
},
|
228
|
-
"is_exonic": True,
|
229
238
|
},
|
230
239
|
"seg_end": {
|
231
240
|
"exon_ord": 7,
|
@@ -237,8 +246,8 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
237
246
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
238
247
|
},
|
239
248
|
"start": 154170399,
|
249
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
240
250
|
},
|
241
|
-
"is_exonic": True,
|
242
251
|
},
|
243
252
|
}
|
244
253
|
}
|
@@ -266,6 +275,7 @@ class ExonGenomicCoordsMapper:
|
|
266
275
|
self,
|
267
276
|
seqrepo_access: SeqRepoAccess,
|
268
277
|
uta_db: UtaDatabase,
|
278
|
+
mane_transcript: ManeTranscript,
|
269
279
|
mane_transcript_mappings: ManeTranscriptMappings,
|
270
280
|
liftover: LiftOver,
|
271
281
|
) -> None:
|
@@ -290,11 +300,13 @@ class ExonGenomicCoordsMapper:
|
|
290
300
|
|
291
301
|
:param seqrepo_access: SeqRepo instance to give access to query SeqRepo database
|
292
302
|
:param uta_db: UtaDatabase instance to give access to query UTA database
|
303
|
+
:param mane_transcript: ManeTranscript instance to give access to ManeTranscript class
|
293
304
|
:param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
|
294
305
|
:param liftover: Instance to provide mapping between human genome assemblies
|
295
306
|
"""
|
296
307
|
self.seqrepo_access = seqrepo_access
|
297
308
|
self.uta_db = uta_db
|
309
|
+
self.mane_transcript = mane_transcript
|
298
310
|
self.mane_transcript_mappings = mane_transcript_mappings
|
299
311
|
self.liftover = liftover
|
300
312
|
|
@@ -433,6 +445,7 @@ class ExonGenomicCoordsMapper:
|
|
433
445
|
gene=gene,
|
434
446
|
genomic_ac=genomic_ac,
|
435
447
|
tx_ac=transcript,
|
448
|
+
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
|
436
449
|
strand=strand,
|
437
450
|
seg_start=seg_start,
|
438
451
|
seg_end=seg_end,
|
@@ -524,6 +537,7 @@ class ExonGenomicCoordsMapper:
|
|
524
537
|
params["gene"] = start_tx_seg_data.gene
|
525
538
|
params["genomic_ac"] = start_tx_seg_data.genomic_ac
|
526
539
|
params["tx_ac"] = start_tx_seg_data.tx_ac
|
540
|
+
params["tx_status"] = start_tx_seg_data.tx_status
|
527
541
|
params["strand"] = start_tx_seg_data.strand
|
528
542
|
params["seg_start"] = start_tx_seg_data.seg
|
529
543
|
else:
|
@@ -559,6 +573,7 @@ class ExonGenomicCoordsMapper:
|
|
559
573
|
params["gene"] = end_tx_seg_data.gene
|
560
574
|
params["genomic_ac"] = end_tx_seg_data.genomic_ac
|
561
575
|
params["tx_ac"] = end_tx_seg_data.tx_ac
|
576
|
+
params["tx_status"] = end_tx_seg_data.tx_status
|
562
577
|
params["strand"] = end_tx_seg_data.strand
|
563
578
|
|
564
579
|
params["seg_end"] = end_tx_seg_data.seg
|
@@ -730,7 +745,12 @@ class ExonGenomicCoordsMapper:
|
|
730
745
|
), None
|
731
746
|
|
732
747
|
def _get_vrs_seq_loc(
|
733
|
-
self,
|
748
|
+
self,
|
749
|
+
genomic_ac: str,
|
750
|
+
genomic_pos: int,
|
751
|
+
is_seg_start: bool,
|
752
|
+
strand: Strand,
|
753
|
+
is_exonic: bool = True,
|
734
754
|
) -> tuple[SequenceLocation | None, str | None]:
|
735
755
|
"""Create VRS Sequence Location for genomic position where transcript segment
|
736
756
|
occurs
|
@@ -740,6 +760,8 @@ class ExonGenomicCoordsMapper:
|
|
740
760
|
:param is_seg_start: ``True`` if ``genomic_pos`` is where the transcript segment
|
741
761
|
starts. ``False`` if ``genomic_pos`` is where the transcript segment ends.
|
742
762
|
:param strand: Strand
|
763
|
+
:param is_exonic: A boolean indicating if the genomic breakpoint occurs
|
764
|
+
on an exon. By default, this is set to ``True``.
|
743
765
|
:return: Tuple containing VRS location (if successful) and error message (if
|
744
766
|
unable to get GA4GH identifier for ``genomic_ac``).
|
745
767
|
"""
|
@@ -759,6 +781,7 @@ class ExonGenomicCoordsMapper:
|
|
759
781
|
),
|
760
782
|
start=genomic_pos if use_start else None,
|
761
783
|
end=genomic_pos if not use_start else None,
|
784
|
+
extensions=[Extension(name="is_exonic", value=is_exonic)],
|
762
785
|
), None
|
763
786
|
|
764
787
|
async def _genomic_to_tx_segment(
|
@@ -852,14 +875,18 @@ class ExonGenomicCoordsMapper:
|
|
852
875
|
if mane_transcripts:
|
853
876
|
transcript = mane_transcripts[0]["RefSeq_nuc"]
|
854
877
|
else:
|
855
|
-
# Attempt to find
|
878
|
+
# Attempt to find longest compatible transcript if a MANE transcript
|
856
879
|
# cannot be found
|
857
|
-
results = await self.
|
858
|
-
|
880
|
+
results = await self.mane_transcript.get_longest_compatible_transcript(
|
881
|
+
start_pos=genomic_pos,
|
882
|
+
end_pos=genomic_pos,
|
883
|
+
gene=gene,
|
884
|
+
alt_ac=genomic_ac,
|
885
|
+
start_annotation_layer=AnnotationLayer.GENOMIC,
|
859
886
|
)
|
860
887
|
|
861
|
-
if
|
862
|
-
transcript = results
|
888
|
+
if results:
|
889
|
+
transcript = results.refseq
|
863
890
|
else:
|
864
891
|
# Run if gene is for a noncoding transcript
|
865
892
|
query = f"""
|
@@ -947,7 +974,7 @@ class ExonGenomicCoordsMapper:
|
|
947
974
|
)
|
948
975
|
|
949
976
|
genomic_location, err_msg = self._get_vrs_seq_loc(
|
950
|
-
genomic_ac, genomic_pos, is_seg_start, strand
|
977
|
+
genomic_ac, genomic_pos, is_seg_start, strand, is_exonic
|
951
978
|
)
|
952
979
|
if err_msg:
|
953
980
|
return GenomicTxSeg(errors=[err_msg])
|
@@ -956,12 +983,12 @@ class ExonGenomicCoordsMapper:
|
|
956
983
|
gene=gene,
|
957
984
|
genomic_ac=genomic_ac,
|
958
985
|
tx_ac=transcript,
|
986
|
+
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
|
959
987
|
strand=strand,
|
960
988
|
seg=TxSegment(
|
961
989
|
exon_ord=exon_num,
|
962
990
|
offset=offset,
|
963
991
|
genomic_location=genomic_location,
|
964
|
-
is_exonic=is_exonic,
|
965
992
|
),
|
966
993
|
)
|
967
994
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
import logging
|
4
4
|
from collections import namedtuple
|
5
5
|
from pathlib import Path
|
6
|
+
from urllib.parse import urlparse
|
6
7
|
|
7
8
|
from agct._core import ChainfileError
|
8
9
|
from asyncpg import InvalidCatalogNameError, UndefinedTableError
|
@@ -11,7 +12,7 @@ from biocommons.seqrepo import SeqRepo
|
|
11
12
|
from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
|
12
13
|
from cool_seq_tool.mappers.liftover import LiftOver
|
13
14
|
from cool_seq_tool.resources.data_files import DataFile, get_data_file
|
14
|
-
from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
|
15
|
+
from cool_seq_tool.sources.uta_database import UTA_DB_URL, ParseResult, UtaDatabase
|
15
16
|
|
16
17
|
_logger = logging.getLogger(__name__)
|
17
18
|
|
@@ -119,14 +120,20 @@ async def check_status(
|
|
119
120
|
else:
|
120
121
|
status["liftover"] = True
|
121
122
|
|
123
|
+
parsed_result = ParseResult(urlparse(db_url))
|
124
|
+
sanitized_url = parsed_result.sanitized_url
|
122
125
|
try:
|
123
126
|
await UtaDatabase.create(db_url)
|
127
|
+
except ValueError:
|
128
|
+
_logger.exception("Database URL is not valid")
|
124
129
|
except (OSError, InvalidCatalogNameError, UndefinedTableError):
|
125
|
-
_logger.exception(
|
130
|
+
_logger.exception(
|
131
|
+
"Encountered error instantiating UTA at URI %s", sanitized_url
|
132
|
+
)
|
126
133
|
except Exception as e:
|
127
134
|
_logger.critical(
|
128
135
|
"Encountered unexpected error instantiating UTA from URI %s: %s",
|
129
|
-
|
136
|
+
sanitized_url,
|
130
137
|
e,
|
131
138
|
)
|
132
139
|
else:
|
{cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/mane_transcript_mappings.py
RENAMED
@@ -8,7 +8,7 @@ from pathlib import Path
|
|
8
8
|
import polars as pl
|
9
9
|
|
10
10
|
from cool_seq_tool.resources.data_files import DataFile, get_data_file
|
11
|
-
from cool_seq_tool.schemas import ManeGeneData
|
11
|
+
from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority
|
12
12
|
|
13
13
|
_logger = logging.getLogger(__name__)
|
14
14
|
|
@@ -85,6 +85,22 @@ class ManeTranscriptMappings:
|
|
85
85
|
return []
|
86
86
|
return mane_rows.to_dicts()
|
87
87
|
|
88
|
+
def get_transcript_status(self, tx_ac: str) -> TranscriptPriority:
|
89
|
+
"""Get MANE status for a transcript
|
90
|
+
|
91
|
+
:param tx_ac: A RefSeq transcript accession
|
92
|
+
:return: A TranscriptPriority object
|
93
|
+
"""
|
94
|
+
mane_info = self.get_mane_from_transcripts([tx_ac])
|
95
|
+
if not mane_info:
|
96
|
+
return TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
|
97
|
+
mane_info = mane_info[0]["MANE_status"]
|
98
|
+
return (
|
99
|
+
TranscriptPriority.MANE_SELECT
|
100
|
+
if mane_info == "MANE Select"
|
101
|
+
else TranscriptPriority.MANE_PLUS_CLINICAL
|
102
|
+
)
|
103
|
+
|
88
104
|
def get_mane_data_from_chr_pos(
|
89
105
|
self, alt_ac: str, start: int, end: int
|
90
106
|
) -> list[dict]:
|
@@ -5,7 +5,7 @@ import logging
|
|
5
5
|
from os import environ
|
6
6
|
from typing import Any, Literal, TypeVar
|
7
7
|
from urllib.parse import ParseResult as UrlLibParseResult
|
8
|
-
from urllib.parse import
|
8
|
+
from urllib.parse import unquote, urlparse, urlunparse
|
9
9
|
|
10
10
|
import asyncpg
|
11
11
|
import boto3
|
@@ -101,8 +101,7 @@ class UtaDatabase:
|
|
101
101
|
"""
|
102
102
|
self.schema = None
|
103
103
|
self._connection_pool = None
|
104
|
-
|
105
|
-
self.db_url = db_url.replace(original_pwd, quote(original_pwd))
|
104
|
+
self.db_url = db_url
|
106
105
|
self.args = self._get_conn_args()
|
107
106
|
|
108
107
|
def _get_conn_args(self) -> DbConnectionArgs:
|
@@ -954,3 +953,28 @@ class ParseResult(UrlLibParseResult):
|
|
954
953
|
"""Create schema property."""
|
955
954
|
path_elems = self.path.split("/")
|
956
955
|
return path_elems[2] if len(path_elems) > 2 else None
|
956
|
+
|
957
|
+
@property
|
958
|
+
def sanitized_url(self) -> str:
|
959
|
+
"""Sanitized DB URL with the password masked"""
|
960
|
+
netloc = ""
|
961
|
+
if self.username:
|
962
|
+
netloc += self.username
|
963
|
+
if self.password is not None and self.password != "":
|
964
|
+
netloc += ":***"
|
965
|
+
netloc += "@"
|
966
|
+
if self.hostname:
|
967
|
+
netloc += f"{self.hostname}"
|
968
|
+
if self.port:
|
969
|
+
netloc += f":{self.port}"
|
970
|
+
|
971
|
+
return urlunparse(
|
972
|
+
(
|
973
|
+
self.scheme,
|
974
|
+
netloc,
|
975
|
+
self.path,
|
976
|
+
self.params,
|
977
|
+
self.query,
|
978
|
+
self.fragment,
|
979
|
+
)
|
980
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.15.0
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
@@ -50,7 +50,7 @@ Requires-Dist: agct>=0.1.0-dev1
|
|
50
50
|
Requires-Dist: polars~=1.0
|
51
51
|
Requires-Dist: biocommons.seqrepo
|
52
52
|
Requires-Dist: pydantic<3.0,>=2.0
|
53
|
-
Requires-Dist: ga4gh.vrs<3.0,>=2.1.
|
53
|
+
Requires-Dist: ga4gh.vrs<3.0,>=2.1.4
|
54
54
|
Requires-Dist: wags-tails~=0.4.0
|
55
55
|
Requires-Dist: bioutils
|
56
56
|
Provides-Extra: dev
|
@@ -172,6 +172,7 @@ def tpm3_exon1():
|
|
172
172
|
"gene": "TPM3",
|
173
173
|
"genomic_ac": "NC_000001.11",
|
174
174
|
"tx_ac": "NM_152263.3",
|
175
|
+
"tx_status": "longest_compatible_remaining",
|
175
176
|
"strand": -1,
|
176
177
|
"seg": {
|
177
178
|
"exon_ord": 0,
|
@@ -183,8 +184,8 @@ def tpm3_exon1():
|
|
183
184
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
184
185
|
},
|
185
186
|
"end": 154192135,
|
187
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
186
188
|
},
|
187
|
-
"is_exonic": True,
|
188
189
|
},
|
189
190
|
}
|
190
191
|
return GenomicTxSeg(**params)
|
@@ -197,6 +198,7 @@ def tpm3_exon8():
|
|
197
198
|
"gene": "TPM3",
|
198
199
|
"genomic_ac": "NC_000001.11",
|
199
200
|
"tx_ac": "NM_152263.3",
|
201
|
+
"tx_status": "longest_compatible_remaining",
|
200
202
|
"strand": -1,
|
201
203
|
"seg": {
|
202
204
|
"exon_ord": 7,
|
@@ -208,8 +210,8 @@ def tpm3_exon8():
|
|
208
210
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
209
211
|
},
|
210
212
|
"start": 154170399,
|
213
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
211
214
|
},
|
212
|
-
"is_exonic": True,
|
213
215
|
},
|
214
216
|
}
|
215
217
|
return GenomicTxSeg(**params)
|
@@ -222,6 +224,7 @@ def tpm3_exon1_g(tpm3_exon1):
|
|
222
224
|
"gene": tpm3_exon1.gene,
|
223
225
|
"genomic_ac": tpm3_exon1.genomic_ac,
|
224
226
|
"tx_ac": tpm3_exon1.tx_ac,
|
227
|
+
"tx_status": tpm3_exon1.tx_status,
|
225
228
|
"strand": tpm3_exon1.strand,
|
226
229
|
"seg_start": tpm3_exon1.seg,
|
227
230
|
}
|
@@ -235,6 +238,7 @@ def tpm3_exon8_g(tpm3_exon8):
|
|
235
238
|
"gene": tpm3_exon8.gene,
|
236
239
|
"genomic_ac": tpm3_exon8.genomic_ac,
|
237
240
|
"tx_ac": tpm3_exon8.tx_ac,
|
241
|
+
"tx_status": tpm3_exon8.tx_status,
|
238
242
|
"strand": tpm3_exon8.strand,
|
239
243
|
"seg_end": tpm3_exon8.seg,
|
240
244
|
}
|
@@ -248,6 +252,7 @@ def tpm3_exon1_exon8(tpm3_exon1, tpm3_exon8):
|
|
248
252
|
"gene": tpm3_exon8.gene,
|
249
253
|
"genomic_ac": tpm3_exon8.genomic_ac,
|
250
254
|
"tx_ac": tpm3_exon8.tx_ac,
|
255
|
+
"tx_status": tpm3_exon8.tx_status,
|
251
256
|
"strand": tpm3_exon8.strand,
|
252
257
|
"seg_start": tpm3_exon1.seg,
|
253
258
|
"seg_end": tpm3_exon8.seg,
|
@@ -269,6 +274,7 @@ def tpm3_exon1_exon8_offset(tpm3_exon1, tpm3_exon8):
|
|
269
274
|
"gene": "TPM3",
|
270
275
|
"genomic_ac": "NC_000001.11",
|
271
276
|
"tx_ac": "NM_152263.3",
|
277
|
+
"tx_status": "longest_compatible_remaining",
|
272
278
|
"strand": -1,
|
273
279
|
"seg_start": tpm3_exon1_cpy.seg,
|
274
280
|
"seg_end": tpm3_exon8_cpy.seg,
|
@@ -283,6 +289,7 @@ def mane_braf():
|
|
283
289
|
"gene": "BRAF",
|
284
290
|
"genomic_ac": "NC_000007.14",
|
285
291
|
"tx_ac": "NM_004333.6",
|
292
|
+
"tx_status": "mane_select",
|
286
293
|
"strand": -1,
|
287
294
|
"seg_start": {
|
288
295
|
"exon_ord": 5,
|
@@ -294,8 +301,8 @@ def mane_braf():
|
|
294
301
|
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
|
295
302
|
},
|
296
303
|
"end": 140801559,
|
304
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
297
305
|
},
|
298
|
-
"is_exonic": True,
|
299
306
|
},
|
300
307
|
"seg_end": {
|
301
308
|
"exon_ord": 14,
|
@@ -307,8 +314,8 @@ def mane_braf():
|
|
307
314
|
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
|
308
315
|
},
|
309
316
|
"start": 140753336,
|
317
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
310
318
|
},
|
311
|
-
"is_exonic": True,
|
312
319
|
},
|
313
320
|
}
|
314
321
|
return GenomicTxSegService(**params)
|
@@ -321,6 +328,7 @@ def wee1_exon2_exon11():
|
|
321
328
|
"gene": "WEE1",
|
322
329
|
"genomic_ac": "NC_000011.10",
|
323
330
|
"tx_ac": "NM_003390.3",
|
331
|
+
"tx_status": "longest_compatible_remaining",
|
324
332
|
"strand": 1,
|
325
333
|
"seg_start": {
|
326
334
|
"exon_ord": 1,
|
@@ -332,6 +340,7 @@ def wee1_exon2_exon11():
|
|
332
340
|
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
|
333
341
|
},
|
334
342
|
"start": 9576092,
|
343
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
335
344
|
},
|
336
345
|
},
|
337
346
|
"seg_end": {
|
@@ -344,6 +353,7 @@ def wee1_exon2_exon11():
|
|
344
353
|
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
|
345
354
|
},
|
346
355
|
"end": 9588449,
|
356
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
347
357
|
},
|
348
358
|
},
|
349
359
|
}
|
@@ -357,6 +367,7 @@ def mane_wee1_exon2_exon11():
|
|
357
367
|
"gene": "WEE1",
|
358
368
|
"genomic_ac": "NC_000011.10",
|
359
369
|
"tx_ac": "NM_003390.4",
|
370
|
+
"tx_status": "mane_select",
|
360
371
|
"strand": 1,
|
361
372
|
"seg_start": {
|
362
373
|
"exon_ord": 1,
|
@@ -368,6 +379,7 @@ def mane_wee1_exon2_exon11():
|
|
368
379
|
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
|
369
380
|
},
|
370
381
|
"start": 9576092,
|
382
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
371
383
|
},
|
372
384
|
},
|
373
385
|
"seg_end": {
|
@@ -380,6 +392,7 @@ def mane_wee1_exon2_exon11():
|
|
380
392
|
"refgetAccession": "SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1",
|
381
393
|
},
|
382
394
|
"end": 9588449,
|
395
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
383
396
|
},
|
384
397
|
},
|
385
398
|
}
|
@@ -393,6 +406,7 @@ def ntrk1_exon10_exon17():
|
|
393
406
|
"gene": "NTRK1",
|
394
407
|
"genomic_ac": "NC_000001.11",
|
395
408
|
"tx_ac": "NM_002529.3",
|
409
|
+
"tx_status": "longest_compatible_remaining",
|
396
410
|
"strand": 1,
|
397
411
|
"seg_start": {
|
398
412
|
"exon_ord": 9,
|
@@ -404,6 +418,7 @@ def ntrk1_exon10_exon17():
|
|
404
418
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
405
419
|
},
|
406
420
|
"start": 156874570,
|
421
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
407
422
|
},
|
408
423
|
},
|
409
424
|
"seg_end": {
|
@@ -416,6 +431,7 @@ def ntrk1_exon10_exon17():
|
|
416
431
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
417
432
|
},
|
418
433
|
"end": 156881850,
|
434
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
419
435
|
},
|
420
436
|
},
|
421
437
|
}
|
@@ -429,6 +445,7 @@ def zbtb10_exon3_end():
|
|
429
445
|
"gene": "ZBTB10",
|
430
446
|
"genomic_ac": "NC_000008.11",
|
431
447
|
"tx_ac": "NM_001105539.3",
|
448
|
+
"tx_status": "mane_select",
|
432
449
|
"strand": 1,
|
433
450
|
"seg_start": None,
|
434
451
|
"seg_end": {
|
@@ -441,8 +458,8 @@ def zbtb10_exon3_end():
|
|
441
458
|
"refgetAccession": "SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs",
|
442
459
|
},
|
443
460
|
"end": 80514010,
|
461
|
+
"extensions": [{"name": "is_exonic", "value": False}],
|
444
462
|
},
|
445
|
-
"is_exonic": False,
|
446
463
|
},
|
447
464
|
}
|
448
465
|
return GenomicTxSegService(**params)
|
@@ -455,6 +472,7 @@ def zbtb10_exon5_start():
|
|
455
472
|
"gene": "ZBTB10",
|
456
473
|
"genomic_ac": "NC_000008.11",
|
457
474
|
"tx_ac": "NM_001105539.3",
|
475
|
+
"tx_status": "mane_select",
|
458
476
|
"strand": 1,
|
459
477
|
"seg_start": {
|
460
478
|
"exon_ord": 4,
|
@@ -466,8 +484,8 @@ def zbtb10_exon5_start():
|
|
466
484
|
"refgetAccession": "SQ.209Z7zJ-mFypBEWLk4rNC6S_OxY5p7bs",
|
467
485
|
},
|
468
486
|
"start": 80518580,
|
487
|
+
"extensions": [{"name": "is_exonic", "value": False}],
|
469
488
|
},
|
470
|
-
"is_exonic": False,
|
471
489
|
},
|
472
490
|
"seg_end": None,
|
473
491
|
}
|
@@ -481,6 +499,7 @@ def tpm3_exon6_end():
|
|
481
499
|
"gene": "TPM3",
|
482
500
|
"genomic_ac": "NC_000001.11",
|
483
501
|
"tx_ac": "NM_152263.4",
|
502
|
+
"tx_status": "mane_select",
|
484
503
|
"strand": -1,
|
485
504
|
"seg_start": None,
|
486
505
|
"seg_end": {
|
@@ -493,8 +512,8 @@ def tpm3_exon6_end():
|
|
493
512
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
494
513
|
},
|
495
514
|
"start": 154171410,
|
515
|
+
"extensions": [{"name": "is_exonic", "value": False}],
|
496
516
|
},
|
497
|
-
"is_exonic": False,
|
498
517
|
},
|
499
518
|
}
|
500
519
|
return GenomicTxSegService(**params)
|
@@ -507,6 +526,7 @@ def tpm3_exon5_start():
|
|
507
526
|
"gene": "TPM3",
|
508
527
|
"genomic_ac": "NC_000001.11",
|
509
528
|
"tx_ac": "NM_152263.4",
|
529
|
+
"tx_status": "mane_select",
|
510
530
|
"strand": -1,
|
511
531
|
"seg_start": {
|
512
532
|
"exon_ord": 4,
|
@@ -518,8 +538,8 @@ def tpm3_exon5_start():
|
|
518
538
|
"refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
|
519
539
|
},
|
520
540
|
"end": 154173080,
|
541
|
+
"extensions": [{"name": "is_exonic", "value": False}],
|
521
542
|
},
|
522
|
-
"is_exonic": False,
|
523
543
|
},
|
524
544
|
"seg_end": None,
|
525
545
|
}
|
@@ -533,6 +553,7 @@ def gusbp3_exon2_end():
|
|
533
553
|
"gene": "GUSBP3",
|
534
554
|
"genomic_ac": "NC_000005.10",
|
535
555
|
"tx_ac": "NR_027386.2",
|
556
|
+
"tx_status": "longest_compatible_remaining",
|
536
557
|
"strand": -1,
|
537
558
|
"seg_start": None,
|
538
559
|
"seg_end": {
|
@@ -545,8 +566,8 @@ def gusbp3_exon2_end():
|
|
545
566
|
"refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
|
546
567
|
},
|
547
568
|
"start": 69680764,
|
569
|
+
"extensions": [{"name": "is_exonic", "value": False}],
|
548
570
|
},
|
549
|
-
"is_exonic": False,
|
550
571
|
},
|
551
572
|
}
|
552
573
|
return GenomicTxSegService(**params)
|
@@ -559,6 +580,7 @@ def eln_grch38_intronic():
|
|
559
580
|
"gene": "ELN",
|
560
581
|
"genomic_ac": "NC_000007.14",
|
561
582
|
"tx_ac": "NM_000501.4",
|
583
|
+
"tx_status": "mane_select",
|
562
584
|
"strand": 1,
|
563
585
|
"seg_start": {
|
564
586
|
"exon_ord": 0,
|
@@ -570,8 +592,8 @@ def eln_grch38_intronic():
|
|
570
592
|
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
|
571
593
|
},
|
572
594
|
"start": 74028173,
|
595
|
+
"extensions": [{"name": "is_exonic", "value": True}],
|
573
596
|
},
|
574
|
-
"is_exonic": True,
|
575
597
|
},
|
576
598
|
"seg_end": {
|
577
599
|
"exon_ord": 7,
|
@@ -583,8 +605,8 @@ def eln_grch38_intronic():
|
|
583
605
|
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
|
584
606
|
},
|
585
607
|
"end": 74043599,
|
608
|
+
"extensions": [{"name": "is_exonic", "value": False}],
|
586
609
|
},
|
587
|
-
"is_exonic": False,
|
588
610
|
},
|
589
611
|
}
|
590
612
|
return GenomicTxSegService(**params)
|
@@ -597,6 +619,7 @@ def gusbp3_exon5_start():
|
|
597
619
|
"gene": "GUSBP3",
|
598
620
|
"genomic_ac": "NC_000005.10",
|
599
621
|
"tx_ac": "NR_027386.2",
|
622
|
+
"tx_status": "longest_compatible_remaining",
|
600
623
|
"strand": -1,
|
601
624
|
"seg_start": {
|
602
625
|
"exon_ord": 4,
|
@@ -608,8 +631,8 @@ def gusbp3_exon5_start():
|
|
608
631
|
"refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
|
609
632
|
},
|
610
633
|
"end": 69645878,
|
634
|
+
"extensions": [{"name": "is_exonic", "value": False}],
|
611
635
|
},
|
612
|
-
"is_exonic": False,
|
613
636
|
},
|
614
637
|
"seg_end": None,
|
615
638
|
}
|
@@ -639,6 +662,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
|
|
639
662
|
assert actual.gene == expected.gene
|
640
663
|
assert actual.genomic_ac == expected.genomic_ac
|
641
664
|
assert actual.tx_ac == expected.tx_ac
|
665
|
+
assert actual.tx_status == expected.tx_status
|
642
666
|
assert actual.strand == expected.strand
|
643
667
|
|
644
668
|
for seg_attr in ["seg_start", "seg_end"]:
|
@@ -660,13 +684,17 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
|
|
660
684
|
assert (
|
661
685
|
actual_seg.genomic_location.end == expected_seg.genomic_location.end
|
662
686
|
)
|
663
|
-
assert
|
687
|
+
assert (
|
688
|
+
actual_seg.genomic_location.extensions
|
689
|
+
== expected_seg.genomic_location.extensions
|
690
|
+
)
|
664
691
|
|
665
692
|
assert actual.errors == expected.errors
|
666
693
|
else:
|
667
694
|
assert actual.gene is None
|
668
695
|
assert actual.genomic_ac is None
|
669
696
|
assert actual.tx_ac is None
|
697
|
+
assert actual.tx_status is None
|
670
698
|
assert actual.strand is None
|
671
699
|
assert actual.seg_start is None
|
672
700
|
assert actual.seg_end is None
|
@@ -711,6 +739,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
|
|
711
739
|
assert actual.gene == expected.gene
|
712
740
|
assert actual.genomic_ac == expected.genomic_ac
|
713
741
|
assert actual.tx_ac == expected.tx_ac
|
742
|
+
assert actual.tx_status == expected.tx_status
|
714
743
|
assert actual.strand == expected.strand
|
715
744
|
|
716
745
|
expected_seg = expected.seg
|
@@ -728,13 +757,17 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
|
|
728
757
|
actual_seg.genomic_location.start == expected_seg.genomic_location.start
|
729
758
|
)
|
730
759
|
assert actual_seg.genomic_location.end == expected_seg.genomic_location.end
|
731
|
-
assert
|
760
|
+
assert (
|
761
|
+
actual_seg.genomic_location.extensions
|
762
|
+
== expected_seg.genomic_location.extensions
|
763
|
+
)
|
732
764
|
|
733
765
|
assert actual.errors == expected.errors
|
734
766
|
else:
|
735
767
|
assert actual.gene is None
|
736
768
|
assert actual.genomic_ac is None
|
737
769
|
assert actual.tx_ac is None
|
770
|
+
assert actual.tx_status is None
|
738
771
|
assert actual.strand is None
|
739
772
|
assert actual.seg is None
|
740
773
|
assert len(actual.errors) > 0
|
@@ -1038,6 +1071,17 @@ async def test_genomic_to_transcript_fusion_context(
|
|
1038
1071
|
resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
|
1039
1072
|
genomic_tx_seg_service_checks(resp, gusbp3_exon5_start)
|
1040
1073
|
|
1074
|
+
# Test case where gene does not have a MANE transcript. We are looking
|
1075
|
+
# to check that the same transcript accession is returned across runs
|
1076
|
+
inputs = {
|
1077
|
+
"genomic_ac": "NC_000001.11",
|
1078
|
+
"seg_end_genomic": 156421555,
|
1079
|
+
"gene": "MIR9-1HG",
|
1080
|
+
}
|
1081
|
+
resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
|
1082
|
+
assert resp.tx_ac == "NM_001320454.2"
|
1083
|
+
assert resp.tx_status == "longest_compatible_remaining"
|
1084
|
+
|
1041
1085
|
|
1042
1086
|
@pytest.mark.asyncio
|
1043
1087
|
async def test_get_alt_ac_start_and_end(
|
{cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/tests/sources/test_mane_transcript_mappings.py
RENAMED
@@ -5,7 +5,7 @@ from unittest.mock import patch
|
|
5
5
|
import polars as pl
|
6
6
|
import pytest
|
7
7
|
|
8
|
-
from cool_seq_tool.schemas import ManeGeneData
|
8
|
+
from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority
|
9
9
|
|
10
10
|
|
11
11
|
@pytest.fixture(scope="module")
|
@@ -168,6 +168,15 @@ def test_get_mane_from_transcripts(
|
|
168
168
|
assert resp == []
|
169
169
|
|
170
170
|
|
171
|
+
def test_get_transcript_status(test_mane_transcript_mappings):
|
172
|
+
"""Test that get_transcript_status works correctly"""
|
173
|
+
actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.4")
|
174
|
+
assert actual == TranscriptPriority.MANE_SELECT
|
175
|
+
|
176
|
+
actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.3")
|
177
|
+
assert actual == TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
|
178
|
+
|
179
|
+
|
171
180
|
def test_get_mane_data_from_chr_pos(
|
172
181
|
test_mane_transcript_mappings, braf_select, braf_plus_clinical
|
173
182
|
):
|
@@ -1,11 +1,14 @@
|
|
1
1
|
"""Test UTA data source."""
|
2
2
|
|
3
|
+
from urllib.parse import urlparse
|
4
|
+
|
3
5
|
import pytest
|
4
6
|
|
5
7
|
from cool_seq_tool.schemas import Strand
|
6
8
|
from cool_seq_tool.sources.uta_database import (
|
7
9
|
GenomicTxData,
|
8
10
|
GenomicTxMetadata,
|
11
|
+
ParseResult,
|
9
12
|
TxExonAlnData,
|
10
13
|
)
|
11
14
|
|
@@ -360,3 +363,98 @@ async def test_get_mane_transcripts_from_genomic_pos(test_db):
|
|
360
363
|
# invalid ac
|
361
364
|
resp = await test_db.get_transcripts_from_genomic_pos("NC_000007.14232", 140753336)
|
362
365
|
assert resp == []
|
366
|
+
|
367
|
+
|
368
|
+
@pytest.mark.parametrize(
|
369
|
+
("raw_url", "expected"),
|
370
|
+
[
|
371
|
+
# Username + password
|
372
|
+
(
|
373
|
+
"postgresql://user:pass@localhost:5432/dbname",
|
374
|
+
{
|
375
|
+
"scheme": "postgresql",
|
376
|
+
"username": "user",
|
377
|
+
"password": "pass",
|
378
|
+
"hostname": "localhost",
|
379
|
+
"port": 5432,
|
380
|
+
"database": "dbname",
|
381
|
+
"sanitized_url": "postgresql://user:***@localhost:5432/dbname",
|
382
|
+
},
|
383
|
+
),
|
384
|
+
# Username with null password
|
385
|
+
(
|
386
|
+
"postgresql://user@localhost/dbname",
|
387
|
+
{
|
388
|
+
"scheme": "postgresql",
|
389
|
+
"username": "user",
|
390
|
+
"password": None,
|
391
|
+
"hostname": "localhost",
|
392
|
+
"port": None,
|
393
|
+
"database": "dbname",
|
394
|
+
"sanitized_url": "postgresql://user@localhost/dbname",
|
395
|
+
},
|
396
|
+
),
|
397
|
+
# Password is "0"
|
398
|
+
(
|
399
|
+
"postgresql://user:0@localhost/dbname",
|
400
|
+
{
|
401
|
+
"scheme": "postgresql",
|
402
|
+
"username": "user",
|
403
|
+
"password": "0",
|
404
|
+
"hostname": "localhost",
|
405
|
+
"port": None,
|
406
|
+
"database": "dbname",
|
407
|
+
"sanitized_url": "postgresql://user:***@localhost/dbname",
|
408
|
+
},
|
409
|
+
),
|
410
|
+
# Empty password
|
411
|
+
(
|
412
|
+
"postgresql://user:@localhost/dbname",
|
413
|
+
{
|
414
|
+
"scheme": "postgresql",
|
415
|
+
"username": "user",
|
416
|
+
"password": "",
|
417
|
+
"hostname": "localhost",
|
418
|
+
"port": None,
|
419
|
+
"database": "dbname",
|
420
|
+
"sanitized_url": "postgresql://user@localhost/dbname",
|
421
|
+
},
|
422
|
+
),
|
423
|
+
# No username
|
424
|
+
(
|
425
|
+
"postgresql://localhost:5432/dbname",
|
426
|
+
{
|
427
|
+
"scheme": "postgresql",
|
428
|
+
"username": None,
|
429
|
+
"password": None,
|
430
|
+
"hostname": "localhost",
|
431
|
+
"port": 5432,
|
432
|
+
"database": "dbname",
|
433
|
+
"sanitized_url": "postgresql://localhost:5432/dbname",
|
434
|
+
},
|
435
|
+
),
|
436
|
+
# With query params
|
437
|
+
(
|
438
|
+
"postgresql://user:secret@localhost/dbname?query#fragment",
|
439
|
+
{
|
440
|
+
"scheme": "postgresql",
|
441
|
+
"username": "user",
|
442
|
+
"password": "secret",
|
443
|
+
"hostname": "localhost",
|
444
|
+
"port": None,
|
445
|
+
"database": "dbname",
|
446
|
+
"sanitized_url": "postgresql://user:***@localhost/dbname?query#fragment",
|
447
|
+
},
|
448
|
+
),
|
449
|
+
],
|
450
|
+
)
|
451
|
+
async def test_parsed_url(raw_url, expected):
|
452
|
+
parsed_result = ParseResult(urlparse(raw_url))
|
453
|
+
|
454
|
+
assert parsed_result.scheme == expected["scheme"]
|
455
|
+
assert parsed_result.username == expected["username"]
|
456
|
+
assert parsed_result.password == expected["password"]
|
457
|
+
assert parsed_result.hostname == expected["hostname"]
|
458
|
+
assert parsed_result.port == expected["port"]
|
459
|
+
assert parsed_result.database == expected["database"]
|
460
|
+
assert parsed_result.sanitized_url == expected["sanitized_url"]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/resources/transcript_mapping.tsv
RENAMED
File without changes
|
File without changes
|
File without changes
|
{cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool/sources/transcript_mappings.py
RENAMED
File without changes
|
File without changes
|
{cool_seq_tool-0.14.4 → cool_seq_tool-0.15.0}/src/cool_seq_tool.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|