cool-seq-tool 0.14.5__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/app.py +1 -0
- cool_seq_tool/mappers/exon_genomic_coords.py +27 -5
- cool_seq_tool/sources/mane_transcript_mappings.py +17 -1
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.0.dist-info}/METADATA +2 -2
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.0.dist-info}/RECORD +8 -8
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.0.dist-info}/WHEEL +0 -0
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.0.dist-info}/licenses/LICENSE +0 -0
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.0.dist-info}/top_level.txt +0 -0
cool_seq_tool/app.py
CHANGED
@@ -8,12 +8,15 @@ from pydantic import ConfigDict, Field, StrictInt, StrictStr, model_validator
|
|
8
8
|
|
9
9
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
10
10
|
from cool_seq_tool.mappers.liftover import LiftOver
|
11
|
+
from cool_seq_tool.mappers.mane_transcript import ManeTranscript
|
11
12
|
from cool_seq_tool.schemas import (
|
13
|
+
AnnotationLayer,
|
12
14
|
Assembly,
|
13
15
|
BaseModelForbidExtra,
|
14
16
|
CoordinateType,
|
15
17
|
ServiceMeta,
|
16
18
|
Strand,
|
19
|
+
TranscriptPriority,
|
17
20
|
)
|
18
21
|
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
|
19
22
|
from cool_seq_tool.sources.uta_database import GenomicAlnData, UtaDatabase
|
@@ -113,6 +116,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
113
116
|
)
|
114
117
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
115
118
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
119
|
+
tx_status: TranscriptPriority | None = Field(
|
120
|
+
None, description="Transcript priority for RefSeq transcript accession"
|
121
|
+
)
|
116
122
|
strand: Strand | None = Field(
|
117
123
|
None, description="The strand that the transcript accession exists on."
|
118
124
|
)
|
@@ -144,6 +150,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
144
150
|
"gene": "TPM3",
|
145
151
|
"genomic_ac": "NC_000001.11",
|
146
152
|
"tx_ac": "NM_152263.3",
|
153
|
+
"tx_status": "longest_compatible_remaining",
|
147
154
|
"strand": -1,
|
148
155
|
"seg": {
|
149
156
|
"exon_ord": 0,
|
@@ -172,6 +179,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
172
179
|
)
|
173
180
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
174
181
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
182
|
+
tx_status: TranscriptPriority | None = Field(
|
183
|
+
None, description="Transcript priority for RefSeq transcript accession"
|
184
|
+
)
|
175
185
|
strand: Strand | None = Field(
|
176
186
|
None, description="The strand that the transcript exists on."
|
177
187
|
)
|
@@ -211,6 +221,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
211
221
|
"gene": "TPM3",
|
212
222
|
"genomic_ac": "NC_000001.11",
|
213
223
|
"tx_ac": "NM_152263.3",
|
224
|
+
"tx_status": "longest_compatible_remaining",
|
214
225
|
"strand": -1,
|
215
226
|
"seg_start": {
|
216
227
|
"exon_ord": 0,
|
@@ -264,6 +275,7 @@ class ExonGenomicCoordsMapper:
|
|
264
275
|
self,
|
265
276
|
seqrepo_access: SeqRepoAccess,
|
266
277
|
uta_db: UtaDatabase,
|
278
|
+
mane_transcript: ManeTranscript,
|
267
279
|
mane_transcript_mappings: ManeTranscriptMappings,
|
268
280
|
liftover: LiftOver,
|
269
281
|
) -> None:
|
@@ -288,11 +300,13 @@ class ExonGenomicCoordsMapper:
|
|
288
300
|
|
289
301
|
:param seqrepo_access: SeqRepo instance to give access to query SeqRepo database
|
290
302
|
:param uta_db: UtaDatabase instance to give access to query UTA database
|
303
|
+
:param mane_transcript: ManeTranscript instance to give access to ManeTranscript class
|
291
304
|
:param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
|
292
305
|
:param liftover: Instance to provide mapping between human genome assemblies
|
293
306
|
"""
|
294
307
|
self.seqrepo_access = seqrepo_access
|
295
308
|
self.uta_db = uta_db
|
309
|
+
self.mane_transcript = mane_transcript
|
296
310
|
self.mane_transcript_mappings = mane_transcript_mappings
|
297
311
|
self.liftover = liftover
|
298
312
|
|
@@ -431,6 +445,7 @@ class ExonGenomicCoordsMapper:
|
|
431
445
|
gene=gene,
|
432
446
|
genomic_ac=genomic_ac,
|
433
447
|
tx_ac=transcript,
|
448
|
+
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
|
434
449
|
strand=strand,
|
435
450
|
seg_start=seg_start,
|
436
451
|
seg_end=seg_end,
|
@@ -522,6 +537,7 @@ class ExonGenomicCoordsMapper:
|
|
522
537
|
params["gene"] = start_tx_seg_data.gene
|
523
538
|
params["genomic_ac"] = start_tx_seg_data.genomic_ac
|
524
539
|
params["tx_ac"] = start_tx_seg_data.tx_ac
|
540
|
+
params["tx_status"] = start_tx_seg_data.tx_status
|
525
541
|
params["strand"] = start_tx_seg_data.strand
|
526
542
|
params["seg_start"] = start_tx_seg_data.seg
|
527
543
|
else:
|
@@ -557,6 +573,7 @@ class ExonGenomicCoordsMapper:
|
|
557
573
|
params["gene"] = end_tx_seg_data.gene
|
558
574
|
params["genomic_ac"] = end_tx_seg_data.genomic_ac
|
559
575
|
params["tx_ac"] = end_tx_seg_data.tx_ac
|
576
|
+
params["tx_status"] = end_tx_seg_data.tx_status
|
560
577
|
params["strand"] = end_tx_seg_data.strand
|
561
578
|
|
562
579
|
params["seg_end"] = end_tx_seg_data.seg
|
@@ -858,14 +875,18 @@ class ExonGenomicCoordsMapper:
|
|
858
875
|
if mane_transcripts:
|
859
876
|
transcript = mane_transcripts[0]["RefSeq_nuc"]
|
860
877
|
else:
|
861
|
-
# Attempt to find
|
878
|
+
# Attempt to find longest compatible transcript if a MANE transcript
|
862
879
|
# cannot be found
|
863
|
-
results = await self.
|
864
|
-
|
880
|
+
results = await self.mane_transcript.get_longest_compatible_transcript(
|
881
|
+
start_pos=genomic_pos,
|
882
|
+
end_pos=genomic_pos,
|
883
|
+
gene=gene,
|
884
|
+
alt_ac=genomic_ac,
|
885
|
+
start_annotation_layer=AnnotationLayer.GENOMIC,
|
865
886
|
)
|
866
887
|
|
867
|
-
if
|
868
|
-
transcript = results
|
888
|
+
if results:
|
889
|
+
transcript = results.refseq
|
869
890
|
else:
|
870
891
|
# Run if gene is for a noncoding transcript
|
871
892
|
query = f"""
|
@@ -962,6 +983,7 @@ class ExonGenomicCoordsMapper:
|
|
962
983
|
gene=gene,
|
963
984
|
genomic_ac=genomic_ac,
|
964
985
|
tx_ac=transcript,
|
986
|
+
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
|
965
987
|
strand=strand,
|
966
988
|
seg=TxSegment(
|
967
989
|
exon_ord=exon_num,
|
@@ -8,7 +8,7 @@ from pathlib import Path
|
|
8
8
|
import polars as pl
|
9
9
|
|
10
10
|
from cool_seq_tool.resources.data_files import DataFile, get_data_file
|
11
|
-
from cool_seq_tool.schemas import ManeGeneData
|
11
|
+
from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority
|
12
12
|
|
13
13
|
_logger = logging.getLogger(__name__)
|
14
14
|
|
@@ -85,6 +85,22 @@ class ManeTranscriptMappings:
|
|
85
85
|
return []
|
86
86
|
return mane_rows.to_dicts()
|
87
87
|
|
88
|
+
def get_transcript_status(self, tx_ac: str) -> TranscriptPriority:
|
89
|
+
"""Get MANE status for a transcript
|
90
|
+
|
91
|
+
:param tx_ac: A RefSeq transcript accession
|
92
|
+
:return: A TranscriptPriority object
|
93
|
+
"""
|
94
|
+
mane_info = self.get_mane_from_transcripts([tx_ac])
|
95
|
+
if not mane_info:
|
96
|
+
return TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
|
97
|
+
mane_info = mane_info[0]["MANE_status"]
|
98
|
+
return (
|
99
|
+
TranscriptPriority.MANE_SELECT
|
100
|
+
if mane_info == "MANE Select"
|
101
|
+
else TranscriptPriority.MANE_PLUS_CLINICAL
|
102
|
+
)
|
103
|
+
|
88
104
|
def get_mane_data_from_chr_pos(
|
89
105
|
self, alt_ac: str, start: int, end: int
|
90
106
|
) -> list[dict]:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.15.0
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
@@ -50,7 +50,7 @@ Requires-Dist: agct>=0.1.0-dev1
|
|
50
50
|
Requires-Dist: polars~=1.0
|
51
51
|
Requires-Dist: biocommons.seqrepo
|
52
52
|
Requires-Dist: pydantic<3.0,>=2.0
|
53
|
-
Requires-Dist: ga4gh.vrs<3.0,>=2.1.
|
53
|
+
Requires-Dist: ga4gh.vrs<3.0,>=2.1.4
|
54
54
|
Requires-Dist: wags-tails~=0.4.0
|
55
55
|
Requires-Dist: bioutils
|
56
56
|
Provides-Extra: dev
|
@@ -1,12 +1,12 @@
|
|
1
1
|
cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
|
2
|
-
cool_seq_tool/app.py,sha256=
|
2
|
+
cool_seq_tool/app.py,sha256=ThdumeXtLNrrGkQW9wwLT3Zm_Fc1wzg88ZxLIwMzcJU,4978
|
3
3
|
cool_seq_tool/schemas.py,sha256=6c87iuA6v7BX7a8nkWEqFbJTksFysuuIeuYxkNCrAsI,5356
|
4
4
|
cool_seq_tool/utils.py,sha256=jra2ZHS7HUqXqabSvyqd5imf6kkhYL8nQd20BWNLpb8,2950
|
5
5
|
cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
|
6
6
|
cool_seq_tool/handlers/seqrepo_access.py,sha256=lRzPc8V0eZJTlefbHuVKeZTEC8-KcyPzpqX7vx3amu8,9118
|
7
7
|
cool_seq_tool/mappers/__init__.py,sha256=tavpwkNogg_nF1J_kb6Q9jk7ezqdRz063v7BMZ4koLM,390
|
8
8
|
cool_seq_tool/mappers/alignment.py,sha256=kWgYssM8YL-Z13H9GdpL77P7simNcbxltAs9YDXHE54,9640
|
9
|
-
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=I59vvswLbXk1fOWLNyNd8NTVT39f5yxTCv20utlTCpo,47361
|
10
10
|
cool_seq_tool/mappers/feature_overlap.py,sha256=X5UFClaH6ixRsO2fDLxqjywp-Z0bvNx4uzgBICy394U,9758
|
11
11
|
cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
|
12
12
|
cool_seq_tool/mappers/mane_transcript.py,sha256=IluiLBxPQoY-CxkpqpjEBcMlHvrNLa34wdKdQxtKgDY,54613
|
@@ -15,11 +15,11 @@ cool_seq_tool/resources/data_files.py,sha256=6d1M5WjeFHdTQpzxqjQ78auQRZvIBVqH8QN
|
|
15
15
|
cool_seq_tool/resources/status.py,sha256=iP-4NiSmqV-D--gypZyrSqVbOWQvyBZICKQb-VinTik,6241
|
16
16
|
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
17
17
|
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
18
|
-
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=
|
18
|
+
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=PLJymduwvG1pt9mravE58BfJsGXdAYXcZYZdHPy12z0,7211
|
19
19
|
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
20
20
|
cool_seq_tool/sources/uta_database.py,sha256=38CQ0QHHh0kA87tdgsJHJiHdJHQc06ylBYfemGFUlZc,36759
|
21
|
-
cool_seq_tool-0.
|
22
|
-
cool_seq_tool-0.
|
23
|
-
cool_seq_tool-0.
|
24
|
-
cool_seq_tool-0.
|
25
|
-
cool_seq_tool-0.
|
21
|
+
cool_seq_tool-0.15.0.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
22
|
+
cool_seq_tool-0.15.0.dist-info/METADATA,sha256=MEcG0vc2k8F8lQljcFgkIrabFHJec_pE7Ib6DzLZR2M,6535
|
23
|
+
cool_seq_tool-0.15.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
24
|
+
cool_seq_tool-0.15.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
25
|
+
cool_seq_tool-0.15.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|