cool-seq-tool 0.14.5__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/app.py +1 -0
- cool_seq_tool/mappers/exon_genomic_coords.py +27 -5
- cool_seq_tool/mappers/liftover.py +8 -7
- cool_seq_tool/resources/status.py +2 -2
- cool_seq_tool/sources/mane_transcript_mappings.py +17 -1
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.1.dist-info}/METADATA +3 -3
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.1.dist-info}/RECORD +10 -10
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.1.dist-info}/WHEEL +0 -0
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.1.dist-info}/licenses/LICENSE +0 -0
- {cool_seq_tool-0.14.5.dist-info → cool_seq_tool-0.15.1.dist-info}/top_level.txt +0 -0
cool_seq_tool/app.py
CHANGED
|
@@ -8,12 +8,15 @@ from pydantic import ConfigDict, Field, StrictInt, StrictStr, model_validator
|
|
|
8
8
|
|
|
9
9
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
|
10
10
|
from cool_seq_tool.mappers.liftover import LiftOver
|
|
11
|
+
from cool_seq_tool.mappers.mane_transcript import ManeTranscript
|
|
11
12
|
from cool_seq_tool.schemas import (
|
|
13
|
+
AnnotationLayer,
|
|
12
14
|
Assembly,
|
|
13
15
|
BaseModelForbidExtra,
|
|
14
16
|
CoordinateType,
|
|
15
17
|
ServiceMeta,
|
|
16
18
|
Strand,
|
|
19
|
+
TranscriptPriority,
|
|
17
20
|
)
|
|
18
21
|
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
|
|
19
22
|
from cool_seq_tool.sources.uta_database import GenomicAlnData, UtaDatabase
|
|
@@ -113,6 +116,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
|
113
116
|
)
|
|
114
117
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
|
115
118
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
|
119
|
+
tx_status: TranscriptPriority | None = Field(
|
|
120
|
+
None, description="Transcript priority for RefSeq transcript accession"
|
|
121
|
+
)
|
|
116
122
|
strand: Strand | None = Field(
|
|
117
123
|
None, description="The strand that the transcript accession exists on."
|
|
118
124
|
)
|
|
@@ -144,6 +150,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
|
|
|
144
150
|
"gene": "TPM3",
|
|
145
151
|
"genomic_ac": "NC_000001.11",
|
|
146
152
|
"tx_ac": "NM_152263.3",
|
|
153
|
+
"tx_status": "longest_compatible_remaining",
|
|
147
154
|
"strand": -1,
|
|
148
155
|
"seg": {
|
|
149
156
|
"exon_ord": 0,
|
|
@@ -172,6 +179,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
|
172
179
|
)
|
|
173
180
|
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
|
|
174
181
|
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
|
|
182
|
+
tx_status: TranscriptPriority | None = Field(
|
|
183
|
+
None, description="Transcript priority for RefSeq transcript accession"
|
|
184
|
+
)
|
|
175
185
|
strand: Strand | None = Field(
|
|
176
186
|
None, description="The strand that the transcript exists on."
|
|
177
187
|
)
|
|
@@ -211,6 +221,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
|
|
|
211
221
|
"gene": "TPM3",
|
|
212
222
|
"genomic_ac": "NC_000001.11",
|
|
213
223
|
"tx_ac": "NM_152263.3",
|
|
224
|
+
"tx_status": "longest_compatible_remaining",
|
|
214
225
|
"strand": -1,
|
|
215
226
|
"seg_start": {
|
|
216
227
|
"exon_ord": 0,
|
|
@@ -264,6 +275,7 @@ class ExonGenomicCoordsMapper:
|
|
|
264
275
|
self,
|
|
265
276
|
seqrepo_access: SeqRepoAccess,
|
|
266
277
|
uta_db: UtaDatabase,
|
|
278
|
+
mane_transcript: ManeTranscript,
|
|
267
279
|
mane_transcript_mappings: ManeTranscriptMappings,
|
|
268
280
|
liftover: LiftOver,
|
|
269
281
|
) -> None:
|
|
@@ -288,11 +300,13 @@ class ExonGenomicCoordsMapper:
|
|
|
288
300
|
|
|
289
301
|
:param seqrepo_access: SeqRepo instance to give access to query SeqRepo database
|
|
290
302
|
:param uta_db: UtaDatabase instance to give access to query UTA database
|
|
303
|
+
:param mane_transcript: ManeTranscript instance to give access to ManeTranscript class
|
|
291
304
|
:param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
|
|
292
305
|
:param liftover: Instance to provide mapping between human genome assemblies
|
|
293
306
|
"""
|
|
294
307
|
self.seqrepo_access = seqrepo_access
|
|
295
308
|
self.uta_db = uta_db
|
|
309
|
+
self.mane_transcript = mane_transcript
|
|
296
310
|
self.mane_transcript_mappings = mane_transcript_mappings
|
|
297
311
|
self.liftover = liftover
|
|
298
312
|
|
|
@@ -431,6 +445,7 @@ class ExonGenomicCoordsMapper:
|
|
|
431
445
|
gene=gene,
|
|
432
446
|
genomic_ac=genomic_ac,
|
|
433
447
|
tx_ac=transcript,
|
|
448
|
+
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
|
|
434
449
|
strand=strand,
|
|
435
450
|
seg_start=seg_start,
|
|
436
451
|
seg_end=seg_end,
|
|
@@ -522,6 +537,7 @@ class ExonGenomicCoordsMapper:
|
|
|
522
537
|
params["gene"] = start_tx_seg_data.gene
|
|
523
538
|
params["genomic_ac"] = start_tx_seg_data.genomic_ac
|
|
524
539
|
params["tx_ac"] = start_tx_seg_data.tx_ac
|
|
540
|
+
params["tx_status"] = start_tx_seg_data.tx_status
|
|
525
541
|
params["strand"] = start_tx_seg_data.strand
|
|
526
542
|
params["seg_start"] = start_tx_seg_data.seg
|
|
527
543
|
else:
|
|
@@ -557,6 +573,7 @@ class ExonGenomicCoordsMapper:
|
|
|
557
573
|
params["gene"] = end_tx_seg_data.gene
|
|
558
574
|
params["genomic_ac"] = end_tx_seg_data.genomic_ac
|
|
559
575
|
params["tx_ac"] = end_tx_seg_data.tx_ac
|
|
576
|
+
params["tx_status"] = end_tx_seg_data.tx_status
|
|
560
577
|
params["strand"] = end_tx_seg_data.strand
|
|
561
578
|
|
|
562
579
|
params["seg_end"] = end_tx_seg_data.seg
|
|
@@ -858,14 +875,18 @@ class ExonGenomicCoordsMapper:
|
|
|
858
875
|
if mane_transcripts:
|
|
859
876
|
transcript = mane_transcripts[0]["RefSeq_nuc"]
|
|
860
877
|
else:
|
|
861
|
-
# Attempt to find
|
|
878
|
+
# Attempt to find longest compatible transcript if a MANE transcript
|
|
862
879
|
# cannot be found
|
|
863
|
-
results = await self.
|
|
864
|
-
|
|
880
|
+
results = await self.mane_transcript.get_longest_compatible_transcript(
|
|
881
|
+
start_pos=genomic_pos,
|
|
882
|
+
end_pos=genomic_pos,
|
|
883
|
+
gene=gene,
|
|
884
|
+
alt_ac=genomic_ac,
|
|
885
|
+
start_annotation_layer=AnnotationLayer.GENOMIC,
|
|
865
886
|
)
|
|
866
887
|
|
|
867
|
-
if
|
|
868
|
-
transcript = results
|
|
888
|
+
if results:
|
|
889
|
+
transcript = results.refseq
|
|
869
890
|
else:
|
|
870
891
|
# Run if gene is for a noncoding transcript
|
|
871
892
|
query = f"""
|
|
@@ -962,6 +983,7 @@ class ExonGenomicCoordsMapper:
|
|
|
962
983
|
gene=gene,
|
|
963
984
|
genomic_ac=genomic_ac,
|
|
964
985
|
tx_ac=transcript,
|
|
986
|
+
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
|
|
965
987
|
strand=strand,
|
|
966
988
|
seg=TxSegment(
|
|
967
989
|
exon_ord=exon_num,
|
|
@@ -6,7 +6,8 @@ Currently only supports GRCh37 <-> GRCh38
|
|
|
6
6
|
import logging
|
|
7
7
|
from os import environ
|
|
8
8
|
|
|
9
|
-
from agct import
|
|
9
|
+
from agct import Assembly as AgctAssembly
|
|
10
|
+
from agct import Converter
|
|
10
11
|
|
|
11
12
|
from cool_seq_tool.schemas import Assembly
|
|
12
13
|
from cool_seq_tool.utils import process_chromosome_input
|
|
@@ -43,13 +44,13 @@ class LiftOver:
|
|
|
43
44
|
"""
|
|
44
45
|
self.from_37_to_38 = Converter(
|
|
45
46
|
chainfile=chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38,
|
|
46
|
-
|
|
47
|
-
|
|
47
|
+
from_assembly=AgctAssembly.HG19,
|
|
48
|
+
to_assembly=AgctAssembly.HG38,
|
|
48
49
|
)
|
|
49
50
|
self.from_38_to_37 = Converter(
|
|
50
51
|
chainfile=chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37,
|
|
51
|
-
|
|
52
|
-
|
|
52
|
+
from_assembly=AgctAssembly.HG38,
|
|
53
|
+
to_assembly=AgctAssembly.HG19,
|
|
53
54
|
)
|
|
54
55
|
|
|
55
56
|
def get_liftover(
|
|
@@ -77,9 +78,9 @@ class LiftOver:
|
|
|
77
78
|
"""
|
|
78
79
|
chromosome = process_chromosome_input(chromosome, "LiftOver.get_liftover()")
|
|
79
80
|
if liftover_to_assembly == Assembly.GRCH38:
|
|
80
|
-
liftover = self.from_37_to_38.convert_coordinate(chromosome, pos)
|
|
81
|
+
liftover = self.from_37_to_38.convert_coordinate(chromosome, pos, pos)
|
|
81
82
|
elif liftover_to_assembly == Assembly.GRCH37:
|
|
82
|
-
liftover = self.from_38_to_37.convert_coordinate(chromosome, pos)
|
|
83
|
+
liftover = self.from_38_to_37.convert_coordinate(chromosome, pos, pos)
|
|
83
84
|
else:
|
|
84
85
|
_logger.warning("%s assembly not supported", liftover_to_assembly)
|
|
85
86
|
liftover = None
|
|
@@ -114,9 +114,9 @@ async def check_status(
|
|
|
114
114
|
chain_file_38_to_37=chain_file_38_to_37,
|
|
115
115
|
)
|
|
116
116
|
except (FileNotFoundError, ChainfileError):
|
|
117
|
-
_logger.exception("agct converter setup failed")
|
|
117
|
+
_logger.exception("`agct` converter setup failed")
|
|
118
118
|
except Exception as e:
|
|
119
|
-
_logger.critical("Encountered unexpected error setting up agct
|
|
119
|
+
_logger.critical("Encountered unexpected error setting up `agct`: %s", e)
|
|
120
120
|
else:
|
|
121
121
|
status["liftover"] = True
|
|
122
122
|
|
|
@@ -8,7 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
import polars as pl
|
|
9
9
|
|
|
10
10
|
from cool_seq_tool.resources.data_files import DataFile, get_data_file
|
|
11
|
-
from cool_seq_tool.schemas import ManeGeneData
|
|
11
|
+
from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority
|
|
12
12
|
|
|
13
13
|
_logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -85,6 +85,22 @@ class ManeTranscriptMappings:
|
|
|
85
85
|
return []
|
|
86
86
|
return mane_rows.to_dicts()
|
|
87
87
|
|
|
88
|
+
def get_transcript_status(self, tx_ac: str) -> TranscriptPriority:
|
|
89
|
+
"""Get MANE status for a transcript
|
|
90
|
+
|
|
91
|
+
:param tx_ac: A RefSeq transcript accession
|
|
92
|
+
:return: A TranscriptPriority object
|
|
93
|
+
"""
|
|
94
|
+
mane_info = self.get_mane_from_transcripts([tx_ac])
|
|
95
|
+
if not mane_info:
|
|
96
|
+
return TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
|
|
97
|
+
mane_info = mane_info[0]["MANE_status"]
|
|
98
|
+
return (
|
|
99
|
+
TranscriptPriority.MANE_SELECT
|
|
100
|
+
if mane_info == "MANE Select"
|
|
101
|
+
else TranscriptPriority.MANE_PLUS_CLINICAL
|
|
102
|
+
)
|
|
103
|
+
|
|
88
104
|
def get_mane_data_from_chr_pos(
|
|
89
105
|
self, alt_ac: str, start: int, end: int
|
|
90
106
|
) -> list[dict]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cool_seq_tool
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.15.1
|
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
|
6
6
|
License: MIT License
|
|
@@ -46,11 +46,11 @@ Description-Content-Type: text/markdown
|
|
|
46
46
|
License-File: LICENSE
|
|
47
47
|
Requires-Dist: asyncpg
|
|
48
48
|
Requires-Dist: boto3
|
|
49
|
-
Requires-Dist: agct>=0.
|
|
49
|
+
Requires-Dist: agct>=0.2.0rc1
|
|
50
50
|
Requires-Dist: polars~=1.0
|
|
51
51
|
Requires-Dist: biocommons.seqrepo
|
|
52
52
|
Requires-Dist: pydantic<3.0,>=2.0
|
|
53
|
-
Requires-Dist: ga4gh.vrs<3.0,>=2.1.
|
|
53
|
+
Requires-Dist: ga4gh.vrs<3.0,>=2.1.4
|
|
54
54
|
Requires-Dist: wags-tails~=0.4.0
|
|
55
55
|
Requires-Dist: bioutils
|
|
56
56
|
Provides-Extra: dev
|
|
@@ -1,25 +1,25 @@
|
|
|
1
1
|
cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
|
|
2
|
-
cool_seq_tool/app.py,sha256=
|
|
2
|
+
cool_seq_tool/app.py,sha256=ThdumeXtLNrrGkQW9wwLT3Zm_Fc1wzg88ZxLIwMzcJU,4978
|
|
3
3
|
cool_seq_tool/schemas.py,sha256=6c87iuA6v7BX7a8nkWEqFbJTksFysuuIeuYxkNCrAsI,5356
|
|
4
4
|
cool_seq_tool/utils.py,sha256=jra2ZHS7HUqXqabSvyqd5imf6kkhYL8nQd20BWNLpb8,2950
|
|
5
5
|
cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
|
|
6
6
|
cool_seq_tool/handlers/seqrepo_access.py,sha256=lRzPc8V0eZJTlefbHuVKeZTEC8-KcyPzpqX7vx3amu8,9118
|
|
7
7
|
cool_seq_tool/mappers/__init__.py,sha256=tavpwkNogg_nF1J_kb6Q9jk7ezqdRz063v7BMZ4koLM,390
|
|
8
8
|
cool_seq_tool/mappers/alignment.py,sha256=kWgYssM8YL-Z13H9GdpL77P7simNcbxltAs9YDXHE54,9640
|
|
9
|
-
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=
|
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=I59vvswLbXk1fOWLNyNd8NTVT39f5yxTCv20utlTCpo,47361
|
|
10
10
|
cool_seq_tool/mappers/feature_overlap.py,sha256=X5UFClaH6ixRsO2fDLxqjywp-Z0bvNx4uzgBICy394U,9758
|
|
11
|
-
cool_seq_tool/mappers/liftover.py,sha256=
|
|
11
|
+
cool_seq_tool/mappers/liftover.py,sha256=vQ2BcSE43ZbY4eCJJqDL6dPPE0UclK7f7NoP2WtEHuA,3459
|
|
12
12
|
cool_seq_tool/mappers/mane_transcript.py,sha256=IluiLBxPQoY-CxkpqpjEBcMlHvrNLa34wdKdQxtKgDY,54613
|
|
13
13
|
cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
|
|
14
14
|
cool_seq_tool/resources/data_files.py,sha256=6d1M5WjeFHdTQpzxqjQ78auQRZvIBVqH8QNCrmRRDXw,4205
|
|
15
|
-
cool_seq_tool/resources/status.py,sha256=
|
|
15
|
+
cool_seq_tool/resources/status.py,sha256=W1iINKqHi_YWSbJi1c-vUIVCAnQCzNgO8A34bQcRmz0,6245
|
|
16
16
|
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
|
17
17
|
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
|
18
|
-
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=
|
|
18
|
+
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=PLJymduwvG1pt9mravE58BfJsGXdAYXcZYZdHPy12z0,7211
|
|
19
19
|
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
|
20
20
|
cool_seq_tool/sources/uta_database.py,sha256=38CQ0QHHh0kA87tdgsJHJiHdJHQc06ylBYfemGFUlZc,36759
|
|
21
|
-
cool_seq_tool-0.
|
|
22
|
-
cool_seq_tool-0.
|
|
23
|
-
cool_seq_tool-0.
|
|
24
|
-
cool_seq_tool-0.
|
|
25
|
-
cool_seq_tool-0.
|
|
21
|
+
cool_seq_tool-0.15.1.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
|
22
|
+
cool_seq_tool-0.15.1.dist-info/METADATA,sha256=QEHrmJL33DUq1R71GYN5mbBPOdMsCfJImYlvG3TC4U8,6533
|
|
23
|
+
cool_seq_tool-0.15.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
24
|
+
cool_seq_tool-0.15.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
|
25
|
+
cool_seq_tool-0.15.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|