cool-seq-tool 0.14.5__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cool_seq_tool/app.py CHANGED
@@ -107,6 +107,7 @@ class CoolSeqTool:
107
107
  self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
108
108
  self.seqrepo_access,
109
109
  self.uta_db,
110
+ self.mane_transcript,
110
111
  self.mane_transcript_mappings,
111
112
  self.liftover,
112
113
  )
@@ -8,12 +8,15 @@ from pydantic import ConfigDict, Field, StrictInt, StrictStr, model_validator
8
8
 
9
9
  from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
10
10
  from cool_seq_tool.mappers.liftover import LiftOver
11
+ from cool_seq_tool.mappers.mane_transcript import ManeTranscript
11
12
  from cool_seq_tool.schemas import (
13
+ AnnotationLayer,
12
14
  Assembly,
13
15
  BaseModelForbidExtra,
14
16
  CoordinateType,
15
17
  ServiceMeta,
16
18
  Strand,
19
+ TranscriptPriority,
17
20
  )
18
21
  from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
19
22
  from cool_seq_tool.sources.uta_database import GenomicAlnData, UtaDatabase
@@ -113,6 +116,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
113
116
  )
114
117
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
115
118
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
119
+ tx_status: TranscriptPriority | None = Field(
120
+ None, description="Transcript priority for RefSeq transcript accession"
121
+ )
116
122
  strand: Strand | None = Field(
117
123
  None, description="The strand that the transcript accession exists on."
118
124
  )
@@ -144,6 +150,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
144
150
  "gene": "TPM3",
145
151
  "genomic_ac": "NC_000001.11",
146
152
  "tx_ac": "NM_152263.3",
153
+ "tx_status": "longest_compatible_remaining",
147
154
  "strand": -1,
148
155
  "seg": {
149
156
  "exon_ord": 0,
@@ -172,6 +179,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
172
179
  )
173
180
  genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
174
181
  tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
182
+ tx_status: TranscriptPriority | None = Field(
183
+ None, description="Transcript priority for RefSeq transcript accession"
184
+ )
175
185
  strand: Strand | None = Field(
176
186
  None, description="The strand that the transcript exists on."
177
187
  )
@@ -211,6 +221,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
211
221
  "gene": "TPM3",
212
222
  "genomic_ac": "NC_000001.11",
213
223
  "tx_ac": "NM_152263.3",
224
+ "tx_status": "longest_compatible_remaining",
214
225
  "strand": -1,
215
226
  "seg_start": {
216
227
  "exon_ord": 0,
@@ -264,6 +275,7 @@ class ExonGenomicCoordsMapper:
264
275
  self,
265
276
  seqrepo_access: SeqRepoAccess,
266
277
  uta_db: UtaDatabase,
278
+ mane_transcript: ManeTranscript,
267
279
  mane_transcript_mappings: ManeTranscriptMappings,
268
280
  liftover: LiftOver,
269
281
  ) -> None:
@@ -288,11 +300,13 @@ class ExonGenomicCoordsMapper:
288
300
 
289
301
  :param seqrepo_access: SeqRepo instance to give access to query SeqRepo database
290
302
  :param uta_db: UtaDatabase instance to give access to query UTA database
303
+ :param mane_transcript: ManeTranscript instance to give access to ManeTranscript class
291
304
  :param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
292
305
  :param liftover: Instance to provide mapping between human genome assemblies
293
306
  """
294
307
  self.seqrepo_access = seqrepo_access
295
308
  self.uta_db = uta_db
309
+ self.mane_transcript = mane_transcript
296
310
  self.mane_transcript_mappings = mane_transcript_mappings
297
311
  self.liftover = liftover
298
312
 
@@ -431,6 +445,7 @@ class ExonGenomicCoordsMapper:
431
445
  gene=gene,
432
446
  genomic_ac=genomic_ac,
433
447
  tx_ac=transcript,
448
+ tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
434
449
  strand=strand,
435
450
  seg_start=seg_start,
436
451
  seg_end=seg_end,
@@ -522,6 +537,7 @@ class ExonGenomicCoordsMapper:
522
537
  params["gene"] = start_tx_seg_data.gene
523
538
  params["genomic_ac"] = start_tx_seg_data.genomic_ac
524
539
  params["tx_ac"] = start_tx_seg_data.tx_ac
540
+ params["tx_status"] = start_tx_seg_data.tx_status
525
541
  params["strand"] = start_tx_seg_data.strand
526
542
  params["seg_start"] = start_tx_seg_data.seg
527
543
  else:
@@ -557,6 +573,7 @@ class ExonGenomicCoordsMapper:
557
573
  params["gene"] = end_tx_seg_data.gene
558
574
  params["genomic_ac"] = end_tx_seg_data.genomic_ac
559
575
  params["tx_ac"] = end_tx_seg_data.tx_ac
576
+ params["tx_status"] = end_tx_seg_data.tx_status
560
577
  params["strand"] = end_tx_seg_data.strand
561
578
 
562
579
  params["seg_end"] = end_tx_seg_data.seg
@@ -858,14 +875,18 @@ class ExonGenomicCoordsMapper:
858
875
  if mane_transcripts:
859
876
  transcript = mane_transcripts[0]["RefSeq_nuc"]
860
877
  else:
861
- # Attempt to find a coding transcript if a MANE transcript
878
+ # Attempt to find longest compatible transcript if a MANE transcript
862
879
  # cannot be found
863
- results = await self.uta_db.get_transcripts(
864
- gene=gene, alt_ac=genomic_ac
880
+ results = await self.mane_transcript.get_longest_compatible_transcript(
881
+ start_pos=genomic_pos,
882
+ end_pos=genomic_pos,
883
+ gene=gene,
884
+ alt_ac=genomic_ac,
885
+ start_annotation_layer=AnnotationLayer.GENOMIC,
865
886
  )
866
887
 
867
- if not results.is_empty():
868
- transcript = results[0]["tx_ac"][0]
888
+ if results:
889
+ transcript = results.refseq
869
890
  else:
870
891
  # Run if gene is for a noncoding transcript
871
892
  query = f"""
@@ -962,6 +983,7 @@ class ExonGenomicCoordsMapper:
962
983
  gene=gene,
963
984
  genomic_ac=genomic_ac,
964
985
  tx_ac=transcript,
986
+ tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
965
987
  strand=strand,
966
988
  seg=TxSegment(
967
989
  exon_ord=exon_num,
@@ -8,7 +8,7 @@ from pathlib import Path
8
8
  import polars as pl
9
9
 
10
10
  from cool_seq_tool.resources.data_files import DataFile, get_data_file
11
- from cool_seq_tool.schemas import ManeGeneData
11
+ from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority
12
12
 
13
13
  _logger = logging.getLogger(__name__)
14
14
 
@@ -85,6 +85,22 @@ class ManeTranscriptMappings:
85
85
  return []
86
86
  return mane_rows.to_dicts()
87
87
 
88
+ def get_transcript_status(self, tx_ac: str) -> TranscriptPriority:
89
+ """Get MANE status for a transcript
90
+
91
+ :param tx_ac: A RefSeq transcript accession
92
+ :return: A TranscriptPriority object
93
+ """
94
+ mane_info = self.get_mane_from_transcripts([tx_ac])
95
+ if not mane_info:
96
+ return TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
97
+ mane_info = mane_info[0]["MANE_status"]
98
+ return (
99
+ TranscriptPriority.MANE_SELECT
100
+ if mane_info == "MANE Select"
101
+ else TranscriptPriority.MANE_PLUS_CLINICAL
102
+ )
103
+
88
104
  def get_mane_data_from_chr_pos(
89
105
  self, alt_ac: str, start: int, end: int
90
106
  ) -> list[dict]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cool_seq_tool
3
- Version: 0.14.5
3
+ Version: 0.15.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -50,7 +50,7 @@ Requires-Dist: agct>=0.1.0-dev1
50
50
  Requires-Dist: polars~=1.0
51
51
  Requires-Dist: biocommons.seqrepo
52
52
  Requires-Dist: pydantic<3.0,>=2.0
53
- Requires-Dist: ga4gh.vrs<3.0,>=2.1.3
53
+ Requires-Dist: ga4gh.vrs<3.0,>=2.1.4
54
54
  Requires-Dist: wags-tails~=0.4.0
55
55
  Requires-Dist: bioutils
56
56
  Provides-Extra: dev
@@ -1,12 +1,12 @@
1
1
  cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
2
- cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
2
+ cool_seq_tool/app.py,sha256=ThdumeXtLNrrGkQW9wwLT3Zm_Fc1wzg88ZxLIwMzcJU,4978
3
3
  cool_seq_tool/schemas.py,sha256=6c87iuA6v7BX7a8nkWEqFbJTksFysuuIeuYxkNCrAsI,5356
4
4
  cool_seq_tool/utils.py,sha256=jra2ZHS7HUqXqabSvyqd5imf6kkhYL8nQd20BWNLpb8,2950
5
5
  cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
6
6
  cool_seq_tool/handlers/seqrepo_access.py,sha256=lRzPc8V0eZJTlefbHuVKeZTEC8-KcyPzpqX7vx3amu8,9118
7
7
  cool_seq_tool/mappers/__init__.py,sha256=tavpwkNogg_nF1J_kb6Q9jk7ezqdRz063v7BMZ4koLM,390
8
8
  cool_seq_tool/mappers/alignment.py,sha256=kWgYssM8YL-Z13H9GdpL77P7simNcbxltAs9YDXHE54,9640
9
- cool_seq_tool/mappers/exon_genomic_coords.py,sha256=N6Wi7D8hs6gZi-BC4ICuWQEGeqUbBysqxG18EtIIgSk,46187
9
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=I59vvswLbXk1fOWLNyNd8NTVT39f5yxTCv20utlTCpo,47361
10
10
  cool_seq_tool/mappers/feature_overlap.py,sha256=X5UFClaH6ixRsO2fDLxqjywp-Z0bvNx4uzgBICy394U,9758
11
11
  cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
12
12
  cool_seq_tool/mappers/mane_transcript.py,sha256=IluiLBxPQoY-CxkpqpjEBcMlHvrNLa34wdKdQxtKgDY,54613
@@ -15,11 +15,11 @@ cool_seq_tool/resources/data_files.py,sha256=6d1M5WjeFHdTQpzxqjQ78auQRZvIBVqH8QN
15
15
  cool_seq_tool/resources/status.py,sha256=iP-4NiSmqV-D--gypZyrSqVbOWQvyBZICKQb-VinTik,6241
16
16
  cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
17
17
  cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
18
- cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
18
+ cool_seq_tool/sources/mane_transcript_mappings.py,sha256=PLJymduwvG1pt9mravE58BfJsGXdAYXcZYZdHPy12z0,7211
19
19
  cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
20
20
  cool_seq_tool/sources/uta_database.py,sha256=38CQ0QHHh0kA87tdgsJHJiHdJHQc06ylBYfemGFUlZc,36759
21
- cool_seq_tool-0.14.5.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
22
- cool_seq_tool-0.14.5.dist-info/METADATA,sha256=tgWwlBS_2Z71zUJpmO7MjDE3cNFy6GYH7W8YyJrTo0I,6535
23
- cool_seq_tool-0.14.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- cool_seq_tool-0.14.5.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
25
- cool_seq_tool-0.14.5.dist-info/RECORD,,
21
+ cool_seq_tool-0.15.0.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
22
+ cool_seq_tool-0.15.0.dist-info/METADATA,sha256=MEcG0vc2k8F8lQljcFgkIrabFHJec_pE7Ib6DzLZR2M,6535
23
+ cool_seq_tool-0.15.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
+ cool_seq_tool-0.15.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
25
+ cool_seq_tool-0.15.0.dist-info/RECORD,,