cool-seq-tool 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,7 +106,7 @@ class AlignmentMapper:
106
106
  c_end_pos: int,
107
107
  cds_start: int | None = None,
108
108
  coordinate_type: CoordinateType = CoordinateType.RESIDUE,
109
- target_genome_assembly: bool = Assembly.GRCH38,
109
+ target_genome_assembly: Assembly = Assembly.GRCH38,
110
110
  ) -> tuple[dict | None, str | None]:
111
111
  """Translate cDNA representation to genomic representation
112
112
 
@@ -65,6 +65,27 @@ class TxSegment(BaseModelForbidExtra):
65
65
  genomic_location: SequenceLocation = Field(
66
66
  ..., description="The genomic position of a transcript segment."
67
67
  )
68
+ is_exonic: bool = Field(
69
+ default=True, description="If the position occurs on an exon"
70
+ )
71
+
72
+ @model_validator(mode="before")
73
+ def check_seg_pos(cls, values: dict) -> dict: # noqa: N805
74
+ """Ensure that only one of `start` or `end` is set in the
75
+ genomic_location field
76
+
77
+ :param values: The values in the TxSegment class
78
+ :raises ValueError: If `start` and `end` are both set in
79
+ `genomic_location`
80
+ :return: Values in model
81
+ """
82
+ loc = values.get("genomic_location")
83
+ start = getattr(loc, "start", None)
84
+ end = getattr(loc, "end", None)
85
+ if start and end:
86
+ err_msg = "Only one of `start` or `end` may be set as this describes the start or end of a transcript segment"
87
+ raise ValueError(err_msg)
88
+ return values
68
89
 
69
90
  model_config = ConfigDict(
70
91
  json_schema_extra={
@@ -79,6 +100,7 @@ class TxSegment(BaseModelForbidExtra):
79
100
  },
80
101
  "end": 154192135,
81
102
  },
103
+ "is_exonic": True,
82
104
  }
83
105
  }
84
106
  )
@@ -136,6 +158,7 @@ class GenomicTxSeg(BaseModelForbidExtra):
136
158
  },
137
159
  "end": 154192135,
138
160
  },
161
+ "is_exonic": True,
139
162
  },
140
163
  "errors": [],
141
164
  }
@@ -202,6 +225,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
202
225
  },
203
226
  "end": 154192135,
204
227
  },
228
+ "is_exonic": True,
205
229
  },
206
230
  "seg_end": {
207
231
  "exon_ord": 7,
@@ -214,6 +238,7 @@ class GenomicTxSegService(BaseModelForbidExtra):
214
238
  },
215
239
  "start": 154170399,
216
240
  },
241
+ "is_exonic": True,
217
242
  },
218
243
  }
219
244
  }
@@ -895,6 +920,7 @@ class ExonGenomicCoordsMapper:
895
920
  # Check if breakpoint occurs on an exon.
896
921
  # If not, determine the adjacent exon given the selected transcript
897
922
  if not self._is_exonic_breakpoint(genomic_pos, tx_exons):
923
+ is_exonic = False
898
924
  exon_num = self._get_adjacent_exon(
899
925
  tx_exons_genomic_coords=tx_exons,
900
926
  strand=strand,
@@ -902,6 +928,7 @@ class ExonGenomicCoordsMapper:
902
928
  end=genomic_pos if not is_seg_start else None,
903
929
  )
904
930
  else:
931
+ is_exonic = True
905
932
  exon_data = await self.uta_db.get_tx_exon_aln_v_data(
906
933
  transcript,
907
934
  genomic_pos,
@@ -934,6 +961,7 @@ class ExonGenomicCoordsMapper:
934
961
  exon_ord=exon_num,
935
962
  offset=offset,
936
963
  genomic_location=genomic_location,
964
+ is_exonic=is_exonic,
937
965
  ),
938
966
  )
939
967
 
@@ -212,14 +212,15 @@ class FeatureOverlap:
212
212
 
213
213
  ga4gh_seq_id = ga4gh_aliases[0]
214
214
 
215
- def _get_seq_loc(start_pos: int, stop_pos: int, refget_ac: str) -> dict:
216
- """Get VRS Sequence Location represented as a dict
215
+ def _get_seq_loc(
216
+ start_pos: int, stop_pos: int, refget_ac: str
217
+ ) -> SequenceLocation:
218
+ """Get VRS Sequence Location
217
219
 
218
220
  :param start_pos: Start position
219
221
  :param stop_pos: Stop position
220
222
  :param refget_ac: Refget Accession (SQ.)
221
- :return: VRS Sequence Location represented as dictionary with the ga4gh ID
222
- included
223
+ :return: VRS Sequence Location
223
224
  """
224
225
  _sl = SequenceLocation(
225
226
  sequenceReference=SequenceReference(
@@ -229,7 +230,7 @@ class FeatureOverlap:
229
230
  end=stop_pos,
230
231
  )
231
232
  ga4gh_identify(_sl)
232
- return _sl.model_dump(exclude_none=True)
233
+ return _sl
233
234
 
234
235
  resp = {}
235
236
  refget_ac = ga4gh_seq_id.split("ga4gh:")[-1]
@@ -55,7 +55,7 @@ class DataRepresentation(BaseModel):
55
55
  """Define object model for final output representation"""
56
56
 
57
57
  gene: str | None = None
58
- refseq: str
58
+ refseq: str | None = None
59
59
  ensembl: str | None = None
60
60
  pos: tuple[int, int]
61
61
  strand: Strand
@@ -447,7 +447,7 @@ class ManeTranscript:
447
447
 
448
448
  async def _g_to_c(
449
449
  self,
450
- g: dict,
450
+ g: GenomicTxMetadata,
451
451
  refseq_c_ac: str,
452
452
  status: TranscriptPriority,
453
453
  ensembl_c_ac: str | None = None,
@@ -590,16 +590,23 @@ class ManeTranscript:
590
590
  if mane_transcript:
591
591
  mane_start_pos = mane_transcript.pos[0]
592
592
  mane_end_pos = mane_transcript.pos[1]
593
- if anno == AnnotationLayer.CDNA:
593
+ if anno == AnnotationLayer.CDNA and isinstance(
594
+ mane_transcript, CdnaRepresentation
595
+ ):
594
596
  mane_cds = mane_transcript.coding_start_site
595
597
  mane_start_pos += mane_cds
596
598
  mane_end_pos += mane_cds
597
- mane_ref, _ = self.seqrepo_access.get_reference_sequence(
598
- mane_transcript.refseq,
599
- start=mane_start_pos,
600
- end=mane_end_pos if mane_start_pos != mane_end_pos else None,
601
- coordinate_type=coordinate_type,
602
- )
599
+
600
+ if mane_transcript.refseq:
601
+ mane_ref, _ = self.seqrepo_access.get_reference_sequence(
602
+ mane_transcript.refseq,
603
+ start=mane_start_pos,
604
+ end=mane_end_pos if mane_start_pos != mane_end_pos else None,
605
+ coordinate_type=coordinate_type,
606
+ )
607
+ else:
608
+ mane_ref = None
609
+
603
610
  if not mane_ref:
604
611
  _logger.info("Unable to validate reference for MANE Transcript")
605
612
 
@@ -1330,7 +1337,7 @@ class ManeTranscript:
1330
1337
  gene: str | None = None,
1331
1338
  coordinate_type: CoordinateType = CoordinateType.RESIDUE,
1332
1339
  try_longest_compatible: bool = False,
1333
- ) -> dict | None:
1340
+ ) -> ProteinAndCdnaRepresentation | None:
1334
1341
  """Given GRCh38 genomic representation, return protein representation.
1335
1342
 
1336
1343
  Will try MANE Select and then MANE Plus Clinical. If neither is found and
@@ -24,6 +24,7 @@ ResourceStatus = namedtuple(
24
24
  DataFile.TRANSCRIPT_MAPPINGS.lower(),
25
25
  DataFile.MANE_SUMMARY.lower(),
26
26
  DataFile.LRG_REFSEQGENE.lower(),
27
+ DataFile.MANE_REFSEQ_GENOMIC.lower(),
27
28
  "liftover",
28
29
  ),
29
30
  )
@@ -37,6 +38,7 @@ async def check_status(
37
38
  sr: SeqRepo | None = None,
38
39
  chain_file_37_to_38: str | None = None,
39
40
  chain_file_38_to_37: str | None = None,
41
+ mane_refseq_genomic_path: str | None = None,
40
42
  ) -> ResourceStatus:
41
43
  """Perform basic status checks on availability of required data resources.
42
44
 
@@ -62,6 +64,7 @@ async def check_status(
62
64
  is used for ``agct``. If this is not provided, will check to see if
63
65
  ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will allow
64
66
  ``agct`` to download a chain file from UCSC
67
+ :param mane_refseq_genomic_path: Optional path to MANE RefSeq Genomic GFF data
65
68
  :return: boolean description of availability of each resource, given current
66
69
  environment configurations
67
70
  """
@@ -69,19 +72,21 @@ async def check_status(
69
72
  DataFile.TRANSCRIPT_MAPPINGS.lower(): transcript_file_path,
70
73
  DataFile.LRG_REFSEQGENE.lower(): lrg_refseqgene_path,
71
74
  DataFile.MANE_SUMMARY.lower(): mane_data_path,
75
+ DataFile.MANE_REFSEQ_GENOMIC.lower(): mane_refseq_genomic_path,
72
76
  }
73
77
 
74
78
  status = {
75
79
  DataFile.TRANSCRIPT_MAPPINGS.lower(): False,
76
80
  DataFile.LRG_REFSEQGENE.lower(): False,
77
81
  DataFile.MANE_SUMMARY.lower(): False,
82
+ DataFile.MANE_REFSEQ_GENOMIC.lower(): False,
78
83
  "liftover": False,
79
84
  "uta": False,
80
85
  "seqrepo": False,
81
86
  }
82
87
  for r in list(DataFile):
83
88
  name_lower = r.lower()
84
- declared_path = file_path_params[name_lower]
89
+ declared_path = file_path_params.get(name_lower)
85
90
  if declared_path and declared_path.exists() and declared_path.is_file():
86
91
  status[name_lower] = True
87
92
  continue
cool_seq_tool/schemas.py CHANGED
@@ -14,7 +14,7 @@ from pydantic import (
14
14
 
15
15
  from cool_seq_tool import __version__
16
16
 
17
- _now = str(datetime.datetime.now(tz=datetime.timezone.utc))
17
+ _now = str(datetime.datetime.now(tz=datetime.UTC))
18
18
 
19
19
 
20
20
  class AnnotationLayer(str, Enum):
cool_seq_tool/utils.py CHANGED
@@ -47,7 +47,7 @@ def service_meta() -> ServiceMeta:
47
47
  """
48
48
  return ServiceMeta(
49
49
  version=__version__,
50
- response_datetime=datetime.datetime.now(tz=datetime.timezone.utc),
50
+ response_datetime=datetime.datetime.now(tz=datetime.UTC),
51
51
  )
52
52
 
53
53
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cool_seq_tool
3
- Version: 0.14.2
3
+ Version: 0.14.4
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -38,10 +38,10 @@ Classifier: Intended Audience :: Developers
38
38
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
39
39
  Classifier: License :: OSI Approved :: MIT License
40
40
  Classifier: Programming Language :: Python :: 3
41
- Classifier: Programming Language :: Python :: 3.10
42
41
  Classifier: Programming Language :: Python :: 3.11
43
42
  Classifier: Programming Language :: Python :: 3.12
44
- Requires-Python: >=3.10
43
+ Classifier: Programming Language :: Python :: 3.13
44
+ Requires-Python: >=3.11
45
45
  Description-Content-Type: text/markdown
46
46
  License-File: LICENSE
47
47
  Requires-Dist: asyncpg
@@ -1,25 +1,25 @@
1
1
  cool_seq_tool/__init__.py,sha256=pJyVj7Z275BBAwpeFMm-WEn_tp-y1_ihRl1sLc4FFZY,400
2
2
  cool_seq_tool/app.py,sha256=vyqlQRffC8sWZXMm-f_f-8WuTTWo3oRNfPUa_qdPV2M,4944
3
- cool_seq_tool/schemas.py,sha256=ueEq7cVKyGWVDvsV9JAzg7Cp2Nni3F9musCkWEtW26g,5365
4
- cool_seq_tool/utils.py,sha256=kesu7UnOplDzvNBg_G-_m1xMM22979nmsi4yWtweetU,2959
3
+ cool_seq_tool/schemas.py,sha256=6c87iuA6v7BX7a8nkWEqFbJTksFysuuIeuYxkNCrAsI,5356
4
+ cool_seq_tool/utils.py,sha256=jra2ZHS7HUqXqabSvyqd5imf6kkhYL8nQd20BWNLpb8,2950
5
5
  cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
6
6
  cool_seq_tool/handlers/seqrepo_access.py,sha256=lRzPc8V0eZJTlefbHuVKeZTEC8-KcyPzpqX7vx3amu8,9118
7
7
  cool_seq_tool/mappers/__init__.py,sha256=tavpwkNogg_nF1J_kb6Q9jk7ezqdRz063v7BMZ4koLM,390
8
- cool_seq_tool/mappers/alignment.py,sha256=nV6PS3mhkQ2MD1GcpNBujBOqd3AKxYSYA9BCusFOa1o,9636
9
- cool_seq_tool/mappers/exon_genomic_coords.py,sha256=t36NhWo2Rl84dgZY6qO7XFmGpfisjAqC-1ZOTRZxWvg,44757
10
- cool_seq_tool/mappers/feature_overlap.py,sha256=_MMEuZh48dHDPKliQV2C14bwziScE46uWl9utnAHOZo,9845
8
+ cool_seq_tool/mappers/alignment.py,sha256=kWgYssM8YL-Z13H9GdpL77P7simNcbxltAs9YDXHE54,9640
9
+ cool_seq_tool/mappers/exon_genomic_coords.py,sha256=fV4LyrpHPLRrx6AtV15g93q5XCH3i-y3Wj9tl-Cg8mM,45845
10
+ cool_seq_tool/mappers/feature_overlap.py,sha256=X5UFClaH6ixRsO2fDLxqjywp-Z0bvNx4uzgBICy394U,9758
11
11
  cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
12
- cool_seq_tool/mappers/mane_transcript.py,sha256=2cAYi0Y_gGdPI40weH9Ud1uVBCTKuwMT0M7KFUyjzU0,54365
12
+ cool_seq_tool/mappers/mane_transcript.py,sha256=IluiLBxPQoY-CxkpqpjEBcMlHvrNLa34wdKdQxtKgDY,54613
13
13
  cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
14
14
  cool_seq_tool/resources/data_files.py,sha256=6d1M5WjeFHdTQpzxqjQ78auQRZvIBVqH8QNCrmRRDXw,4205
15
- cool_seq_tool/resources/status.py,sha256=9LYSO2mOzVmoSQwllzq1mGChjtDA6j3I0S372N89clA,5683
15
+ cool_seq_tool/resources/status.py,sha256=5UKx5FIQuyIY7FU4kSinDIM4MhLpr9_MiQDDBNt9kRo,5990
16
16
  cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
17
17
  cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
18
18
  cool_seq_tool/sources/mane_transcript_mappings.py,sha256=C5puIA1xuEzBaSvs8VtSxVb2OIDGUg5no8v6Ma2QSdw,6597
19
19
  cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
20
20
  cool_seq_tool/sources/uta_database.py,sha256=zzRzmYuybqzEg7zeuQjhK46SPK5GfbiWWNRGNJju8AI,36197
21
- cool_seq_tool-0.14.2.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
22
- cool_seq_tool-0.14.2.dist-info/METADATA,sha256=qtjqeUzTDsOnTlil7dVW7Uzg50mQ_-Lu8sKwR95ExGs,6535
23
- cool_seq_tool-0.14.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- cool_seq_tool-0.14.2.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
25
- cool_seq_tool-0.14.2.dist-info/RECORD,,
21
+ cool_seq_tool-0.14.4.dist-info/licenses/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
22
+ cool_seq_tool-0.14.4.dist-info/METADATA,sha256=gPz48irXCwNUecTcgpKrdrHiBhi8R_Is55S2UYs9Qtk,6535
23
+ cool_seq_tool-0.14.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
+ cool_seq_tool-0.14.4.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
25
+ cool_seq_tool-0.14.4.dist-info/RECORD,,