cool-seq-tool 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,7 @@ from cool_seq_tool.mappers.liftover import LiftOver
25
25
  from cool_seq_tool.schemas import (
26
26
  AnnotationLayer,
27
27
  Assembly,
28
+ ManeGeneData,
28
29
  ResidueMode,
29
30
  Strand,
30
31
  TranscriptPriority,
@@ -71,10 +72,10 @@ class CdnaRepresentation(DataRepresentation):
71
72
  class GenomicRepresentation(BaseModel):
72
73
  """Define object model for genomic representation"""
73
74
 
74
- refseq: str
75
75
  pos: tuple[int, int]
76
- status: TranscriptPriority
77
- alt_ac: str
76
+ mane_genes: list[ManeGeneData] = []
77
+ status: Literal["grch38"] = TranscriptPriority.GRCH38.value
78
+ ac: str
78
79
 
79
80
 
80
81
  class ProteinAndCdnaRepresentation(BaseModel):
@@ -108,7 +109,7 @@ class ManeTranscript:
108
109
 
109
110
  >>> import asyncio
110
111
  >>> result = asyncio.run(mane_mapper.g_to_grch38("NC_000001.11", 100, 200))
111
- >>> result["ac"]
112
+ >>> result.ac
112
113
  'NC_000001.11'
113
114
 
114
115
  See the :ref:`Usage section <async_note>` for more information.
@@ -128,7 +129,7 @@ class ManeTranscript:
128
129
  self.liftover = liftover
129
130
 
130
131
  @staticmethod
131
- def _get_reading_frame(pos: int) -> int:
132
+ def get_reading_frame(pos: int) -> int:
132
133
  """Return reading frame number. Only used on c. coordinate.
133
134
 
134
135
  :param pos: cDNA position
@@ -531,8 +532,8 @@ class ManeTranscript:
531
532
  """
532
533
  for pos, pos_index in [(start_pos, 0), (end_pos, 1)]:
533
534
  if pos is not None:
534
- og_rf = self._get_reading_frame(pos)
535
- new_rf = self._get_reading_frame(transcript_data.pos[pos_index])
535
+ og_rf = self.get_reading_frame(pos)
536
+ new_rf = self.get_reading_frame(transcript_data.pos[pos_index])
536
537
 
537
538
  if og_rf != new_rf:
538
539
  _logger.warning(
@@ -618,7 +619,7 @@ class ManeTranscript:
618
619
 
619
620
  return True
620
621
 
621
- def _validate_index(
622
+ def validate_index(
622
623
  self, ac: str, pos: tuple[int, int], coding_start_site: int
623
624
  ) -> bool:
624
625
  """Validate that positions actually exist on accession
@@ -910,7 +911,7 @@ class ManeTranscript:
910
911
  ac = lcr_result.refseq or lcr_result.ensembl
911
912
  pos = lcr_result.pos
912
913
 
913
- if not self._validate_index(ac, pos, coding_start_site):
914
+ if not self.validate_index(ac, pos, coding_start_site):
914
915
  _logger.warning(
915
916
  "%s are not valid positions on %s with coding start site %s",
916
917
  pos,
@@ -936,7 +937,7 @@ class ManeTranscript:
936
937
  cds = lcr_result_dict[k].get("coding_start_site", 0)
937
938
  ac = lcr_result_dict[k]["refseq"] or lcr_result_dict[k]["ensembl"]
938
939
  pos = lcr_result_dict[k]["pos"]
939
- if not self._validate_index(ac, pos, cds):
940
+ if not self.validate_index(ac, pos, cds):
940
941
  valid = False
941
942
  _logger.warning(
942
943
  "%s are not valid positions on %s with coding start site %s",
@@ -962,7 +963,16 @@ class ManeTranscript:
962
963
  residue_mode: Literal[ResidueMode.RESIDUE]
963
964
  | Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.RESIDUE,
964
965
  ) -> DataRepresentation | CdnaRepresentation | None:
965
- """Return MANE transcript.
966
+ """Return MANE representation
967
+
968
+ If ``start_annotation_layer`` is ``AnnotationLayer.PROTEIN``, will return
969
+ ``AnnotationLayer.PROTEIN`` representation.
970
+ If ``start_annotation_layer`` is ``AnnotationLayer.CDNA``, will return
971
+ ``AnnotationLayer.CDNA`` representation.
972
+ If ``start_annotation_layer`` is ``AnnotationLayer.GENOMIC`` will return
973
+ ``AnnotationLayer.CDNA`` representation if ``gene`` is provided and
974
+ ``AnnotationLayer.GENOMIC`` GRCh38 representation if ``gene`` is NOT
975
+ provided.
966
976
 
967
977
  >>> from cool_seq_tool.app import CoolSeqTool
968
978
  >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
@@ -983,7 +993,11 @@ class ManeTranscript:
983
993
  :param start_pos: Start position change
984
994
  :param end_pos: End position change
985
995
  :param start_annotation_layer: Starting annotation layer.
986
- :param gene: HGNC gene symbol
996
+ :param gene: HGNC gene symbol.
997
+ If ``gene`` is not provided and ``start_annotation_layer`` is
998
+ ``AnnotationLayer.GENOMIC``, will return GRCh38 representation.
999
+ If ``gene`` is provided and ``start_annotation_layer`` is
1000
+ ``AnnotationLayer.GENOMIC``, will return cDNA representation.
987
1001
  :param ref: Reference at position given during input
988
1002
  :param try_longest_compatible: ``True`` if should try longest compatible remaining
989
1003
  if mane transcript was not compatible. ``False`` otherwise.
@@ -1093,29 +1107,56 @@ class ManeTranscript:
1093
1107
  )
1094
1108
  return None
1095
1109
  if start_annotation_layer == AnnotationLayer.GENOMIC:
1110
+ if not gene:
1111
+ return await self.g_to_grch38(
1112
+ ac,
1113
+ start_pos,
1114
+ end_pos,
1115
+ get_mane_genes=True,
1116
+ residue_mode=residue_mode,
1117
+ )
1118
+
1096
1119
  return await self.g_to_mane_c(
1097
- ac, start_pos, end_pos, gene=gene, residue_mode=residue_mode
1120
+ ac, start_pos, end_pos, gene, residue_mode=residue_mode
1098
1121
  )
1099
1122
  _logger.warning("Annotation layer not supported: %s", start_annotation_layer)
1100
1123
  return None
1101
1124
 
1102
- async def g_to_grch38(self, ac: str, start_pos: int, end_pos: int) -> dict | None:
1125
+ async def g_to_grch38(
1126
+ self,
1127
+ ac: str,
1128
+ start_pos: int,
1129
+ end_pos: int,
1130
+ get_mane_genes: bool = False,
1131
+ residue_mode: ResidueMode = ResidueMode.RESIDUE,
1132
+ ) -> GenomicRepresentation | None:
1103
1133
  """Return genomic coordinate on GRCh38 when not given gene context.
1104
1134
 
1105
1135
  :param ac: Genomic accession
1106
1136
  :param start_pos: Genomic start position
1107
1137
  :param end_pos: Genomic end position
1108
- :return: NC accession, start and end pos on GRCh38 assembly
1138
+ :param get_mane_genes: ``True`` if mane genes for genomic position should be
1139
+ included in response. ``False``, otherwise.
1140
+ :param residue_mode: Residue mode for ``start_pos`` and ``end_pos``
1141
+ :return: GRCh38 genomic representation (accession and start/end inter-residue
1142
+ position)
1109
1143
  """
1110
- if end_pos is None:
1111
- end_pos = start_pos
1144
+ start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, residue_mode)
1112
1145
 
1113
1146
  # Checking to see what chromosome and assembly we're on
1114
1147
  descr = await self.uta_db.get_chr_assembly(ac)
1115
1148
  if not descr:
1116
1149
  # Already GRCh38 assembly
1117
- if self._validate_index(ac, (start_pos, end_pos), 0):
1118
- return {"ac": ac, "pos": (start_pos, end_pos)}
1150
+ if self.validate_index(ac, (start_pos, end_pos), 0):
1151
+ return GenomicRepresentation(
1152
+ ac=ac,
1153
+ pos=(start_pos, end_pos),
1154
+ mane_genes=self.mane_transcript_mappings.get_genomic_mane_genes(
1155
+ ac, start_pos + 1, end_pos
1156
+ )
1157
+ if get_mane_genes
1158
+ else [],
1159
+ )
1119
1160
  return None
1120
1161
  chromosome, assembly = descr
1121
1162
  is_same_pos = start_pos == end_pos
@@ -1145,8 +1186,16 @@ class ManeTranscript:
1145
1186
  newest_ac = await self.uta_db.get_newest_assembly_ac(ac)
1146
1187
  if newest_ac:
1147
1188
  ac = newest_ac[0]
1148
- if self._validate_index(ac, (start_pos, end_pos), 0):
1149
- return {"ac": ac, "pos": (start_pos, end_pos)}
1189
+ if self.validate_index(ac, (start_pos, end_pos), 0):
1190
+ return GenomicRepresentation(
1191
+ ac=ac,
1192
+ pos=(start_pos, end_pos),
1193
+ mane_genes=self.mane_transcript_mappings.get_genomic_mane_genes(
1194
+ ac, start_pos + 1, end_pos
1195
+ )
1196
+ if get_mane_genes
1197
+ else [],
1198
+ )
1150
1199
  return None
1151
1200
 
1152
1201
  @staticmethod
@@ -1176,14 +1225,11 @@ class ManeTranscript:
1176
1225
  ac: str,
1177
1226
  start_pos: int,
1178
1227
  end_pos: int,
1179
- gene: str | None = None,
1228
+ gene: str,
1180
1229
  residue_mode: ResidueMode = ResidueMode.RESIDUE,
1181
- ) -> GenomicRepresentation | CdnaRepresentation | None:
1230
+ ) -> CdnaRepresentation | None:
1182
1231
  """Return MANE Transcript on the c. coordinate.
1183
1232
 
1184
- If an arg for ``gene`` is provided, lifts to GRCh38, then gets MANE cDNA
1185
- representation.
1186
-
1187
1233
  >>> import asyncio
1188
1234
  >>> from cool_seq_tool.app import CoolSeqTool
1189
1235
  >>> cst = CoolSeqTool()
@@ -1198,34 +1244,17 @@ class ManeTranscript:
1198
1244
  <TranscriptPriority.MANE_SELECT: 'mane_select'>
1199
1245
  >>> del cst
1200
1246
 
1201
- Locating a MANE transcript requires a ``gene`` symbol argument -- if none is
1202
- given, this method will only lift over to genomic coordinates on GRCh38.
1203
-
1204
1247
  :param ac: Transcript accession on g. coordinate
1205
1248
  :param start_pos: genomic start position
1206
1249
  :param end_pos: genomic end position
1207
1250
  :param gene: HGNC gene symbol
1208
1251
  :param residue_mode: Starting residue mode for ``start_pos`` and ``end_pos``.
1209
1252
  Will always return coordinates in inter-residue.
1210
- :return: MANE Transcripts with cDNA change on c. coordinate if gene
1211
- is provided. Else, GRCh38 data
1253
+ :return: MANE Transcripts with cDNA change on c. coordinate
1212
1254
  """
1213
1255
  start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, residue_mode)
1214
1256
  residue_mode = ResidueMode.INTER_RESIDUE
1215
1257
 
1216
- # If gene not provided, return GRCh38
1217
- if not gene:
1218
- grch38 = await self.g_to_grch38(ac, start_pos, end_pos)
1219
- if not grch38:
1220
- return None
1221
-
1222
- return GenomicRepresentation(
1223
- refseq=grch38["ac"],
1224
- pos=grch38["pos"],
1225
- status=TranscriptPriority.GRCH38,
1226
- alt_ac=grch38["ac"],
1227
- )
1228
-
1229
1258
  if not await self.uta_db.validate_genomic_ac(ac):
1230
1259
  _logger.warning("Genomic accession does not exist: %s", ac)
1231
1260
  return None
@@ -1238,12 +1267,14 @@ class ManeTranscript:
1238
1267
  mane_c_ac = current_mane_data["RefSeq_nuc"]
1239
1268
 
1240
1269
  # Liftover to GRCh38
1241
- grch38 = await self.g_to_grch38(ac, start_pos, end_pos)
1270
+ grch38 = await self.g_to_grch38(
1271
+ ac, start_pos, end_pos, get_mane_genes=False, residue_mode=residue_mode
1272
+ )
1242
1273
  mane_tx_genomic_data = None
1243
1274
  if grch38:
1244
1275
  # GRCh38 -> MANE C
1245
1276
  mane_tx_genomic_data = await self.uta_db.get_mane_c_genomic_data(
1246
- mane_c_ac, grch38["ac"], grch38["pos"][0], grch38["pos"][1]
1277
+ mane_c_ac, grch38.ac, grch38.pos[0], grch38.pos[1]
1247
1278
  )
1248
1279
 
1249
1280
  if not grch38 or not mane_tx_genomic_data:
@@ -1261,9 +1292,7 @@ class ManeTranscript:
1261
1292
  mane_tx_genomic_data, coding_start_site
1262
1293
  )
1263
1294
 
1264
- if not self._validate_index(
1265
- mane_c_ac, mane_c_pos_change, coding_start_site
1266
- ):
1295
+ if not self.validate_index(mane_c_ac, mane_c_pos_change, coding_start_site):
1267
1296
  _logger.warning(
1268
1297
  "%s are not valid positions on %s with coding start site %s",
1269
1298
  mane_c_pos_change,
@@ -1284,7 +1313,7 @@ class ManeTranscript:
1284
1313
  ),
1285
1314
  refseq_c_ac=current_mane_data["RefSeq_nuc"],
1286
1315
  ensembl_c_ac=current_mane_data["Ensembl_nuc"],
1287
- alt_ac=grch38["ac"] if grch38 else None,
1316
+ alt_ac=grch38.ac if grch38 else None,
1288
1317
  )
1289
1318
  return None
1290
1319
 
@@ -1351,9 +1380,7 @@ class ManeTranscript:
1351
1380
  )
1352
1381
 
1353
1382
  # Validate MANE C positions
1354
- if not self._validate_index(
1355
- mane_c_ac, mane_c_pos_change, coding_start_site
1356
- ):
1383
+ if not self.validate_index(mane_c_ac, mane_c_pos_change, coding_start_site):
1357
1384
  _logger.warning(
1358
1385
  "%s are not valid positions on %s with coding start site %s",
1359
1386
  mane_c_pos_change,
cool_seq_tool/schemas.py CHANGED
@@ -116,72 +116,12 @@ class BaseModelForbidExtra(BaseModel, extra="forbid"):
116
116
  """Base Pydantic model class with extra values forbidden."""
117
117
 
118
118
 
119
- class GenomicRequestBody(BaseModelForbidExtra):
120
- """Define constraints for genomic to transcript exon coordinates request body"""
119
+ class ManeGeneData(BaseModel, extra="forbid"):
120
+ """Define minimal object model for representing a MANE gene"""
121
121
 
122
- chromosome: StrictStr | StrictInt
123
- start: StrictInt | None = None
124
- end: StrictInt | None = None
125
- strand: Strand | None = None
126
- transcript: StrictStr | None = None
127
- gene: StrictStr | None = None
128
- residue_mode: ResidueMode = ResidueMode.RESIDUE
129
-
130
- @model_validator(mode="after")
131
- def check_start_and_end(cls, values):
132
- """Check that at least one of {``start``, ``end``} is set"""
133
- start, end = values.start, values.end
134
- if not start or end:
135
- msg = "Must provide either `start` or `end`"
136
- raise ValueError(msg)
137
- return values
138
-
139
- model_config = ConfigDict(
140
- json_schema_extra={
141
- "example": {
142
- "chromosome": "NC_000001.11",
143
- "start": 154192135,
144
- "end": None,
145
- "strand": Strand.NEGATIVE,
146
- "transcript": "NM_152263.3",
147
- "gene": "TPM3",
148
- "residue_mode": "residue",
149
- }
150
- }
151
- )
152
-
153
-
154
- class TranscriptRequestBody(BaseModelForbidExtra):
155
- """Define constraints for transcript exon to genomic coordinates request body"""
156
-
157
- transcript: StrictStr
158
- gene: StrictStr | None = None
159
- exon_start: StrictInt | None = None
160
- exon_start_offset: StrictInt | None = 0
161
- exon_end: StrictInt | None = None
162
- exon_end_offset: StrictInt | None = 0
163
-
164
- @model_validator(mode="after")
165
- def check_exon_start_and_exon_end(cls, values):
166
- """Check that at least one of {``exon_start``, ``exon_end``} is set"""
167
- exon_start, exon_end = values.exon_start, values.exon_end
168
- if not exon_start or exon_end:
169
- msg = "Must provide either `exon_start` or `exon_end`"
170
- raise ValueError(msg)
171
- return values
172
-
173
- model_config = ConfigDict(
174
- json_schema_extra={
175
- "example": {
176
- "gene": "TPM3",
177
- "transcript": "NM_152263.3",
178
- "exon_start": 1,
179
- "exon_start_offset": 1,
180
- "exon_end": None,
181
- "exon_end_offset": None,
182
- }
183
- }
184
- )
122
+ ncbi_gene_id: StrictInt
123
+ hgnc_id: StrictInt | None
124
+ symbol: StrictStr
185
125
 
186
126
 
187
127
  class TranscriptExonData(BaseModelForbidExtra):
@@ -354,216 +294,3 @@ class GenomicDataResponse(BaseModelForbidExtra):
354
294
  }
355
295
  }
356
296
  )
357
-
358
-
359
- class MappedManeData(BaseModel):
360
- """Define mapped mane data fields"""
361
-
362
- gene: StrictStr
363
- refseq: StrictStr
364
- ensembl: StrictStr | None = None
365
- strand: Strand
366
- status: TranscriptPriority
367
- alt_ac: StrictStr
368
- assembly: Assembly
369
-
370
- model_config = ConfigDict(
371
- json_schema_extra={
372
- "example": {
373
- "gene": "BRAF",
374
- "refseq": "NM_001374258.1",
375
- "ensembl": "ENST00000644969.2",
376
- "strand": Strand.NEGATIVE,
377
- "status": TranscriptPriority.MANE_PLUS_CLINICAL,
378
- "alt_ac": "NC_000007.13",
379
- "assembly": Assembly.GRCH37,
380
- }
381
- }
382
- )
383
-
384
-
385
- class MappedManeDataService(BaseModelForbidExtra):
386
- """Service model response for mapped mane data"""
387
-
388
- mapped_mane_data: MappedManeData | None = None
389
- warnings: list[StrictStr] = []
390
- service_meta: ServiceMeta
391
-
392
- model_config = ConfigDict(
393
- json_schema_extra={
394
- "example": {
395
- "mapped_mane_data": {
396
- "gene": "BRAF",
397
- "refseq": "NM_001374258.1",
398
- "ensembl": "ENST00000644969.2",
399
- "strand": Strand.NEGATIVE,
400
- "status": TranscriptPriority.MANE_PLUS_CLINICAL,
401
- "alt_ac": "NC_000007.13",
402
- "assembly": Assembly.GRCH37,
403
- },
404
- "warnings": [],
405
- "service_meta": {
406
- "name": "cool_seq_tool",
407
- "version": __version__,
408
- "response_datetime": _now,
409
- "url": "https://github.com/GenomicMedLab/cool-seq-tool",
410
- },
411
- }
412
- }
413
- )
414
-
415
-
416
- class ManeData(BaseModel):
417
- """Define mane data fields"""
418
-
419
- gene: StrictStr | None = None
420
- refseq: StrictStr | None = None
421
- ensembl: StrictStr | None = None
422
- pos: tuple[int, int]
423
- strand: Strand
424
- status: TranscriptPriority
425
-
426
- model_config = ConfigDict(
427
- json_schema_extra={
428
- "example": {
429
- "gene": "BRAF",
430
- "refseq": "NP_004324.2",
431
- "ensembl": "ENSP00000493543.1",
432
- "pos": (598, 598),
433
- "strand": Strand.NEGATIVE,
434
- "status": TranscriptPriority.MANE_SELECT,
435
- }
436
- }
437
- )
438
-
439
-
440
- class ManeDataService(BaseModelForbidExtra):
441
- """Service model response for getting mane data"""
442
-
443
- mane_data: ManeData | None = None
444
- warnings: list[StrictStr] = []
445
- service_meta: ServiceMeta
446
-
447
- model_config = ConfigDict(
448
- json_schema_extra={
449
- "example": {
450
- "mane_data": {
451
- "gene": "BRAF",
452
- "refseq": "NP_004324.2",
453
- "ensembl": "ENSP00000493543.1",
454
- "pos": (598, 598),
455
- "strand": Strand.NEGATIVE,
456
- "status": TranscriptPriority.MANE_SELECT,
457
- },
458
- "warnings": [],
459
- "service_meta": {
460
- "name": "cool_seq_tool",
461
- "version": __version__,
462
- "response_datetime": _now,
463
- "url": "https://github.com/GenomicMedLab/cool-seq-tool",
464
- },
465
- }
466
- }
467
- )
468
-
469
-
470
- # ALIGNMENT MAPPER SERVICE SCHEMAS
471
-
472
-
473
- class CdnaRepresentation(BaseModelForbidExtra):
474
- """Model response for cDNA representation"""
475
-
476
- c_ac: StrictStr
477
- c_start_pos: StrictInt
478
- c_end_pos: StrictInt
479
- cds_start: StrictInt
480
- residue_mode: Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.INTER_RESIDUE.value
481
-
482
- model_config = ConfigDict(
483
- json_schema_extra={
484
- "example": {
485
- "c_ac": "NM_004333.6",
486
- "c_start_pos": 1797,
487
- "c_end_pos": 1800,
488
- "cds_start": 226,
489
- "residue_mode": ResidueMode.INTER_RESIDUE,
490
- }
491
- }
492
- )
493
-
494
-
495
- class ToCdnaService(BaseModelForbidExtra):
496
- """Service model response for protein -> cDNA"""
497
-
498
- c_data: CdnaRepresentation | None = None
499
- warnings: list[StrictStr] = []
500
- service_meta: ServiceMeta
501
-
502
- model_config = ConfigDict(
503
- json_schema_extra={
504
- "example": {
505
- "c_data": {
506
- "c_ac": "NM_004333.6",
507
- "c_start_pos": 1797,
508
- "c_end_pos": 1800,
509
- "cds_start": 226,
510
- "residue_mode": ResidueMode.INTER_RESIDUE,
511
- },
512
- "warnings": [],
513
- "service_meta": {
514
- "name": "cool_seq_tool",
515
- "version": __version__,
516
- "response_datetime": _now,
517
- "url": "https://github.com/GenomicMedLab/cool-seq-tool",
518
- },
519
- }
520
- }
521
- )
522
-
523
-
524
- class GenomicRepresentation(BaseModelForbidExtra):
525
- """Model response for genomic representation"""
526
-
527
- g_ac: str
528
- g_start_pos: int
529
- g_end_pos: int
530
- residue_mode: Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.INTER_RESIDUE.value
531
-
532
- model_config = ConfigDict(
533
- json_schema_extra={
534
- "example": {
535
- "g_ac": "NC_000007.13",
536
- "g_start_pos": 140453134,
537
- "g_end_pos": 140453137,
538
- "residue_mode": ResidueMode.INTER_RESIDUE,
539
- }
540
- }
541
- )
542
-
543
-
544
- class ToGenomicService(BaseModelForbidExtra):
545
- """Service model response for cDNA -> genomic"""
546
-
547
- g_data: GenomicRepresentation | None = None
548
- warnings: list[StrictStr] = []
549
- service_meta: ServiceMeta
550
-
551
- model_config = ConfigDict(
552
- json_schema_extra={
553
- "example": {
554
- "g_data": {
555
- "g_ac": "NC_000007.13",
556
- "g_start_pos": 140453134,
557
- "g_end_pos": 140453137,
558
- "residue_mode": ResidueMode.INTER_RESIDUE,
559
- },
560
- "warnings": [],
561
- "service_meta": {
562
- "name": "cool_seq_tool",
563
- "version": __version__,
564
- "response_datetime": _now,
565
- "url": "https://github.com/GenomicMedLab/cool-seq-tool",
566
- },
567
- }
568
- }
569
- )
@@ -8,6 +8,7 @@ from pathlib import Path
8
8
  import polars as pl
9
9
 
10
10
  from cool_seq_tool.resources.data_files import DataFile, get_data_file
11
+ from cool_seq_tool.schemas import ManeGeneData
11
12
 
12
13
  _logger = logging.getLogger(__name__)
13
14
 
@@ -103,3 +104,37 @@ class ManeTranscriptMappings:
103
104
 
104
105
  mane_rows = mane_rows.sort(by="MANE_status", descending=True)
105
106
  return mane_rows.to_dicts()
107
+
108
+ def get_genomic_mane_genes(
109
+ self, ac: str, start: int, end: int
110
+ ) -> list[ManeGeneData]:
111
+ """Get MANE gene(s) for genomic location
112
+
113
+ :param ac: RefSeq genomic accession
114
+ :param start: Genomic start position. Assumes residue coordinates.
115
+ :param end: Genomic end position. Assumes residue coordinates.
116
+ :return: Unique MANE gene(s) found for a genomic location
117
+ """
118
+ mane_rows = self.df.filter(
119
+ (start >= pl.col("chr_start"))
120
+ & (end <= pl.col("chr_end"))
121
+ & (pl.col("GRCh38_chr") == ac)
122
+ ).unique(subset=["#NCBI_GeneID"])
123
+
124
+ if len(mane_rows) == 0:
125
+ return []
126
+
127
+ mane_rows = mane_rows.with_columns(
128
+ pl.col("#NCBI_GeneID")
129
+ .str.split_exact(":", 1)
130
+ .struct.field("field_1")
131
+ .cast(pl.Int32)
132
+ .alias("ncbi_gene_id"),
133
+ pl.col("HGNC_ID")
134
+ .str.split_exact(":", 1)
135
+ .struct.field("field_1")
136
+ .cast(pl.Int32)
137
+ .alias("hgnc_id"),
138
+ )
139
+ mane_rows = mane_rows.select(["ncbi_gene_id", "hgnc_id", "symbol"])
140
+ return [ManeGeneData(**mane_gene) for mane_gene in mane_rows.to_dicts()]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cool_seq_tool
3
- Version: 0.5.1
3
+ Version: 0.6.0
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  cool_seq_tool/__init__.py,sha256=fJmjglvv3Ylm0khQSD-XTqdyUA5YzEiS3iB8FGTOhIs,247
2
2
  cool_seq_tool/app.py,sha256=DJFcPVHQ5Ar9xdmHwrFKFMqbjDtx3L9gn84_wP63ARY,4982
3
- cool_seq_tool/schemas.py,sha256=hZ4pStUHgCarXPFLkuGU26znC0dooVDvixO_7eO5eUQ,16301
3
+ cool_seq_tool/schemas.py,sha256=OfRoEEB-bJPvPtSh8GKDBMs_wdGljrSCkg9vPVqFeIw,8033
4
4
  cool_seq_tool/utils.py,sha256=mq_eGgqiILDcrtb1trMwRdsTERixuj8kDxHfgwsWsko,2914
5
5
  cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
6
6
  cool_seq_tool/handlers/seqrepo_access.py,sha256=jKUn9mdyK0rHJk9I274N9H_B-M1m4r-hmOX7VwfjRC0,9135
@@ -8,17 +8,17 @@ cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzR
8
8
  cool_seq_tool/mappers/alignment.py,sha256=6Vk4XEar54ivuH8N7oBqa9gUa8E5GjWCI9hC1HCkM18,9552
9
9
  cool_seq_tool/mappers/exon_genomic_coords.py,sha256=McLXZcnDLdLSKR3eHnY4xJ0iLfCmSwAwK_RQXBV1AYQ,39160
10
10
  cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
11
- cool_seq_tool/mappers/mane_transcript.py,sha256=iNkK8mtzXPmD1BROHzJ4vipr6oBbQv_BdUmvuOGFIMA,52823
11
+ cool_seq_tool/mappers/mane_transcript.py,sha256=Iv6J2Tjwt9cYAqoiEQ-XNEc8iRI3tXOONA6YjOv2huU,54241
12
12
  cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
13
13
  cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
14
14
  cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
15
15
  cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
16
16
  cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
17
- cool_seq_tool/sources/mane_transcript_mappings.py,sha256=IQtaRWrIi3f1k0WiDtlmlfOlQQB6bTKSEAh2PHk-Lsw,4079
17
+ cool_seq_tool/sources/mane_transcript_mappings.py,sha256=E_pj7FEBcB6HUR8yhSVibB0beMMlKJ62pK0qvl4y5nw,5358
18
18
  cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
19
19
  cool_seq_tool/sources/uta_database.py,sha256=TKMx_yoqWe5QVnqkZe_10x-Lp4PtKvArbMg5ufba0_Q,38353
20
- cool_seq_tool-0.5.1.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
- cool_seq_tool-0.5.1.dist-info/METADATA,sha256=9GLDkcYGYGfUmhlkJ8S1bfjgbzPE2adEKy4iEwsyRnU,6210
22
- cool_seq_tool-0.5.1.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
23
- cool_seq_tool-0.5.1.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
- cool_seq_tool-0.5.1.dist-info/RECORD,,
20
+ cool_seq_tool-0.6.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
21
+ cool_seq_tool-0.6.0.dist-info/METADATA,sha256=9q0VK-zTlDxBI5jOG3d4w02n9SevWbGdHSO5HP0-U8M,6210
22
+ cool_seq_tool-0.6.0.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
23
+ cool_seq_tool-0.6.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
24
+ cool_seq_tool-0.6.0.dist-info/RECORD,,