cool-seq-tool 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/mappers/exon_genomic_coords.py +1 -1
- cool_seq_tool/mappers/mane_transcript.py +80 -53
- cool_seq_tool/schemas.py +5 -278
- cool_seq_tool/sources/mane_transcript_mappings.py +35 -0
- {cool_seq_tool-0.5.0.dist-info → cool_seq_tool-0.6.0.dist-info}/METADATA +7 -7
- {cool_seq_tool-0.5.0.dist-info → cool_seq_tool-0.6.0.dist-info}/RECORD +9 -9
- {cool_seq_tool-0.5.0.dist-info → cool_seq_tool-0.6.0.dist-info}/WHEEL +1 -1
- {cool_seq_tool-0.5.0.dist-info → cool_seq_tool-0.6.0.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.5.0.dist-info → cool_seq_tool-0.6.0.dist-info}/top_level.txt +0 -0
@@ -962,7 +962,7 @@ class ExonGenomicCoordsMapper:
|
|
962
962
|
exon = tx_exons_genomic_coords[i]
|
963
963
|
next_exon = tx_exons_genomic_coords[i + 1]
|
964
964
|
bp = start if start else end
|
965
|
-
if strand ==
|
965
|
+
if strand == Strand.POSITIVE:
|
966
966
|
lte_exon = exon
|
967
967
|
gte_exon = next_exon
|
968
968
|
else:
|
@@ -25,6 +25,7 @@ from cool_seq_tool.mappers.liftover import LiftOver
|
|
25
25
|
from cool_seq_tool.schemas import (
|
26
26
|
AnnotationLayer,
|
27
27
|
Assembly,
|
28
|
+
ManeGeneData,
|
28
29
|
ResidueMode,
|
29
30
|
Strand,
|
30
31
|
TranscriptPriority,
|
@@ -71,10 +72,10 @@ class CdnaRepresentation(DataRepresentation):
|
|
71
72
|
class GenomicRepresentation(BaseModel):
|
72
73
|
"""Define object model for genomic representation"""
|
73
74
|
|
74
|
-
refseq: str
|
75
75
|
pos: tuple[int, int]
|
76
|
-
|
77
|
-
|
76
|
+
mane_genes: list[ManeGeneData] = []
|
77
|
+
status: Literal["grch38"] = TranscriptPriority.GRCH38.value
|
78
|
+
ac: str
|
78
79
|
|
79
80
|
|
80
81
|
class ProteinAndCdnaRepresentation(BaseModel):
|
@@ -108,7 +109,7 @@ class ManeTranscript:
|
|
108
109
|
|
109
110
|
>>> import asyncio
|
110
111
|
>>> result = asyncio.run(mane_mapper.g_to_grch38("NC_000001.11", 100, 200))
|
111
|
-
>>> result
|
112
|
+
>>> result.ac
|
112
113
|
'NC_000001.11'
|
113
114
|
|
114
115
|
See the :ref:`Usage section <async_note>` for more information.
|
@@ -128,7 +129,7 @@ class ManeTranscript:
|
|
128
129
|
self.liftover = liftover
|
129
130
|
|
130
131
|
@staticmethod
|
131
|
-
def
|
132
|
+
def get_reading_frame(pos: int) -> int:
|
132
133
|
"""Return reading frame number. Only used on c. coordinate.
|
133
134
|
|
134
135
|
:param pos: cDNA position
|
@@ -531,8 +532,8 @@ class ManeTranscript:
|
|
531
532
|
"""
|
532
533
|
for pos, pos_index in [(start_pos, 0), (end_pos, 1)]:
|
533
534
|
if pos is not None:
|
534
|
-
og_rf = self.
|
535
|
-
new_rf = self.
|
535
|
+
og_rf = self.get_reading_frame(pos)
|
536
|
+
new_rf = self.get_reading_frame(transcript_data.pos[pos_index])
|
536
537
|
|
537
538
|
if og_rf != new_rf:
|
538
539
|
_logger.warning(
|
@@ -618,7 +619,7 @@ class ManeTranscript:
|
|
618
619
|
|
619
620
|
return True
|
620
621
|
|
621
|
-
def
|
622
|
+
def validate_index(
|
622
623
|
self, ac: str, pos: tuple[int, int], coding_start_site: int
|
623
624
|
) -> bool:
|
624
625
|
"""Validate that positions actually exist on accession
|
@@ -910,7 +911,7 @@ class ManeTranscript:
|
|
910
911
|
ac = lcr_result.refseq or lcr_result.ensembl
|
911
912
|
pos = lcr_result.pos
|
912
913
|
|
913
|
-
if not self.
|
914
|
+
if not self.validate_index(ac, pos, coding_start_site):
|
914
915
|
_logger.warning(
|
915
916
|
"%s are not valid positions on %s with coding start site %s",
|
916
917
|
pos,
|
@@ -936,7 +937,7 @@ class ManeTranscript:
|
|
936
937
|
cds = lcr_result_dict[k].get("coding_start_site", 0)
|
937
938
|
ac = lcr_result_dict[k]["refseq"] or lcr_result_dict[k]["ensembl"]
|
938
939
|
pos = lcr_result_dict[k]["pos"]
|
939
|
-
if not self.
|
940
|
+
if not self.validate_index(ac, pos, cds):
|
940
941
|
valid = False
|
941
942
|
_logger.warning(
|
942
943
|
"%s are not valid positions on %s with coding start site %s",
|
@@ -962,7 +963,16 @@ class ManeTranscript:
|
|
962
963
|
residue_mode: Literal[ResidueMode.RESIDUE]
|
963
964
|
| Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.RESIDUE,
|
964
965
|
) -> DataRepresentation | CdnaRepresentation | None:
|
965
|
-
"""Return MANE
|
966
|
+
"""Return MANE representation
|
967
|
+
|
968
|
+
If ``start_annotation_layer`` is ``AnnotationLayer.PROTEIN``, will return
|
969
|
+
``AnnotationLayer.PROTEIN`` representation.
|
970
|
+
If ``start_annotation_layer`` is ``AnnotationLayer.CDNA``, will return
|
971
|
+
``AnnotationLayer.CDNA`` representation.
|
972
|
+
If ``start_annotation_layer`` is ``AnnotationLayer.GENOMIC`` will return
|
973
|
+
``AnnotationLayer.CDNA`` representation if ``gene`` is provided and
|
974
|
+
``AnnotationLayer.GENOMIC`` GRCh38 representation if ``gene`` is NOT
|
975
|
+
provided.
|
966
976
|
|
967
977
|
>>> from cool_seq_tool.app import CoolSeqTool
|
968
978
|
>>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
|
@@ -983,7 +993,11 @@ class ManeTranscript:
|
|
983
993
|
:param start_pos: Start position change
|
984
994
|
:param end_pos: End position change
|
985
995
|
:param start_annotation_layer: Starting annotation layer.
|
986
|
-
:param gene: HGNC gene symbol
|
996
|
+
:param gene: HGNC gene symbol.
|
997
|
+
If ``gene`` is not provided and ``start_annotation_layer`` is
|
998
|
+
``AnnotationLayer.GENOMIC``, will return GRCh38 representation.
|
999
|
+
If ``gene`` is provided and ``start_annotation_layer`` is
|
1000
|
+
``AnnotationLayer.GENOMIC``, will return cDNA representation.
|
987
1001
|
:param ref: Reference at position given during input
|
988
1002
|
:param try_longest_compatible: ``True`` if should try longest compatible remaining
|
989
1003
|
if mane transcript was not compatible. ``False`` otherwise.
|
@@ -1093,29 +1107,56 @@ class ManeTranscript:
|
|
1093
1107
|
)
|
1094
1108
|
return None
|
1095
1109
|
if start_annotation_layer == AnnotationLayer.GENOMIC:
|
1110
|
+
if not gene:
|
1111
|
+
return await self.g_to_grch38(
|
1112
|
+
ac,
|
1113
|
+
start_pos,
|
1114
|
+
end_pos,
|
1115
|
+
get_mane_genes=True,
|
1116
|
+
residue_mode=residue_mode,
|
1117
|
+
)
|
1118
|
+
|
1096
1119
|
return await self.g_to_mane_c(
|
1097
|
-
ac, start_pos, end_pos, gene
|
1120
|
+
ac, start_pos, end_pos, gene, residue_mode=residue_mode
|
1098
1121
|
)
|
1099
1122
|
_logger.warning("Annotation layer not supported: %s", start_annotation_layer)
|
1100
1123
|
return None
|
1101
1124
|
|
1102
|
-
async def g_to_grch38(
|
1125
|
+
async def g_to_grch38(
|
1126
|
+
self,
|
1127
|
+
ac: str,
|
1128
|
+
start_pos: int,
|
1129
|
+
end_pos: int,
|
1130
|
+
get_mane_genes: bool = False,
|
1131
|
+
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
1132
|
+
) -> GenomicRepresentation | None:
|
1103
1133
|
"""Return genomic coordinate on GRCh38 when not given gene context.
|
1104
1134
|
|
1105
1135
|
:param ac: Genomic accession
|
1106
1136
|
:param start_pos: Genomic start position
|
1107
1137
|
:param end_pos: Genomic end position
|
1108
|
-
:
|
1138
|
+
:param get_mane_genes: ``True`` if mane genes for genomic position should be
|
1139
|
+
included in response. ``False``, otherwise.
|
1140
|
+
:param residue_mode: Residue mode for ``start_pos`` and ``end_pos``
|
1141
|
+
:return: GRCh38 genomic representation (accession and start/end inter-residue
|
1142
|
+
position)
|
1109
1143
|
"""
|
1110
|
-
|
1111
|
-
end_pos = start_pos
|
1144
|
+
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, residue_mode)
|
1112
1145
|
|
1113
1146
|
# Checking to see what chromosome and assembly we're on
|
1114
1147
|
descr = await self.uta_db.get_chr_assembly(ac)
|
1115
1148
|
if not descr:
|
1116
1149
|
# Already GRCh38 assembly
|
1117
|
-
if self.
|
1118
|
-
return
|
1150
|
+
if self.validate_index(ac, (start_pos, end_pos), 0):
|
1151
|
+
return GenomicRepresentation(
|
1152
|
+
ac=ac,
|
1153
|
+
pos=(start_pos, end_pos),
|
1154
|
+
mane_genes=self.mane_transcript_mappings.get_genomic_mane_genes(
|
1155
|
+
ac, start_pos + 1, end_pos
|
1156
|
+
)
|
1157
|
+
if get_mane_genes
|
1158
|
+
else [],
|
1159
|
+
)
|
1119
1160
|
return None
|
1120
1161
|
chromosome, assembly = descr
|
1121
1162
|
is_same_pos = start_pos == end_pos
|
@@ -1145,8 +1186,16 @@ class ManeTranscript:
|
|
1145
1186
|
newest_ac = await self.uta_db.get_newest_assembly_ac(ac)
|
1146
1187
|
if newest_ac:
|
1147
1188
|
ac = newest_ac[0]
|
1148
|
-
if self.
|
1149
|
-
return
|
1189
|
+
if self.validate_index(ac, (start_pos, end_pos), 0):
|
1190
|
+
return GenomicRepresentation(
|
1191
|
+
ac=ac,
|
1192
|
+
pos=(start_pos, end_pos),
|
1193
|
+
mane_genes=self.mane_transcript_mappings.get_genomic_mane_genes(
|
1194
|
+
ac, start_pos + 1, end_pos
|
1195
|
+
)
|
1196
|
+
if get_mane_genes
|
1197
|
+
else [],
|
1198
|
+
)
|
1150
1199
|
return None
|
1151
1200
|
|
1152
1201
|
@staticmethod
|
@@ -1176,14 +1225,11 @@ class ManeTranscript:
|
|
1176
1225
|
ac: str,
|
1177
1226
|
start_pos: int,
|
1178
1227
|
end_pos: int,
|
1179
|
-
gene: str
|
1228
|
+
gene: str,
|
1180
1229
|
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
1181
|
-
) ->
|
1230
|
+
) -> CdnaRepresentation | None:
|
1182
1231
|
"""Return MANE Transcript on the c. coordinate.
|
1183
1232
|
|
1184
|
-
If an arg for ``gene`` is provided, lifts to GRCh38, then gets MANE cDNA
|
1185
|
-
representation.
|
1186
|
-
|
1187
1233
|
>>> import asyncio
|
1188
1234
|
>>> from cool_seq_tool.app import CoolSeqTool
|
1189
1235
|
>>> cst = CoolSeqTool()
|
@@ -1198,34 +1244,17 @@ class ManeTranscript:
|
|
1198
1244
|
<TranscriptPriority.MANE_SELECT: 'mane_select'>
|
1199
1245
|
>>> del cst
|
1200
1246
|
|
1201
|
-
Locating a MANE transcript requires a ``gene`` symbol argument -- if none is
|
1202
|
-
given, this method will only lift over to genomic coordinates on GRCh38.
|
1203
|
-
|
1204
1247
|
:param ac: Transcript accession on g. coordinate
|
1205
1248
|
:param start_pos: genomic start position
|
1206
1249
|
:param end_pos: genomic end position
|
1207
1250
|
:param gene: HGNC gene symbol
|
1208
1251
|
:param residue_mode: Starting residue mode for ``start_pos`` and ``end_pos``.
|
1209
1252
|
Will always return coordinates in inter-residue.
|
1210
|
-
:return: MANE Transcripts with cDNA change on c. coordinate
|
1211
|
-
is provided. Else, GRCh38 data
|
1253
|
+
:return: MANE Transcripts with cDNA change on c. coordinate
|
1212
1254
|
"""
|
1213
1255
|
start_pos, end_pos = get_inter_residue_pos(start_pos, end_pos, residue_mode)
|
1214
1256
|
residue_mode = ResidueMode.INTER_RESIDUE
|
1215
1257
|
|
1216
|
-
# If gene not provided, return GRCh38
|
1217
|
-
if not gene:
|
1218
|
-
grch38 = await self.g_to_grch38(ac, start_pos, end_pos)
|
1219
|
-
if not grch38:
|
1220
|
-
return None
|
1221
|
-
|
1222
|
-
return GenomicRepresentation(
|
1223
|
-
refseq=grch38["ac"],
|
1224
|
-
pos=grch38["pos"],
|
1225
|
-
status=TranscriptPriority.GRCH38,
|
1226
|
-
alt_ac=grch38["ac"],
|
1227
|
-
)
|
1228
|
-
|
1229
1258
|
if not await self.uta_db.validate_genomic_ac(ac):
|
1230
1259
|
_logger.warning("Genomic accession does not exist: %s", ac)
|
1231
1260
|
return None
|
@@ -1238,12 +1267,14 @@ class ManeTranscript:
|
|
1238
1267
|
mane_c_ac = current_mane_data["RefSeq_nuc"]
|
1239
1268
|
|
1240
1269
|
# Liftover to GRCh38
|
1241
|
-
grch38 = await self.g_to_grch38(
|
1270
|
+
grch38 = await self.g_to_grch38(
|
1271
|
+
ac, start_pos, end_pos, get_mane_genes=False, residue_mode=residue_mode
|
1272
|
+
)
|
1242
1273
|
mane_tx_genomic_data = None
|
1243
1274
|
if grch38:
|
1244
1275
|
# GRCh38 -> MANE C
|
1245
1276
|
mane_tx_genomic_data = await self.uta_db.get_mane_c_genomic_data(
|
1246
|
-
mane_c_ac, grch38
|
1277
|
+
mane_c_ac, grch38.ac, grch38.pos[0], grch38.pos[1]
|
1247
1278
|
)
|
1248
1279
|
|
1249
1280
|
if not grch38 or not mane_tx_genomic_data:
|
@@ -1261,9 +1292,7 @@ class ManeTranscript:
|
|
1261
1292
|
mane_tx_genomic_data, coding_start_site
|
1262
1293
|
)
|
1263
1294
|
|
1264
|
-
if not self.
|
1265
|
-
mane_c_ac, mane_c_pos_change, coding_start_site
|
1266
|
-
):
|
1295
|
+
if not self.validate_index(mane_c_ac, mane_c_pos_change, coding_start_site):
|
1267
1296
|
_logger.warning(
|
1268
1297
|
"%s are not valid positions on %s with coding start site %s",
|
1269
1298
|
mane_c_pos_change,
|
@@ -1284,7 +1313,7 @@ class ManeTranscript:
|
|
1284
1313
|
),
|
1285
1314
|
refseq_c_ac=current_mane_data["RefSeq_nuc"],
|
1286
1315
|
ensembl_c_ac=current_mane_data["Ensembl_nuc"],
|
1287
|
-
alt_ac=grch38
|
1316
|
+
alt_ac=grch38.ac if grch38 else None,
|
1288
1317
|
)
|
1289
1318
|
return None
|
1290
1319
|
|
@@ -1351,9 +1380,7 @@ class ManeTranscript:
|
|
1351
1380
|
)
|
1352
1381
|
|
1353
1382
|
# Validate MANE C positions
|
1354
|
-
if not self.
|
1355
|
-
mane_c_ac, mane_c_pos_change, coding_start_site
|
1356
|
-
):
|
1383
|
+
if not self.validate_index(mane_c_ac, mane_c_pos_change, coding_start_site):
|
1357
1384
|
_logger.warning(
|
1358
1385
|
"%s are not valid positions on %s with coding start site %s",
|
1359
1386
|
mane_c_pos_change,
|
cool_seq_tool/schemas.py
CHANGED
@@ -116,72 +116,12 @@ class BaseModelForbidExtra(BaseModel, extra="forbid"):
|
|
116
116
|
"""Base Pydantic model class with extra values forbidden."""
|
117
117
|
|
118
118
|
|
119
|
-
class
|
120
|
-
"""Define
|
119
|
+
class ManeGeneData(BaseModel, extra="forbid"):
|
120
|
+
"""Define minimal object model for representing a MANE gene"""
|
121
121
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
strand: Strand | None = None
|
126
|
-
transcript: StrictStr | None = None
|
127
|
-
gene: StrictStr | None = None
|
128
|
-
residue_mode: ResidueMode = ResidueMode.RESIDUE
|
129
|
-
|
130
|
-
@model_validator(mode="after")
|
131
|
-
def check_start_and_end(cls, values):
|
132
|
-
"""Check that at least one of {``start``, ``end``} is set"""
|
133
|
-
start, end = values.start, values.end
|
134
|
-
if not start or end:
|
135
|
-
msg = "Must provide either `start` or `end`"
|
136
|
-
raise ValueError(msg)
|
137
|
-
return values
|
138
|
-
|
139
|
-
model_config = ConfigDict(
|
140
|
-
json_schema_extra={
|
141
|
-
"example": {
|
142
|
-
"chromosome": "NC_000001.11",
|
143
|
-
"start": 154192135,
|
144
|
-
"end": None,
|
145
|
-
"strand": Strand.NEGATIVE,
|
146
|
-
"transcript": "NM_152263.3",
|
147
|
-
"gene": "TPM3",
|
148
|
-
"residue_mode": "residue",
|
149
|
-
}
|
150
|
-
}
|
151
|
-
)
|
152
|
-
|
153
|
-
|
154
|
-
class TranscriptRequestBody(BaseModelForbidExtra):
|
155
|
-
"""Define constraints for transcript exon to genomic coordinates request body"""
|
156
|
-
|
157
|
-
transcript: StrictStr
|
158
|
-
gene: StrictStr | None = None
|
159
|
-
exon_start: StrictInt | None = None
|
160
|
-
exon_start_offset: StrictInt | None = 0
|
161
|
-
exon_end: StrictInt | None = None
|
162
|
-
exon_end_offset: StrictInt | None = 0
|
163
|
-
|
164
|
-
@model_validator(mode="after")
|
165
|
-
def check_exon_start_and_exon_end(cls, values):
|
166
|
-
"""Check that at least one of {``exon_start``, ``exon_end``} is set"""
|
167
|
-
exon_start, exon_end = values.exon_start, values.exon_end
|
168
|
-
if not exon_start or exon_end:
|
169
|
-
msg = "Must provide either `exon_start` or `exon_end`"
|
170
|
-
raise ValueError(msg)
|
171
|
-
return values
|
172
|
-
|
173
|
-
model_config = ConfigDict(
|
174
|
-
json_schema_extra={
|
175
|
-
"example": {
|
176
|
-
"gene": "TPM3",
|
177
|
-
"transcript": "NM_152263.3",
|
178
|
-
"exon_start": 1,
|
179
|
-
"exon_start_offset": 1,
|
180
|
-
"exon_end": None,
|
181
|
-
"exon_end_offset": None,
|
182
|
-
}
|
183
|
-
}
|
184
|
-
)
|
122
|
+
ncbi_gene_id: StrictInt
|
123
|
+
hgnc_id: StrictInt | None
|
124
|
+
symbol: StrictStr
|
185
125
|
|
186
126
|
|
187
127
|
class TranscriptExonData(BaseModelForbidExtra):
|
@@ -354,216 +294,3 @@ class GenomicDataResponse(BaseModelForbidExtra):
|
|
354
294
|
}
|
355
295
|
}
|
356
296
|
)
|
357
|
-
|
358
|
-
|
359
|
-
class MappedManeData(BaseModel):
|
360
|
-
"""Define mapped mane data fields"""
|
361
|
-
|
362
|
-
gene: StrictStr
|
363
|
-
refseq: StrictStr
|
364
|
-
ensembl: StrictStr | None = None
|
365
|
-
strand: Strand
|
366
|
-
status: TranscriptPriority
|
367
|
-
alt_ac: StrictStr
|
368
|
-
assembly: Assembly
|
369
|
-
|
370
|
-
model_config = ConfigDict(
|
371
|
-
json_schema_extra={
|
372
|
-
"example": {
|
373
|
-
"gene": "BRAF",
|
374
|
-
"refseq": "NM_001374258.1",
|
375
|
-
"ensembl": "ENST00000644969.2",
|
376
|
-
"strand": Strand.NEGATIVE,
|
377
|
-
"status": TranscriptPriority.MANE_PLUS_CLINICAL,
|
378
|
-
"alt_ac": "NC_000007.13",
|
379
|
-
"assembly": Assembly.GRCH37,
|
380
|
-
}
|
381
|
-
}
|
382
|
-
)
|
383
|
-
|
384
|
-
|
385
|
-
class MappedManeDataService(BaseModelForbidExtra):
|
386
|
-
"""Service model response for mapped mane data"""
|
387
|
-
|
388
|
-
mapped_mane_data: MappedManeData | None = None
|
389
|
-
warnings: list[StrictStr] = []
|
390
|
-
service_meta: ServiceMeta
|
391
|
-
|
392
|
-
model_config = ConfigDict(
|
393
|
-
json_schema_extra={
|
394
|
-
"example": {
|
395
|
-
"mapped_mane_data": {
|
396
|
-
"gene": "BRAF",
|
397
|
-
"refseq": "NM_001374258.1",
|
398
|
-
"ensembl": "ENST00000644969.2",
|
399
|
-
"strand": Strand.NEGATIVE,
|
400
|
-
"status": TranscriptPriority.MANE_PLUS_CLINICAL,
|
401
|
-
"alt_ac": "NC_000007.13",
|
402
|
-
"assembly": Assembly.GRCH37,
|
403
|
-
},
|
404
|
-
"warnings": [],
|
405
|
-
"service_meta": {
|
406
|
-
"name": "cool_seq_tool",
|
407
|
-
"version": __version__,
|
408
|
-
"response_datetime": _now,
|
409
|
-
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
410
|
-
},
|
411
|
-
}
|
412
|
-
}
|
413
|
-
)
|
414
|
-
|
415
|
-
|
416
|
-
class ManeData(BaseModel):
|
417
|
-
"""Define mane data fields"""
|
418
|
-
|
419
|
-
gene: StrictStr | None = None
|
420
|
-
refseq: StrictStr | None = None
|
421
|
-
ensembl: StrictStr | None = None
|
422
|
-
pos: tuple[int, int]
|
423
|
-
strand: Strand
|
424
|
-
status: TranscriptPriority
|
425
|
-
|
426
|
-
model_config = ConfigDict(
|
427
|
-
json_schema_extra={
|
428
|
-
"example": {
|
429
|
-
"gene": "BRAF",
|
430
|
-
"refseq": "NP_004324.2",
|
431
|
-
"ensembl": "ENSP00000493543.1",
|
432
|
-
"pos": (598, 598),
|
433
|
-
"strand": Strand.NEGATIVE,
|
434
|
-
"status": TranscriptPriority.MANE_SELECT,
|
435
|
-
}
|
436
|
-
}
|
437
|
-
)
|
438
|
-
|
439
|
-
|
440
|
-
class ManeDataService(BaseModelForbidExtra):
|
441
|
-
"""Service model response for getting mane data"""
|
442
|
-
|
443
|
-
mane_data: ManeData | None = None
|
444
|
-
warnings: list[StrictStr] = []
|
445
|
-
service_meta: ServiceMeta
|
446
|
-
|
447
|
-
model_config = ConfigDict(
|
448
|
-
json_schema_extra={
|
449
|
-
"example": {
|
450
|
-
"mane_data": {
|
451
|
-
"gene": "BRAF",
|
452
|
-
"refseq": "NP_004324.2",
|
453
|
-
"ensembl": "ENSP00000493543.1",
|
454
|
-
"pos": (598, 598),
|
455
|
-
"strand": Strand.NEGATIVE,
|
456
|
-
"status": TranscriptPriority.MANE_SELECT,
|
457
|
-
},
|
458
|
-
"warnings": [],
|
459
|
-
"service_meta": {
|
460
|
-
"name": "cool_seq_tool",
|
461
|
-
"version": __version__,
|
462
|
-
"response_datetime": _now,
|
463
|
-
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
464
|
-
},
|
465
|
-
}
|
466
|
-
}
|
467
|
-
)
|
468
|
-
|
469
|
-
|
470
|
-
# ALIGNMENT MAPPER SERVICE SCHEMAS
|
471
|
-
|
472
|
-
|
473
|
-
class CdnaRepresentation(BaseModelForbidExtra):
|
474
|
-
"""Model response for cDNA representation"""
|
475
|
-
|
476
|
-
c_ac: StrictStr
|
477
|
-
c_start_pos: StrictInt
|
478
|
-
c_end_pos: StrictInt
|
479
|
-
cds_start: StrictInt
|
480
|
-
residue_mode: Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.INTER_RESIDUE.value
|
481
|
-
|
482
|
-
model_config = ConfigDict(
|
483
|
-
json_schema_extra={
|
484
|
-
"example": {
|
485
|
-
"c_ac": "NM_004333.6",
|
486
|
-
"c_start_pos": 1797,
|
487
|
-
"c_end_pos": 1800,
|
488
|
-
"cds_start": 226,
|
489
|
-
"residue_mode": ResidueMode.INTER_RESIDUE,
|
490
|
-
}
|
491
|
-
}
|
492
|
-
)
|
493
|
-
|
494
|
-
|
495
|
-
class ToCdnaService(BaseModelForbidExtra):
|
496
|
-
"""Service model response for protein -> cDNA"""
|
497
|
-
|
498
|
-
c_data: CdnaRepresentation | None = None
|
499
|
-
warnings: list[StrictStr] = []
|
500
|
-
service_meta: ServiceMeta
|
501
|
-
|
502
|
-
model_config = ConfigDict(
|
503
|
-
json_schema_extra={
|
504
|
-
"example": {
|
505
|
-
"c_data": {
|
506
|
-
"c_ac": "NM_004333.6",
|
507
|
-
"c_start_pos": 1797,
|
508
|
-
"c_end_pos": 1800,
|
509
|
-
"cds_start": 226,
|
510
|
-
"residue_mode": ResidueMode.INTER_RESIDUE,
|
511
|
-
},
|
512
|
-
"warnings": [],
|
513
|
-
"service_meta": {
|
514
|
-
"name": "cool_seq_tool",
|
515
|
-
"version": __version__,
|
516
|
-
"response_datetime": _now,
|
517
|
-
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
518
|
-
},
|
519
|
-
}
|
520
|
-
}
|
521
|
-
)
|
522
|
-
|
523
|
-
|
524
|
-
class GenomicRepresentation(BaseModelForbidExtra):
|
525
|
-
"""Model response for genomic representation"""
|
526
|
-
|
527
|
-
g_ac: str
|
528
|
-
g_start_pos: int
|
529
|
-
g_end_pos: int
|
530
|
-
residue_mode: Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.INTER_RESIDUE.value
|
531
|
-
|
532
|
-
model_config = ConfigDict(
|
533
|
-
json_schema_extra={
|
534
|
-
"example": {
|
535
|
-
"g_ac": "NC_000007.13",
|
536
|
-
"g_start_pos": 140453134,
|
537
|
-
"g_end_pos": 140453137,
|
538
|
-
"residue_mode": ResidueMode.INTER_RESIDUE,
|
539
|
-
}
|
540
|
-
}
|
541
|
-
)
|
542
|
-
|
543
|
-
|
544
|
-
class ToGenomicService(BaseModelForbidExtra):
|
545
|
-
"""Service model response for cDNA -> genomic"""
|
546
|
-
|
547
|
-
g_data: GenomicRepresentation | None = None
|
548
|
-
warnings: list[StrictStr] = []
|
549
|
-
service_meta: ServiceMeta
|
550
|
-
|
551
|
-
model_config = ConfigDict(
|
552
|
-
json_schema_extra={
|
553
|
-
"example": {
|
554
|
-
"g_data": {
|
555
|
-
"g_ac": "NC_000007.13",
|
556
|
-
"g_start_pos": 140453134,
|
557
|
-
"g_end_pos": 140453137,
|
558
|
-
"residue_mode": ResidueMode.INTER_RESIDUE,
|
559
|
-
},
|
560
|
-
"warnings": [],
|
561
|
-
"service_meta": {
|
562
|
-
"name": "cool_seq_tool",
|
563
|
-
"version": __version__,
|
564
|
-
"response_datetime": _now,
|
565
|
-
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
566
|
-
},
|
567
|
-
}
|
568
|
-
}
|
569
|
-
)
|
@@ -8,6 +8,7 @@ from pathlib import Path
|
|
8
8
|
import polars as pl
|
9
9
|
|
10
10
|
from cool_seq_tool.resources.data_files import DataFile, get_data_file
|
11
|
+
from cool_seq_tool.schemas import ManeGeneData
|
11
12
|
|
12
13
|
_logger = logging.getLogger(__name__)
|
13
14
|
|
@@ -103,3 +104,37 @@ class ManeTranscriptMappings:
|
|
103
104
|
|
104
105
|
mane_rows = mane_rows.sort(by="MANE_status", descending=True)
|
105
106
|
return mane_rows.to_dicts()
|
107
|
+
|
108
|
+
def get_genomic_mane_genes(
|
109
|
+
self, ac: str, start: int, end: int
|
110
|
+
) -> list[ManeGeneData]:
|
111
|
+
"""Get MANE gene(s) for genomic location
|
112
|
+
|
113
|
+
:param ac: RefSeq genomic accession
|
114
|
+
:param start: Genomic start position. Assumes residue coordinates.
|
115
|
+
:param end: Genomic end position. Assumes residue coordinates.
|
116
|
+
:return: Unique MANE gene(s) found for a genomic location
|
117
|
+
"""
|
118
|
+
mane_rows = self.df.filter(
|
119
|
+
(start >= pl.col("chr_start"))
|
120
|
+
& (end <= pl.col("chr_end"))
|
121
|
+
& (pl.col("GRCh38_chr") == ac)
|
122
|
+
).unique(subset=["#NCBI_GeneID"])
|
123
|
+
|
124
|
+
if len(mane_rows) == 0:
|
125
|
+
return []
|
126
|
+
|
127
|
+
mane_rows = mane_rows.with_columns(
|
128
|
+
pl.col("#NCBI_GeneID")
|
129
|
+
.str.split_exact(":", 1)
|
130
|
+
.struct.field("field_1")
|
131
|
+
.cast(pl.Int32)
|
132
|
+
.alias("ncbi_gene_id"),
|
133
|
+
pl.col("HGNC_ID")
|
134
|
+
.str.split_exact(":", 1)
|
135
|
+
.struct.field("field_1")
|
136
|
+
.cast(pl.Int32)
|
137
|
+
.alias("hgnc_id"),
|
138
|
+
)
|
139
|
+
mane_rows = mane_rows.select(["ncbi_gene_id", "hgnc_id", "symbol"])
|
140
|
+
return [ManeGeneData(**mane_gene) for mane_gene in mane_rows.to_dicts()]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
@@ -56,7 +56,7 @@ Requires-Dist: ga4gh.vrs
|
|
56
56
|
Requires-Dist: wags-tails ~=0.1.3
|
57
57
|
Requires-Dist: bioutils
|
58
58
|
Provides-Extra: dev
|
59
|
-
Requires-Dist: pre-commit ; extra == 'dev'
|
59
|
+
Requires-Dist: pre-commit >=3.7.1 ; extra == 'dev'
|
60
60
|
Requires-Dist: ipython ; extra == 'dev'
|
61
61
|
Requires-Dist: ipykernel ; extra == 'dev'
|
62
62
|
Requires-Dist: psycopg2-binary ; extra == 'dev'
|
@@ -69,11 +69,11 @@ Requires-Dist: sphinx-copybutton ==0.5.2 ; extra == 'docs'
|
|
69
69
|
Requires-Dist: sphinxext-opengraph ==0.8.2 ; extra == 'docs'
|
70
70
|
Requires-Dist: furo ==2023.3.27 ; extra == 'docs'
|
71
71
|
Requires-Dist: sphinx-github-changelog ==1.2.1 ; extra == 'docs'
|
72
|
-
Provides-Extra:
|
73
|
-
Requires-Dist: pytest ; extra == '
|
74
|
-
Requires-Dist: pytest-cov ; extra == '
|
75
|
-
Requires-Dist: pytest-asyncio ==0.18.3 ; extra == '
|
76
|
-
Requires-Dist: mock ; extra == '
|
72
|
+
Provides-Extra: test
|
73
|
+
Requires-Dist: pytest ; extra == 'test'
|
74
|
+
Requires-Dist: pytest-cov ; extra == 'test'
|
75
|
+
Requires-Dist: pytest-asyncio ==0.18.3 ; extra == 'test'
|
76
|
+
Requires-Dist: mock ; extra == 'test'
|
77
77
|
|
78
78
|
<h1 align="center">
|
79
79
|
CoolSeqTool
|
@@ -1,24 +1,24 @@
|
|
1
1
|
cool_seq_tool/__init__.py,sha256=fJmjglvv3Ylm0khQSD-XTqdyUA5YzEiS3iB8FGTOhIs,247
|
2
2
|
cool_seq_tool/app.py,sha256=DJFcPVHQ5Ar9xdmHwrFKFMqbjDtx3L9gn84_wP63ARY,4982
|
3
|
-
cool_seq_tool/schemas.py,sha256=
|
3
|
+
cool_seq_tool/schemas.py,sha256=OfRoEEB-bJPvPtSh8GKDBMs_wdGljrSCkg9vPVqFeIw,8033
|
4
4
|
cool_seq_tool/utils.py,sha256=mq_eGgqiILDcrtb1trMwRdsTERixuj8kDxHfgwsWsko,2914
|
5
5
|
cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
|
6
6
|
cool_seq_tool/handlers/seqrepo_access.py,sha256=jKUn9mdyK0rHJk9I274N9H_B-M1m4r-hmOX7VwfjRC0,9135
|
7
7
|
cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
|
8
8
|
cool_seq_tool/mappers/alignment.py,sha256=6Vk4XEar54ivuH8N7oBqa9gUa8E5GjWCI9hC1HCkM18,9552
|
9
|
-
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=McLXZcnDLdLSKR3eHnY4xJ0iLfCmSwAwK_RQXBV1AYQ,39160
|
10
10
|
cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
|
11
|
-
cool_seq_tool/mappers/mane_transcript.py,sha256=
|
11
|
+
cool_seq_tool/mappers/mane_transcript.py,sha256=Iv6J2Tjwt9cYAqoiEQ-XNEc8iRI3tXOONA6YjOv2huU,54241
|
12
12
|
cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
|
13
13
|
cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
|
14
14
|
cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
|
15
15
|
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
16
16
|
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
17
|
-
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=
|
17
|
+
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=E_pj7FEBcB6HUR8yhSVibB0beMMlKJ62pK0qvl4y5nw,5358
|
18
18
|
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
19
19
|
cool_seq_tool/sources/uta_database.py,sha256=TKMx_yoqWe5QVnqkZe_10x-Lp4PtKvArbMg5ufba0_Q,38353
|
20
|
-
cool_seq_tool-0.
|
21
|
-
cool_seq_tool-0.
|
22
|
-
cool_seq_tool-0.
|
23
|
-
cool_seq_tool-0.
|
24
|
-
cool_seq_tool-0.
|
20
|
+
cool_seq_tool-0.6.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
21
|
+
cool_seq_tool-0.6.0.dist-info/METADATA,sha256=9q0VK-zTlDxBI5jOG3d4w02n9SevWbGdHSO5HP0-U8M,6210
|
22
|
+
cool_seq_tool-0.6.0.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
23
|
+
cool_seq_tool-0.6.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
24
|
+
cool_seq_tool-0.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|