cool-seq-tool 0.4.0.dev1__py3-none-any.whl → 0.4.0.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/data/__init__.py +1 -1
- cool_seq_tool/data/data_downloads.py +19 -13
- cool_seq_tool/handlers/seqrepo_access.py +2 -4
- cool_seq_tool/mappers/exon_genomic_coords.py +25 -26
- cool_seq_tool/mappers/mane_transcript.py +112 -106
- cool_seq_tool/routers/default.py +7 -9
- cool_seq_tool/routers/mane.py +2 -2
- cool_seq_tool/schemas.py +30 -21
- cool_seq_tool/sources/mane_transcript_mappings.py +1 -1
- cool_seq_tool/sources/transcript_mappings.py +13 -16
- cool_seq_tool/sources/uta_database.py +134 -153
- cool_seq_tool/utils.py +5 -2
- cool_seq_tool/version.py +1 -1
- {cool_seq_tool-0.4.0.dev1.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/METADATA +7 -6
- cool_seq_tool-0.4.0.dev2.dist-info/RECORD +29 -0
- cool_seq_tool-0.4.0.dev1.dist-info/RECORD +0 -29
- {cool_seq_tool-0.4.0.dev1.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.4.0.dev1.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/WHEEL +0 -0
- {cool_seq_tool-0.4.0.dev1.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/top_level.txt +0 -0
cool_seq_tool/routers/default.py
CHANGED
@@ -45,7 +45,7 @@ async def genomic_to_transcript_exon_coordinates(
|
|
45
45
|
request_body = request_body.model_dump()
|
46
46
|
|
47
47
|
response = GenomicDataResponse(
|
48
|
-
genomic_data=None, warnings=
|
48
|
+
genomic_data=None, warnings=[], service_meta=service_meta()
|
49
49
|
)
|
50
50
|
|
51
51
|
try:
|
@@ -53,9 +53,7 @@ async def genomic_to_transcript_exon_coordinates(
|
|
53
53
|
**request_body
|
54
54
|
)
|
55
55
|
except Exception as e:
|
56
|
-
logger.error(
|
57
|
-
f"genomic_to_transcript_exon_coordinates unhandled exception {str(e)}"
|
58
|
-
)
|
56
|
+
logger.error("genomic_to_transcript_exon_coordinates unhandled exception %s", e)
|
59
57
|
response.warnings.append(UNHANDLED_EXCEPTION_MSG)
|
60
58
|
|
61
59
|
return response
|
@@ -80,7 +78,7 @@ async def transcript_to_genomic_coordinates(
|
|
80
78
|
request_body = request_body.model_dump()
|
81
79
|
|
82
80
|
response = GenomicDataResponse(
|
83
|
-
genomic_data=None, warnings=
|
81
|
+
genomic_data=None, warnings=[], service_meta=service_meta()
|
84
82
|
)
|
85
83
|
|
86
84
|
try:
|
@@ -90,7 +88,7 @@ async def transcript_to_genomic_coordinates(
|
|
90
88
|
)
|
91
89
|
)
|
92
90
|
except Exception as e:
|
93
|
-
logger.error(
|
91
|
+
logger.error("transcript_to_genomic_coordinates unhandled exception %s", e)
|
94
92
|
response.warnings.append(UNHANDLED_EXCEPTION_MSG)
|
95
93
|
|
96
94
|
return response
|
@@ -119,9 +117,9 @@ async def get_sequence(
|
|
119
117
|
_, path = tempfile.mkstemp(suffix=".fasta")
|
120
118
|
try:
|
121
119
|
cool_seq_tool.seqrepo_access.get_fasta_file(sequence_id, Path(path))
|
122
|
-
except KeyError:
|
120
|
+
except KeyError as e:
|
123
121
|
raise HTTPException(
|
124
122
|
status_code=404, detail="No sequence available for requested identifier"
|
125
|
-
)
|
126
|
-
background_tasks.add_task(lambda p: os.unlink(p), path)
|
123
|
+
) from e
|
124
|
+
background_tasks.add_task(lambda p: os.unlink(p), path) # noqa: PTH108
|
127
125
|
return FileResponse(path)
|
cool_seq_tool/routers/mane.py
CHANGED
@@ -73,7 +73,7 @@ async def get_mane_data(
|
|
73
73
|
:param ResidueMode residue_mode: Starting residue mode for `start_pos`
|
74
74
|
and `end_pos`. Will always return coordinates in inter-residue
|
75
75
|
"""
|
76
|
-
warnings =
|
76
|
+
warnings = []
|
77
77
|
mane_data = None
|
78
78
|
try:
|
79
79
|
mane_data = await cool_seq_tool.mane_transcript.get_mane_transcript(
|
@@ -90,7 +90,7 @@ async def get_mane_data(
|
|
90
90
|
if not mane_data:
|
91
91
|
warnings.append("Unable to retrieve MANE data")
|
92
92
|
except Exception as e:
|
93
|
-
logger.exception(
|
93
|
+
logger.exception("get_mane_data unhandled exception %s", e)
|
94
94
|
warnings.append(UNHANDLED_EXCEPTION_MSG)
|
95
95
|
|
96
96
|
return ManeDataService(
|
cool_seq_tool/schemas.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
"""Defines attribute constants, useful object structures, and API response schemas."""
|
2
|
+
import datetime
|
2
3
|
import re
|
3
|
-
from datetime import datetime
|
4
4
|
from enum import Enum, IntEnum
|
5
5
|
from typing import List, Literal, Optional, Tuple, Union
|
6
6
|
|
@@ -15,6 +15,8 @@ from pydantic import (
|
|
15
15
|
|
16
16
|
from cool_seq_tool.version import __version__
|
17
17
|
|
18
|
+
_now = str(datetime.datetime.now(tz=datetime.timezone.utc))
|
19
|
+
|
18
20
|
|
19
21
|
class AnnotationLayer(str, Enum):
|
20
22
|
"""Create enum for supported annotation layers"""
|
@@ -79,9 +81,10 @@ class GenomicRequestBody(BaseModelForbidExtra):
|
|
79
81
|
@model_validator(mode="after")
|
80
82
|
def check_start_and_end(cls, values):
|
81
83
|
"""Check that at least one of {``start``, ``end``} is set"""
|
82
|
-
msg = "Must provide either `start` or `end`"
|
83
84
|
start, end = values.start, values.end
|
84
|
-
|
85
|
+
if not start or end:
|
86
|
+
msg = "Must provide either `start` or `end`"
|
87
|
+
raise ValueError(msg)
|
85
88
|
return values
|
86
89
|
|
87
90
|
model_config = ConfigDict(
|
@@ -112,9 +115,10 @@ class TranscriptRequestBody(BaseModelForbidExtra):
|
|
112
115
|
@model_validator(mode="after")
|
113
116
|
def check_exon_start_and_exon_end(cls, values):
|
114
117
|
"""Check that at least one of {``exon_start``, ``exon_end``} is set"""
|
115
|
-
msg = "Must provide either `exon_start` or `exon_end`"
|
116
118
|
exon_start, exon_end = values.exon_start, values.exon_end
|
117
|
-
|
119
|
+
if not exon_start or exon_end:
|
120
|
+
msg = "Must provide either `exon_start` or `exon_end`"
|
121
|
+
raise ValueError(msg)
|
118
122
|
return values
|
119
123
|
|
120
124
|
model_config = ConfigDict(
|
@@ -177,20 +181,23 @@ class GenomicData(BaseModelForbidExtra):
|
|
177
181
|
Check that at least one of {``exon_start``, ``exon_end``} is set.
|
178
182
|
If not set, set corresponding offset to ``None``
|
179
183
|
"""
|
180
|
-
msg = "Missing values for `start` or `end`"
|
181
184
|
start = values.start
|
182
185
|
end = values.end
|
183
|
-
|
186
|
+
if not start and not end:
|
187
|
+
msg = "Missing values for `start` or `end`"
|
188
|
+
raise ValueError(msg)
|
184
189
|
|
185
190
|
if start:
|
186
|
-
|
187
|
-
|
191
|
+
if not values.exon_start:
|
192
|
+
msg = "Missing value `exon_start`"
|
193
|
+
raise ValueError(msg)
|
188
194
|
else:
|
189
195
|
values.exon_start_offset = None
|
190
196
|
|
191
197
|
if end:
|
192
|
-
|
193
|
-
|
198
|
+
if not values.exon_end:
|
199
|
+
msg = "Missing value `exon_end`"
|
200
|
+
raise ValueError(msg)
|
194
201
|
else:
|
195
202
|
values.exon_end_offset = None
|
196
203
|
return values
|
@@ -218,7 +225,7 @@ class ServiceMeta(BaseModelForbidExtra):
|
|
218
225
|
|
219
226
|
name: Literal["cool_seq_tool"] = "cool_seq_tool"
|
220
227
|
version: StrictStr
|
221
|
-
response_datetime: datetime
|
228
|
+
response_datetime: datetime.datetime
|
222
229
|
url: Literal[
|
223
230
|
"https://github.com/GenomicMedLab/cool-seq-tool"
|
224
231
|
] = "https://github.com/GenomicMedLab/cool-seq-tool"
|
@@ -228,8 +235,10 @@ class ServiceMeta(BaseModelForbidExtra):
|
|
228
235
|
"""Check version matches semantic versioning regex pattern.
|
229
236
|
https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
|
230
237
|
"""
|
231
|
-
version_regex = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
|
232
|
-
|
238
|
+
version_regex = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
|
239
|
+
if not re.match(version_regex, v):
|
240
|
+
msg = f"Invalid version {v}"
|
241
|
+
raise ValueError(msg)
|
233
242
|
return v
|
234
243
|
|
235
244
|
model_config = ConfigDict(
|
@@ -237,7 +246,7 @@ class ServiceMeta(BaseModelForbidExtra):
|
|
237
246
|
"example": {
|
238
247
|
"name": "cool_seq_tool",
|
239
248
|
"version": __version__,
|
240
|
-
"response_datetime":
|
249
|
+
"response_datetime": _now,
|
241
250
|
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
242
251
|
}
|
243
252
|
}
|
@@ -267,7 +276,7 @@ class TranscriptExonDataResponse(BaseModelForbidExtra):
|
|
267
276
|
"service_meta": {
|
268
277
|
"name": "cool_seq_tool",
|
269
278
|
"version": __version__,
|
270
|
-
"response_datetime":
|
279
|
+
"response_datetime": _now,
|
271
280
|
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
272
281
|
},
|
273
282
|
}
|
@@ -301,7 +310,7 @@ class GenomicDataResponse(BaseModelForbidExtra):
|
|
301
310
|
"service_meta": {
|
302
311
|
"name": "cool_seq_tool",
|
303
312
|
"version": __version__,
|
304
|
-
"response_datetime":
|
313
|
+
"response_datetime": _now,
|
305
314
|
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
306
315
|
},
|
307
316
|
}
|
@@ -358,7 +367,7 @@ class MappedManeDataService(BaseModelForbidExtra):
|
|
358
367
|
"service_meta": {
|
359
368
|
"name": "cool_seq_tool",
|
360
369
|
"version": __version__,
|
361
|
-
"response_datetime":
|
370
|
+
"response_datetime": _now,
|
362
371
|
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
363
372
|
},
|
364
373
|
}
|
@@ -412,7 +421,7 @@ class ManeDataService(BaseModelForbidExtra):
|
|
412
421
|
"service_meta": {
|
413
422
|
"name": "cool_seq_tool",
|
414
423
|
"version": __version__,
|
415
|
-
"response_datetime":
|
424
|
+
"response_datetime": _now,
|
416
425
|
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
417
426
|
},
|
418
427
|
}
|
@@ -466,7 +475,7 @@ class ToCdnaService(BaseModelForbidExtra):
|
|
466
475
|
"service_meta": {
|
467
476
|
"name": "cool_seq_tool",
|
468
477
|
"version": __version__,
|
469
|
-
"response_datetime":
|
478
|
+
"response_datetime": _now,
|
470
479
|
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
471
480
|
},
|
472
481
|
}
|
@@ -514,7 +523,7 @@ class ToGenomicService(BaseModelForbidExtra):
|
|
514
523
|
"service_meta": {
|
515
524
|
"name": "cool_seq_tool",
|
516
525
|
"version": __version__,
|
517
|
-
"response_datetime":
|
526
|
+
"response_datetime": _now,
|
518
527
|
"url": "https://github.com/GenomicMedLab/cool-seq-tool",
|
519
528
|
},
|
520
529
|
}
|
@@ -65,7 +65,7 @@ class TranscriptMappings:
|
|
65
65
|
|
66
66
|
:param transcript_file_path: Path to transcript mappings file
|
67
67
|
"""
|
68
|
-
with open(
|
68
|
+
with transcript_file_path.open() as file:
|
69
69
|
reader = csv.DictReader(file, delimiter="\t")
|
70
70
|
for row in reader:
|
71
71
|
gene = row["Gene name"]
|
@@ -108,7 +108,7 @@ class TranscriptMappings:
|
|
108
108
|
|
109
109
|
:param Path lrg_refseqgene_path: Path to LRG RefSeqGene file
|
110
110
|
"""
|
111
|
-
with open(
|
111
|
+
with lrg_refseqgene_path.open() as file:
|
112
112
|
reader = csv.DictReader(file, delimiter="\t")
|
113
113
|
for row in reader:
|
114
114
|
gene = row["Symbol"]
|
@@ -146,7 +146,7 @@ class TranscriptMappings:
|
|
146
146
|
:param identifier: Gene identifier to get protein transcripts for
|
147
147
|
:return: Protein transcripts for a gene symbol
|
148
148
|
"""
|
149
|
-
protein_transcripts =
|
149
|
+
protein_transcripts = []
|
150
150
|
protein_transcripts += self.ensembl_protein_version_for_gene_symbol.get(
|
151
151
|
identifier, ""
|
152
152
|
)
|
@@ -160,7 +160,7 @@ class TranscriptMappings:
|
|
160
160
|
:param identifier: Gene identifier to find transcripts for
|
161
161
|
:return: cDNA transcripts for a gene symbol
|
162
162
|
"""
|
163
|
-
genomic_transcripts =
|
163
|
+
genomic_transcripts = []
|
164
164
|
genomic_transcripts += self.ensembl_transcript_version_for_gene_symbol.get(
|
165
165
|
identifier, ""
|
166
166
|
)
|
@@ -179,10 +179,9 @@ class TranscriptMappings:
|
|
179
179
|
:return: Gene symbol
|
180
180
|
"""
|
181
181
|
gene_symbol = self.ensembl_protein_version_to_gene_symbol.get(q)
|
182
|
-
if not gene_symbol:
|
183
|
-
|
184
|
-
|
185
|
-
gene_symbol = self.ensembl_protein_to_gene_symbol.get(q)
|
182
|
+
if not gene_symbol and "." in q:
|
183
|
+
q = q.split(".")[0]
|
184
|
+
gene_symbol = self.ensembl_protein_to_gene_symbol.get(q)
|
186
185
|
return gene_symbol
|
187
186
|
|
188
187
|
def get_gene_symbol_from_refeq_protein(self, q: str) -> Optional[str]:
|
@@ -200,10 +199,9 @@ class TranscriptMappings:
|
|
200
199
|
:return: Gene symbol
|
201
200
|
"""
|
202
201
|
gene_symbol = self.refseq_rna_version_to_gene_symbol.get(q)
|
203
|
-
if not gene_symbol:
|
204
|
-
|
205
|
-
|
206
|
-
gene_symbol = self.refseq_rna_to_gene_symbol.get(q)
|
202
|
+
if not gene_symbol and "." in q:
|
203
|
+
q = q.split(".")[0]
|
204
|
+
gene_symbol = self.refseq_rna_to_gene_symbol.get(q)
|
207
205
|
return gene_symbol
|
208
206
|
|
209
207
|
def get_gene_symbol_from_ensembl_transcript(self, q: str) -> Optional[str]:
|
@@ -213,8 +211,7 @@ class TranscriptMappings:
|
|
213
211
|
:return: Gene symbol
|
214
212
|
"""
|
215
213
|
gene_symbol = self.ensembl_transcript_version_to_gene_symbol.get(q)
|
216
|
-
if not gene_symbol:
|
217
|
-
|
218
|
-
|
219
|
-
gene_symbol = self.ensembl_transcript_to_gene_symbol.get(q)
|
214
|
+
if not gene_symbol and "." in q:
|
215
|
+
q = q.split(".")[0]
|
216
|
+
gene_symbol = self.ensembl_transcript_to_gene_symbol.get(q)
|
220
217
|
return gene_symbol
|