cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +7 -11
- cool_seq_tool/app.py +44 -24
- cool_seq_tool/handlers/__init__.py +1 -0
- cool_seq_tool/handlers/seqrepo_access.py +27 -25
- cool_seq_tool/mappers/__init__.py +3 -1
- cool_seq_tool/mappers/alignment.py +5 -6
- cool_seq_tool/mappers/exon_genomic_coords.py +139 -124
- cool_seq_tool/mappers/liftover.py +90 -0
- cool_seq_tool/mappers/mane_transcript.py +208 -113
- cool_seq_tool/resources/__init__.py +1 -0
- cool_seq_tool/resources/data_files.py +93 -0
- cool_seq_tool/resources/status.py +153 -0
- cool_seq_tool/schemas.py +92 -54
- cool_seq_tool/sources/__init__.py +1 -0
- cool_seq_tool/sources/mane_transcript_mappings.py +16 -9
- cool_seq_tool/sources/transcript_mappings.py +41 -32
- cool_seq_tool/sources/uta_database.py +96 -249
- cool_seq_tool/utils.py +44 -4
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/LICENSE +1 -1
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/METADATA +16 -11
- cool_seq_tool-0.5.0.dist-info/RECORD +24 -0
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/WHEEL +1 -1
- cool_seq_tool/api.py +0 -42
- cool_seq_tool/data/__init__.py +0 -2
- cool_seq_tool/data/data_downloads.py +0 -89
- cool_seq_tool/paths.py +0 -28
- cool_seq_tool/routers/__init__.py +0 -16
- cool_seq_tool/routers/default.py +0 -125
- cool_seq_tool/routers/mane.py +0 -98
- cool_seq_tool/routers/mappings.py +0 -155
- cool_seq_tool/version.py +0 -2
- cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
- /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/top_level.txt +0 -0
cool_seq_tool/utils.py
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
"""Provide a small set of general helper functions."""
|
2
|
+
|
2
3
|
import datetime
|
3
4
|
import logging
|
4
|
-
from typing import Tuple
|
5
5
|
|
6
|
+
from bioutils.accessions import chr22XY
|
7
|
+
|
8
|
+
from cool_seq_tool import __version__
|
6
9
|
from cool_seq_tool.schemas import ResidueMode, ServiceMeta
|
7
|
-
from cool_seq_tool.version import __version__
|
8
10
|
|
9
|
-
|
11
|
+
_logger = logging.getLogger(__name__)
|
10
12
|
|
11
13
|
|
12
14
|
def get_inter_residue_pos(
|
13
15
|
start_pos: int, end_pos: int, residue_mode: ResidueMode
|
14
|
-
) ->
|
16
|
+
) -> tuple[int, int]:
|
15
17
|
"""Return equivalent inter-residue position.
|
16
18
|
|
17
19
|
Generally, we prefer to work with inter-residue coordinates where possible. Our
|
@@ -47,3 +49,41 @@ def service_meta() -> ServiceMeta:
|
|
47
49
|
version=__version__,
|
48
50
|
response_datetime=datetime.datetime.now(tz=datetime.timezone.utc),
|
49
51
|
)
|
52
|
+
|
53
|
+
|
54
|
+
def process_chromosome_input(chromosome: str, context: str = "") -> str:
|
55
|
+
"""Perform processing on a chromosome arg.
|
56
|
+
|
57
|
+
E.g.
|
58
|
+
|
59
|
+
>>> from cool_seq_tool.utils import process_chromosome_input
|
60
|
+
>>> process_chromosome_input("7")
|
61
|
+
'chr7'
|
62
|
+
>>> process_chromosome_input("x")
|
63
|
+
'chrX'
|
64
|
+
>>> process_chromosome_input("chr7")
|
65
|
+
'chr7'
|
66
|
+
|
67
|
+
In the future, we could also use this method to be more opinionated about legal
|
68
|
+
chromosome values, or throw exceptions in the event of invalid or unrecognized
|
69
|
+
terms.
|
70
|
+
|
71
|
+
:param chromosome: user-provided chromosome input
|
72
|
+
:param context: calling context to provide in log
|
73
|
+
:return: processed chromosome value. Idempotent -- returns original value if no
|
74
|
+
changes needed.
|
75
|
+
"""
|
76
|
+
original_chromosome_value = chromosome
|
77
|
+
if chromosome.lower().startswith("chr"):
|
78
|
+
chromosome = f"chr{chromosome[3:].upper()}"
|
79
|
+
else:
|
80
|
+
chromosome = chromosome.upper()
|
81
|
+
chromosome = chr22XY(chromosome)
|
82
|
+
if original_chromosome_value != chromosome:
|
83
|
+
_logger.warning(
|
84
|
+
"Transformed provided chromosome value from `%s` to `%s` in `%s`",
|
85
|
+
original_chromosome_value,
|
86
|
+
chromosome,
|
87
|
+
context if context else "cool_seq_tool",
|
88
|
+
)
|
89
|
+
return chromosome
|
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
7
7
|
|
8
|
-
Copyright (c) 2021-
|
8
|
+
Copyright (c) 2021-2024 Wagner Lab
|
9
9
|
|
10
10
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
11
11
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -26,12 +26,11 @@ License: MIT License
|
|
26
26
|
SOFTWARE.
|
27
27
|
|
28
28
|
Project-URL: Homepage, https://github.com/genomicmedlab/cool-seq-tool
|
29
|
-
Project-URL: Documentation, https://coolseqtool.readthedocs.io/
|
29
|
+
Project-URL: Documentation, https://coolseqtool.readthedocs.io/
|
30
30
|
Project-URL: Changelog, https://github.com/genomicmedlab/cool-seq-tool/releases
|
31
31
|
Project-URL: Source, https://github.com/genomicmedlab/cool-seq-tool
|
32
32
|
Project-URL: Bug Tracker, https://github.com/genomicmedlab/cool-seq-tool/issues
|
33
33
|
Classifier: Development Status :: 3 - Alpha
|
34
|
-
Classifier: Framework :: FastAPI
|
35
34
|
Classifier: Framework :: Pydantic
|
36
35
|
Classifier: Framework :: Pydantic :: 2
|
37
36
|
Classifier: Intended Audience :: Science/Research
|
@@ -39,30 +38,29 @@ Classifier: Intended Audience :: Developers
|
|
39
38
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
40
39
|
Classifier: License :: OSI Approved :: MIT License
|
41
40
|
Classifier: Programming Language :: Python :: 3
|
42
|
-
Classifier: Programming Language :: Python :: 3.8
|
43
|
-
Classifier: Programming Language :: Python :: 3.9
|
44
41
|
Classifier: Programming Language :: Python :: 3.10
|
45
42
|
Classifier: Programming Language :: Python :: 3.11
|
46
|
-
|
43
|
+
Classifier: Programming Language :: Python :: 3.12
|
44
|
+
Requires-Python: >=3.10
|
47
45
|
Description-Content-Type: text/markdown
|
48
46
|
License-File: LICENSE
|
49
47
|
Requires-Dist: asyncpg
|
50
48
|
Requires-Dist: aiofiles
|
51
49
|
Requires-Dist: boto3
|
52
50
|
Requires-Dist: agct >=0.1.0-dev1
|
53
|
-
Requires-Dist: polars
|
51
|
+
Requires-Dist: polars ~=1.0
|
54
52
|
Requires-Dist: hgvs
|
55
53
|
Requires-Dist: biocommons.seqrepo
|
56
54
|
Requires-Dist: pydantic ==2.*
|
57
|
-
Requires-Dist: uvicorn
|
58
|
-
Requires-Dist: fastapi
|
59
55
|
Requires-Dist: ga4gh.vrs
|
56
|
+
Requires-Dist: wags-tails ~=0.1.3
|
57
|
+
Requires-Dist: bioutils
|
60
58
|
Provides-Extra: dev
|
61
59
|
Requires-Dist: pre-commit ; extra == 'dev'
|
62
60
|
Requires-Dist: ipython ; extra == 'dev'
|
63
61
|
Requires-Dist: ipykernel ; extra == 'dev'
|
64
62
|
Requires-Dist: psycopg2-binary ; extra == 'dev'
|
65
|
-
Requires-Dist: ruff ==0.
|
63
|
+
Requires-Dist: ruff ==0.5.0 ; extra == 'dev'
|
66
64
|
Provides-Extra: docs
|
67
65
|
Requires-Dist: sphinx ==6.1.3 ; extra == 'docs'
|
68
66
|
Requires-Dist: sphinx-autodoc-typehints ==1.22.0 ; extra == 'docs'
|
@@ -81,8 +79,14 @@ Requires-Dist: mock ; extra == 'tests'
|
|
81
79
|
CoolSeqTool
|
82
80
|
</h1>
|
83
81
|
|
82
|
+
[](https://pypi.python.org/pypi/cool-seq-tool) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
|
83
|
+
|
84
|
+
---
|
85
|
+
|
84
86
|
**[Documentation](https://coolseqtool.readthedocs.io/latest/)** · [Installation](https://coolseqtool.readthedocs.io/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/latest/reference/index.html)
|
85
87
|
|
88
|
+
---
|
89
|
+
|
86
90
|
## Overview
|
87
91
|
|
88
92
|
<!-- description -->
|
@@ -113,6 +117,7 @@ All CoolSeqTool resources can be initialized by way of a top-level class instanc
|
|
113
117
|
|
114
118
|
```pycon
|
115
119
|
>>> from cool_seq_tool.app import CoolSeqTool
|
120
|
+
>>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
|
116
121
|
>>> cst = CoolSeqTool()
|
117
122
|
>>> result = await cst.mane_transcript.get_mane_transcript(
|
118
123
|
... "NP_004324.2",
|
@@ -0,0 +1,24 @@
|
|
1
|
+
cool_seq_tool/__init__.py,sha256=fJmjglvv3Ylm0khQSD-XTqdyUA5YzEiS3iB8FGTOhIs,247
|
2
|
+
cool_seq_tool/app.py,sha256=DJFcPVHQ5Ar9xdmHwrFKFMqbjDtx3L9gn84_wP63ARY,4982
|
3
|
+
cool_seq_tool/schemas.py,sha256=hZ4pStUHgCarXPFLkuGU26znC0dooVDvixO_7eO5eUQ,16301
|
4
|
+
cool_seq_tool/utils.py,sha256=mq_eGgqiILDcrtb1trMwRdsTERixuj8kDxHfgwsWsko,2914
|
5
|
+
cool_seq_tool/handlers/__init__.py,sha256=KalQ46vX1MO4SJz2SlspKoIRy1n3c3Vp1t4Y2pIfqow,78
|
6
|
+
cool_seq_tool/handlers/seqrepo_access.py,sha256=jKUn9mdyK0rHJk9I274N9H_B-M1m4r-hmOX7VwfjRC0,9135
|
7
|
+
cool_seq_tool/mappers/__init__.py,sha256=O0JRxNFk8nWxD4v5ij47xelhvfVLdEXS43l2tzRuiUE,305
|
8
|
+
cool_seq_tool/mappers/alignment.py,sha256=6Vk4XEar54ivuH8N7oBqa9gUa8E5GjWCI9hC1HCkM18,9552
|
9
|
+
cool_seq_tool/mappers/exon_genomic_coords.py,sha256=Tvy0IQA9oAmq3dSYGnFHP-DlwFb1vwqehJ19NGg6Mpk,39160
|
10
|
+
cool_seq_tool/mappers/liftover.py,sha256=lltx9zxfkrb5PHtJlKp3a39JCwPP4e0Zft-mQc1jXL8,3367
|
11
|
+
cool_seq_tool/mappers/mane_transcript.py,sha256=iNkK8mtzXPmD1BROHzJ4vipr6oBbQv_BdUmvuOGFIMA,52823
|
12
|
+
cool_seq_tool/resources/__init__.py,sha256=VwUC8YaucTS6SmRirToulZTF6CuvuLQRSxFfSfAovCc,77
|
13
|
+
cool_seq_tool/resources/data_files.py,sha256=3lhu28tzlSoTs4vHZNu-hhoAWRrPGuZj_oIjqk2sYQM,3837
|
14
|
+
cool_seq_tool/resources/status.py,sha256=L0KM-VG3N4Yuaqh3AKZd_2KPDLR0Y7rvW_OD6x8mF7A,5717
|
15
|
+
cool_seq_tool/resources/transcript_mapping.tsv,sha256=AO3luYQAbFiCoRgiiPXotakb5pAwx1jDCeXpvGdIuac,24138769
|
16
|
+
cool_seq_tool/sources/__init__.py,sha256=51QiymeptF7AeVGgV-tW_9f4pIUr0xtYbyzpvHOCneM,304
|
17
|
+
cool_seq_tool/sources/mane_transcript_mappings.py,sha256=IQtaRWrIi3f1k0WiDtlmlfOlQQB6bTKSEAh2PHk-Lsw,4079
|
18
|
+
cool_seq_tool/sources/transcript_mappings.py,sha256=903RKTMBO2rbKh6iTQ1BEWnY4C7saBFMPw2_4ATuudg,10054
|
19
|
+
cool_seq_tool/sources/uta_database.py,sha256=TKMx_yoqWe5QVnqkZe_10x-Lp4PtKvArbMg5ufba0_Q,38353
|
20
|
+
cool_seq_tool-0.5.0.dist-info/LICENSE,sha256=IpqC9A-tZW7XXXvCS8c4AVINqkmpxiVA-34Qe3CZSjo,1072
|
21
|
+
cool_seq_tool-0.5.0.dist-info/METADATA,sha256=Ajjbvx2EZZUem5SVHY3qQHPRPx74c-VeHCMQdYcHB-o,6207
|
22
|
+
cool_seq_tool-0.5.0.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
|
23
|
+
cool_seq_tool-0.5.0.dist-info/top_level.txt,sha256=cGuxdN6p3y16jQf6hCwWhE4OptwUeZPm_PNJlPb3b0k,14
|
24
|
+
cool_seq_tool-0.5.0.dist-info/RECORD,,
|
cool_seq_tool/api.py
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
"""Main application for FastAPI"""
|
2
|
-
from typing import Dict
|
3
|
-
|
4
|
-
from fastapi import FastAPI
|
5
|
-
from fastapi.openapi.utils import get_openapi
|
6
|
-
|
7
|
-
from cool_seq_tool.routers import SERVICE_NAME, default, mane, mappings
|
8
|
-
from cool_seq_tool.version import __version__
|
9
|
-
|
10
|
-
app = FastAPI(
|
11
|
-
docs_url=f"/{SERVICE_NAME}",
|
12
|
-
openapi_url=f"/{SERVICE_NAME}/openapi.json",
|
13
|
-
swagger_ui_parameters={"tryItOutEnabled": True},
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
app.include_router(default.router)
|
18
|
-
app.include_router(mane.router)
|
19
|
-
app.include_router(mappings.router)
|
20
|
-
|
21
|
-
|
22
|
-
def custom_openapi() -> Dict:
|
23
|
-
"""Generate custom fields for OpenAPI response."""
|
24
|
-
if app.openapi_schema:
|
25
|
-
return app.openapi_schema
|
26
|
-
openapi_schema = get_openapi(
|
27
|
-
title="The GenomicMedLab Cool-Seq-Tool",
|
28
|
-
version=__version__,
|
29
|
-
description="Common Operations On Lots of Sequences Tool.",
|
30
|
-
routes=app.routes,
|
31
|
-
)
|
32
|
-
|
33
|
-
openapi_schema["info"]["contact"] = {
|
34
|
-
"name": "Alex H. Wagner",
|
35
|
-
"email": "Alex.Wagner@nationwidechildrens.org",
|
36
|
-
"url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab",
|
37
|
-
}
|
38
|
-
app.openapi_schema = openapi_schema
|
39
|
-
return app.openapi_schema
|
40
|
-
|
41
|
-
|
42
|
-
app.openapi = custom_openapi
|
cool_seq_tool/data/__init__.py
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
"""Handle acquisition of external data."""
|
2
|
-
import datetime
|
3
|
-
import gzip
|
4
|
-
import logging
|
5
|
-
import shutil
|
6
|
-
from ftplib import FTP
|
7
|
-
from pathlib import Path
|
8
|
-
|
9
|
-
from dateutil import parser
|
10
|
-
|
11
|
-
from cool_seq_tool import APP_ROOT
|
12
|
-
|
13
|
-
logger = logging.getLogger("cool_seq_tool")
|
14
|
-
|
15
|
-
|
16
|
-
class DataDownload:
|
17
|
-
"""Manage downloadable data files. Responsible for checking if files are available
|
18
|
-
under expected locations, and fetching them if not.
|
19
|
-
|
20
|
-
Relevant methods are called automatically by data classes; users should not have
|
21
|
-
to interact with this class under normal circumstances.
|
22
|
-
"""
|
23
|
-
|
24
|
-
def __init__(self) -> None:
|
25
|
-
"""Initialize downloadable data locations."""
|
26
|
-
self._data_dir = APP_ROOT / "data"
|
27
|
-
|
28
|
-
def get_mane_summary(self) -> Path:
|
29
|
-
"""Identify latest MANE summary data. If unavailable locally, download from
|
30
|
-
`NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/current/>`_.
|
31
|
-
|
32
|
-
:return: path to MANE summary file
|
33
|
-
"""
|
34
|
-
with FTP("ftp.ncbi.nlm.nih.gov") as ftp:
|
35
|
-
ftp.login()
|
36
|
-
ftp.cwd("/refseq/MANE/MANE_human/current")
|
37
|
-
files = ftp.nlst()
|
38
|
-
mane_summary_file = [f for f in files if f.endswith(".summary.txt.gz")]
|
39
|
-
if not mane_summary_file:
|
40
|
-
msg = "Unable to download MANE summary data"
|
41
|
-
raise Exception(msg)
|
42
|
-
mane_summary_file = mane_summary_file[0]
|
43
|
-
self._mane_summary_path = self._data_dir / mane_summary_file[:-3]
|
44
|
-
mane_data_path = self._data_dir / mane_summary_file
|
45
|
-
if not self._mane_summary_path.exists():
|
46
|
-
logger.info("Downloading MANE summary file from NCBI.")
|
47
|
-
with mane_data_path.open("wb") as fp:
|
48
|
-
ftp.retrbinary(f"RETR {mane_summary_file}", fp.write)
|
49
|
-
with gzip.open(
|
50
|
-
mane_data_path, "rb"
|
51
|
-
) as f_in, self._mane_summary_path.open("wb") as f_out:
|
52
|
-
shutil.copyfileobj(f_in, f_out)
|
53
|
-
mane_data_path.unlink()
|
54
|
-
logger.info("MANE summary file download complete.")
|
55
|
-
return self._mane_summary_path
|
56
|
-
|
57
|
-
def get_lrg_refseq_gene_data(self) -> Path:
|
58
|
-
"""Identify latest LRG RefSeq Gene file. If unavailable locally, download from
|
59
|
-
`NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/>`_.
|
60
|
-
|
61
|
-
:return: path to acquired LRG RefSeq Gene data file
|
62
|
-
"""
|
63
|
-
with FTP("ftp.ncbi.nlm.nih.gov") as ftp:
|
64
|
-
ftp.login()
|
65
|
-
lrg_refseqgene_file = "LRG_RefSeqGene"
|
66
|
-
ftp_dir_path = "/refseq/H_sapiens/RefSeqGene/"
|
67
|
-
ftp_file_path = f"{ftp_dir_path}{lrg_refseqgene_file}"
|
68
|
-
timestamp = ftp.voidcmd(f"MDTM {ftp_file_path}")[4:].strip()
|
69
|
-
date = str(parser.parse(timestamp)).split()[0]
|
70
|
-
version = (
|
71
|
-
datetime.datetime.strptime(date, "%Y-%m-%d")
|
72
|
-
.astimezone(tz=datetime.timezone.utc)
|
73
|
-
.strftime("%Y%m%d")
|
74
|
-
)
|
75
|
-
fn_versioned = f"{lrg_refseqgene_file}_{version}"
|
76
|
-
lrg_refseqgene_path = self._data_dir / lrg_refseqgene_file
|
77
|
-
self._lrg_refseqgene_path = self._data_dir / fn_versioned
|
78
|
-
if not self._lrg_refseqgene_path.exists():
|
79
|
-
logger.info("Downloading LRG RefSeq data from NCBI.")
|
80
|
-
ftp.cwd(ftp_dir_path)
|
81
|
-
with lrg_refseqgene_path.open("wb") as fp:
|
82
|
-
ftp.retrbinary(f"RETR {lrg_refseqgene_file}", fp.write)
|
83
|
-
with lrg_refseqgene_path.open(
|
84
|
-
"rb"
|
85
|
-
) as f_in, self._lrg_refseqgene_path.open("wb") as f_out:
|
86
|
-
shutil.copyfileobj(f_in, f_out)
|
87
|
-
lrg_refseqgene_path.unlink()
|
88
|
-
logger.info("LRG RefSeq data download complete.")
|
89
|
-
return self._lrg_refseqgene_path
|
cool_seq_tool/paths.py
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
"""Provide paths to shared files, and trigger data acquisition if unavailable."""
|
2
|
-
from os import environ
|
3
|
-
from pathlib import Path
|
4
|
-
|
5
|
-
from cool_seq_tool.data.data_downloads import DataDownload
|
6
|
-
|
7
|
-
APP_ROOT = Path(__file__).resolve().parents[0]
|
8
|
-
|
9
|
-
TRANSCRIPT_MAPPINGS_PATH = Path(
|
10
|
-
environ.get("TRANSCRIPT_MAPPINGS_PATH", f"{APP_ROOT}/data/transcript_mapping.tsv")
|
11
|
-
)
|
12
|
-
|
13
|
-
d = DataDownload()
|
14
|
-
|
15
|
-
provided_mane_summary_path = environ.get("MANE_SUMMARY_PATH", "")
|
16
|
-
if provided_mane_summary_path:
|
17
|
-
MANE_SUMMARY_PATH = Path(provided_mane_summary_path)
|
18
|
-
else:
|
19
|
-
MANE_SUMMARY_PATH = d.get_mane_summary()
|
20
|
-
|
21
|
-
provided_lrg_refseq_path = environ.get("LRG_REFSEQGENE_PATH", "")
|
22
|
-
if provided_lrg_refseq_path:
|
23
|
-
LRG_REFSEQGENE_PATH = Path(provided_lrg_refseq_path)
|
24
|
-
else:
|
25
|
-
LRG_REFSEQGENE_PATH = d.get_lrg_refseq_gene_data()
|
26
|
-
|
27
|
-
|
28
|
-
SEQREPO_ROOT_DIR = environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
|
@@ -1,16 +0,0 @@
|
|
1
|
-
"""Module for routers"""
|
2
|
-
from enum import Enum
|
3
|
-
|
4
|
-
from cool_seq_tool.app import CoolSeqTool
|
5
|
-
|
6
|
-
cool_seq_tool = CoolSeqTool()
|
7
|
-
SERVICE_NAME = "cool_seq_tool"
|
8
|
-
RESP_DESCR = "A response to a validly-formed query."
|
9
|
-
UNHANDLED_EXCEPTION_MSG = "Unhandled exception occurred. Check logs for more details."
|
10
|
-
|
11
|
-
|
12
|
-
class Tags(str, Enum):
|
13
|
-
"""Define tags for endpoints"""
|
14
|
-
|
15
|
-
MANE_TRANSCRIPT = "MANE Transcript"
|
16
|
-
ALIGNMENT_MAPPER = "Alignment Mapper"
|
cool_seq_tool/routers/default.py
DELETED
@@ -1,125 +0,0 @@
|
|
1
|
-
"""Module containing default routes"""
|
2
|
-
import logging
|
3
|
-
import os
|
4
|
-
import tempfile
|
5
|
-
from pathlib import Path
|
6
|
-
|
7
|
-
from fastapi import APIRouter, HTTPException, Query
|
8
|
-
from fastapi.responses import FileResponse
|
9
|
-
from starlette.background import BackgroundTasks
|
10
|
-
|
11
|
-
from cool_seq_tool.routers import (
|
12
|
-
RESP_DESCR,
|
13
|
-
SERVICE_NAME,
|
14
|
-
UNHANDLED_EXCEPTION_MSG,
|
15
|
-
cool_seq_tool,
|
16
|
-
)
|
17
|
-
from cool_seq_tool.schemas import (
|
18
|
-
GenomicDataResponse,
|
19
|
-
GenomicRequestBody,
|
20
|
-
TranscriptRequestBody,
|
21
|
-
)
|
22
|
-
from cool_seq_tool.utils import service_meta
|
23
|
-
|
24
|
-
logger = logging.getLogger("cool_seq_tool")
|
25
|
-
|
26
|
-
router = APIRouter(prefix=f"/{SERVICE_NAME}")
|
27
|
-
|
28
|
-
|
29
|
-
@router.post(
|
30
|
-
"/genomic_to_transcript_exon_coordinates",
|
31
|
-
summary="Get transcript exon data given genomic coordinate data",
|
32
|
-
response_description=RESP_DESCR,
|
33
|
-
description="Return transcript exon data",
|
34
|
-
response_model=GenomicDataResponse,
|
35
|
-
)
|
36
|
-
async def genomic_to_transcript_exon_coordinates(
|
37
|
-
request_body: GenomicRequestBody,
|
38
|
-
) -> GenomicDataResponse:
|
39
|
-
"""Get transcript exon data given genomic coordinate data
|
40
|
-
|
41
|
-
:param GenomicRequestBody request_body: Request body
|
42
|
-
|
43
|
-
Returns: GenomicDataResponse with data and warnings
|
44
|
-
"""
|
45
|
-
request_body = request_body.model_dump()
|
46
|
-
|
47
|
-
response = GenomicDataResponse(
|
48
|
-
genomic_data=None, warnings=[], service_meta=service_meta()
|
49
|
-
)
|
50
|
-
|
51
|
-
try:
|
52
|
-
response = await cool_seq_tool.ex_g_coords_mapper.genomic_to_transcript_exon_coordinates(
|
53
|
-
**request_body
|
54
|
-
)
|
55
|
-
except Exception as e:
|
56
|
-
logger.error("genomic_to_transcript_exon_coordinates unhandled exception %s", e)
|
57
|
-
response.warnings.append(UNHANDLED_EXCEPTION_MSG)
|
58
|
-
|
59
|
-
return response
|
60
|
-
|
61
|
-
|
62
|
-
@router.post(
|
63
|
-
"/transcript_to_genomic_coordinates",
|
64
|
-
summary="Get genomic coordinate data given transcript exon data",
|
65
|
-
response_description=RESP_DESCR,
|
66
|
-
description="Return genomic coordinate data",
|
67
|
-
response_model=GenomicDataResponse,
|
68
|
-
)
|
69
|
-
async def transcript_to_genomic_coordinates(
|
70
|
-
request_body: TranscriptRequestBody,
|
71
|
-
) -> GenomicDataResponse:
|
72
|
-
"""Get transcript exon data given genomic coordinate data
|
73
|
-
|
74
|
-
:param TranscriptRequestBody request_body: Request body
|
75
|
-
|
76
|
-
Returns: GenomicDataResponse with data and warnings
|
77
|
-
"""
|
78
|
-
request_body = request_body.model_dump()
|
79
|
-
|
80
|
-
response = GenomicDataResponse(
|
81
|
-
genomic_data=None, warnings=[], service_meta=service_meta()
|
82
|
-
)
|
83
|
-
|
84
|
-
try:
|
85
|
-
response = (
|
86
|
-
await cool_seq_tool.ex_g_coords_mapper.transcript_to_genomic_coordinates(
|
87
|
-
**request_body
|
88
|
-
)
|
89
|
-
)
|
90
|
-
except Exception as e:
|
91
|
-
logger.error("transcript_to_genomic_coordinates unhandled exception %s", e)
|
92
|
-
response.warnings.append(UNHANDLED_EXCEPTION_MSG)
|
93
|
-
|
94
|
-
return response
|
95
|
-
|
96
|
-
|
97
|
-
@router.get(
|
98
|
-
"/download_sequence",
|
99
|
-
summary="Get sequence for ID",
|
100
|
-
response_description=RESP_DESCR,
|
101
|
-
description="Given a known accession identifier, retrieve sequence data and return"
|
102
|
-
"as a FASTA file",
|
103
|
-
response_class=FileResponse,
|
104
|
-
)
|
105
|
-
async def get_sequence(
|
106
|
-
background_tasks: BackgroundTasks,
|
107
|
-
sequence_id: str = Query(
|
108
|
-
..., description="ID of sequence to retrieve, sans namespace"
|
109
|
-
),
|
110
|
-
) -> FileResponse:
|
111
|
-
"""Get sequence for requested sequence ID.
|
112
|
-
:param sequence_id: accession ID, sans namespace, eg `NM_152263.3`
|
113
|
-
:param background_tasks: Starlette background tasks object. Use to clean up
|
114
|
-
tempfile after get method returns.
|
115
|
-
:return: FASTA file if successful, or 404 if unable to find matching resource
|
116
|
-
"""
|
117
|
-
_, path = tempfile.mkstemp(suffix=".fasta")
|
118
|
-
try:
|
119
|
-
cool_seq_tool.seqrepo_access.get_fasta_file(sequence_id, Path(path))
|
120
|
-
except KeyError as e:
|
121
|
-
raise HTTPException(
|
122
|
-
status_code=404, detail="No sequence available for requested identifier"
|
123
|
-
) from e
|
124
|
-
background_tasks.add_task(lambda p: os.unlink(p), path) # noqa: PTH108
|
125
|
-
return FileResponse(path)
|
cool_seq_tool/routers/mane.py
DELETED
@@ -1,98 +0,0 @@
|
|
1
|
-
"""Module containing routes related to MANE data"""
|
2
|
-
import logging
|
3
|
-
from typing import Optional
|
4
|
-
|
5
|
-
from fastapi import APIRouter, Query
|
6
|
-
|
7
|
-
from cool_seq_tool.routers import (
|
8
|
-
RESP_DESCR,
|
9
|
-
SERVICE_NAME,
|
10
|
-
UNHANDLED_EXCEPTION_MSG,
|
11
|
-
Tags,
|
12
|
-
cool_seq_tool,
|
13
|
-
)
|
14
|
-
from cool_seq_tool.schemas import AnnotationLayer, ManeDataService, ResidueMode
|
15
|
-
from cool_seq_tool.utils import service_meta
|
16
|
-
|
17
|
-
logger = logging.getLogger("cool_seq_tool")
|
18
|
-
|
19
|
-
router = APIRouter(prefix=f"/{SERVICE_NAME}/mane")
|
20
|
-
|
21
|
-
|
22
|
-
ref_descr = (
|
23
|
-
"Reference at position given during input. When this is set, it will "
|
24
|
-
"ensure that the reference sequences match for the final result."
|
25
|
-
)
|
26
|
-
try_longest_compatible_descr = (
|
27
|
-
"`True` if should try longest compatible remaining if"
|
28
|
-
" mane transcript was not compatible. `False` otherwise."
|
29
|
-
)
|
30
|
-
|
31
|
-
|
32
|
-
@router.get(
|
33
|
-
"/get_mane_data",
|
34
|
-
summary="Retrieve MANE data in inter-residue coordinates",
|
35
|
-
response_description=RESP_DESCR,
|
36
|
-
description="Return MANE Select, MANE Plus Clinical, or Longest Remaining "
|
37
|
-
"Transcript data in inter-residue coordinates. See our docs for "
|
38
|
-
"more information on transcript priority.",
|
39
|
-
response_model=ManeDataService,
|
40
|
-
tags=[Tags.MANE_TRANSCRIPT],
|
41
|
-
)
|
42
|
-
async def get_mane_data(
|
43
|
-
ac: str = Query(..., description="Accession"),
|
44
|
-
start_pos: int = Query(..., description="Start position"),
|
45
|
-
start_annotation_layer: AnnotationLayer = Query(
|
46
|
-
..., description="Starting annotation layer for query"
|
47
|
-
),
|
48
|
-
end_pos: Optional[int] = Query(
|
49
|
-
None, description="End position. If not set, will set to `start_pos`."
|
50
|
-
),
|
51
|
-
gene: Optional[str] = Query(None, description="HGNC gene symbol"),
|
52
|
-
ref: Optional[str] = Query(None, description=ref_descr),
|
53
|
-
try_longest_compatible: bool = Query(
|
54
|
-
True, description=try_longest_compatible_descr
|
55
|
-
),
|
56
|
-
residue_mode: ResidueMode = Query(
|
57
|
-
ResidueMode.RESIDUE, description="Residue mode for position(s)"
|
58
|
-
),
|
59
|
-
) -> ManeDataService:
|
60
|
-
"""Return MANE or Longest Compatible Remaining Transcript data on inter-residue
|
61
|
-
coordinates
|
62
|
-
|
63
|
-
:param str ac: Accession
|
64
|
-
:param int start_pos: Start position
|
65
|
-
:param AnnotationLayer start_annotation_layer: Starting annotation layer for query
|
66
|
-
:param Optional[int] end_pos: End position. If `None` assumes
|
67
|
-
both `start_pos` and `end_pos` have same values.
|
68
|
-
:param Optional[str] gene: Gene symbol
|
69
|
-
:param Optional[str] ref: Reference at position given during input
|
70
|
-
:param bool try_longest_compatible: `True` if should try longest
|
71
|
-
compatible remaining if mane transcript was not compatible.
|
72
|
-
`False` otherwise.
|
73
|
-
:param ResidueMode residue_mode: Starting residue mode for `start_pos`
|
74
|
-
and `end_pos`. Will always return coordinates in inter-residue
|
75
|
-
"""
|
76
|
-
warnings = []
|
77
|
-
mane_data = None
|
78
|
-
try:
|
79
|
-
mane_data = await cool_seq_tool.mane_transcript.get_mane_transcript(
|
80
|
-
ac=ac,
|
81
|
-
start_pos=start_pos,
|
82
|
-
end_pos=end_pos,
|
83
|
-
start_annotation_layer=start_annotation_layer,
|
84
|
-
gene=gene,
|
85
|
-
ref=ref,
|
86
|
-
try_longest_compatible=try_longest_compatible,
|
87
|
-
residue_mode=residue_mode,
|
88
|
-
)
|
89
|
-
|
90
|
-
if not mane_data:
|
91
|
-
warnings.append("Unable to retrieve MANE data")
|
92
|
-
except Exception as e:
|
93
|
-
logger.exception("get_mane_data unhandled exception %s", e)
|
94
|
-
warnings.append(UNHANDLED_EXCEPTION_MSG)
|
95
|
-
|
96
|
-
return ManeDataService(
|
97
|
-
mane_data=mane_data, warnings=warnings, service_meta=service_meta()
|
98
|
-
)
|