cool-seq-tool 0.3.0.dev0__tar.gz → 0.4.0.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0}/LICENSE +1 -1
- cool_seq_tool-0.4.0.dev0/PKG-INFO +130 -0
- cool_seq_tool-0.4.0.dev0/README.md +52 -0
- cool_seq_tool-0.4.0.dev0/pyproject.toml +122 -0
- cool_seq_tool-0.4.0.dev0/setup.cfg +4 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/api.py +3 -3
- cool_seq_tool-0.4.0.dev0/src/cool_seq_tool/app.py +90 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/data/data_downloads.py +8 -5
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/handlers/seqrepo_access.py +55 -27
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/mappers/__init__.py +4 -1
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/mappers/alignment.py +40 -37
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/mappers/exon_genomic_coords.py +329 -138
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/mappers/mane_transcript.py +402 -227
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/routers/mappings.py +1 -1
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/schemas.py +31 -24
- cool_seq_tool-0.4.0.dev0/src/cool_seq_tool/sources/__init__.py +6 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/sources/mane_transcript_mappings.py +28 -7
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/sources/transcript_mappings.py +27 -11
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/sources/uta_database.py +179 -232
- cool_seq_tool-0.4.0.dev0/src/cool_seq_tool/utils.py +46 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/version.py +1 -1
- cool_seq_tool-0.4.0.dev0/src/cool_seq_tool.egg-info/PKG-INFO +130 -0
- cool_seq_tool-0.4.0.dev0/src/cool_seq_tool.egg-info/SOURCES.txt +32 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool.egg-info/requires.txt +9 -2
- cool_seq_tool-0.4.0.dev0/tests/test_utils.py +26 -0
- cool_seq_tool-0.3.0.dev0/PKG-INFO +0 -187
- cool_seq_tool-0.3.0.dev0/README.md +0 -153
- cool_seq_tool-0.3.0.dev0/cool_seq_tool/app.py +0 -69
- cool_seq_tool-0.3.0.dev0/cool_seq_tool/data/transcript_mapping.tsv +0 -256226
- cool_seq_tool-0.3.0.dev0/cool_seq_tool/sources/__init__.py +0 -4
- cool_seq_tool-0.3.0.dev0/cool_seq_tool/utils.py +0 -48
- cool_seq_tool-0.3.0.dev0/cool_seq_tool.egg-info/PKG-INFO +0 -187
- cool_seq_tool-0.3.0.dev0/cool_seq_tool.egg-info/SOURCES.txt +0 -36
- cool_seq_tool-0.3.0.dev0/cool_seq_tool.egg-info/not-zip-safe +0 -1
- cool_seq_tool-0.3.0.dev0/pyproject.toml +0 -41
- cool_seq_tool-0.3.0.dev0/setup.cfg +0 -52
- cool_seq_tool-0.3.0.dev0/setup.py +0 -5
- cool_seq_tool-0.3.0.dev0/tests/test_utils.py +0 -25
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/__init__.py +0 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/data/__init__.py +0 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/handlers/__init__.py +0 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/paths.py +0 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/routers/__init__.py +0 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/routers/default.py +0 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/routers/mane.py +1 -1
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool.egg-info/dependency_links.txt +0 -0
- {cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool.egg-info/top_level.txt +0 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: cool_seq_tool
|
3
|
+
Version: 0.4.0.dev0
|
4
|
+
Summary: Common Operation on Lots of Sequences Tool
|
5
|
+
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
|
+
License: MIT License
|
7
|
+
|
8
|
+
Copyright (c) 2021-2023 Wagner Lab
|
9
|
+
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
12
|
+
in the Software without restriction, including without limitation the rights
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
15
|
+
furnished to do so, subject to the following conditions:
|
16
|
+
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
18
|
+
copies or substantial portions of the Software.
|
19
|
+
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
26
|
+
SOFTWARE.
|
27
|
+
|
28
|
+
Project-URL: Homepage, https://github.com/genomicmedlab/cool-seq-tool
|
29
|
+
Project-URL: Documentation, https://coolseqtool.readthedocs.io/en/latest/index.html
|
30
|
+
Project-URL: Changelog, https://github.com/genomicmedlab/cool-seq-tool/releases
|
31
|
+
Project-URL: Source, https://github.com/genomicmedlab/cool-seq-tool
|
32
|
+
Project-URL: Bug Tracker, https://github.com/genomicmedlab/cool-seq-tool/issues
|
33
|
+
Classifier: Development Status :: 3 - Alpha
|
34
|
+
Classifier: Framework :: FastAPI
|
35
|
+
Classifier: Framework :: Pydantic
|
36
|
+
Classifier: Framework :: Pydantic :: 2
|
37
|
+
Classifier: Intended Audience :: Science/Research
|
38
|
+
Classifier: Intended Audience :: Developers
|
39
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
40
|
+
Classifier: License :: OSI Approved :: MIT License
|
41
|
+
Classifier: Programming Language :: Python :: 3
|
42
|
+
Classifier: Programming Language :: Python :: 3.8
|
43
|
+
Classifier: Programming Language :: Python :: 3.9
|
44
|
+
Classifier: Programming Language :: Python :: 3.10
|
45
|
+
Classifier: Programming Language :: Python :: 3.11
|
46
|
+
Requires-Python: >=3.8
|
47
|
+
Description-Content-Type: text/markdown
|
48
|
+
License-File: LICENSE
|
49
|
+
Requires-Dist: asyncpg
|
50
|
+
Requires-Dist: aiofiles
|
51
|
+
Requires-Dist: boto3
|
52
|
+
Requires-Dist: pyliftover
|
53
|
+
Requires-Dist: polars
|
54
|
+
Requires-Dist: hgvs
|
55
|
+
Requires-Dist: biocommons.seqrepo
|
56
|
+
Requires-Dist: pydantic==2.*
|
57
|
+
Requires-Dist: uvicorn
|
58
|
+
Requires-Dist: fastapi
|
59
|
+
Requires-Dist: ga4gh.vrs
|
60
|
+
Provides-Extra: dev
|
61
|
+
Requires-Dist: pre-commit; extra == "dev"
|
62
|
+
Requires-Dist: ipython; extra == "dev"
|
63
|
+
Requires-Dist: ipykernel; extra == "dev"
|
64
|
+
Requires-Dist: psycopg2-binary; extra == "dev"
|
65
|
+
Requires-Dist: ruff; extra == "dev"
|
66
|
+
Provides-Extra: tests
|
67
|
+
Requires-Dist: pytest; extra == "tests"
|
68
|
+
Requires-Dist: pytest-cov; extra == "tests"
|
69
|
+
Requires-Dist: pytest-asyncio==0.18.3; extra == "tests"
|
70
|
+
Requires-Dist: mock; extra == "tests"
|
71
|
+
Provides-Extra: docs
|
72
|
+
Requires-Dist: sphinx==6.1.3; extra == "docs"
|
73
|
+
Requires-Dist: sphinx-autodoc-typehints==1.22.0; extra == "docs"
|
74
|
+
Requires-Dist: sphinx-autobuild==2021.3.14; extra == "docs"
|
75
|
+
Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
|
76
|
+
Requires-Dist: sphinxext-opengraph==0.8.2; extra == "docs"
|
77
|
+
Requires-Dist: furo==2023.3.27; extra == "docs"
|
78
|
+
|
79
|
+
<h1 align="center">
|
80
|
+
CoolSeqTool
|
81
|
+
</h1>
|
82
|
+
|
83
|
+
**[Documentation](https://coolseqtool.readthedocs.io/en/latest/)** · [Installation](https://coolseqtool.readthedocs.io/en/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/en/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/en/latest/reference/index.html)
|
84
|
+
|
85
|
+
## Overview
|
86
|
+
|
87
|
+
<!-- description -->
|
88
|
+
The **CoolSeqTool** provides:
|
89
|
+
|
90
|
+
- A Pythonic API on top of sequence data of interest to tertiary analysis tools, including mappings between gene names and transcripts, [MANE transcript](https://www.ncbi.nlm.nih.gov/refseq/MANE/) descriptions, and the [Universal Transcript Archive](https://github.com/biocommons/uta)
|
91
|
+
- Augmented access to the [SeqRepo](https://github.com/biocommons/biocommons.seqrepo) database, including multiple additional methods and tools
|
92
|
+
- Mapping tools that combine the above to support translation between references sequences, annotation layers, and MANE transcripts
|
93
|
+
<!-- /description -->
|
94
|
+
|
95
|
+
---
|
96
|
+
|
97
|
+
## Install
|
98
|
+
|
99
|
+
CoolSeqTool is available on [PyPI](https://pypi.org/project/cool-seq-tool)
|
100
|
+
|
101
|
+
```shell
|
102
|
+
python3 -m pip install cool-seq-tool
|
103
|
+
```
|
104
|
+
|
105
|
+
See the [installation instructions](https://coolseqtool.readthedocs.io/en/latest/install.html) in the documentation for a description of dependency setup requirements.
|
106
|
+
|
107
|
+
---
|
108
|
+
|
109
|
+
## Usage
|
110
|
+
|
111
|
+
All CoolSeqTool resources can be initialized by way of a top-level class instance:
|
112
|
+
|
113
|
+
```pycon
|
114
|
+
>>> from cool_seq_tool.app import CoolSeqTool
|
115
|
+
>>> cst = CoolSeqTool()
|
116
|
+
>>> result = await cst.mane_transcript.get_mane_transcript(
|
117
|
+
... "NP_004324.2",
|
118
|
+
... 599,
|
119
|
+
... AnnotationLayer.PROTEIN,
|
120
|
+
... residue_mode=ResidueMode.INTER_RESIDUE,
|
121
|
+
... )
|
122
|
+
>>> result.gene, result.refseq, result.status
|
123
|
+
('EGFR', 'NM_005228.5', <TranscriptPriority.MANE_SELECT: 'mane_select'>)
|
124
|
+
```
|
125
|
+
|
126
|
+
---
|
127
|
+
|
128
|
+
## Feedback and contributing
|
129
|
+
|
130
|
+
We welcome bug reports, feature requests, and code contributions from users and interested collaborators. The [documentation](https://coolseqtool.readthedocs.io/en/latest/contributing.html) contains guidance for submitting feedback and contributing new code.
|
@@ -0,0 +1,52 @@
|
|
1
|
+
<h1 align="center">
|
2
|
+
CoolSeqTool
|
3
|
+
</h1>
|
4
|
+
|
5
|
+
**[Documentation](https://coolseqtool.readthedocs.io/en/latest/)** · [Installation](https://coolseqtool.readthedocs.io/en/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/en/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/en/latest/reference/index.html)
|
6
|
+
|
7
|
+
## Overview
|
8
|
+
|
9
|
+
<!-- description -->
|
10
|
+
The **CoolSeqTool** provides:
|
11
|
+
|
12
|
+
- A Pythonic API on top of sequence data of interest to tertiary analysis tools, including mappings between gene names and transcripts, [MANE transcript](https://www.ncbi.nlm.nih.gov/refseq/MANE/) descriptions, and the [Universal Transcript Archive](https://github.com/biocommons/uta)
|
13
|
+
- Augmented access to the [SeqRepo](https://github.com/biocommons/biocommons.seqrepo) database, including multiple additional methods and tools
|
14
|
+
- Mapping tools that combine the above to support translation between references sequences, annotation layers, and MANE transcripts
|
15
|
+
<!-- /description -->
|
16
|
+
|
17
|
+
---
|
18
|
+
|
19
|
+
## Install
|
20
|
+
|
21
|
+
CoolSeqTool is available on [PyPI](https://pypi.org/project/cool-seq-tool)
|
22
|
+
|
23
|
+
```shell
|
24
|
+
python3 -m pip install cool-seq-tool
|
25
|
+
```
|
26
|
+
|
27
|
+
See the [installation instructions](https://coolseqtool.readthedocs.io/en/latest/install.html) in the documentation for a description of dependency setup requirements.
|
28
|
+
|
29
|
+
---
|
30
|
+
|
31
|
+
## Usage
|
32
|
+
|
33
|
+
All CoolSeqTool resources can be initialized by way of a top-level class instance:
|
34
|
+
|
35
|
+
```pycon
|
36
|
+
>>> from cool_seq_tool.app import CoolSeqTool
|
37
|
+
>>> cst = CoolSeqTool()
|
38
|
+
>>> result = await cst.mane_transcript.get_mane_transcript(
|
39
|
+
... "NP_004324.2",
|
40
|
+
... 599,
|
41
|
+
... AnnotationLayer.PROTEIN,
|
42
|
+
... residue_mode=ResidueMode.INTER_RESIDUE,
|
43
|
+
... )
|
44
|
+
>>> result.gene, result.refseq, result.status
|
45
|
+
('EGFR', 'NM_005228.5', <TranscriptPriority.MANE_SELECT: 'mane_select'>)
|
46
|
+
```
|
47
|
+
|
48
|
+
---
|
49
|
+
|
50
|
+
## Feedback and contributing
|
51
|
+
|
52
|
+
We welcome bug reports, feature requests, and code contributions from users and interested collaborators. The [documentation](https://coolseqtool.readthedocs.io/en/latest/contributing.html) contains guidance for submitting feedback and contributing new code.
|
@@ -0,0 +1,122 @@
|
|
1
|
+
[project]
|
2
|
+
name = "cool_seq_tool"
|
3
|
+
authors = [
|
4
|
+
{name = "Kori Kuzma"},
|
5
|
+
{name = "James Stevenson"},
|
6
|
+
{name = "Katie Stahl"},
|
7
|
+
{name = "Alex Wagner"},
|
8
|
+
]
|
9
|
+
readme = "README.md"
|
10
|
+
classifiers = [
|
11
|
+
"Development Status :: 3 - Alpha",
|
12
|
+
"Framework :: FastAPI",
|
13
|
+
"Framework :: Pydantic",
|
14
|
+
"Framework :: Pydantic :: 2",
|
15
|
+
"Intended Audience :: Science/Research",
|
16
|
+
"Intended Audience :: Developers",
|
17
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
18
|
+
"License :: OSI Approved :: MIT License",
|
19
|
+
"Programming Language :: Python :: 3",
|
20
|
+
"Programming Language :: Python :: 3.8",
|
21
|
+
"Programming Language :: Python :: 3.9",
|
22
|
+
"Programming Language :: Python :: 3.10",
|
23
|
+
"Programming Language :: Python :: 3.11",
|
24
|
+
]
|
25
|
+
requires-python = ">=3.8"
|
26
|
+
description = "Common Operation on Lots of Sequences Tool"
|
27
|
+
license = {file = "LICENSE"}
|
28
|
+
dependencies = [
|
29
|
+
"asyncpg",
|
30
|
+
"aiofiles",
|
31
|
+
"boto3",
|
32
|
+
"pyliftover",
|
33
|
+
"polars",
|
34
|
+
"hgvs",
|
35
|
+
"biocommons.seqrepo",
|
36
|
+
"pydantic == 2.*",
|
37
|
+
"uvicorn",
|
38
|
+
"fastapi",
|
39
|
+
"ga4gh.vrs",
|
40
|
+
]
|
41
|
+
dynamic = ["version"]
|
42
|
+
|
43
|
+
[project.optional-dependencies]
|
44
|
+
dev = ["pre-commit", "ipython", "ipykernel", "psycopg2-binary", "ruff"]
|
45
|
+
tests = ["pytest", "pytest-cov", "pytest-asyncio==0.18.3", "mock"]
|
46
|
+
docs = [
|
47
|
+
"sphinx==6.1.3",
|
48
|
+
"sphinx-autodoc-typehints==1.22.0",
|
49
|
+
"sphinx-autobuild==2021.3.14",
|
50
|
+
"sphinx-copybutton==0.5.2",
|
51
|
+
"sphinxext-opengraph==0.8.2",
|
52
|
+
"furo==2023.3.27",
|
53
|
+
]
|
54
|
+
|
55
|
+
[project.urls]
|
56
|
+
Homepage = "https://github.com/genomicmedlab/cool-seq-tool"
|
57
|
+
Documentation = "https://coolseqtool.readthedocs.io/en/latest/index.html"
|
58
|
+
Changelog = "https://github.com/genomicmedlab/cool-seq-tool/releases"
|
59
|
+
Source = "https://github.com/genomicmedlab/cool-seq-tool"
|
60
|
+
"Bug Tracker" = "https://github.com/genomicmedlab/cool-seq-tool/issues"
|
61
|
+
|
62
|
+
[build-system]
|
63
|
+
requires = ["setuptools>=61.0"]
|
64
|
+
build-backend = "setuptools.build_meta"
|
65
|
+
|
66
|
+
[tool.setuptools.dynamic]
|
67
|
+
version = {attr = "cool_seq_tool.version.__version__"}
|
68
|
+
|
69
|
+
[tool.setuptools.packages.find]
|
70
|
+
where = ["src"]
|
71
|
+
|
72
|
+
[tool.pytest.ini_options]
|
73
|
+
addopts = "--cov=src --cov-report term-missing"
|
74
|
+
testpaths = ["tests"]
|
75
|
+
|
76
|
+
[tool.coverage.run]
|
77
|
+
branch = true
|
78
|
+
|
79
|
+
[tool.ruff]
|
80
|
+
src = ["src"]
|
81
|
+
exclude = ["docs/source/conf.py"]
|
82
|
+
# pycodestyle (E, W)
|
83
|
+
# Pyflakes (F)
|
84
|
+
# flake8-annotations (ANN)
|
85
|
+
# pydocstyle (D)
|
86
|
+
# pep8-naming (N)
|
87
|
+
# isort (I)
|
88
|
+
select = ["E", "W", "F", "ANN", "D", "N", "I"]
|
89
|
+
fixable = ["I", "F401"]
|
90
|
+
|
91
|
+
# ANN101 - missing-type-self
|
92
|
+
# ANN003 - missing-type-kwargs
|
93
|
+
# D203 - one-blank-line-before-class
|
94
|
+
# D205 - blank-line-after-summary
|
95
|
+
# D206 - indent-with-spaces*
|
96
|
+
# D213 - multi-line-summary-second-line
|
97
|
+
# D300 - triple-single-quotes*
|
98
|
+
# D400 - ends-in-period
|
99
|
+
# D415 - ends-in-punctuation
|
100
|
+
# E111 - indentation-with-invalid-multiple*
|
101
|
+
# E114 - indentation-with-invalid-multiple-comment*
|
102
|
+
# E117 - over-indented*
|
103
|
+
# E501 - line-too-long*
|
104
|
+
# W191 - tab-indentation*
|
105
|
+
# *ignored for compatibility with formatter
|
106
|
+
ignore = [
|
107
|
+
"ANN101", "ANN003",
|
108
|
+
"D203", "D205", "D206", "D213", "D300", "D400", "D415",
|
109
|
+
"E111", "E114", "E117", "E501",
|
110
|
+
"W191"
|
111
|
+
]
|
112
|
+
|
113
|
+
[tool.ruff.per-file-ignores]
|
114
|
+
# ANN001 - missing-type-function-argument
|
115
|
+
# ANN2 - missing-return-type
|
116
|
+
# ANN102 - missing-type-cls
|
117
|
+
# N805 - invalid-first-argument-name-for-method
|
118
|
+
# F821 - undefined-name
|
119
|
+
# F401 - unused-import
|
120
|
+
"tests/*" = ["ANN001", "ANN2", "ANN102"]
|
121
|
+
"*__init__.py" = ["F401"]
|
122
|
+
"src/cool_seq_tool/schemas.py" = ["ANN201", "N805", "ANN001"]
|
@@ -24,16 +24,16 @@ def custom_openapi() -> Dict:
|
|
24
24
|
if app.openapi_schema:
|
25
25
|
return app.openapi_schema
|
26
26
|
openapi_schema = get_openapi(
|
27
|
-
title="The GenomicMedLab Cool
|
27
|
+
title="The GenomicMedLab Cool-Seq-Tool",
|
28
28
|
version=__version__,
|
29
|
-
description="Common Operations On Lots
|
29
|
+
description="Common Operations On Lots of Sequences Tool.",
|
30
30
|
routes=app.routes,
|
31
31
|
)
|
32
32
|
|
33
33
|
openapi_schema["info"]["contact"] = {
|
34
34
|
"name": "Alex H. Wagner",
|
35
35
|
"email": "Alex.Wagner@nationwidechildrens.org",
|
36
|
-
"url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab",
|
36
|
+
"url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab",
|
37
37
|
}
|
38
38
|
app.openapi_schema = openapi_schema
|
39
39
|
return app.openapi_schema
|
@@ -0,0 +1,90 @@
|
|
1
|
+
"""Provides core CoolSeqTool class, which non-redundantly initializes all Cool-Seq-Tool
|
2
|
+
data handler and mapping resources for straightforward access.
|
3
|
+
"""
|
4
|
+
import logging
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
from biocommons.seqrepo import SeqRepo
|
9
|
+
|
10
|
+
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
11
|
+
from cool_seq_tool.mappers import (
|
12
|
+
AlignmentMapper,
|
13
|
+
ExonGenomicCoordsMapper,
|
14
|
+
ManeTranscript,
|
15
|
+
)
|
16
|
+
from cool_seq_tool.paths import (
|
17
|
+
LRG_REFSEQGENE_PATH,
|
18
|
+
MANE_SUMMARY_PATH,
|
19
|
+
SEQREPO_ROOT_DIR,
|
20
|
+
TRANSCRIPT_MAPPINGS_PATH,
|
21
|
+
)
|
22
|
+
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
|
23
|
+
from cool_seq_tool.sources.transcript_mappings import TranscriptMappings
|
24
|
+
from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
|
25
|
+
|
26
|
+
logger = logging.getLogger(__name__)
|
27
|
+
|
28
|
+
|
29
|
+
class CoolSeqTool:
|
30
|
+
"""Non-redundantly initialize all Cool-Seq-Tool data resources, available under the
|
31
|
+
following attribute names:
|
32
|
+
|
33
|
+
* ``self.seqrepo_access``: :py:class:`SeqRepoAccess <cool_seq_tool.handlers.seqrepo_access.SeqRepoAccess>`
|
34
|
+
* ``self.transcript_mappings``: :py:class:`TranscriptMappings <cool_seq_tool.sources.transcript_mappings.TranscriptMappings>`
|
35
|
+
* ``self.mane_transcript_mappings``: :py:class:`ManeTranscriptMappings <cool_seq_tool.sources.mane_transcript_mappings.ManeTranscriptMappings>`
|
36
|
+
* ``self.uta_db``: :py:class:`UtaDatabase <cool_seq_tool.sources.uta_database.UtaDatabase>`
|
37
|
+
* ``self.alignment_mapper``: :py:class:`AlignmentMapper <cool_seq_tool.mappers.alignment.AlignmentMapper>`
|
38
|
+
* ``self.mane_transcript``: :py:class:`ManeTranscript <cool_seq_tool.mappers.mane_transcript.ManeTranscript>`
|
39
|
+
* ``self.ex_g_coords_mapper``: :py:class:`ExonGenomicCoordsMapper <cool_seq_tool.mappers.exon_genomic_coords.ExonGenomicCoordsMapper>`
|
40
|
+
|
41
|
+
Initialization with default resource locations is straightforward:
|
42
|
+
|
43
|
+
.. code-block:: pycon
|
44
|
+
|
45
|
+
>>> from cool_seq_tool.app import CoolSeqTool
|
46
|
+
>>> cst = CoolSeqTool()
|
47
|
+
|
48
|
+
See the :ref:`configuration <configuration>` section for more information.
|
49
|
+
"""
|
50
|
+
|
51
|
+
def __init__(
|
52
|
+
self,
|
53
|
+
transcript_file_path: Path = TRANSCRIPT_MAPPINGS_PATH,
|
54
|
+
lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH,
|
55
|
+
mane_data_path: Path = MANE_SUMMARY_PATH,
|
56
|
+
db_url: str = UTA_DB_URL,
|
57
|
+
sr: Optional[SeqRepo] = None,
|
58
|
+
) -> None:
|
59
|
+
"""Initialize CoolSeqTool class
|
60
|
+
|
61
|
+
:param transcript_file_path: The path to ``transcript_mapping.tsv``
|
62
|
+
:param lrg_refseqgene_path: The path to the LRG_RefSeqGene file
|
63
|
+
:param mane_data_path: Path to RefSeq MANE summary data
|
64
|
+
:param db_url: PostgreSQL connection URL
|
65
|
+
Format: ``driver://user:password@host/database/schema``
|
66
|
+
:param sr: SeqRepo instance. If this is not provided, will create a new instance
|
67
|
+
"""
|
68
|
+
if not sr:
|
69
|
+
sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
|
70
|
+
self.seqrepo_access = SeqRepoAccess(sr)
|
71
|
+
self.transcript_mappings = TranscriptMappings(
|
72
|
+
transcript_file_path=transcript_file_path,
|
73
|
+
lrg_refseqgene_path=lrg_refseqgene_path,
|
74
|
+
)
|
75
|
+
self.mane_transcript_mappings = ManeTranscriptMappings(
|
76
|
+
mane_data_path=mane_data_path
|
77
|
+
)
|
78
|
+
self.uta_db = UtaDatabase(db_url=db_url)
|
79
|
+
self.alignment_mapper = AlignmentMapper(
|
80
|
+
self.seqrepo_access, self.transcript_mappings, self.uta_db
|
81
|
+
)
|
82
|
+
self.mane_transcript = ManeTranscript(
|
83
|
+
self.seqrepo_access,
|
84
|
+
self.transcript_mappings,
|
85
|
+
self.mane_transcript_mappings,
|
86
|
+
self.uta_db,
|
87
|
+
)
|
88
|
+
self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
|
89
|
+
self.uta_db, self.mane_transcript
|
90
|
+
)
|
{cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/data/data_downloads.py
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""
|
1
|
+
"""Handle acquisition of external data."""
|
2
2
|
import datetime
|
3
3
|
import gzip
|
4
4
|
import logging
|
@@ -15,8 +15,11 @@ logger = logging.getLogger("cool_seq_tool")
|
|
15
15
|
|
16
16
|
|
17
17
|
class DataDownload:
|
18
|
-
"""
|
19
|
-
|
18
|
+
"""Manage downloadable data files. Responsible for checking if files are available
|
19
|
+
under expected locations, and fetching them if not.
|
20
|
+
|
21
|
+
Relevant methods are called automatically by data classes; users should not have
|
22
|
+
to interact with this class under normal circumstances.
|
20
23
|
"""
|
21
24
|
|
22
25
|
def __init__(self) -> None:
|
@@ -25,7 +28,7 @@ class DataDownload:
|
|
25
28
|
|
26
29
|
def get_mane_summary(self) -> Path:
|
27
30
|
"""Identify latest MANE summary data. If unavailable locally, download from
|
28
|
-
|
31
|
+
`NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/current/>`_.
|
29
32
|
|
30
33
|
:return: path to MANE summary file
|
31
34
|
"""
|
@@ -52,7 +55,7 @@ class DataDownload:
|
|
52
55
|
|
53
56
|
def get_lrg_refseq_gene_data(self) -> Path:
|
54
57
|
"""Identify latest LRG RefSeq Gene file. If unavailable locally, download from
|
55
|
-
|
58
|
+
`NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/>`_.
|
56
59
|
|
57
60
|
:return: path to acquired LRG RefSeq Gene data file
|
58
61
|
"""
|
{cool_seq_tool-0.3.0.dev0 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/handlers/seqrepo_access.py
RENAMED
@@ -1,4 +1,6 @@
|
|
1
|
-
"""
|
1
|
+
"""Wrap SeqRepo to provide additional lookup and identification methods on top of basic
|
2
|
+
dereferencing functions.
|
3
|
+
"""
|
2
4
|
import logging
|
3
5
|
from os import environ
|
4
6
|
from pathlib import Path
|
@@ -13,7 +15,9 @@ logger = logging.getLogger(__name__)
|
|
13
15
|
|
14
16
|
|
15
17
|
class SeqRepoAccess(SeqRepoDataProxy):
|
16
|
-
"""
|
18
|
+
"""Provide a wrapper around the base SeqRepoDataProxy class from ``VRS-Python`` to
|
19
|
+
provide additional lookup and identification methods.
|
20
|
+
"""
|
17
21
|
|
18
22
|
environ["SEQREPO_LRU_CACHE_MAXSIZE"] = "none"
|
19
23
|
|
@@ -24,25 +28,37 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
24
28
|
end: Optional[int] = None,
|
25
29
|
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
26
30
|
) -> Tuple[str, Optional[str]]:
|
27
|
-
"""Get reference sequence for an accession given a start and end position.
|
28
|
-
|
31
|
+
"""Get reference sequence for an accession given a start and end position. If
|
32
|
+
``start`` and ``end`` are not given, returns the entire reference sequence.
|
33
|
+
|
34
|
+
>>> from cool_seq_tool.handlers import SeqRepoAccess
|
35
|
+
>>> from biocommons.seqrepo import SeqRepo
|
36
|
+
>>> sr = SeqRepoAccess(SeqRepo("/usr/local/share/seqrepo/latest"))
|
37
|
+
>>> sr.get_reference_sequence("NM_002529.3", 1, 10)[0]
|
38
|
+
'TGCAGCTGG'
|
39
|
+
>>> sr.get_reference_sequence("NP_001341538.1", 1, 10)[0]
|
40
|
+
'MAALSGGGG'
|
29
41
|
|
30
42
|
:param ac: Accession
|
31
43
|
:param start: Start pos change
|
32
|
-
:param end: End pos change. If
|
33
|
-
values, if
|
34
|
-
:param residue_mode: Residue mode for
|
44
|
+
:param end: End pos change. If ``None`` assumes both ``start`` and ``end`` have
|
45
|
+
same values, if ``start`` exists.
|
46
|
+
:param residue_mode: Residue mode for ``start`` and ``end``
|
35
47
|
:return: Sequence at position (if accession and positions actually
|
36
48
|
exist, else return empty string), warning if any
|
37
49
|
"""
|
38
|
-
if start
|
39
|
-
|
40
|
-
|
41
|
-
return "",
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
50
|
+
if start and end:
|
51
|
+
if start > end:
|
52
|
+
msg = f"start ({start}) cannot be greater than end ({end})"
|
53
|
+
return "", msg
|
54
|
+
|
55
|
+
start, end = get_inter_residue_pos(start, end, residue_mode)
|
56
|
+
if start == end:
|
57
|
+
end += 1
|
58
|
+
else:
|
59
|
+
if start is not None and residue_mode == ResidueMode.RESIDUE:
|
60
|
+
start -= 1
|
61
|
+
|
46
62
|
try:
|
47
63
|
sequence = self.sr.fetch(ac, start=start, end=end)
|
48
64
|
except KeyError:
|
@@ -53,18 +69,12 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
53
69
|
error = str(e)
|
54
70
|
if error.startswith("start out of range"):
|
55
71
|
msg = (
|
56
|
-
f"Start inter-residue coordinate ({start}) is out of "
|
57
|
-
f"index on {ac}"
|
72
|
+
f"Start inter-residue coordinate ({start}) is out of index on {ac}"
|
58
73
|
)
|
59
74
|
elif error.startswith("stop out of range"):
|
60
75
|
msg = (
|
61
76
|
f"End inter-residue coordinate ({end}) is out of " f"index on {ac}"
|
62
77
|
)
|
63
|
-
elif error.startswith("invalid coordinates") and ">" in error:
|
64
|
-
msg = (
|
65
|
-
f"Invalid inter-residue coordinates: start ({start}) "
|
66
|
-
f"cannot be greater than end ({end})"
|
67
|
-
)
|
68
78
|
else:
|
69
79
|
msg = f"{e}"
|
70
80
|
logger.warning(msg)
|
@@ -78,8 +88,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
78
88
|
if len(sequence) != expected_len_of_seq:
|
79
89
|
return (
|
80
90
|
"",
|
81
|
-
f"End inter-residue coordinate ({end})"
|
82
|
-
f" is out of index on {ac}",
|
91
|
+
f"End inter-residue coordinate ({end}) is out of index on {ac}",
|
83
92
|
)
|
84
93
|
return sequence, None
|
85
94
|
|
@@ -88,6 +97,14 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
88
97
|
) -> Tuple[List[str], Optional[str]]:
|
89
98
|
"""Return list of identifiers for accession.
|
90
99
|
|
100
|
+
>>> from cool_seq_tool.handlers import SeqRepoAccess
|
101
|
+
>>> from biocommons.seqrepo import SeqRepo
|
102
|
+
>>> sr = SeqRepoAccess(SeqRepo("/usr/local/share/seqrepo/latest"))
|
103
|
+
>>> sr.translate_identifier("NM_002529.3")[0]
|
104
|
+
['MD5:18f0a6e3af9e1bbd8fef1948c7156012', 'NCBI:NM_002529.3', 'refseq:NM_002529.3', 'SEGUID:dEJQBkga9d9VeBHTyTbg6JEtTGQ', 'SHA1:74425006481af5df557811d3c936e0e8912d4c64', 'VMC:GS_RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA', 'sha512t24u:RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA', 'ga4gh:SQ.RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA']
|
105
|
+
>>> sr.translate_identifier("NM_002529.3", "ga4gh")[0]
|
106
|
+
['ga4gh:SQ.RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA']
|
107
|
+
|
91
108
|
:param ac: Identifier accession
|
92
109
|
:param target_namespace: The namespace(s) of identifier to return
|
93
110
|
:return: List of identifiers, warning
|
@@ -123,7 +140,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
123
140
|
) -> Tuple[Optional[List[str]], Optional[str]]:
|
124
141
|
"""Get accessions for a chromosome
|
125
142
|
|
126
|
-
:param
|
143
|
+
:param chromosome: Chromosome number. Must be either 1-22, X, or Y
|
127
144
|
:return: Accessions for chromosome (ordered by latest assembly)
|
128
145
|
"""
|
129
146
|
acs = []
|
@@ -160,9 +177,20 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
160
177
|
|
161
178
|
def get_fasta_file(self, sequence_id: str, outfile_path: Path) -> None:
|
162
179
|
"""Retrieve FASTA file containing sequence for requested sequence ID.
|
163
|
-
|
180
|
+
|
181
|
+
>>> from pathlib import Path
|
182
|
+
>>> from cool_seq_tool.handlers import SeqRepoAccess
|
183
|
+
>>> from biocommons.seqrepo import SeqRepo
|
184
|
+
>>> sr = SeqRepoAccess(SeqRepo("/usr/local/share/seqrepo/latest"))
|
185
|
+
>>> # write to local file tpm3.fasta:
|
186
|
+
>>> sr.get_fasta_file("NM_002529.3", Path("tpm3.fasta"))
|
187
|
+
|
188
|
+
FASTA file headers will include GA4GH sequence digest, Ensembl accession ID,
|
189
|
+
and RefSeq accession ID.
|
190
|
+
|
191
|
+
:param sequence_id: accession ID, sans namespace, eg ``NM_152263.3``
|
164
192
|
:param outfile_path: path to save file to
|
165
|
-
:return: None, but saves sequence data to
|
193
|
+
:return: None, but saves sequence data to ``outfile_path`` if successful
|
166
194
|
:raise: KeyError if SeqRepo doesn't have sequence data for the given ID
|
167
195
|
"""
|
168
196
|
sequence = self.get_reference_sequence(sequence_id)[0]
|
@@ -1,4 +1,7 @@
|
|
1
1
|
"""Module for mapping data"""
|
2
2
|
from .alignment import AlignmentMapper # noqa: I001
|
3
|
-
from .mane_transcript import
|
3
|
+
from .mane_transcript import ManeTranscript
|
4
4
|
from .exon_genomic_coords import ExonGenomicCoordsMapper
|
5
|
+
|
6
|
+
|
7
|
+
__all__ = ["AlignmentMapper", "ManeTranscript", "ExonGenomicCoordsMapper"]
|