cool-seq-tool 0.4.0.dev3__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/LICENSE +1 -1
- {cool_seq_tool-0.4.0.dev3/src/cool_seq_tool.egg-info → cool_seq_tool-0.4.1}/PKG-INFO +15 -8
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/README.md +7 -0
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/pyproject.toml +27 -11
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/__init__.py +1 -3
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/api.py +1 -2
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/app.py +38 -23
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/handlers/__init__.py +1 -0
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/handlers/seqrepo_access.py +13 -15
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/mappers/__init__.py +1 -0
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/mappers/alignment.py +5 -6
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/mappers/exon_genomic_coords.py +75 -73
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/mappers/mane_transcript.py +84 -86
- cool_seq_tool-0.4.1/src/cool_seq_tool/resources/__init__.py +1 -0
- cool_seq_tool-0.4.1/src/cool_seq_tool/resources/data_files.py +93 -0
- cool_seq_tool-0.4.1/src/cool_seq_tool/resources/status.py +151 -0
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/routers/__init__.py +1 -0
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/routers/default.py +1 -0
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/routers/mane.py +4 -4
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/routers/mappings.py +2 -2
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/schemas.py +83 -37
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/sources/__init__.py +1 -0
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/sources/mane_transcript_mappings.py +14 -7
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/sources/transcript_mappings.py +41 -32
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/sources/uta_database.py +91 -70
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/utils.py +2 -2
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/version.py +2 -1
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1/src/cool_seq_tool.egg-info}/PKG-INFO +15 -8
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool.egg-info/SOURCES.txt +4 -4
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool.egg-info/requires.txt +3 -2
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/tests/test_utils.py +1 -0
- cool_seq_tool-0.4.0.dev3/src/cool_seq_tool/data/__init__.py +0 -2
- cool_seq_tool-0.4.0.dev3/src/cool_seq_tool/data/data_downloads.py +0 -89
- cool_seq_tool-0.4.0.dev3/src/cool_seq_tool/paths.py +0 -28
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/setup.cfg +0 -0
- {cool_seq_tool-0.4.0.dev3/src/cool_seq_tool/data → cool_seq_tool-0.4.1/src/cool_seq_tool/resources}/transcript_mapping.tsv +0 -0
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool.egg-info/dependency_links.txt +0 -0
- {cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool.egg-info/top_level.txt +0 -0
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cool_seq_tool
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.1
|
4
4
|
Summary: Common Operation on Lots of Sequences Tool
|
5
5
|
Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
|
6
6
|
License: MIT License
|
7
7
|
|
8
|
-
Copyright (c) 2021-
|
8
|
+
Copyright (c) 2021-2024 Wagner Lab
|
9
9
|
|
10
10
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
11
11
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -26,7 +26,7 @@ License: MIT License
|
|
26
26
|
SOFTWARE.
|
27
27
|
|
28
28
|
Project-URL: Homepage, https://github.com/genomicmedlab/cool-seq-tool
|
29
|
-
Project-URL: Documentation, https://coolseqtool.readthedocs.io/
|
29
|
+
Project-URL: Documentation, https://coolseqtool.readthedocs.io/
|
30
30
|
Project-URL: Changelog, https://github.com/genomicmedlab/cool-seq-tool/releases
|
31
31
|
Project-URL: Source, https://github.com/genomicmedlab/cool-seq-tool
|
32
32
|
Project-URL: Bug Tracker, https://github.com/genomicmedlab/cool-seq-tool/issues
|
@@ -39,30 +39,30 @@ Classifier: Intended Audience :: Developers
|
|
39
39
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
40
40
|
Classifier: License :: OSI Approved :: MIT License
|
41
41
|
Classifier: Programming Language :: Python :: 3
|
42
|
-
Classifier: Programming Language :: Python :: 3.8
|
43
|
-
Classifier: Programming Language :: Python :: 3.9
|
44
42
|
Classifier: Programming Language :: Python :: 3.10
|
45
43
|
Classifier: Programming Language :: Python :: 3.11
|
46
|
-
|
44
|
+
Classifier: Programming Language :: Python :: 3.12
|
45
|
+
Requires-Python: >=3.10
|
47
46
|
Description-Content-Type: text/markdown
|
48
47
|
License-File: LICENSE
|
49
48
|
Requires-Dist: asyncpg
|
50
49
|
Requires-Dist: aiofiles
|
51
50
|
Requires-Dist: boto3
|
52
51
|
Requires-Dist: agct>=0.1.0-dev1
|
53
|
-
Requires-Dist: polars
|
52
|
+
Requires-Dist: polars~=1.0
|
54
53
|
Requires-Dist: hgvs
|
55
54
|
Requires-Dist: biocommons.seqrepo
|
56
55
|
Requires-Dist: pydantic==2.*
|
57
56
|
Requires-Dist: uvicorn
|
58
57
|
Requires-Dist: fastapi
|
59
58
|
Requires-Dist: ga4gh.vrs
|
59
|
+
Requires-Dist: wags-tails~=0.1.3
|
60
60
|
Provides-Extra: dev
|
61
61
|
Requires-Dist: pre-commit; extra == "dev"
|
62
62
|
Requires-Dist: ipython; extra == "dev"
|
63
63
|
Requires-Dist: ipykernel; extra == "dev"
|
64
64
|
Requires-Dist: psycopg2-binary; extra == "dev"
|
65
|
-
Requires-Dist: ruff==0.
|
65
|
+
Requires-Dist: ruff==0.5.0; extra == "dev"
|
66
66
|
Provides-Extra: tests
|
67
67
|
Requires-Dist: pytest; extra == "tests"
|
68
68
|
Requires-Dist: pytest-cov; extra == "tests"
|
@@ -81,8 +81,14 @@ Requires-Dist: sphinx-github-changelog==1.2.1; extra == "docs"
|
|
81
81
|
CoolSeqTool
|
82
82
|
</h1>
|
83
83
|
|
84
|
+
[](https://pypi.python.org/pypi/cool-seq-tool) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
|
85
|
+
|
86
|
+
---
|
87
|
+
|
84
88
|
**[Documentation](https://coolseqtool.readthedocs.io/latest/)** · [Installation](https://coolseqtool.readthedocs.io/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/latest/reference/index.html)
|
85
89
|
|
90
|
+
---
|
91
|
+
|
86
92
|
## Overview
|
87
93
|
|
88
94
|
<!-- description -->
|
@@ -113,6 +119,7 @@ All CoolSeqTool resources can be initialized by way of a top-level class instanc
|
|
113
119
|
|
114
120
|
```pycon
|
115
121
|
>>> from cool_seq_tool.app import CoolSeqTool
|
122
|
+
>>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
|
116
123
|
>>> cst = CoolSeqTool()
|
117
124
|
>>> result = await cst.mane_transcript.get_mane_transcript(
|
118
125
|
... "NP_004324.2",
|
@@ -2,8 +2,14 @@
|
|
2
2
|
CoolSeqTool
|
3
3
|
</h1>
|
4
4
|
|
5
|
+
[](https://pypi.python.org/pypi/cool-seq-tool) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://pypi.python.org/pypi/cool-seq-tool) [](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
|
6
|
+
|
7
|
+
---
|
8
|
+
|
5
9
|
**[Documentation](https://coolseqtool.readthedocs.io/latest/)** · [Installation](https://coolseqtool.readthedocs.io/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/latest/reference/index.html)
|
6
10
|
|
11
|
+
---
|
12
|
+
|
7
13
|
## Overview
|
8
14
|
|
9
15
|
<!-- description -->
|
@@ -34,6 +40,7 @@ All CoolSeqTool resources can be initialized by way of a top-level class instanc
|
|
34
40
|
|
35
41
|
```pycon
|
36
42
|
>>> from cool_seq_tool.app import CoolSeqTool
|
43
|
+
>>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
|
37
44
|
>>> cst = CoolSeqTool()
|
38
45
|
>>> result = await cst.mane_transcript.get_mane_transcript(
|
39
46
|
... "NP_004324.2",
|
@@ -17,12 +17,11 @@ classifiers = [
|
|
17
17
|
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
18
18
|
"License :: OSI Approved :: MIT License",
|
19
19
|
"Programming Language :: Python :: 3",
|
20
|
-
"Programming Language :: Python :: 3.8",
|
21
|
-
"Programming Language :: Python :: 3.9",
|
22
20
|
"Programming Language :: Python :: 3.10",
|
23
21
|
"Programming Language :: Python :: 3.11",
|
22
|
+
"Programming Language :: Python :: 3.12",
|
24
23
|
]
|
25
|
-
requires-python = ">=3.
|
24
|
+
requires-python = ">=3.10"
|
26
25
|
description = "Common Operation on Lots of Sequences Tool"
|
27
26
|
license = {file = "LICENSE"}
|
28
27
|
dependencies = [
|
@@ -30,18 +29,19 @@ dependencies = [
|
|
30
29
|
"aiofiles",
|
31
30
|
"boto3",
|
32
31
|
"agct >= 0.1.0-dev1",
|
33
|
-
"polars",
|
32
|
+
"polars ~= 1.0",
|
34
33
|
"hgvs",
|
35
34
|
"biocommons.seqrepo",
|
36
35
|
"pydantic == 2.*",
|
37
36
|
"uvicorn",
|
38
37
|
"fastapi",
|
39
38
|
"ga4gh.vrs",
|
39
|
+
"wags-tails ~= 0.1.3"
|
40
40
|
]
|
41
41
|
dynamic = ["version"]
|
42
42
|
|
43
43
|
[project.optional-dependencies]
|
44
|
-
dev = ["pre-commit", "ipython", "ipykernel", "psycopg2-binary", "ruff==0.
|
44
|
+
dev = ["pre-commit", "ipython", "ipykernel", "psycopg2-binary", "ruff==0.5.0"]
|
45
45
|
tests = ["pytest", "pytest-cov", "pytest-asyncio==0.18.3", "mock"]
|
46
46
|
docs = [
|
47
47
|
"sphinx==6.1.3",
|
@@ -55,13 +55,13 @@ docs = [
|
|
55
55
|
|
56
56
|
[project.urls]
|
57
57
|
Homepage = "https://github.com/genomicmedlab/cool-seq-tool"
|
58
|
-
Documentation = "https://coolseqtool.readthedocs.io/
|
58
|
+
Documentation = "https://coolseqtool.readthedocs.io/"
|
59
59
|
Changelog = "https://github.com/genomicmedlab/cool-seq-tool/releases"
|
60
60
|
Source = "https://github.com/genomicmedlab/cool-seq-tool"
|
61
61
|
"Bug Tracker" = "https://github.com/genomicmedlab/cool-seq-tool/issues"
|
62
62
|
|
63
63
|
[build-system]
|
64
|
-
requires = ["setuptools>=
|
64
|
+
requires = ["setuptools>=64"]
|
65
65
|
build-backend = "setuptools.build_meta"
|
66
66
|
|
67
67
|
[tool.setuptools.dynamic]
|
@@ -75,7 +75,7 @@ version = {attr = "cool_seq_tool.version.__version__"}
|
|
75
75
|
# where = ["src"]
|
76
76
|
|
77
77
|
[tool.setuptools.package-data]
|
78
|
-
"cool_seq_tool.
|
78
|
+
"cool_seq_tool.resources" = ["transcript_mapping.tsv"]
|
79
79
|
|
80
80
|
[tool.pytest.ini_options]
|
81
81
|
addopts = "--cov=src --cov-report term-missing"
|
@@ -103,16 +103,22 @@ lint.select = [
|
|
103
103
|
"DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
|
104
104
|
"T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
|
105
105
|
"EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em
|
106
|
+
"LOG", # https://docs.astral.sh/ruff/rules/#flake8-logging-log
|
106
107
|
"G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
|
108
|
+
"INP", # https://docs.astral.sh/ruff/rules/#flake8-no-pep420-inp
|
107
109
|
"PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie
|
108
110
|
"T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20
|
109
111
|
"PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt
|
110
112
|
"Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q
|
111
113
|
"RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse
|
112
114
|
"RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret
|
115
|
+
"SLF", # https://docs.astral.sh/ruff/rules/#flake8-self-slf
|
113
116
|
"SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
|
117
|
+
"ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg
|
114
118
|
"PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
|
115
119
|
"PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh
|
120
|
+
"PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf
|
121
|
+
"FURB", # https://docs.astral.sh/ruff/rules/#refurb-furb
|
116
122
|
"RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
|
117
123
|
]
|
118
124
|
lint.fixable = [
|
@@ -123,16 +129,20 @@ lint.fixable = [
|
|
123
129
|
"ANN",
|
124
130
|
"B",
|
125
131
|
"C4",
|
132
|
+
"LOG",
|
126
133
|
"G",
|
127
134
|
"PIE",
|
128
135
|
"PT",
|
129
136
|
"RSE",
|
130
137
|
"SIM",
|
138
|
+
"PERF",
|
139
|
+
"FURB",
|
131
140
|
"RUF"
|
132
141
|
]
|
133
142
|
|
134
|
-
# ANN101 - missing-type-self
|
135
143
|
# ANN003 - missing-type-kwargs
|
144
|
+
# ANN101 - missing-type-self
|
145
|
+
# ANN102 - missing-type-cls
|
136
146
|
# D203 - one-blank-line-before-class
|
137
147
|
# D205 - blank-line-after-summary
|
138
148
|
# D206 - indent-with-spaces*
|
@@ -148,7 +158,7 @@ lint.fixable = [
|
|
148
158
|
# S321 - suspicious-ftp-lib-usage
|
149
159
|
# *ignored for compatibility with formatter
|
150
160
|
lint.ignore = [
|
151
|
-
"ANN101", "
|
161
|
+
"ANN003", "ANN101", "ANN102",
|
152
162
|
"D203", "D205", "D206", "D213", "D300", "D400", "D415",
|
153
163
|
"E111", "E114", "E117", "E501",
|
154
164
|
"W191",
|
@@ -162,9 +172,15 @@ lint.ignore = [
|
|
162
172
|
# N805 - invalid-first-argument-name-for-method
|
163
173
|
# F821 - undefined-name
|
164
174
|
# F401 - unused-import
|
165
|
-
|
175
|
+
# INP001 - implicit-namespace-package
|
176
|
+
# SLF001 - private-member-access
|
177
|
+
# ARG001 - unused-function-argument
|
178
|
+
"tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "INP001", "SLF001", "ARG001"]
|
166
179
|
"*__init__.py" = ["F401"]
|
167
180
|
"src/cool_seq_tool/schemas.py" = ["ANN201", "N805", "ANN001"]
|
168
181
|
|
169
182
|
[tool.ruff.lint.flake8-bugbear]
|
170
183
|
extend-immutable-calls = ["fastapi.Query"]
|
184
|
+
|
185
|
+
[tool.ruff.format]
|
186
|
+
docstring-code-format = true
|
@@ -1,5 +1,4 @@
|
|
1
1
|
"""Main application for FastAPI"""
|
2
|
-
from typing import Dict
|
3
2
|
|
4
3
|
from fastapi import FastAPI
|
5
4
|
from fastapi.openapi.utils import get_openapi
|
@@ -19,7 +18,7 @@ app.include_router(mane.router)
|
|
19
18
|
app.include_router(mappings.router)
|
20
19
|
|
21
20
|
|
22
|
-
def custom_openapi() ->
|
21
|
+
def custom_openapi() -> dict:
|
23
22
|
"""Generate custom fields for OpenAPI response."""
|
24
23
|
if app.openapi_schema:
|
25
24
|
return app.openapi_schema
|
@@ -1,24 +1,18 @@
|
|
1
1
|
"""Provides core CoolSeqTool class, which non-redundantly initializes all Cool-Seq-Tool
|
2
2
|
data handler and mapping resources for straightforward access.
|
3
3
|
"""
|
4
|
+
|
4
5
|
import logging
|
5
6
|
from pathlib import Path
|
6
|
-
from typing import Optional
|
7
7
|
|
8
8
|
from biocommons.seqrepo import SeqRepo
|
9
9
|
|
10
|
-
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
10
|
+
from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
|
11
11
|
from cool_seq_tool.mappers import (
|
12
12
|
AlignmentMapper,
|
13
13
|
ExonGenomicCoordsMapper,
|
14
14
|
ManeTranscript,
|
15
15
|
)
|
16
|
-
from cool_seq_tool.paths import (
|
17
|
-
LRG_REFSEQGENE_PATH,
|
18
|
-
MANE_SUMMARY_PATH,
|
19
|
-
SEQREPO_ROOT_DIR,
|
20
|
-
TRANSCRIPT_MAPPINGS_PATH,
|
21
|
-
)
|
22
16
|
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
|
23
17
|
from cool_seq_tool.sources.transcript_mappings import TranscriptMappings
|
24
18
|
from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
|
@@ -37,26 +31,44 @@ class CoolSeqTool:
|
|
37
31
|
* ``self.alignment_mapper``: :py:class:`AlignmentMapper <cool_seq_tool.mappers.alignment.AlignmentMapper>`
|
38
32
|
* ``self.mane_transcript``: :py:class:`ManeTranscript <cool_seq_tool.mappers.mane_transcript.ManeTranscript>`
|
39
33
|
* ``self.ex_g_coords_mapper``: :py:class:`ExonGenomicCoordsMapper <cool_seq_tool.mappers.exon_genomic_coords.ExonGenomicCoordsMapper>`
|
40
|
-
|
41
|
-
Initialization with default resource locations is straightforward:
|
42
|
-
|
43
|
-
.. code-block:: pycon
|
44
|
-
|
45
|
-
>>> from cool_seq_tool.app import CoolSeqTool
|
46
|
-
>>> cst = CoolSeqTool()
|
47
|
-
|
48
|
-
See the :ref:`configuration <configuration>` section for more information.
|
49
34
|
"""
|
50
35
|
|
51
36
|
def __init__(
|
52
37
|
self,
|
53
|
-
transcript_file_path: Path =
|
54
|
-
lrg_refseqgene_path: Path =
|
55
|
-
mane_data_path: Path =
|
38
|
+
transcript_file_path: Path | None = None,
|
39
|
+
lrg_refseqgene_path: Path | None = None,
|
40
|
+
mane_data_path: Path | None = None,
|
56
41
|
db_url: str = UTA_DB_URL,
|
57
|
-
sr:
|
42
|
+
sr: SeqRepo | None = None,
|
43
|
+
force_local_files: bool = False,
|
58
44
|
) -> None:
|
59
|
-
"""Initialize CoolSeqTool class
|
45
|
+
"""Initialize CoolSeqTool class.
|
46
|
+
|
47
|
+
Initialization with default resource locations is straightforward:
|
48
|
+
|
49
|
+
>>> from cool_seq_tool.app import CoolSeqTool
|
50
|
+
>>> cst = CoolSeqTool()
|
51
|
+
|
52
|
+
By default, this will attempt to fetch the latest versions of static resources,
|
53
|
+
which means brief FTP and HTTPS requests to NCBI servers upon initialization.
|
54
|
+
To suppress this check and simply rely on the most recent locally-available
|
55
|
+
data:
|
56
|
+
|
57
|
+
>>> cst = CoolSeqTool(force_local_files=True)
|
58
|
+
|
59
|
+
Note that this will raise a FileNotFoundError if no locally-available data exists.
|
60
|
+
|
61
|
+
Paths to those files can also be explicitly passed to avoid checks as well:
|
62
|
+
|
63
|
+
>>> from pathlib import Path
|
64
|
+
>>> cst = CoolSeqTool(
|
65
|
+
... lrg_refseqgene_path=Path("lrg_refseqgene_20240625.tsv"),
|
66
|
+
... mane_data_path=Path("ncbi_mane_summary_1.3.txt"),
|
67
|
+
... )
|
68
|
+
|
69
|
+
If not passed explicit arguments, these locations can also be set via
|
70
|
+
environment variables. See the :ref:`configuration <configuration>` section of
|
71
|
+
the docs for more information.
|
60
72
|
|
61
73
|
:param transcript_file_path: The path to ``transcript_mapping.tsv``
|
62
74
|
:param lrg_refseqgene_path: The path to the LRG_RefSeqGene file
|
@@ -64,6 +76,8 @@ class CoolSeqTool:
|
|
64
76
|
:param db_url: PostgreSQL connection URL
|
65
77
|
Format: ``driver://user:password@host/database/schema``
|
66
78
|
:param sr: SeqRepo instance. If this is not provided, will create a new instance
|
79
|
+
:param force_local_files: if ``True``, don't check for or try to acquire latest
|
80
|
+
versions of static data files -- just use most recently available, if any
|
67
81
|
"""
|
68
82
|
if not sr:
|
69
83
|
sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
|
@@ -71,9 +85,10 @@ class CoolSeqTool:
|
|
71
85
|
self.transcript_mappings = TranscriptMappings(
|
72
86
|
transcript_file_path=transcript_file_path,
|
73
87
|
lrg_refseqgene_path=lrg_refseqgene_path,
|
88
|
+
from_local=force_local_files,
|
74
89
|
)
|
75
90
|
self.mane_transcript_mappings = ManeTranscriptMappings(
|
76
|
-
mane_data_path=mane_data_path
|
91
|
+
mane_data_path=mane_data_path, from_local=force_local_files
|
77
92
|
)
|
78
93
|
self.uta_db = UtaDatabase(db_url=db_url)
|
79
94
|
self.alignment_mapper = AlignmentMapper(
|
{cool_seq_tool-0.4.0.dev3 → cool_seq_tool-0.4.1}/src/cool_seq_tool/handlers/seqrepo_access.py
RENAMED
@@ -1,10 +1,10 @@
|
|
1
1
|
"""Wrap SeqRepo to provide additional lookup and identification methods on top of basic
|
2
2
|
dereferencing functions.
|
3
3
|
"""
|
4
|
+
|
4
5
|
import logging
|
5
6
|
from os import environ
|
6
7
|
from pathlib import Path
|
7
|
-
from typing import List, Optional, Tuple, Union
|
8
8
|
|
9
9
|
from ga4gh.vrs.dataproxy import SeqRepoDataProxy
|
10
10
|
|
@@ -14,6 +14,9 @@ from cool_seq_tool.utils import get_inter_residue_pos
|
|
14
14
|
logger = logging.getLogger(__name__)
|
15
15
|
|
16
16
|
|
17
|
+
SEQREPO_ROOT_DIR = environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
|
18
|
+
|
19
|
+
|
17
20
|
class SeqRepoAccess(SeqRepoDataProxy):
|
18
21
|
"""Provide a wrapper around the base SeqRepoDataProxy class from ``VRS-Python`` to
|
19
22
|
provide additional lookup and identification methods.
|
@@ -24,10 +27,10 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
24
27
|
def get_reference_sequence(
|
25
28
|
self,
|
26
29
|
ac: str,
|
27
|
-
start:
|
28
|
-
end:
|
30
|
+
start: int | None = None,
|
31
|
+
end: int | None = None,
|
29
32
|
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
30
|
-
) ->
|
33
|
+
) -> tuple[str, str | None]:
|
31
34
|
"""Get reference sequence for an accession given a start and end position. If
|
32
35
|
``start`` and ``end`` are not given, returns the entire reference sequence.
|
33
36
|
|
@@ -93,8 +96,8 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
93
96
|
return sequence, None
|
94
97
|
|
95
98
|
def translate_identifier(
|
96
|
-
self, ac: str, target_namespaces:
|
97
|
-
) ->
|
99
|
+
self, ac: str, target_namespaces: str | list[str] | None = None
|
100
|
+
) -> tuple[list[str], str | None]:
|
98
101
|
"""Return list of identifiers for accession.
|
99
102
|
|
100
103
|
>>> from cool_seq_tool.handlers import SeqRepoAccess
|
@@ -120,9 +123,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
120
123
|
else:
|
121
124
|
return ga4gh_identifiers, None
|
122
125
|
|
123
|
-
def translate_alias(
|
124
|
-
self, input_str: str
|
125
|
-
) -> Tuple[List[Optional[str]], Optional[str]]:
|
126
|
+
def translate_alias(self, input_str: str) -> tuple[list[str | None], str | None]:
|
126
127
|
"""Get aliases for a given input.
|
127
128
|
|
128
129
|
:param str input_str: Input to get aliases for
|
@@ -135,9 +136,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
135
136
|
logger.warning(msg)
|
136
137
|
return [], msg
|
137
138
|
|
138
|
-
def chromosome_to_acs(
|
139
|
-
self, chromosome: str
|
140
|
-
) -> Tuple[Optional[List[str]], Optional[str]]:
|
139
|
+
def chromosome_to_acs(self, chromosome: str) -> tuple[list[str] | None, str | None]:
|
141
140
|
"""Get accessions for a chromosome
|
142
141
|
|
143
142
|
:param chromosome: Chromosome number. Must be either 1-22, X, or Y
|
@@ -148,13 +147,12 @@ class SeqRepoAccess(SeqRepoDataProxy):
|
|
148
147
|
tmp_acs, _ = self.translate_identifier(
|
149
148
|
f"{assembly}:chr{chromosome}", target_namespaces="refseq"
|
150
149
|
)
|
151
|
-
for ac in tmp_acs
|
152
|
-
acs.append(ac.split("refseq:")[-1])
|
150
|
+
acs += [ac.split("refseq:")[-1] for ac in tmp_acs]
|
153
151
|
if acs:
|
154
152
|
return acs, None
|
155
153
|
return None, f"{chromosome} is not a valid chromosome"
|
156
154
|
|
157
|
-
def ac_to_chromosome(self, ac: str) ->
|
155
|
+
def ac_to_chromosome(self, ac: str) -> tuple[str | None, str | None]:
|
158
156
|
"""Get chromosome for accession.
|
159
157
|
|
160
158
|
:param str ac: Accession
|
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Module containing alignment methods for translating to and from different
|
2
2
|
reference sequences.
|
3
3
|
"""
|
4
|
-
from typing import Dict, Optional, Tuple
|
5
4
|
|
6
5
|
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
|
7
6
|
from cool_seq_tool.schemas import AnnotationLayer, Assembly, ResidueMode
|
@@ -34,7 +33,7 @@ class AlignmentMapper:
|
|
34
33
|
p_start_pos: int,
|
35
34
|
p_end_pos: int,
|
36
35
|
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
37
|
-
) ->
|
36
|
+
) -> tuple[dict | None, str | None]:
|
38
37
|
"""Translate protein representation to cDNA representation.
|
39
38
|
|
40
39
|
:param p_ac: Protein RefSeq accession
|
@@ -83,7 +82,7 @@ class AlignmentMapper:
|
|
83
82
|
"residue_mode": ResidueMode.INTER_RESIDUE.value,
|
84
83
|
}, None
|
85
84
|
|
86
|
-
async def _get_cds_start(self, c_ac: str) ->
|
85
|
+
async def _get_cds_start(self, c_ac: str) -> tuple[int | None, str | None]:
|
87
86
|
"""Get CDS start for a given cDNA RefSeq accession
|
88
87
|
|
89
88
|
:param c_ac: cDNA RefSeq accession
|
@@ -105,10 +104,10 @@ class AlignmentMapper:
|
|
105
104
|
c_ac: str,
|
106
105
|
c_start_pos: int,
|
107
106
|
c_end_pos: int,
|
108
|
-
cds_start:
|
107
|
+
cds_start: int | None = None,
|
109
108
|
residue_mode: ResidueMode = ResidueMode.RESIDUE,
|
110
109
|
target_genome_assembly: bool = Assembly.GRCH38,
|
111
|
-
) ->
|
110
|
+
) -> tuple[dict | None, str | None]:
|
112
111
|
"""Translate cDNA representation to genomic representation
|
113
112
|
|
114
113
|
:param c_ac: cDNA RefSeq accession
|
@@ -212,7 +211,7 @@ class AlignmentMapper:
|
|
212
211
|
p_end_pos: int,
|
213
212
|
residue_mode: ResidueMode = ResidueMode.INTER_RESIDUE,
|
214
213
|
target_genome_assembly: Assembly = Assembly.GRCH38,
|
215
|
-
) ->
|
214
|
+
) -> tuple[dict | None, str | None]:
|
216
215
|
"""Translate protein representation to genomic representation, by way of
|
217
216
|
intermediary conversion into cDNA coordinates.
|
218
217
|
|