cool-seq-tool 0.3.0.dev1__tar.gz → 0.4.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0}/LICENSE +1 -1
  2. cool_seq_tool-0.4.0.dev0/PKG-INFO +130 -0
  3. cool_seq_tool-0.4.0.dev0/README.md +52 -0
  4. cool_seq_tool-0.4.0.dev0/pyproject.toml +122 -0
  5. cool_seq_tool-0.4.0.dev0/setup.cfg +4 -0
  6. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/api.py +3 -3
  7. cool_seq_tool-0.4.0.dev0/src/cool_seq_tool/app.py +90 -0
  8. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/data/data_downloads.py +8 -5
  9. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/handlers/seqrepo_access.py +55 -27
  10. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/mappers/__init__.py +4 -1
  11. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/mappers/alignment.py +40 -37
  12. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/mappers/exon_genomic_coords.py +329 -138
  13. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/mappers/mane_transcript.py +402 -227
  14. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/routers/mappings.py +1 -1
  15. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/schemas.py +31 -24
  16. cool_seq_tool-0.4.0.dev0/src/cool_seq_tool/sources/__init__.py +6 -0
  17. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/sources/mane_transcript_mappings.py +28 -7
  18. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/sources/transcript_mappings.py +27 -11
  19. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/sources/uta_database.py +179 -232
  20. cool_seq_tool-0.4.0.dev0/src/cool_seq_tool/utils.py +46 -0
  21. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/version.py +1 -1
  22. cool_seq_tool-0.4.0.dev0/src/cool_seq_tool.egg-info/PKG-INFO +130 -0
  23. cool_seq_tool-0.4.0.dev0/src/cool_seq_tool.egg-info/SOURCES.txt +32 -0
  24. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool.egg-info/requires.txt +8 -1
  25. cool_seq_tool-0.4.0.dev0/tests/test_utils.py +26 -0
  26. cool_seq_tool-0.3.0.dev1/PKG-INFO +0 -187
  27. cool_seq_tool-0.3.0.dev1/README.md +0 -153
  28. cool_seq_tool-0.3.0.dev1/cool_seq_tool/app.py +0 -69
  29. cool_seq_tool-0.3.0.dev1/cool_seq_tool/data/transcript_mapping.tsv +0 -256226
  30. cool_seq_tool-0.3.0.dev1/cool_seq_tool/sources/__init__.py +0 -4
  31. cool_seq_tool-0.3.0.dev1/cool_seq_tool/utils.py +0 -48
  32. cool_seq_tool-0.3.0.dev1/cool_seq_tool.egg-info/PKG-INFO +0 -187
  33. cool_seq_tool-0.3.0.dev1/cool_seq_tool.egg-info/SOURCES.txt +0 -36
  34. cool_seq_tool-0.3.0.dev1/cool_seq_tool.egg-info/not-zip-safe +0 -1
  35. cool_seq_tool-0.3.0.dev1/pyproject.toml +0 -41
  36. cool_seq_tool-0.3.0.dev1/setup.cfg +0 -52
  37. cool_seq_tool-0.3.0.dev1/setup.py +0 -5
  38. cool_seq_tool-0.3.0.dev1/tests/test_utils.py +0 -25
  39. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/__init__.py +0 -0
  40. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/data/__init__.py +0 -0
  41. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/handlers/__init__.py +0 -0
  42. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/paths.py +0 -0
  43. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/routers/__init__.py +0 -0
  44. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/routers/default.py +0 -0
  45. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool/routers/mane.py +1 -1
  46. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool.egg-info/dependency_links.txt +0 -0
  47. {cool_seq_tool-0.3.0.dev1 → cool_seq_tool-0.4.0.dev0/src}/cool_seq_tool.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2021 VICC
3
+ Copyright (c) 2021-2023 Wagner Lab
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -0,0 +1,130 @@
1
+ Metadata-Version: 2.1
2
+ Name: cool_seq_tool
3
+ Version: 0.4.0.dev0
4
+ Summary: Common Operation on Lots of Sequences Tool
5
+ Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
+ License: MIT License
7
+
8
+ Copyright (c) 2021-2023 Wagner Lab
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/genomicmedlab/cool-seq-tool
29
+ Project-URL: Documentation, https://coolseqtool.readthedocs.io/en/latest/index.html
30
+ Project-URL: Changelog, https://github.com/genomicmedlab/cool-seq-tool/releases
31
+ Project-URL: Source, https://github.com/genomicmedlab/cool-seq-tool
32
+ Project-URL: Bug Tracker, https://github.com/genomicmedlab/cool-seq-tool/issues
33
+ Classifier: Development Status :: 3 - Alpha
34
+ Classifier: Framework :: FastAPI
35
+ Classifier: Framework :: Pydantic
36
+ Classifier: Framework :: Pydantic :: 2
37
+ Classifier: Intended Audience :: Science/Research
38
+ Classifier: Intended Audience :: Developers
39
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
40
+ Classifier: License :: OSI Approved :: MIT License
41
+ Classifier: Programming Language :: Python :: 3
42
+ Classifier: Programming Language :: Python :: 3.8
43
+ Classifier: Programming Language :: Python :: 3.9
44
+ Classifier: Programming Language :: Python :: 3.10
45
+ Classifier: Programming Language :: Python :: 3.11
46
+ Requires-Python: >=3.8
47
+ Description-Content-Type: text/markdown
48
+ License-File: LICENSE
49
+ Requires-Dist: asyncpg
50
+ Requires-Dist: aiofiles
51
+ Requires-Dist: boto3
52
+ Requires-Dist: pyliftover
53
+ Requires-Dist: polars
54
+ Requires-Dist: hgvs
55
+ Requires-Dist: biocommons.seqrepo
56
+ Requires-Dist: pydantic==2.*
57
+ Requires-Dist: uvicorn
58
+ Requires-Dist: fastapi
59
+ Requires-Dist: ga4gh.vrs
60
+ Provides-Extra: dev
61
+ Requires-Dist: pre-commit; extra == "dev"
62
+ Requires-Dist: ipython; extra == "dev"
63
+ Requires-Dist: ipykernel; extra == "dev"
64
+ Requires-Dist: psycopg2-binary; extra == "dev"
65
+ Requires-Dist: ruff; extra == "dev"
66
+ Provides-Extra: tests
67
+ Requires-Dist: pytest; extra == "tests"
68
+ Requires-Dist: pytest-cov; extra == "tests"
69
+ Requires-Dist: pytest-asyncio==0.18.3; extra == "tests"
70
+ Requires-Dist: mock; extra == "tests"
71
+ Provides-Extra: docs
72
+ Requires-Dist: sphinx==6.1.3; extra == "docs"
73
+ Requires-Dist: sphinx-autodoc-typehints==1.22.0; extra == "docs"
74
+ Requires-Dist: sphinx-autobuild==2021.3.14; extra == "docs"
75
+ Requires-Dist: sphinx-copybutton==0.5.2; extra == "docs"
76
+ Requires-Dist: sphinxext-opengraph==0.8.2; extra == "docs"
77
+ Requires-Dist: furo==2023.3.27; extra == "docs"
78
+
79
+ <h1 align="center">
80
+ CoolSeqTool
81
+ </h1>
82
+
83
+ **[Documentation](https://coolseqtool.readthedocs.io/en/latest/)** · [Installation](https://coolseqtool.readthedocs.io/en/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/en/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/en/latest/reference/index.html)
84
+
85
+ ## Overview
86
+
87
+ <!-- description -->
88
+ The **CoolSeqTool** provides:
89
+
90
+ - A Pythonic API on top of sequence data of interest to tertiary analysis tools, including mappings between gene names and transcripts, [MANE transcript](https://www.ncbi.nlm.nih.gov/refseq/MANE/) descriptions, and the [Universal Transcript Archive](https://github.com/biocommons/uta)
91
+ - Augmented access to the [SeqRepo](https://github.com/biocommons/biocommons.seqrepo) database, including multiple additional methods and tools
92
+ - Mapping tools that combine the above to support translation between references sequences, annotation layers, and MANE transcripts
93
+ <!-- /description -->
94
+
95
+ ---
96
+
97
+ ## Install
98
+
99
+ CoolSeqTool is available on [PyPI](https://pypi.org/project/cool-seq-tool)
100
+
101
+ ```shell
102
+ python3 -m pip install cool-seq-tool
103
+ ```
104
+
105
+ See the [installation instructions](https://coolseqtool.readthedocs.io/en/latest/install.html) in the documentation for a description of dependency setup requirements.
106
+
107
+ ---
108
+
109
+ ## Usage
110
+
111
+ All CoolSeqTool resources can be initialized by way of a top-level class instance:
112
+
113
+ ```pycon
114
+ >>> from cool_seq_tool.app import CoolSeqTool
115
+ >>> cst = CoolSeqTool()
116
+ >>> result = await cst.mane_transcript.get_mane_transcript(
117
+ ... "NP_004324.2",
118
+ ... 599,
119
+ ... AnnotationLayer.PROTEIN,
120
+ ... residue_mode=ResidueMode.INTER_RESIDUE,
121
+ ... )
122
+ >>> result.gene, result.refseq, result.status
123
+ ('EGFR', 'NM_005228.5', <TranscriptPriority.MANE_SELECT: 'mane_select'>)
124
+ ```
125
+
126
+ ---
127
+
128
+ ## Feedback and contributing
129
+
130
+ We welcome bug reports, feature requests, and code contributions from users and interested collaborators. The [documentation](https://coolseqtool.readthedocs.io/en/latest/contributing.html) contains guidance for submitting feedback and contributing new code.
@@ -0,0 +1,52 @@
1
+ <h1 align="center">
2
+ CoolSeqTool
3
+ </h1>
4
+
5
+ **[Documentation](https://coolseqtool.readthedocs.io/en/latest/)** · [Installation](https://coolseqtool.readthedocs.io/en/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/en/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/en/latest/reference/index.html)
6
+
7
+ ## Overview
8
+
9
+ <!-- description -->
10
+ The **CoolSeqTool** provides:
11
+
12
+ - A Pythonic API on top of sequence data of interest to tertiary analysis tools, including mappings between gene names and transcripts, [MANE transcript](https://www.ncbi.nlm.nih.gov/refseq/MANE/) descriptions, and the [Universal Transcript Archive](https://github.com/biocommons/uta)
13
+ - Augmented access to the [SeqRepo](https://github.com/biocommons/biocommons.seqrepo) database, including multiple additional methods and tools
14
+ - Mapping tools that combine the above to support translation between references sequences, annotation layers, and MANE transcripts
15
+ <!-- /description -->
16
+
17
+ ---
18
+
19
+ ## Install
20
+
21
+ CoolSeqTool is available on [PyPI](https://pypi.org/project/cool-seq-tool)
22
+
23
+ ```shell
24
+ python3 -m pip install cool-seq-tool
25
+ ```
26
+
27
+ See the [installation instructions](https://coolseqtool.readthedocs.io/en/latest/install.html) in the documentation for a description of dependency setup requirements.
28
+
29
+ ---
30
+
31
+ ## Usage
32
+
33
+ All CoolSeqTool resources can be initialized by way of a top-level class instance:
34
+
35
+ ```pycon
36
+ >>> from cool_seq_tool.app import CoolSeqTool
37
+ >>> cst = CoolSeqTool()
38
+ >>> result = await cst.mane_transcript.get_mane_transcript(
39
+ ... "NP_004324.2",
40
+ ... 599,
41
+ ... AnnotationLayer.PROTEIN,
42
+ ... residue_mode=ResidueMode.INTER_RESIDUE,
43
+ ... )
44
+ >>> result.gene, result.refseq, result.status
45
+ ('EGFR', 'NM_005228.5', <TranscriptPriority.MANE_SELECT: 'mane_select'>)
46
+ ```
47
+
48
+ ---
49
+
50
+ ## Feedback and contributing
51
+
52
+ We welcome bug reports, feature requests, and code contributions from users and interested collaborators. The [documentation](https://coolseqtool.readthedocs.io/en/latest/contributing.html) contains guidance for submitting feedback and contributing new code.
@@ -0,0 +1,122 @@
1
+ [project]
2
+ name = "cool_seq_tool"
3
+ authors = [
4
+ {name = "Kori Kuzma"},
5
+ {name = "James Stevenson"},
6
+ {name = "Katie Stahl"},
7
+ {name = "Alex Wagner"},
8
+ ]
9
+ readme = "README.md"
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Framework :: FastAPI",
13
+ "Framework :: Pydantic",
14
+ "Framework :: Pydantic :: 2",
15
+ "Intended Audience :: Science/Research",
16
+ "Intended Audience :: Developers",
17
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.8",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ ]
25
+ requires-python = ">=3.8"
26
+ description = "Common Operation on Lots of Sequences Tool"
27
+ license = {file = "LICENSE"}
28
+ dependencies = [
29
+ "asyncpg",
30
+ "aiofiles",
31
+ "boto3",
32
+ "pyliftover",
33
+ "polars",
34
+ "hgvs",
35
+ "biocommons.seqrepo",
36
+ "pydantic == 2.*",
37
+ "uvicorn",
38
+ "fastapi",
39
+ "ga4gh.vrs",
40
+ ]
41
+ dynamic = ["version"]
42
+
43
+ [project.optional-dependencies]
44
+ dev = ["pre-commit", "ipython", "ipykernel", "psycopg2-binary", "ruff"]
45
+ tests = ["pytest", "pytest-cov", "pytest-asyncio==0.18.3", "mock"]
46
+ docs = [
47
+ "sphinx==6.1.3",
48
+ "sphinx-autodoc-typehints==1.22.0",
49
+ "sphinx-autobuild==2021.3.14",
50
+ "sphinx-copybutton==0.5.2",
51
+ "sphinxext-opengraph==0.8.2",
52
+ "furo==2023.3.27",
53
+ ]
54
+
55
+ [project.urls]
56
+ Homepage = "https://github.com/genomicmedlab/cool-seq-tool"
57
+ Documentation = "https://coolseqtool.readthedocs.io/en/latest/index.html"
58
+ Changelog = "https://github.com/genomicmedlab/cool-seq-tool/releases"
59
+ Source = "https://github.com/genomicmedlab/cool-seq-tool"
60
+ "Bug Tracker" = "https://github.com/genomicmedlab/cool-seq-tool/issues"
61
+
62
+ [build-system]
63
+ requires = ["setuptools>=61.0"]
64
+ build-backend = "setuptools.build_meta"
65
+
66
+ [tool.setuptools.dynamic]
67
+ version = {attr = "cool_seq_tool.version.__version__"}
68
+
69
+ [tool.setuptools.packages.find]
70
+ where = ["src"]
71
+
72
+ [tool.pytest.ini_options]
73
+ addopts = "--cov=src --cov-report term-missing"
74
+ testpaths = ["tests"]
75
+
76
+ [tool.coverage.run]
77
+ branch = true
78
+
79
+ [tool.ruff]
80
+ src = ["src"]
81
+ exclude = ["docs/source/conf.py"]
82
+ # pycodestyle (E, W)
83
+ # Pyflakes (F)
84
+ # flake8-annotations (ANN)
85
+ # pydocstyle (D)
86
+ # pep8-naming (N)
87
+ # isort (I)
88
+ select = ["E", "W", "F", "ANN", "D", "N", "I"]
89
+ fixable = ["I", "F401"]
90
+
91
+ # ANN101 - missing-type-self
92
+ # ANN003 - missing-type-kwargs
93
+ # D203 - one-blank-line-before-class
94
+ # D205 - blank-line-after-summary
95
+ # D206 - indent-with-spaces*
96
+ # D213 - multi-line-summary-second-line
97
+ # D300 - triple-single-quotes*
98
+ # D400 - ends-in-period
99
+ # D415 - ends-in-punctuation
100
+ # E111 - indentation-with-invalid-multiple*
101
+ # E114 - indentation-with-invalid-multiple-comment*
102
+ # E117 - over-indented*
103
+ # E501 - line-too-long*
104
+ # W191 - tab-indentation*
105
+ # *ignored for compatibility with formatter
106
+ ignore = [
107
+ "ANN101", "ANN003",
108
+ "D203", "D205", "D206", "D213", "D300", "D400", "D415",
109
+ "E111", "E114", "E117", "E501",
110
+ "W191"
111
+ ]
112
+
113
+ [tool.ruff.per-file-ignores]
114
+ # ANN001 - missing-type-function-argument
115
+ # ANN2 - missing-return-type
116
+ # ANN102 - missing-type-cls
117
+ # N805 - invalid-first-argument-name-for-method
118
+ # F821 - undefined-name
119
+ # F401 - unused-import
120
+ "tests/*" = ["ANN001", "ANN2", "ANN102"]
121
+ "*__init__.py" = ["F401"]
122
+ "src/cool_seq_tool/schemas.py" = ["ANN201", "N805", "ANN001"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -24,16 +24,16 @@ def custom_openapi() -> Dict:
24
24
  if app.openapi_schema:
25
25
  return app.openapi_schema
26
26
  openapi_schema = get_openapi(
27
- title="The GenomicMedLab Cool Seq Tool",
27
+ title="The GenomicMedLab Cool-Seq-Tool",
28
28
  version=__version__,
29
- description="Common Operations On Lots-of Sequences Tool.",
29
+ description="Common Operations On Lots of Sequences Tool.",
30
30
  routes=app.routes,
31
31
  )
32
32
 
33
33
  openapi_schema["info"]["contact"] = {
34
34
  "name": "Alex H. Wagner",
35
35
  "email": "Alex.Wagner@nationwidechildrens.org",
36
- "url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab", # noqa: E501
36
+ "url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab",
37
37
  }
38
38
  app.openapi_schema = openapi_schema
39
39
  return app.openapi_schema
@@ -0,0 +1,90 @@
1
+ """Provides core CoolSeqTool class, which non-redundantly initializes all Cool-Seq-Tool
2
+ data handler and mapping resources for straightforward access.
3
+ """
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ from biocommons.seqrepo import SeqRepo
9
+
10
+ from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
11
+ from cool_seq_tool.mappers import (
12
+ AlignmentMapper,
13
+ ExonGenomicCoordsMapper,
14
+ ManeTranscript,
15
+ )
16
+ from cool_seq_tool.paths import (
17
+ LRG_REFSEQGENE_PATH,
18
+ MANE_SUMMARY_PATH,
19
+ SEQREPO_ROOT_DIR,
20
+ TRANSCRIPT_MAPPINGS_PATH,
21
+ )
22
+ from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
23
+ from cool_seq_tool.sources.transcript_mappings import TranscriptMappings
24
+ from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class CoolSeqTool:
30
+ """Non-redundantly initialize all Cool-Seq-Tool data resources, available under the
31
+ following attribute names:
32
+
33
+ * ``self.seqrepo_access``: :py:class:`SeqRepoAccess <cool_seq_tool.handlers.seqrepo_access.SeqRepoAccess>`
34
+ * ``self.transcript_mappings``: :py:class:`TranscriptMappings <cool_seq_tool.sources.transcript_mappings.TranscriptMappings>`
35
+ * ``self.mane_transcript_mappings``: :py:class:`ManeTranscriptMappings <cool_seq_tool.sources.mane_transcript_mappings.ManeTranscriptMappings>`
36
+ * ``self.uta_db``: :py:class:`UtaDatabase <cool_seq_tool.sources.uta_database.UtaDatabase>`
37
+ * ``self.alignment_mapper``: :py:class:`AlignmentMapper <cool_seq_tool.mappers.alignment.AlignmentMapper>`
38
+ * ``self.mane_transcript``: :py:class:`ManeTranscript <cool_seq_tool.mappers.mane_transcript.ManeTranscript>`
39
+ * ``self.ex_g_coords_mapper``: :py:class:`ExonGenomicCoordsMapper <cool_seq_tool.mappers.exon_genomic_coords.ExonGenomicCoordsMapper>`
40
+
41
+ Initialization with default resource locations is straightforward:
42
+
43
+ .. code-block:: pycon
44
+
45
+ >>> from cool_seq_tool.app import CoolSeqTool
46
+ >>> cst = CoolSeqTool()
47
+
48
+ See the :ref:`configuration <configuration>` section for more information.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ transcript_file_path: Path = TRANSCRIPT_MAPPINGS_PATH,
54
+ lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH,
55
+ mane_data_path: Path = MANE_SUMMARY_PATH,
56
+ db_url: str = UTA_DB_URL,
57
+ sr: Optional[SeqRepo] = None,
58
+ ) -> None:
59
+ """Initialize CoolSeqTool class
60
+
61
+ :param transcript_file_path: The path to ``transcript_mapping.tsv``
62
+ :param lrg_refseqgene_path: The path to the LRG_RefSeqGene file
63
+ :param mane_data_path: Path to RefSeq MANE summary data
64
+ :param db_url: PostgreSQL connection URL
65
+ Format: ``driver://user:password@host/database/schema``
66
+ :param sr: SeqRepo instance. If this is not provided, will create a new instance
67
+ """
68
+ if not sr:
69
+ sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
70
+ self.seqrepo_access = SeqRepoAccess(sr)
71
+ self.transcript_mappings = TranscriptMappings(
72
+ transcript_file_path=transcript_file_path,
73
+ lrg_refseqgene_path=lrg_refseqgene_path,
74
+ )
75
+ self.mane_transcript_mappings = ManeTranscriptMappings(
76
+ mane_data_path=mane_data_path
77
+ )
78
+ self.uta_db = UtaDatabase(db_url=db_url)
79
+ self.alignment_mapper = AlignmentMapper(
80
+ self.seqrepo_access, self.transcript_mappings, self.uta_db
81
+ )
82
+ self.mane_transcript = ManeTranscript(
83
+ self.seqrepo_access,
84
+ self.transcript_mappings,
85
+ self.mane_transcript_mappings,
86
+ self.uta_db,
87
+ )
88
+ self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
89
+ self.uta_db, self.mane_transcript
90
+ )
@@ -1,4 +1,4 @@
1
- """Module for handling downloadable data files."""
1
+ """Handle acquisition of external data."""
2
2
  import datetime
3
3
  import gzip
4
4
  import logging
@@ -15,8 +15,11 @@ logger = logging.getLogger("cool_seq_tool")
15
15
 
16
16
 
17
17
  class DataDownload:
18
- """Class for managing downloadable data files. Responsible for checking if files
19
- are available under default locations, and fetching them if not.
18
+ """Manage downloadable data files. Responsible for checking if files are available
19
+ under expected locations, and fetching them if not.
20
+
21
+ Relevant methods are called automatically by data classes; users should not have
22
+ to interact with this class under normal circumstances.
20
23
  """
21
24
 
22
25
  def __init__(self) -> None:
@@ -25,7 +28,7 @@ class DataDownload:
25
28
 
26
29
  def get_mane_summary(self) -> Path:
27
30
  """Identify latest MANE summary data. If unavailable locally, download from
28
- source.
31
+ `NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/current/>`_.
29
32
 
30
33
  :return: path to MANE summary file
31
34
  """
@@ -52,7 +55,7 @@ class DataDownload:
52
55
 
53
56
  def get_lrg_refseq_gene_data(self) -> Path:
54
57
  """Identify latest LRG RefSeq Gene file. If unavailable locally, download from
55
- source.
58
+ `NCBI FTP server <https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/>`_.
56
59
 
57
60
  :return: path to acquired LRG RefSeq Gene data file
58
61
  """
@@ -1,4 +1,6 @@
1
- """A module for accessing SeqRepo."""
1
+ """Wrap SeqRepo to provide additional lookup and identification methods on top of basic
2
+ dereferencing functions.
3
+ """
2
4
  import logging
3
5
  from os import environ
4
6
  from pathlib import Path
@@ -13,7 +15,9 @@ logger = logging.getLogger(__name__)
13
15
 
14
16
 
15
17
  class SeqRepoAccess(SeqRepoDataProxy):
16
- """The SeqRepoAccess class."""
18
+ """Provide a wrapper around the base SeqRepoDataProxy class from ``VRS-Python`` to
19
+ provide additional lookup and identification methods.
20
+ """
17
21
 
18
22
  environ["SEQREPO_LRU_CACHE_MAXSIZE"] = "none"
19
23
 
@@ -24,25 +28,37 @@ class SeqRepoAccess(SeqRepoDataProxy):
24
28
  end: Optional[int] = None,
25
29
  residue_mode: ResidueMode = ResidueMode.RESIDUE,
26
30
  ) -> Tuple[str, Optional[str]]:
27
- """Get reference sequence for an accession given a start and end position.
28
- If `start` and `end` are not given, it will return the entire reference sequence
31
+ """Get reference sequence for an accession given a start and end position. If
32
+ ``start`` and ``end`` are not given, returns the entire reference sequence.
33
+
34
+ >>> from cool_seq_tool.handlers import SeqRepoAccess
35
+ >>> from biocommons.seqrepo import SeqRepo
36
+ >>> sr = SeqRepoAccess(SeqRepo("/usr/local/share/seqrepo/latest"))
37
+ >>> sr.get_reference_sequence("NM_002529.3", 1, 10)[0]
38
+ 'TGCAGCTGG'
39
+ >>> sr.get_reference_sequence("NP_001341538.1", 1, 10)[0]
40
+ 'MAALSGGGG'
29
41
 
30
42
  :param ac: Accession
31
43
  :param start: Start pos change
32
- :param end: End pos change. If `None` assumes both `start` and `end` have same
33
- values, if `start` exists.
34
- :param residue_mode: Residue mode for `start` and `end`
44
+ :param end: End pos change. If ``None`` assumes both ``start`` and ``end`` have
45
+ same values, if ``start`` exists.
46
+ :param residue_mode: Residue mode for ``start`` and ``end``
35
47
  :return: Sequence at position (if accession and positions actually
36
48
  exist, else return empty string), warning if any
37
49
  """
38
- if start or end:
39
- pos, warning = get_inter_residue_pos(start, residue_mode, end_pos=end)
40
- if pos is None:
41
- return "", warning
42
- else:
43
- start, end = pos
44
- if start == end:
45
- end += 1
50
+ if start and end:
51
+ if start > end:
52
+ msg = f"start ({start}) cannot be greater than end ({end})"
53
+ return "", msg
54
+
55
+ start, end = get_inter_residue_pos(start, end, residue_mode)
56
+ if start == end:
57
+ end += 1
58
+ else:
59
+ if start is not None and residue_mode == ResidueMode.RESIDUE:
60
+ start -= 1
61
+
46
62
  try:
47
63
  sequence = self.sr.fetch(ac, start=start, end=end)
48
64
  except KeyError:
@@ -53,18 +69,12 @@ class SeqRepoAccess(SeqRepoDataProxy):
53
69
  error = str(e)
54
70
  if error.startswith("start out of range"):
55
71
  msg = (
56
- f"Start inter-residue coordinate ({start}) is out of "
57
- f"index on {ac}"
72
+ f"Start inter-residue coordinate ({start}) is out of index on {ac}"
58
73
  )
59
74
  elif error.startswith("stop out of range"):
60
75
  msg = (
61
76
  f"End inter-residue coordinate ({end}) is out of " f"index on {ac}"
62
77
  )
63
- elif error.startswith("invalid coordinates") and ">" in error:
64
- msg = (
65
- f"Invalid inter-residue coordinates: start ({start}) "
66
- f"cannot be greater than end ({end})"
67
- )
68
78
  else:
69
79
  msg = f"{e}"
70
80
  logger.warning(msg)
@@ -78,8 +88,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
78
88
  if len(sequence) != expected_len_of_seq:
79
89
  return (
80
90
  "",
81
- f"End inter-residue coordinate ({end})"
82
- f" is out of index on {ac}",
91
+ f"End inter-residue coordinate ({end}) is out of index on {ac}",
83
92
  )
84
93
  return sequence, None
85
94
 
@@ -88,6 +97,14 @@ class SeqRepoAccess(SeqRepoDataProxy):
88
97
  ) -> Tuple[List[str], Optional[str]]:
89
98
  """Return list of identifiers for accession.
90
99
 
100
+ >>> from cool_seq_tool.handlers import SeqRepoAccess
101
+ >>> from biocommons.seqrepo import SeqRepo
102
+ >>> sr = SeqRepoAccess(SeqRepo("/usr/local/share/seqrepo/latest"))
103
+ >>> sr.translate_identifier("NM_002529.3")[0]
104
+ ['MD5:18f0a6e3af9e1bbd8fef1948c7156012', 'NCBI:NM_002529.3', 'refseq:NM_002529.3', 'SEGUID:dEJQBkga9d9VeBHTyTbg6JEtTGQ', 'SHA1:74425006481af5df557811d3c936e0e8912d4c64', 'VMC:GS_RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA', 'sha512t24u:RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA', 'ga4gh:SQ.RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA']
105
+ >>> sr.translate_identifier("NM_002529.3", "ga4gh")[0]
106
+ ['ga4gh:SQ.RSkww1aYmsMiWbNdNnOTnVDAM3ZWp1uA']
107
+
91
108
  :param ac: Identifier accession
92
109
  :param target_namespace: The namespace(s) of identifier to return
93
110
  :return: List of identifiers, warning
@@ -123,7 +140,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
123
140
  ) -> Tuple[Optional[List[str]], Optional[str]]:
124
141
  """Get accessions for a chromosome
125
142
 
126
- :param str chromosome: Chromosome number. Must be either 1-22, X, or Y
143
+ :param chromosome: Chromosome number. Must be either 1-22, X, or Y
127
144
  :return: Accessions for chromosome (ordered by latest assembly)
128
145
  """
129
146
  acs = []
@@ -160,9 +177,20 @@ class SeqRepoAccess(SeqRepoDataProxy):
160
177
 
161
178
  def get_fasta_file(self, sequence_id: str, outfile_path: Path) -> None:
162
179
  """Retrieve FASTA file containing sequence for requested sequence ID.
163
- :param sequence_id: accession ID, sans namespace, eg `NM_152263.3`
180
+
181
+ >>> from pathlib import Path
182
+ >>> from cool_seq_tool.handlers import SeqRepoAccess
183
+ >>> from biocommons.seqrepo import SeqRepo
184
+ >>> sr = SeqRepoAccess(SeqRepo("/usr/local/share/seqrepo/latest"))
185
+ >>> # write to local file tpm3.fasta:
186
+ >>> sr.get_fasta_file("NM_002529.3", Path("tpm3.fasta"))
187
+
188
+ FASTA file headers will include GA4GH sequence digest, Ensembl accession ID,
189
+ and RefSeq accession ID.
190
+
191
+ :param sequence_id: accession ID, sans namespace, eg ``NM_152263.3``
164
192
  :param outfile_path: path to save file to
165
- :return: None, but saves sequence data to `outfile_path` if successful
193
+ :return: None, but saves sequence data to ``outfile_path`` if successful
166
194
  :raise: KeyError if SeqRepo doesn't have sequence data for the given ID
167
195
  """
168
196
  sequence = self.get_reference_sequence(sequence_id)[0]
@@ -1,4 +1,7 @@
1
1
  """Module for mapping data"""
2
2
  from .alignment import AlignmentMapper # noqa: I001
3
- from .mane_transcript import MANETranscript
3
+ from .mane_transcript import ManeTranscript
4
4
  from .exon_genomic_coords import ExonGenomicCoordsMapper
5
+
6
+
7
+ __all__ = ["AlignmentMapper", "ManeTranscript", "ExonGenomicCoordsMapper"]