cool-seq-tool 0.4.0.dev2__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/LICENSE +1 -1
  2. {cool_seq_tool-0.4.0.dev2/src/cool_seq_tool.egg-info → cool_seq_tool-0.4.1}/PKG-INFO +15 -8
  3. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/README.md +7 -0
  4. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/pyproject.toml +31 -15
  5. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/__init__.py +1 -3
  6. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/api.py +1 -2
  7. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/app.py +42 -24
  8. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/handlers/__init__.py +1 -0
  9. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/handlers/seqrepo_access.py +13 -15
  10. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/mappers/__init__.py +1 -0
  11. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/mappers/alignment.py +5 -6
  12. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/mappers/exon_genomic_coords.py +232 -68
  13. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/mappers/mane_transcript.py +84 -86
  14. cool_seq_tool-0.4.1/src/cool_seq_tool/resources/__init__.py +1 -0
  15. cool_seq_tool-0.4.1/src/cool_seq_tool/resources/data_files.py +93 -0
  16. cool_seq_tool-0.4.1/src/cool_seq_tool/resources/status.py +151 -0
  17. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/routers/__init__.py +1 -0
  18. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/routers/default.py +1 -0
  19. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/routers/mane.py +4 -4
  20. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/routers/mappings.py +2 -2
  21. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/schemas.py +83 -37
  22. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/sources/__init__.py +1 -0
  23. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/sources/mane_transcript_mappings.py +14 -7
  24. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/sources/transcript_mappings.py +41 -32
  25. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/sources/uta_database.py +120 -69
  26. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/utils.py +2 -2
  27. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool/version.py +2 -1
  28. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1/src/cool_seq_tool.egg-info}/PKG-INFO +15 -8
  29. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool.egg-info/SOURCES.txt +4 -4
  30. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool.egg-info/requires.txt +3 -2
  31. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/tests/test_utils.py +1 -0
  32. cool_seq_tool-0.4.0.dev2/src/cool_seq_tool/data/__init__.py +0 -2
  33. cool_seq_tool-0.4.0.dev2/src/cool_seq_tool/data/data_downloads.py +0 -89
  34. cool_seq_tool-0.4.0.dev2/src/cool_seq_tool/paths.py +0 -28
  35. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/setup.cfg +0 -0
  36. {cool_seq_tool-0.4.0.dev2/src/cool_seq_tool/data → cool_seq_tool-0.4.1/src/cool_seq_tool/resources}/transcript_mapping.tsv +0 -0
  37. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool.egg-info/dependency_links.txt +0 -0
  38. {cool_seq_tool-0.4.0.dev2 → cool_seq_tool-0.4.1}/src/cool_seq_tool.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2021-2023 Wagner Lab
3
+ Copyright (c) 2021-2024 Wagner Lab
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cool_seq_tool
3
- Version: 0.4.0.dev2
3
+ Version: 0.4.1
4
4
  Summary: Common Operation on Lots of Sequences Tool
5
5
  Author: Kori Kuzma, James Stevenson, Katie Stahl, Alex Wagner
6
6
  License: MIT License
7
7
 
8
- Copyright (c) 2021-2023 Wagner Lab
8
+ Copyright (c) 2021-2024 Wagner Lab
9
9
 
10
10
  Permission is hereby granted, free of charge, to any person obtaining a copy
11
11
  of this software and associated documentation files (the "Software"), to deal
@@ -26,7 +26,7 @@ License: MIT License
26
26
  SOFTWARE.
27
27
 
28
28
  Project-URL: Homepage, https://github.com/genomicmedlab/cool-seq-tool
29
- Project-URL: Documentation, https://coolseqtool.readthedocs.io/en/latest/index.html
29
+ Project-URL: Documentation, https://coolseqtool.readthedocs.io/
30
30
  Project-URL: Changelog, https://github.com/genomicmedlab/cool-seq-tool/releases
31
31
  Project-URL: Source, https://github.com/genomicmedlab/cool-seq-tool
32
32
  Project-URL: Bug Tracker, https://github.com/genomicmedlab/cool-seq-tool/issues
@@ -39,30 +39,30 @@ Classifier: Intended Audience :: Developers
39
39
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
40
40
  Classifier: License :: OSI Approved :: MIT License
41
41
  Classifier: Programming Language :: Python :: 3
42
- Classifier: Programming Language :: Python :: 3.8
43
- Classifier: Programming Language :: Python :: 3.9
44
42
  Classifier: Programming Language :: Python :: 3.10
45
43
  Classifier: Programming Language :: Python :: 3.11
46
- Requires-Python: >=3.8
44
+ Classifier: Programming Language :: Python :: 3.12
45
+ Requires-Python: >=3.10
47
46
  Description-Content-Type: text/markdown
48
47
  License-File: LICENSE
49
48
  Requires-Dist: asyncpg
50
49
  Requires-Dist: aiofiles
51
50
  Requires-Dist: boto3
52
51
  Requires-Dist: agct>=0.1.0-dev1
53
- Requires-Dist: polars
52
+ Requires-Dist: polars~=1.0
54
53
  Requires-Dist: hgvs
55
54
  Requires-Dist: biocommons.seqrepo
56
55
  Requires-Dist: pydantic==2.*
57
56
  Requires-Dist: uvicorn
58
57
  Requires-Dist: fastapi
59
58
  Requires-Dist: ga4gh.vrs
59
+ Requires-Dist: wags-tails~=0.1.3
60
60
  Provides-Extra: dev
61
61
  Requires-Dist: pre-commit; extra == "dev"
62
62
  Requires-Dist: ipython; extra == "dev"
63
63
  Requires-Dist: ipykernel; extra == "dev"
64
64
  Requires-Dist: psycopg2-binary; extra == "dev"
65
- Requires-Dist: ruff>=0.1.14; extra == "dev"
65
+ Requires-Dist: ruff==0.5.0; extra == "dev"
66
66
  Provides-Extra: tests
67
67
  Requires-Dist: pytest; extra == "tests"
68
68
  Requires-Dist: pytest-cov; extra == "tests"
@@ -81,8 +81,14 @@ Requires-Dist: sphinx-github-changelog==1.2.1; extra == "docs"
81
81
  CoolSeqTool
82
82
  </h1>
83
83
 
84
+ [![image](https://img.shields.io/pypi/v/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/l/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/pyversions/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![Actions status](https://github.com/genomicmedlab/cool-seq-tool/actions/workflows/checks.yaml/badge.svg)](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
85
+
86
+ ---
87
+
84
88
  **[Documentation](https://coolseqtool.readthedocs.io/latest/)** · [Installation](https://coolseqtool.readthedocs.io/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/latest/reference/index.html)
85
89
 
90
+ ---
91
+
86
92
  ## Overview
87
93
 
88
94
  <!-- description -->
@@ -113,6 +119,7 @@ All CoolSeqTool resources can be initialized by way of a top-level class instanc
113
119
 
114
120
  ```pycon
115
121
  >>> from cool_seq_tool.app import CoolSeqTool
122
+ >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
116
123
  >>> cst = CoolSeqTool()
117
124
  >>> result = await cst.mane_transcript.get_mane_transcript(
118
125
  ... "NP_004324.2",
@@ -2,8 +2,14 @@
2
2
  CoolSeqTool
3
3
  </h1>
4
4
 
5
+ [![image](https://img.shields.io/pypi/v/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/l/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![image](https://img.shields.io/pypi/pyversions/cool-seq-tool.svg)](https://pypi.python.org/pypi/cool-seq-tool) [![Actions status](https://github.com/genomicmedlab/cool-seq-tool/actions/workflows/checks.yaml/badge.svg)](https://github.com/genomicmedlab/cool-seq-tool/actions/checks.yaml)
6
+
7
+ ---
8
+
5
9
  **[Documentation](https://coolseqtool.readthedocs.io/latest/)** · [Installation](https://coolseqtool.readthedocs.io/latest/install.html) · [Usage](https://coolseqtool.readthedocs.io/latest/usage.html) · [API reference](https://coolseqtool.readthedocs.io/latest/reference/index.html)
6
10
 
11
+ ---
12
+
7
13
  ## Overview
8
14
 
9
15
  <!-- description -->
@@ -34,6 +40,7 @@ All CoolSeqTool resources can be initialized by way of a top-level class instanc
34
40
 
35
41
  ```pycon
36
42
  >>> from cool_seq_tool.app import CoolSeqTool
43
+ >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
37
44
  >>> cst = CoolSeqTool()
38
45
  >>> result = await cst.mane_transcript.get_mane_transcript(
39
46
  ... "NP_004324.2",
@@ -17,12 +17,11 @@ classifiers = [
17
17
  "Topic :: Scientific/Engineering :: Bio-Informatics",
18
18
  "License :: OSI Approved :: MIT License",
19
19
  "Programming Language :: Python :: 3",
20
- "Programming Language :: Python :: 3.8",
21
- "Programming Language :: Python :: 3.9",
22
20
  "Programming Language :: Python :: 3.10",
23
21
  "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
24
23
  ]
25
- requires-python = ">=3.8"
24
+ requires-python = ">=3.10"
26
25
  description = "Common Operation on Lots of Sequences Tool"
27
26
  license = {file = "LICENSE"}
28
27
  dependencies = [
@@ -30,18 +29,19 @@ dependencies = [
30
29
  "aiofiles",
31
30
  "boto3",
32
31
  "agct >= 0.1.0-dev1",
33
- "polars",
32
+ "polars ~= 1.0",
34
33
  "hgvs",
35
34
  "biocommons.seqrepo",
36
35
  "pydantic == 2.*",
37
36
  "uvicorn",
38
37
  "fastapi",
39
38
  "ga4gh.vrs",
39
+ "wags-tails ~= 0.1.3"
40
40
  ]
41
41
  dynamic = ["version"]
42
42
 
43
43
  [project.optional-dependencies]
44
- dev = ["pre-commit", "ipython", "ipykernel", "psycopg2-binary", "ruff>=0.1.14"]
44
+ dev = ["pre-commit", "ipython", "ipykernel", "psycopg2-binary", "ruff==0.5.0"]
45
45
  tests = ["pytest", "pytest-cov", "pytest-asyncio==0.18.3", "mock"]
46
46
  docs = [
47
47
  "sphinx==6.1.3",
@@ -55,13 +55,13 @@ docs = [
55
55
 
56
56
  [project.urls]
57
57
  Homepage = "https://github.com/genomicmedlab/cool-seq-tool"
58
- Documentation = "https://coolseqtool.readthedocs.io/en/latest/index.html"
58
+ Documentation = "https://coolseqtool.readthedocs.io/"
59
59
  Changelog = "https://github.com/genomicmedlab/cool-seq-tool/releases"
60
60
  Source = "https://github.com/genomicmedlab/cool-seq-tool"
61
61
  "Bug Tracker" = "https://github.com/genomicmedlab/cool-seq-tool/issues"
62
62
 
63
63
  [build-system]
64
- requires = ["setuptools>=61.0"]
64
+ requires = ["setuptools>=64"]
65
65
  build-backend = "setuptools.build_meta"
66
66
 
67
67
  [tool.setuptools.dynamic]
@@ -75,7 +75,7 @@ version = {attr = "cool_seq_tool.version.__version__"}
75
75
  # where = ["src"]
76
76
 
77
77
  [tool.setuptools.package-data]
78
- "cool_seq_tool.data" = ["transcript_mapping.tsv"]
78
+ "cool_seq_tool.resources" = ["transcript_mapping.tsv"]
79
79
 
80
80
  [tool.pytest.ini_options]
81
81
  addopts = "--cov=src --cov-report term-missing"
@@ -87,7 +87,7 @@ branch = true
87
87
  [tool.ruff]
88
88
  src = ["src"]
89
89
  exclude = ["docs/source/conf.py"]
90
- select = [
90
+ lint.select = [
91
91
  "F", # https://docs.astral.sh/ruff/rules/#pyflakes-f
92
92
  "E", "W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w
93
93
  "I", # https://docs.astral.sh/ruff/rules/#isort-i
@@ -103,19 +103,25 @@ select = [
103
103
  "DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
104
104
  "T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
105
105
  "EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em
106
+ "LOG", # https://docs.astral.sh/ruff/rules/#flake8-logging-log
106
107
  "G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
108
+ "INP", # https://docs.astral.sh/ruff/rules/#flake8-no-pep420-inp
107
109
  "PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie
108
110
  "T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20
109
111
  "PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt
110
112
  "Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q
111
113
  "RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse
112
114
  "RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret
115
+ "SLF", # https://docs.astral.sh/ruff/rules/#flake8-self-slf
113
116
  "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
117
+ "ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg
114
118
  "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
115
119
  "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh
120
+ "PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf
121
+ "FURB", # https://docs.astral.sh/ruff/rules/#refurb-furb
116
122
  "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
117
123
  ]
118
- fixable = [
124
+ lint.fixable = [
119
125
  "I",
120
126
  "F401",
121
127
  "D",
@@ -123,16 +129,20 @@ fixable = [
123
129
  "ANN",
124
130
  "B",
125
131
  "C4",
132
+ "LOG",
126
133
  "G",
127
134
  "PIE",
128
135
  "PT",
129
136
  "RSE",
130
137
  "SIM",
138
+ "PERF",
139
+ "FURB",
131
140
  "RUF"
132
141
  ]
133
142
 
134
- # ANN101 - missing-type-self
135
143
  # ANN003 - missing-type-kwargs
144
+ # ANN101 - missing-type-self
145
+ # ANN102 - missing-type-cls
136
146
  # D203 - one-blank-line-before-class
137
147
  # D205 - blank-line-after-summary
138
148
  # D206 - indent-with-spaces*
@@ -147,24 +157,30 @@ fixable = [
147
157
  # W191 - tab-indentation*
148
158
  # S321 - suspicious-ftp-lib-usage
149
159
  # *ignored for compatibility with formatter
150
- ignore = [
151
- "ANN101", "ANN003",
160
+ lint.ignore = [
161
+ "ANN003", "ANN101", "ANN102",
152
162
  "D203", "D205", "D206", "D213", "D300", "D400", "D415",
153
163
  "E111", "E114", "E117", "E501",
154
164
  "W191",
155
165
  "S321",
156
166
  ]
157
167
 
158
- [tool.ruff.per-file-ignores]
168
+ [tool.ruff.lint.per-file-ignores]
159
169
  # ANN001 - missing-type-function-argument
160
170
  # ANN2 - missing-return-type
161
171
  # ANN102 - missing-type-cls
162
172
  # N805 - invalid-first-argument-name-for-method
163
173
  # F821 - undefined-name
164
174
  # F401 - unused-import
165
- "tests/*" = ["ANN001", "ANN2", "ANN102", "S101"]
175
+ # INP001 - implicit-namespace-package
176
+ # SLF001 - private-member-access
177
+ # ARG001 - unused-function-argument
178
+ "tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "INP001", "SLF001", "ARG001"]
166
179
  "*__init__.py" = ["F401"]
167
180
  "src/cool_seq_tool/schemas.py" = ["ANN201", "N805", "ANN001"]
168
181
 
169
182
  [tool.ruff.lint.flake8-bugbear]
170
183
  extend-immutable-calls = ["fastapi.Query"]
184
+
185
+ [tool.ruff.format]
186
+ docstring-code-format = true
@@ -1,8 +1,6 @@
1
1
  """The cool_seq_tool package"""
2
- import logging
3
- from pathlib import Path
4
2
 
5
- APP_ROOT = Path(__file__).resolve().parents[0]
3
+ import logging
6
4
 
7
5
  logging.basicConfig(
8
6
  filename="cool_seq_tool.log",
@@ -1,5 +1,4 @@
1
1
  """Main application for FastAPI"""
2
- from typing import Dict
3
2
 
4
3
  from fastapi import FastAPI
5
4
  from fastapi.openapi.utils import get_openapi
@@ -19,7 +18,7 @@ app.include_router(mane.router)
19
18
  app.include_router(mappings.router)
20
19
 
21
20
 
22
- def custom_openapi() -> Dict:
21
+ def custom_openapi() -> dict:
23
22
  """Generate custom fields for OpenAPI response."""
24
23
  if app.openapi_schema:
25
24
  return app.openapi_schema
@@ -1,24 +1,18 @@
1
1
  """Provides core CoolSeqTool class, which non-redundantly initializes all Cool-Seq-Tool
2
2
  data handler and mapping resources for straightforward access.
3
3
  """
4
+
4
5
  import logging
5
6
  from pathlib import Path
6
- from typing import Optional
7
7
 
8
8
  from biocommons.seqrepo import SeqRepo
9
9
 
10
- from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
10
+ from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
11
11
  from cool_seq_tool.mappers import (
12
12
  AlignmentMapper,
13
13
  ExonGenomicCoordsMapper,
14
14
  ManeTranscript,
15
15
  )
16
- from cool_seq_tool.paths import (
17
- LRG_REFSEQGENE_PATH,
18
- MANE_SUMMARY_PATH,
19
- SEQREPO_ROOT_DIR,
20
- TRANSCRIPT_MAPPINGS_PATH,
21
- )
22
16
  from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
23
17
  from cool_seq_tool.sources.transcript_mappings import TranscriptMappings
24
18
  from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
@@ -37,26 +31,44 @@ class CoolSeqTool:
37
31
  * ``self.alignment_mapper``: :py:class:`AlignmentMapper <cool_seq_tool.mappers.alignment.AlignmentMapper>`
38
32
  * ``self.mane_transcript``: :py:class:`ManeTranscript <cool_seq_tool.mappers.mane_transcript.ManeTranscript>`
39
33
  * ``self.ex_g_coords_mapper``: :py:class:`ExonGenomicCoordsMapper <cool_seq_tool.mappers.exon_genomic_coords.ExonGenomicCoordsMapper>`
40
-
41
- Initialization with default resource locations is straightforward:
42
-
43
- .. code-block:: pycon
44
-
45
- >>> from cool_seq_tool.app import CoolSeqTool
46
- >>> cst = CoolSeqTool()
47
-
48
- See the :ref:`configuration <configuration>` section for more information.
49
34
  """
50
35
 
51
36
  def __init__(
52
37
  self,
53
- transcript_file_path: Path = TRANSCRIPT_MAPPINGS_PATH,
54
- lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH,
55
- mane_data_path: Path = MANE_SUMMARY_PATH,
38
+ transcript_file_path: Path | None = None,
39
+ lrg_refseqgene_path: Path | None = None,
40
+ mane_data_path: Path | None = None,
56
41
  db_url: str = UTA_DB_URL,
57
- sr: Optional[SeqRepo] = None,
42
+ sr: SeqRepo | None = None,
43
+ force_local_files: bool = False,
58
44
  ) -> None:
59
- """Initialize CoolSeqTool class
45
+ """Initialize CoolSeqTool class.
46
+
47
+ Initialization with default resource locations is straightforward:
48
+
49
+ >>> from cool_seq_tool.app import CoolSeqTool
50
+ >>> cst = CoolSeqTool()
51
+
52
+ By default, this will attempt to fetch the latest versions of static resources,
53
+ which means brief FTP and HTTPS requests to NCBI servers upon initialization.
54
+ To suppress this check and simply rely on the most recent locally-available
55
+ data:
56
+
57
+ >>> cst = CoolSeqTool(force_local_files=True)
58
+
59
+ Note that this will raise a FileNotFoundError if no locally-available data exists.
60
+
61
+ Paths to those files can also be explicitly passed to avoid checks as well:
62
+
63
+ >>> from pathlib import Path
64
+ >>> cst = CoolSeqTool(
65
+ ... lrg_refseqgene_path=Path("lrg_refseqgene_20240625.tsv"),
66
+ ... mane_data_path=Path("ncbi_mane_summary_1.3.txt"),
67
+ ... )
68
+
69
+ If not passed explicit arguments, these locations can also be set via
70
+ environment variables. See the :ref:`configuration <configuration>` section of
71
+ the docs for more information.
60
72
 
61
73
  :param transcript_file_path: The path to ``transcript_mapping.tsv``
62
74
  :param lrg_refseqgene_path: The path to the LRG_RefSeqGene file
@@ -64,6 +76,8 @@ class CoolSeqTool:
64
76
  :param db_url: PostgreSQL connection URL
65
77
  Format: ``driver://user:password@host/database/schema``
66
78
  :param sr: SeqRepo instance. If this is not provided, will create a new instance
79
+ :param force_local_files: if ``True``, don't check for or try to acquire latest
80
+ versions of static data files -- just use most recently available, if any
67
81
  """
68
82
  if not sr:
69
83
  sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
@@ -71,9 +85,10 @@ class CoolSeqTool:
71
85
  self.transcript_mappings = TranscriptMappings(
72
86
  transcript_file_path=transcript_file_path,
73
87
  lrg_refseqgene_path=lrg_refseqgene_path,
88
+ from_local=force_local_files,
74
89
  )
75
90
  self.mane_transcript_mappings = ManeTranscriptMappings(
76
- mane_data_path=mane_data_path
91
+ mane_data_path=mane_data_path, from_local=force_local_files
77
92
  )
78
93
  self.uta_db = UtaDatabase(db_url=db_url)
79
94
  self.alignment_mapper = AlignmentMapper(
@@ -86,5 +101,8 @@ class CoolSeqTool:
86
101
  self.uta_db,
87
102
  )
88
103
  self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
89
- self.uta_db, self.mane_transcript
104
+ self.seqrepo_access,
105
+ self.uta_db,
106
+ self.mane_transcript,
107
+ self.mane_transcript_mappings,
90
108
  )
@@ -1,2 +1,3 @@
1
1
  """Module for extending clients"""
2
+
2
3
  from .seqrepo_access import SeqRepoAccess
@@ -1,10 +1,10 @@
1
1
  """Wrap SeqRepo to provide additional lookup and identification methods on top of basic
2
2
  dereferencing functions.
3
3
  """
4
+
4
5
  import logging
5
6
  from os import environ
6
7
  from pathlib import Path
7
- from typing import List, Optional, Tuple, Union
8
8
 
9
9
  from ga4gh.vrs.dataproxy import SeqRepoDataProxy
10
10
 
@@ -14,6 +14,9 @@ from cool_seq_tool.utils import get_inter_residue_pos
14
14
  logger = logging.getLogger(__name__)
15
15
 
16
16
 
17
+ SEQREPO_ROOT_DIR = environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")
18
+
19
+
17
20
  class SeqRepoAccess(SeqRepoDataProxy):
18
21
  """Provide a wrapper around the base SeqRepoDataProxy class from ``VRS-Python`` to
19
22
  provide additional lookup and identification methods.
@@ -24,10 +27,10 @@ class SeqRepoAccess(SeqRepoDataProxy):
24
27
  def get_reference_sequence(
25
28
  self,
26
29
  ac: str,
27
- start: Optional[int] = None,
28
- end: Optional[int] = None,
30
+ start: int | None = None,
31
+ end: int | None = None,
29
32
  residue_mode: ResidueMode = ResidueMode.RESIDUE,
30
- ) -> Tuple[str, Optional[str]]:
33
+ ) -> tuple[str, str | None]:
31
34
  """Get reference sequence for an accession given a start and end position. If
32
35
  ``start`` and ``end`` are not given, returns the entire reference sequence.
33
36
 
@@ -93,8 +96,8 @@ class SeqRepoAccess(SeqRepoDataProxy):
93
96
  return sequence, None
94
97
 
95
98
  def translate_identifier(
96
- self, ac: str, target_namespaces: Optional[Union[str, List[str]]] = None
97
- ) -> Tuple[List[str], Optional[str]]:
99
+ self, ac: str, target_namespaces: str | list[str] | None = None
100
+ ) -> tuple[list[str], str | None]:
98
101
  """Return list of identifiers for accession.
99
102
 
100
103
  >>> from cool_seq_tool.handlers import SeqRepoAccess
@@ -120,9 +123,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
120
123
  else:
121
124
  return ga4gh_identifiers, None
122
125
 
123
- def translate_alias(
124
- self, input_str: str
125
- ) -> Tuple[List[Optional[str]], Optional[str]]:
126
+ def translate_alias(self, input_str: str) -> tuple[list[str | None], str | None]:
126
127
  """Get aliases for a given input.
127
128
 
128
129
  :param str input_str: Input to get aliases for
@@ -135,9 +136,7 @@ class SeqRepoAccess(SeqRepoDataProxy):
135
136
  logger.warning(msg)
136
137
  return [], msg
137
138
 
138
- def chromosome_to_acs(
139
- self, chromosome: str
140
- ) -> Tuple[Optional[List[str]], Optional[str]]:
139
+ def chromosome_to_acs(self, chromosome: str) -> tuple[list[str] | None, str | None]:
141
140
  """Get accessions for a chromosome
142
141
 
143
142
  :param chromosome: Chromosome number. Must be either 1-22, X, or Y
@@ -148,13 +147,12 @@ class SeqRepoAccess(SeqRepoDataProxy):
148
147
  tmp_acs, _ = self.translate_identifier(
149
148
  f"{assembly}:chr{chromosome}", target_namespaces="refseq"
150
149
  )
151
- for ac in tmp_acs:
152
- acs.append(ac.split("refseq:")[-1])
150
+ acs += [ac.split("refseq:")[-1] for ac in tmp_acs]
153
151
  if acs:
154
152
  return acs, None
155
153
  return None, f"{chromosome} is not a valid chromosome"
156
154
 
157
- def ac_to_chromosome(self, ac: str) -> Tuple[Optional[str], Optional[str]]:
155
+ def ac_to_chromosome(self, ac: str) -> tuple[str | None, str | None]:
158
156
  """Get chromosome for accession.
159
157
 
160
158
  :param str ac: Accession
@@ -1,4 +1,5 @@
1
1
  """Module for mapping data"""
2
+
2
3
  from .alignment import AlignmentMapper # noqa: I001
3
4
  from .mane_transcript import ManeTranscript
4
5
  from .exon_genomic_coords import ExonGenomicCoordsMapper
@@ -1,7 +1,6 @@
1
1
  """Module containing alignment methods for translating to and from different
2
2
  reference sequences.
3
3
  """
4
- from typing import Dict, Optional, Tuple
5
4
 
6
5
  from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
7
6
  from cool_seq_tool.schemas import AnnotationLayer, Assembly, ResidueMode
@@ -34,7 +33,7 @@ class AlignmentMapper:
34
33
  p_start_pos: int,
35
34
  p_end_pos: int,
36
35
  residue_mode: ResidueMode = ResidueMode.RESIDUE,
37
- ) -> Tuple[Optional[Dict], Optional[str]]:
36
+ ) -> tuple[dict | None, str | None]:
38
37
  """Translate protein representation to cDNA representation.
39
38
 
40
39
  :param p_ac: Protein RefSeq accession
@@ -83,7 +82,7 @@ class AlignmentMapper:
83
82
  "residue_mode": ResidueMode.INTER_RESIDUE.value,
84
83
  }, None
85
84
 
86
- async def _get_cds_start(self, c_ac: str) -> Tuple[Optional[int], Optional[str]]:
85
+ async def _get_cds_start(self, c_ac: str) -> tuple[int | None, str | None]:
87
86
  """Get CDS start for a given cDNA RefSeq accession
88
87
 
89
88
  :param c_ac: cDNA RefSeq accession
@@ -105,10 +104,10 @@ class AlignmentMapper:
105
104
  c_ac: str,
106
105
  c_start_pos: int,
107
106
  c_end_pos: int,
108
- cds_start: Optional[int] = None,
107
+ cds_start: int | None = None,
109
108
  residue_mode: ResidueMode = ResidueMode.RESIDUE,
110
109
  target_genome_assembly: bool = Assembly.GRCH38,
111
- ) -> Tuple[Optional[Dict], Optional[str]]:
110
+ ) -> tuple[dict | None, str | None]:
112
111
  """Translate cDNA representation to genomic representation
113
112
 
114
113
  :param c_ac: cDNA RefSeq accession
@@ -212,7 +211,7 @@ class AlignmentMapper:
212
211
  p_end_pos: int,
213
212
  residue_mode: ResidueMode = ResidueMode.INTER_RESIDUE,
214
213
  target_genome_assembly: Assembly = Assembly.GRCH38,
215
- ) -> Tuple[Optional[Dict], Optional[str]]:
214
+ ) -> tuple[dict | None, str | None]:
216
215
  """Translate protein representation to genomic representation, by way of
217
216
  intermediary conversion into cDNA coordinates.
218
217