cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. cool_seq_tool/__init__.py +7 -11
  2. cool_seq_tool/app.py +44 -24
  3. cool_seq_tool/handlers/__init__.py +1 -0
  4. cool_seq_tool/handlers/seqrepo_access.py +27 -25
  5. cool_seq_tool/mappers/__init__.py +3 -1
  6. cool_seq_tool/mappers/alignment.py +5 -6
  7. cool_seq_tool/mappers/exon_genomic_coords.py +139 -124
  8. cool_seq_tool/mappers/liftover.py +90 -0
  9. cool_seq_tool/mappers/mane_transcript.py +208 -113
  10. cool_seq_tool/resources/__init__.py +1 -0
  11. cool_seq_tool/resources/data_files.py +93 -0
  12. cool_seq_tool/resources/status.py +153 -0
  13. cool_seq_tool/schemas.py +92 -54
  14. cool_seq_tool/sources/__init__.py +1 -0
  15. cool_seq_tool/sources/mane_transcript_mappings.py +16 -9
  16. cool_seq_tool/sources/transcript_mappings.py +41 -32
  17. cool_seq_tool/sources/uta_database.py +96 -249
  18. cool_seq_tool/utils.py +44 -4
  19. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/LICENSE +1 -1
  20. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/METADATA +16 -11
  21. cool_seq_tool-0.5.0.dist-info/RECORD +24 -0
  22. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/WHEEL +1 -1
  23. cool_seq_tool/api.py +0 -42
  24. cool_seq_tool/data/__init__.py +0 -2
  25. cool_seq_tool/data/data_downloads.py +0 -89
  26. cool_seq_tool/paths.py +0 -28
  27. cool_seq_tool/routers/__init__.py +0 -16
  28. cool_seq_tool/routers/default.py +0 -125
  29. cool_seq_tool/routers/mane.py +0 -98
  30. cool_seq_tool/routers/mappings.py +0 -155
  31. cool_seq_tool/version.py +0 -2
  32. cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
  33. /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
  34. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1 @@
1
+ """Provide tools for acquiring and managing Cool-Seq-Tool data resources."""
@@ -0,0 +1,93 @@
1
+ """Fetch data files regarding transcript mapping and annotation."""
2
+
3
+ import logging
4
+ from enum import Enum
5
+ from importlib import resources
6
+ from os import environ
7
+ from pathlib import Path
8
+
9
+ from wags_tails import NcbiLrgRefSeqGeneData, NcbiManeSummaryData
10
+
11
+ _logger = logging.getLogger(__name__)
12
+
13
+
14
+ class DataFile(str, Enum):
15
+ """Constrain legal values for file resource fetching in :py:meth:`get_data_file() <cool_seq_tool.resources.data_files.get_data_file>`."""
16
+
17
+ TRANSCRIPT_MAPPINGS = "transcript_mappings"
18
+ MANE_SUMMARY = "mane_summary"
19
+ LRG_REFSEQGENE = "lrg_refseqgene"
20
+
21
+ def lower(self) -> str:
22
+ """Return lower-cased value
23
+
24
+ :return: lower case string
25
+ """
26
+ return self.value.lower()
27
+
28
+
29
+ _resource_acquisition_params = {
30
+ DataFile.TRANSCRIPT_MAPPINGS: (
31
+ "TRANSCRIPT_MAPPINGS_PATH",
32
+ lambda _: resources.files(__package__) / "transcript_mapping.tsv",
33
+ ),
34
+ DataFile.MANE_SUMMARY: (
35
+ "MANE_SUMMARY_PATH",
36
+ lambda from_local: NcbiManeSummaryData(silent=True).get_latest(
37
+ from_local=from_local
38
+ )[0],
39
+ ),
40
+ DataFile.LRG_REFSEQGENE: (
41
+ "LRG_REFSEQGENE_PATH",
42
+ lambda from_local: NcbiLrgRefSeqGeneData(silent=True).get_latest(
43
+ from_local=from_local
44
+ )[0],
45
+ ),
46
+ }
47
+
48
+
49
+ def get_data_file(resource: DataFile, from_local: bool = False) -> Path:
50
+ """Acquire Cool-Seq-Tool file dependency.
51
+
52
+ Each resource can be defined using an environment variable:
53
+
54
+ * ``Resource.TRANSCRIPT_MAPPINGS`` -> ``TRANSCRIPT_MAPPINGS_PATH``
55
+ * ``Resource.MANE_SUMMARY`` -> ``MANE_SUMMARY_PATH``
56
+ * ``Resource.LRG_REFSEQGENE`` -> ``LRG_REFSEQGENE_PATH``
57
+
58
+ Otherwise, this function falls back on default expected locations:
59
+
60
+ * ``transcript_mappings.tsv`` is bundled with this library.
61
+ * LRG RefseqGene and MANE summary files are acquired from NCBI using the `wags-tails <https://wags-tails.readthedocs.io/stable/>`_ if unavailable locally, or out of date.
62
+
63
+ :param resource: resource to fetch
64
+ :param from_local: if ``True``, don't check for or acquire latest version -- just
65
+ provide most recent locally available file and raise FileNotFoundError otherwise
66
+ :return: path to file. Consuming functions can assume that it exists and is a file.
67
+ :raise FileNotFoundError: if file location configured by env var doesn't exist
68
+ :raise ValueError: if file location configured by env var isn't a file
69
+ """
70
+ params = _resource_acquisition_params[resource]
71
+ configured_path = environ.get(params[0])
72
+ if configured_path:
73
+ _logger.debug(
74
+ "Acquiring %s via env var %s:%s", resource, params[0], configured_path
75
+ )
76
+ path = Path(configured_path)
77
+ loc_descr = (
78
+ "the default file bundled with Cool-Seq-Tool"
79
+ if resource == DataFile.TRANSCRIPT_MAPPINGS
80
+ else "the the default file pattern and possibly acquire from source via the `wags-tails` package"
81
+ )
82
+ msg = f'No {params[0].replace("_", " ").title()} file exists at path {configured_path} defined under env var {params[0]}. Either unset to use {loc_descr}, or ensure that it is available at this location. See the "Environment configuration" section under the Usage page within the documentation for more: https://coolseqtool.readthedocs.io/stable/usage.html#environment-configuration'
83
+ if not path.exists():
84
+ raise FileNotFoundError(msg)
85
+ if not path.is_file():
86
+ raise ValueError(msg)
87
+ else:
88
+ _logger.debug("Acquiring %s from default location/method.", resource)
89
+ # param[1] is the resource fetcher function -- use `from_local` param to
90
+ # optionally avoid unnecessary fetches
91
+ path = params[1](from_local)
92
+ _logger.debug("Acquired %s at %s", resource, path)
93
+ return path
@@ -0,0 +1,153 @@
1
+ """Enable quick status check of Cool-Seq-Tool resources."""
2
+
3
+ import logging
4
+ from collections import namedtuple
5
+ from pathlib import Path
6
+
7
+ from agct._core import ChainfileError
8
+ from asyncpg import InvalidCatalogNameError, UndefinedTableError
9
+ from biocommons.seqrepo import SeqRepo
10
+
11
+ from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
12
+ from cool_seq_tool.mappers.liftover import LiftOver
13
+ from cool_seq_tool.resources.data_files import DataFile, get_data_file
14
+ from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase
15
+
16
+ _logger = logging.getLogger(__name__)
17
+
18
+
19
+ ResourceStatus = namedtuple(
20
+ "ResourceStatus",
21
+ (
22
+ "uta",
23
+ "seqrepo",
24
+ DataFile.TRANSCRIPT_MAPPINGS.lower(),
25
+ DataFile.MANE_SUMMARY.lower(),
26
+ DataFile.LRG_REFSEQGENE.lower(),
27
+ "liftover",
28
+ ),
29
+ )
30
+
31
+
32
+ async def check_status(
33
+ transcript_file_path: Path | None = None,
34
+ lrg_refseqgene_path: Path | None = None,
35
+ mane_data_path: Path | None = None,
36
+ db_url: str = UTA_DB_URL,
37
+ sr: SeqRepo | None = None,
38
+ chain_file_37_to_38: str | None = None,
39
+ chain_file_38_to_37: str | None = None,
40
+ ) -> ResourceStatus:
41
+ """Perform basic status checks on availability of required data resources.
42
+
43
+ Arguments are intended to mirror arguments to :py:meth:`cool_seq_tool.app.CoolSeqTool.__init__`.
44
+
45
+ Additional arguments are available for testing paths to specific chainfiles (same
46
+ signature as :py:meth:`cool_seq_tool.mappers.liftover.LiftOver.__init__`).
47
+
48
+ >>> from cool_seq_tool.resources.status import check_status
49
+ >>> await check_status()
50
+ ResourceStatus(uta=True, seqrepo=True, transcript_mappings=True, mane_summary=True, lrg_refseqgene=True, liftover=True)
51
+
52
+ :param transcript_file_path: The path to ``transcript_mapping.tsv``
53
+ :param lrg_refseqgene_path: The path to the LRG_RefSeqGene file
54
+ :param mane_data_path: Path to RefSeq MANE summary data
55
+ :param db_url: PostgreSQL connection URL
56
+ Format: ``driver://user:password@host/database/schema``
57
+ :param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly. This
58
+ is used for ``agct``. If this is not provided, will check to see if
59
+ ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will allow
60
+ ``agct`` to download a chain file from UCSC
61
+ :param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly. This
62
+ is used for ``agct``. If this is not provided, will check to see if
63
+ ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will allow
64
+ ``agct`` to download a chain file from UCSC
65
+ :return: boolean description of availability of each resource, given current
66
+ environment configurations
67
+ """
68
+ file_path_params = {
69
+ DataFile.TRANSCRIPT_MAPPINGS.lower(): transcript_file_path,
70
+ DataFile.LRG_REFSEQGENE.lower(): lrg_refseqgene_path,
71
+ DataFile.MANE_SUMMARY.lower(): mane_data_path,
72
+ }
73
+
74
+ status = {
75
+ DataFile.TRANSCRIPT_MAPPINGS.lower(): False,
76
+ DataFile.LRG_REFSEQGENE.lower(): False,
77
+ DataFile.MANE_SUMMARY.lower(): False,
78
+ "liftover": False,
79
+ "uta": False,
80
+ "seqrepo": False,
81
+ }
82
+ for r in list(DataFile):
83
+ name_lower = r.lower()
84
+ declared_path = file_path_params[name_lower]
85
+ if declared_path and declared_path.exists() and declared_path.is_file():
86
+ status[name_lower] = True
87
+ continue
88
+ try:
89
+ get_data_file(r)
90
+ except FileNotFoundError:
91
+ _logger.error(
92
+ "%s does not exist at configured location %s", name_lower, declared_path
93
+ )
94
+ except ValueError:
95
+ _logger.error(
96
+ "%s configured at %s is not a valid file.", name_lower, declared_path
97
+ )
98
+ except Exception as e:
99
+ _logger.critical(
100
+ "Encountered unexpected error fetching %s: %s", name_lower, e
101
+ )
102
+ else:
103
+ status[name_lower] = True
104
+
105
+ try:
106
+ LiftOver(
107
+ chain_file_37_to_38=chain_file_37_to_38,
108
+ chain_file_38_to_37=chain_file_38_to_37,
109
+ )
110
+ except (FileNotFoundError, ChainfileError) as e:
111
+ _logger.error("agct converter setup failed: %s", e)
112
+ except Exception as e:
113
+ _logger.critical("Encountered unexpected error setting up agct: %s", e)
114
+ else:
115
+ status["liftover"] = True
116
+
117
+ try:
118
+ await UtaDatabase.create(db_url)
119
+ except (OSError, InvalidCatalogNameError, UndefinedTableError) as e:
120
+ _logger.error(
121
+ "Encountered error instantiating UTA at URI %s: %s", UTA_DB_URL, e
122
+ )
123
+ except Exception as e:
124
+ _logger.critical(
125
+ "Encountered unexpected error instantiating UTA from URI %s: %s",
126
+ UTA_DB_URL,
127
+ e,
128
+ )
129
+ else:
130
+ status["uta"] = True
131
+
132
+ try:
133
+ if not sr:
134
+ sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
135
+ sra = SeqRepoAccess(sr)
136
+ sra.sr["NC_000001.11"][1000:1001]
137
+ except OSError as e:
138
+ _logger.error("Encountered error while instantiating SeqRepo: %s", e)
139
+ except KeyError:
140
+ _logger.error("SeqRepo data fetch test failed -- is it populated?")
141
+ except Exception as e:
142
+ _logger.critical("Encountered unexpected error setting up SeqRepo: %s", e)
143
+ else:
144
+ status["seqrepo"] = True
145
+
146
+ structured_status = ResourceStatus(**status)
147
+ if all(status.values()):
148
+ _logger.info("Cool-Seq-Tool resource status passed")
149
+ else:
150
+ _logger.error(
151
+ "Cool-Seq-Tool resource check failed. Result: %s", structured_status
152
+ )
153
+ return structured_status
cool_seq_tool/schemas.py CHANGED
@@ -1,19 +1,18 @@
1
1
  """Defines attribute constants, useful object structures, and API response schemas."""
2
+
2
3
  import datetime
3
- import re
4
4
  from enum import Enum, IntEnum
5
- from typing import List, Literal, Optional, Tuple, Union
5
+ from typing import Literal
6
6
 
7
7
  from pydantic import (
8
8
  BaseModel,
9
9
  ConfigDict,
10
10
  StrictInt,
11
11
  StrictStr,
12
- field_validator,
13
12
  model_validator,
14
13
  )
15
14
 
16
- from cool_seq_tool.version import __version__
15
+ from cool_seq_tool import __version__
17
16
 
18
17
  _now = str(datetime.datetime.now(tz=datetime.timezone.utc))
19
18
 
@@ -34,11 +33,16 @@ class Strand(IntEnum):
34
33
 
35
34
 
36
35
  class Assembly(str, Enum):
37
- """Create Enum for supported genomic assemblies"""
36
+ """Define supported genomic assemblies. Must be defined in ascending order"""
38
37
 
39
38
  GRCH37 = "GRCh37"
40
39
  GRCH38 = "GRCh38"
41
40
 
41
+ @classmethod
42
+ def values(cls) -> list[str]:
43
+ """Return list of values in enum (ascending assembly order)"""
44
+ return [item.value for item in cls]
45
+
42
46
 
43
47
  class TranscriptPriority(str, Enum):
44
48
  """Create Enum for Transcript Priority labels"""
@@ -52,10 +56,55 @@ class TranscriptPriority(str, Enum):
52
56
  class ResidueMode(str, Enum):
53
57
  """Create Enum for residue modes.
54
58
 
59
+ We typically prefer to operate in inter-residue coordinates, but users should be
60
+ careful to define the coordinate mode of their data when calling ``cool-seq-tool``
61
+ functions.
62
+
55
63
  | | C | | T | | G | |
56
64
  ZERO | | 0 | | 1 | | 2 | |
57
65
  RESIDUE | | 1 | | 2 | | 3 | |
58
66
  INTER_RESIDUE | 0 | | 1 | | 2 | | 3 |
67
+
68
+ .. tabularcolumns:: |L|C|C|C|C|C|C|C|
69
+ .. list-table::
70
+ :header-rows: 1
71
+
72
+ * -
73
+ -
74
+ - C
75
+ -
76
+ - T
77
+ -
78
+ - G
79
+ -
80
+ * - ``ZERO``
81
+ -
82
+ - 0
83
+ -
84
+ - 1
85
+ -
86
+ - 2
87
+ -
88
+ * - ``RESIDUE``
89
+ -
90
+ - 1
91
+ -
92
+ - 2
93
+ -
94
+ - 3
95
+ -
96
+ * - ``INTER_RESIDUE``
97
+ - 0
98
+ -
99
+ - 1
100
+ -
101
+ - 2
102
+ -
103
+ - 3
104
+
105
+
106
+ See "Conventions that promote reliable data sharing" and figure 3 within the
107
+ `Variation Representation Schema (VRS) paper <https://www.ncbi.nlm.nih.gov/pmc/articles/pmid/35311178/>`_ for further discussion.
59
108
  """
60
109
 
61
110
  ZERO = "zero"
@@ -70,12 +119,12 @@ class BaseModelForbidExtra(BaseModel, extra="forbid"):
70
119
  class GenomicRequestBody(BaseModelForbidExtra):
71
120
  """Define constraints for genomic to transcript exon coordinates request body"""
72
121
 
73
- chromosome: Union[StrictStr, StrictInt]
74
- start: Optional[StrictInt] = None
75
- end: Optional[StrictInt] = None
76
- strand: Optional[Strand] = None
77
- transcript: Optional[StrictStr] = None
78
- gene: Optional[StrictStr] = None
122
+ chromosome: StrictStr | StrictInt
123
+ start: StrictInt | None = None
124
+ end: StrictInt | None = None
125
+ strand: Strand | None = None
126
+ transcript: StrictStr | None = None
127
+ gene: StrictStr | None = None
79
128
  residue_mode: ResidueMode = ResidueMode.RESIDUE
80
129
 
81
130
  @model_validator(mode="after")
@@ -106,11 +155,11 @@ class TranscriptRequestBody(BaseModelForbidExtra):
106
155
  """Define constraints for transcript exon to genomic coordinates request body"""
107
156
 
108
157
  transcript: StrictStr
109
- gene: Optional[StrictStr] = None
110
- exon_start: Optional[StrictInt] = None
111
- exon_start_offset: Optional[StrictInt] = 0
112
- exon_end: Optional[StrictInt] = None
113
- exon_end_offset: Optional[StrictInt] = 0
158
+ gene: StrictStr | None = None
159
+ exon_start: StrictInt | None = None
160
+ exon_start_offset: StrictInt | None = 0
161
+ exon_end: StrictInt | None = None
162
+ exon_end_offset: StrictInt | None = 0
114
163
 
115
164
  @model_validator(mode="after")
116
165
  def check_exon_start_and_exon_end(cls, values):
@@ -166,12 +215,12 @@ class GenomicData(BaseModelForbidExtra):
166
215
 
167
216
  gene: StrictStr
168
217
  chr: StrictStr
169
- start: Optional[StrictInt] = None # Genomic start position
170
- end: Optional[StrictInt] = None # Genomic end position
171
- exon_start: Optional[StrictInt] = None
172
- exon_start_offset: Optional[StrictInt] = 0
173
- exon_end: Optional[StrictInt] = None
174
- exon_end_offset: Optional[StrictInt] = 0
218
+ start: StrictInt | None = None # Genomic start position
219
+ end: StrictInt | None = None # Genomic end position
220
+ exon_start: StrictInt | None = None
221
+ exon_start_offset: StrictInt | None = 0
222
+ exon_end: StrictInt | None = None
223
+ exon_end_offset: StrictInt | None = 0
175
224
  transcript: StrictStr
176
225
  strand: Strand
177
226
 
@@ -226,20 +275,9 @@ class ServiceMeta(BaseModelForbidExtra):
226
275
  name: Literal["cool_seq_tool"] = "cool_seq_tool"
227
276
  version: StrictStr
228
277
  response_datetime: datetime.datetime
229
- url: Literal[
278
+ url: Literal["https://github.com/GenomicMedLab/cool-seq-tool"] = (
230
279
  "https://github.com/GenomicMedLab/cool-seq-tool"
231
- ] = "https://github.com/GenomicMedLab/cool-seq-tool"
232
-
233
- @field_validator("version")
234
- def validate_version(cls, v):
235
- """Check version matches semantic versioning regex pattern.
236
- https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
237
- """
238
- version_regex = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
239
- if not re.match(version_regex, v):
240
- msg = f"Invalid version {v}"
241
- raise ValueError(msg)
242
- return v
280
+ )
243
281
 
244
282
  model_config = ConfigDict(
245
283
  json_schema_extra={
@@ -256,8 +294,8 @@ class ServiceMeta(BaseModelForbidExtra):
256
294
  class TranscriptExonDataResponse(BaseModelForbidExtra):
257
295
  """Response model for Transcript Exon Data"""
258
296
 
259
- transcript_exon_data: Optional[TranscriptExonData] = None
260
- warnings: List[StrictStr] = []
297
+ transcript_exon_data: TranscriptExonData | None = None
298
+ warnings: list[StrictStr] = []
261
299
  service_meta: ServiceMeta
262
300
 
263
301
  model_config = ConfigDict(
@@ -287,8 +325,8 @@ class TranscriptExonDataResponse(BaseModelForbidExtra):
287
325
  class GenomicDataResponse(BaseModelForbidExtra):
288
326
  """Response model for Genomic Data"""
289
327
 
290
- genomic_data: Optional[GenomicData] = None
291
- warnings: List[StrictStr] = []
328
+ genomic_data: GenomicData | None = None
329
+ warnings: list[StrictStr] = []
292
330
  service_meta: ServiceMeta
293
331
 
294
332
  model_config = ConfigDict(
@@ -323,7 +361,7 @@ class MappedManeData(BaseModel):
323
361
 
324
362
  gene: StrictStr
325
363
  refseq: StrictStr
326
- ensembl: Optional[StrictStr] = None
364
+ ensembl: StrictStr | None = None
327
365
  strand: Strand
328
366
  status: TranscriptPriority
329
367
  alt_ac: StrictStr
@@ -338,7 +376,7 @@ class MappedManeData(BaseModel):
338
376
  "strand": Strand.NEGATIVE,
339
377
  "status": TranscriptPriority.MANE_PLUS_CLINICAL,
340
378
  "alt_ac": "NC_000007.13",
341
- "assembly": "GRCh37",
379
+ "assembly": Assembly.GRCH37,
342
380
  }
343
381
  }
344
382
  )
@@ -347,8 +385,8 @@ class MappedManeData(BaseModel):
347
385
  class MappedManeDataService(BaseModelForbidExtra):
348
386
  """Service model response for mapped mane data"""
349
387
 
350
- mapped_mane_data: Optional[MappedManeData] = None
351
- warnings: List[StrictStr] = []
388
+ mapped_mane_data: MappedManeData | None = None
389
+ warnings: list[StrictStr] = []
352
390
  service_meta: ServiceMeta
353
391
 
354
392
  model_config = ConfigDict(
@@ -361,7 +399,7 @@ class MappedManeDataService(BaseModelForbidExtra):
361
399
  "strand": Strand.NEGATIVE,
362
400
  "status": TranscriptPriority.MANE_PLUS_CLINICAL,
363
401
  "alt_ac": "NC_000007.13",
364
- "assembly": "GRCh37",
402
+ "assembly": Assembly.GRCH37,
365
403
  },
366
404
  "warnings": [],
367
405
  "service_meta": {
@@ -378,10 +416,10 @@ class MappedManeDataService(BaseModelForbidExtra):
378
416
  class ManeData(BaseModel):
379
417
  """Define mane data fields"""
380
418
 
381
- gene: Optional[StrictStr] = None
382
- refseq: Optional[StrictStr] = None
383
- ensembl: Optional[StrictStr] = None
384
- pos: Tuple[int, int]
419
+ gene: StrictStr | None = None
420
+ refseq: StrictStr | None = None
421
+ ensembl: StrictStr | None = None
422
+ pos: tuple[int, int]
385
423
  strand: Strand
386
424
  status: TranscriptPriority
387
425
 
@@ -402,8 +440,8 @@ class ManeData(BaseModel):
402
440
  class ManeDataService(BaseModelForbidExtra):
403
441
  """Service model response for getting mane data"""
404
442
 
405
- mane_data: Optional[ManeData] = None
406
- warnings: List[StrictStr] = []
443
+ mane_data: ManeData | None = None
444
+ warnings: list[StrictStr] = []
407
445
  service_meta: ServiceMeta
408
446
 
409
447
  model_config = ConfigDict(
@@ -457,8 +495,8 @@ class CdnaRepresentation(BaseModelForbidExtra):
457
495
  class ToCdnaService(BaseModelForbidExtra):
458
496
  """Service model response for protein -> cDNA"""
459
497
 
460
- c_data: Optional[CdnaRepresentation] = None
461
- warnings: List[StrictStr] = []
498
+ c_data: CdnaRepresentation | None = None
499
+ warnings: list[StrictStr] = []
462
500
  service_meta: ServiceMeta
463
501
 
464
502
  model_config = ConfigDict(
@@ -506,8 +544,8 @@ class GenomicRepresentation(BaseModelForbidExtra):
506
544
  class ToGenomicService(BaseModelForbidExtra):
507
545
  """Service model response for cDNA -> genomic"""
508
546
 
509
- g_data: Optional[GenomicRepresentation] = None
510
- warnings: List[StrictStr] = []
547
+ g_data: GenomicRepresentation | None = None
548
+ warnings: list[StrictStr] = []
511
549
  service_meta: ServiceMeta
512
550
 
513
551
  model_config = ConfigDict(
@@ -1,4 +1,5 @@
1
1
  """Module for providing basic acquisition/setup for the various resources"""
2
+
2
3
  from .mane_transcript_mappings import ManeTranscriptMappings
3
4
  from .transcript_mappings import TranscriptMappings
4
5
  from .uta_database import UtaDatabase
@@ -1,15 +1,15 @@
1
1
  """Provide fast tabular access to MANE summary file. Enables retrieval of associated
2
2
  MANE transcripts for gene symbols, genomic positions, or transcript accessions.
3
3
  """
4
+
4
5
  import logging
5
6
  from pathlib import Path
6
- from typing import Dict, List
7
7
 
8
8
  import polars as pl
9
9
 
10
- from cool_seq_tool.paths import MANE_SUMMARY_PATH
10
+ from cool_seq_tool.resources.data_files import DataFile, get_data_file
11
11
 
12
- logger = logging.getLogger(__name__)
12
+ _logger = logging.getLogger(__name__)
13
13
 
14
14
 
15
15
  class ManeTranscriptMappings:
@@ -22,11 +22,18 @@ class ManeTranscriptMappings:
22
22
  See the `NCBI MANE page <https://www.ncbi.nlm.nih.gov/refseq/MANE/>`_ for more information.
23
23
  """
24
24
 
25
- def __init__(self, mane_data_path: Path = MANE_SUMMARY_PATH) -> None:
25
+ def __init__(
26
+ self, mane_data_path: Path | None = None, from_local: bool = False
27
+ ) -> None:
26
28
  """Initialize the MANE Transcript mappings class.
27
29
 
28
- :param Path mane_data_path: Path to RefSeq MANE summary data
30
+ :param mane_data_path: Path to RefSeq MANE summary data
31
+ :param from_local: if ``True``, don't check for or acquire latest version --
32
+ just provide most recent locally available file, if possible, and raise
33
+ error otherwise
29
34
  """
35
+ if not mane_data_path:
36
+ mane_data_path = get_data_file(DataFile.MANE_SUMMARY, from_local)
30
37
  self.mane_data_path = mane_data_path
31
38
  self.df = self._load_mane_transcript_data()
32
39
 
@@ -37,7 +44,7 @@ class ManeTranscriptMappings:
37
44
  """
38
45
  return pl.read_csv(self.mane_data_path, separator="\t")
39
46
 
40
- def get_gene_mane_data(self, gene_symbol: str) -> List[Dict]:
47
+ def get_gene_mane_data(self, gene_symbol: str) -> list[dict]:
41
48
  """Return MANE Transcript data for a gene.
42
49
 
43
50
  >>> from cool_seq_tool.sources import ManeTranscriptMappings
@@ -56,7 +63,7 @@ class ManeTranscriptMappings:
56
63
  data = self.df.filter(pl.col("symbol") == gene_symbol.upper())
57
64
 
58
65
  if len(data) == 0:
59
- logger.warning(
66
+ _logger.warning(
60
67
  "Unable to get MANE Transcript data for gene: %s", gene_symbol
61
68
  )
62
69
  return []
@@ -64,7 +71,7 @@ class ManeTranscriptMappings:
64
71
  data = data.sort(by="MANE_status", descending=True)
65
72
  return data.to_dicts()
66
73
 
67
- def get_mane_from_transcripts(self, transcripts: List[str]) -> List[Dict]:
74
+ def get_mane_from_transcripts(self, transcripts: list[str]) -> list[dict]:
68
75
  """Get mane transcripts from a list of transcripts
69
76
 
70
77
  :param List[str] transcripts: RefSeq transcripts on c. coordinate
@@ -77,7 +84,7 @@ class ManeTranscriptMappings:
77
84
 
78
85
  def get_mane_data_from_chr_pos(
79
86
  self, alt_ac: str, start: int, end: int
80
- ) -> List[Dict]:
87
+ ) -> list[dict]:
81
88
  """Get MANE data given a GRCh38 genomic position.
82
89
 
83
90
  :param str alt_ac: NC Accession