cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/__init__.py +7 -11
- cool_seq_tool/app.py +44 -24
- cool_seq_tool/handlers/__init__.py +1 -0
- cool_seq_tool/handlers/seqrepo_access.py +27 -25
- cool_seq_tool/mappers/__init__.py +3 -1
- cool_seq_tool/mappers/alignment.py +5 -6
- cool_seq_tool/mappers/exon_genomic_coords.py +139 -124
- cool_seq_tool/mappers/liftover.py +90 -0
- cool_seq_tool/mappers/mane_transcript.py +208 -113
- cool_seq_tool/resources/__init__.py +1 -0
- cool_seq_tool/resources/data_files.py +93 -0
- cool_seq_tool/resources/status.py +153 -0
- cool_seq_tool/schemas.py +92 -54
- cool_seq_tool/sources/__init__.py +1 -0
- cool_seq_tool/sources/mane_transcript_mappings.py +16 -9
- cool_seq_tool/sources/transcript_mappings.py +41 -32
- cool_seq_tool/sources/uta_database.py +96 -249
- cool_seq_tool/utils.py +44 -4
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/LICENSE +1 -1
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/METADATA +16 -11
- cool_seq_tool-0.5.0.dist-info/RECORD +24 -0
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/WHEEL +1 -1
- cool_seq_tool/api.py +0 -42
- cool_seq_tool/data/__init__.py +0 -2
- cool_seq_tool/data/data_downloads.py +0 -89
- cool_seq_tool/paths.py +0 -28
- cool_seq_tool/routers/__init__.py +0 -16
- cool_seq_tool/routers/default.py +0 -125
- cool_seq_tool/routers/mane.py +0 -98
- cool_seq_tool/routers/mappings.py +0 -155
- cool_seq_tool/version.py +0 -2
- cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
- /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
- {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
"""Module for mapping to/from human genome assemblies.
|
2
|
+
|
3
|
+
Currently only supports GRCh37 <-> GRCh38
|
4
|
+
"""
|
5
|
+
|
6
|
+
import logging
|
7
|
+
from os import environ
|
8
|
+
|
9
|
+
from agct import Converter, Genome
|
10
|
+
|
11
|
+
from cool_seq_tool.schemas import Assembly
|
12
|
+
from cool_seq_tool.utils import process_chromosome_input
|
13
|
+
|
14
|
+
# Environment variables for paths to chain files for agct
|
15
|
+
LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38")
|
16
|
+
LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37")
|
17
|
+
|
18
|
+
|
19
|
+
_logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
class LiftOver:
|
23
|
+
"""Class for mapping to/from human genome assemblies
|
24
|
+
|
25
|
+
Currently only supports GRCh37 <-> GRCh38
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
chain_file_37_to_38: str | None = None,
|
31
|
+
chain_file_38_to_37: str | None = None,
|
32
|
+
) -> None:
|
33
|
+
"""Initialize liftover class
|
34
|
+
|
35
|
+
:param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly.
|
36
|
+
This is used for ``agct``. If this is not provided, will check to see
|
37
|
+
if ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will
|
38
|
+
allow ``agct`` to download a chain file from UCSC
|
39
|
+
:param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly.
|
40
|
+
This is used for ``agct``. If this is not provided, will check to see
|
41
|
+
if ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will
|
42
|
+
allow ``agct`` to download a chain file from UCSC
|
43
|
+
"""
|
44
|
+
self.from_37_to_38 = Converter(
|
45
|
+
chainfile=chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38,
|
46
|
+
from_db=Genome.HG19,
|
47
|
+
to_db=Genome.HG38,
|
48
|
+
)
|
49
|
+
self.from_38_to_37 = Converter(
|
50
|
+
chainfile=chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37,
|
51
|
+
from_db=Genome.HG38,
|
52
|
+
to_db=Genome.HG19,
|
53
|
+
)
|
54
|
+
|
55
|
+
def get_liftover(
|
56
|
+
self, chromosome: str, pos: int, liftover_to_assembly: Assembly
|
57
|
+
) -> tuple[str, int] | None:
|
58
|
+
"""Get new genome assembly data for a position on a chromosome.
|
59
|
+
|
60
|
+
Use a UCSC-style chromosome name:
|
61
|
+
|
62
|
+
>>> from cool_seq_tool.mappers import LiftOver
|
63
|
+
>>> from cool_seq_tool.schemas import Assembly
|
64
|
+
>>> lo = LiftOver()
|
65
|
+
>>> lo.get_liftover("chr7", 140453136, Assembly.GRCH38)
|
66
|
+
('chr7', 140753336)
|
67
|
+
|
68
|
+
Chromosome names can also be NCBI-style, without prefixes:
|
69
|
+
|
70
|
+
>>> lo.get_liftover("7", 140453136, Assembly.GRCH38)
|
71
|
+
('chr7', 140753336)
|
72
|
+
|
73
|
+
:param chromosome: The chromosome number, e.g. ``"chr7"``, ``"chrX"``, ``"5"``.
|
74
|
+
:param pos: Position on the chromosome
|
75
|
+
:param liftover_to_assembly: Assembly to liftover to
|
76
|
+
:return: Target chromosome and target position for assembly
|
77
|
+
"""
|
78
|
+
chromosome = process_chromosome_input(chromosome, "LiftOver.get_liftover()")
|
79
|
+
if liftover_to_assembly == Assembly.GRCH38:
|
80
|
+
liftover = self.from_37_to_38.convert_coordinate(chromosome, pos)
|
81
|
+
elif liftover_to_assembly == Assembly.GRCH37:
|
82
|
+
liftover = self.from_38_to_37.convert_coordinate(chromosome, pos)
|
83
|
+
else:
|
84
|
+
_logger.warning("%s assembly not supported", liftover_to_assembly)
|
85
|
+
liftover = None
|
86
|
+
|
87
|
+
if not liftover:
|
88
|
+
_logger.warning("%s does not exist on %s", pos, chromosome)
|
89
|
+
return None
|
90
|
+
return liftover[0][:2]
|