cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. cool_seq_tool/__init__.py +7 -11
  2. cool_seq_tool/app.py +44 -24
  3. cool_seq_tool/handlers/__init__.py +1 -0
  4. cool_seq_tool/handlers/seqrepo_access.py +27 -25
  5. cool_seq_tool/mappers/__init__.py +3 -1
  6. cool_seq_tool/mappers/alignment.py +5 -6
  7. cool_seq_tool/mappers/exon_genomic_coords.py +139 -124
  8. cool_seq_tool/mappers/liftover.py +90 -0
  9. cool_seq_tool/mappers/mane_transcript.py +208 -113
  10. cool_seq_tool/resources/__init__.py +1 -0
  11. cool_seq_tool/resources/data_files.py +93 -0
  12. cool_seq_tool/resources/status.py +153 -0
  13. cool_seq_tool/schemas.py +92 -54
  14. cool_seq_tool/sources/__init__.py +1 -0
  15. cool_seq_tool/sources/mane_transcript_mappings.py +16 -9
  16. cool_seq_tool/sources/transcript_mappings.py +41 -32
  17. cool_seq_tool/sources/uta_database.py +96 -249
  18. cool_seq_tool/utils.py +44 -4
  19. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/LICENSE +1 -1
  20. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/METADATA +16 -11
  21. cool_seq_tool-0.5.0.dist-info/RECORD +24 -0
  22. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/WHEEL +1 -1
  23. cool_seq_tool/api.py +0 -42
  24. cool_seq_tool/data/__init__.py +0 -2
  25. cool_seq_tool/data/data_downloads.py +0 -89
  26. cool_seq_tool/paths.py +0 -28
  27. cool_seq_tool/routers/__init__.py +0 -16
  28. cool_seq_tool/routers/default.py +0 -125
  29. cool_seq_tool/routers/mane.py +0 -98
  30. cool_seq_tool/routers/mappings.py +0 -155
  31. cool_seq_tool/version.py +0 -2
  32. cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
  33. /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
  34. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
1
+ """Module for mapping to/from human genome assemblies.
2
+
3
+ Currently only supports GRCh37 <-> GRCh38
4
+ """
5
+
6
+ import logging
7
+ from os import environ
8
+
9
+ from agct import Converter, Genome
10
+
11
+ from cool_seq_tool.schemas import Assembly
12
+ from cool_seq_tool.utils import process_chromosome_input
13
+
14
+ # Environment variables for paths to chain files for agct
15
+ LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38")
16
+ LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37")
17
+
18
+
19
+ _logger = logging.getLogger(__name__)
20
+
21
+
22
+ class LiftOver:
23
+ """Class for mapping to/from human genome assemblies
24
+
25
+ Currently only supports GRCh37 <-> GRCh38
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ chain_file_37_to_38: str | None = None,
31
+ chain_file_38_to_37: str | None = None,
32
+ ) -> None:
33
+ """Initialize liftover class
34
+
35
+ :param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly.
36
+ This is used for ``agct``. If this is not provided, will check to see
37
+ if ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will
38
+ allow ``agct`` to download a chain file from UCSC
39
+ :param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly.
40
+ This is used for ``agct``. If this is not provided, will check to see
41
+ if ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will
42
+ allow ``agct`` to download a chain file from UCSC
43
+ """
44
+ self.from_37_to_38 = Converter(
45
+ chainfile=chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38,
46
+ from_db=Genome.HG19,
47
+ to_db=Genome.HG38,
48
+ )
49
+ self.from_38_to_37 = Converter(
50
+ chainfile=chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37,
51
+ from_db=Genome.HG38,
52
+ to_db=Genome.HG19,
53
+ )
54
+
55
+ def get_liftover(
56
+ self, chromosome: str, pos: int, liftover_to_assembly: Assembly
57
+ ) -> tuple[str, int] | None:
58
+ """Get new genome assembly data for a position on a chromosome.
59
+
60
+ Use a UCSC-style chromosome name:
61
+
62
+ >>> from cool_seq_tool.mappers import LiftOver
63
+ >>> from cool_seq_tool.schemas import Assembly
64
+ >>> lo = LiftOver()
65
+ >>> lo.get_liftover("chr7", 140453136, Assembly.GRCH38)
66
+ ('chr7', 140753336)
67
+
68
+ Chromosome names can also be NCBI-style, without prefixes:
69
+
70
+ >>> lo.get_liftover("7", 140453136, Assembly.GRCH38)
71
+ ('chr7', 140753336)
72
+
73
+ :param chromosome: The chromosome number, e.g. ``"chr7"``, ``"chrX"``, ``"5"``.
74
+ :param pos: Position on the chromosome
75
+ :param liftover_to_assembly: Assembly to liftover to
76
+ :return: Target chromosome and target position for assembly
77
+ """
78
+ chromosome = process_chromosome_input(chromosome, "LiftOver.get_liftover()")
79
+ if liftover_to_assembly == Assembly.GRCH38:
80
+ liftover = self.from_37_to_38.convert_coordinate(chromosome, pos)
81
+ elif liftover_to_assembly == Assembly.GRCH37:
82
+ liftover = self.from_38_to_37.convert_coordinate(chromosome, pos)
83
+ else:
84
+ _logger.warning("%s assembly not supported", liftover_to_assembly)
85
+ liftover = None
86
+
87
+ if not liftover:
88
+ _logger.warning("%s does not exist on %s", pos, chromosome)
89
+ return None
90
+ return liftover[0][:2]