cool-seq-tool 0.4.0.dev3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. cool_seq_tool/__init__.py +7 -11
  2. cool_seq_tool/app.py +44 -24
  3. cool_seq_tool/handlers/__init__.py +1 -0
  4. cool_seq_tool/handlers/seqrepo_access.py +27 -25
  5. cool_seq_tool/mappers/__init__.py +3 -1
  6. cool_seq_tool/mappers/alignment.py +5 -6
  7. cool_seq_tool/mappers/exon_genomic_coords.py +139 -124
  8. cool_seq_tool/mappers/liftover.py +90 -0
  9. cool_seq_tool/mappers/mane_transcript.py +208 -113
  10. cool_seq_tool/resources/__init__.py +1 -0
  11. cool_seq_tool/resources/data_files.py +93 -0
  12. cool_seq_tool/resources/status.py +153 -0
  13. cool_seq_tool/schemas.py +92 -54
  14. cool_seq_tool/sources/__init__.py +1 -0
  15. cool_seq_tool/sources/mane_transcript_mappings.py +16 -9
  16. cool_seq_tool/sources/transcript_mappings.py +41 -32
  17. cool_seq_tool/sources/uta_database.py +96 -249
  18. cool_seq_tool/utils.py +44 -4
  19. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/LICENSE +1 -1
  20. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/METADATA +16 -11
  21. cool_seq_tool-0.5.0.dist-info/RECORD +24 -0
  22. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/WHEEL +1 -1
  23. cool_seq_tool/api.py +0 -42
  24. cool_seq_tool/data/__init__.py +0 -2
  25. cool_seq_tool/data/data_downloads.py +0 -89
  26. cool_seq_tool/paths.py +0 -28
  27. cool_seq_tool/routers/__init__.py +0 -16
  28. cool_seq_tool/routers/default.py +0 -125
  29. cool_seq_tool/routers/mane.py +0 -98
  30. cool_seq_tool/routers/mappings.py +0 -155
  31. cool_seq_tool/version.py +0 -2
  32. cool_seq_tool-0.4.0.dev3.dist-info/RECORD +0 -29
  33. /cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
  34. {cool_seq_tool-0.4.0.dev3.dist-info → cool_seq_tool-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  """Provide mappings between gene symbols and RefSeq + Ensembl transcript accessions."""
2
+
2
3
  import csv
3
4
  from pathlib import Path
4
- from typing import Dict, List, Optional
5
5
 
6
- from cool_seq_tool.paths import LRG_REFSEQGENE_PATH, TRANSCRIPT_MAPPINGS_PATH
6
+ from cool_seq_tool.resources.data_files import DataFile, get_data_file
7
7
 
8
8
 
9
9
  class TranscriptMappings:
10
10
  """Provide mappings between gene symbols and RefSeq + Ensembl transcript accessions.
11
11
 
12
- Uses ``LRG_RefSeqGene`` and ``transcript_mappings.csv``, which will automatically
12
+ Uses ``LRG_RefSeqGene`` and ``transcript_mappings.tsv``, which will automatically
13
13
  be acquired if they aren't already available. See the
14
14
  :ref:`configuration <configuration>` section in the documentation for information
15
15
  about manual acquisition of data.
@@ -21,44 +21,53 @@ class TranscriptMappings:
21
21
 
22
22
  def __init__(
23
23
  self,
24
- transcript_file_path: Path = TRANSCRIPT_MAPPINGS_PATH,
25
- lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH,
24
+ transcript_file_path: Path | None = None,
25
+ lrg_refseqgene_path: Path | None = None,
26
+ from_local: bool = False,
26
27
  ) -> None:
27
28
  """Initialize the transcript mappings class.
28
29
 
29
30
  :param transcript_file_path: Path to transcript mappings file
30
31
  :param lrg_refseqgene_path: Path to LRG RefSeqGene file
32
+ :param from_local: if ``True``, don't check for or acquire latest version --
33
+ just provide most recent locally available file, if possible, and raise
34
+ error otherwise
31
35
  """
32
36
  # ENSP <-> Gene Symbol
33
- self.ensembl_protein_version_for_gene_symbol: Dict[str, List[str]] = {}
34
- self.ensembl_protein_version_to_gene_symbol: Dict[str, str] = {}
35
- self.ensembl_protein_for_gene_symbol: Dict[str, List[str]] = {}
36
- self.ensembl_protein_to_gene_symbol: Dict[str, str] = {}
37
+ self.ensembl_protein_version_for_gene_symbol: dict[str, list[str]] = {}
38
+ self.ensembl_protein_version_to_gene_symbol: dict[str, str] = {}
39
+ self.ensembl_protein_for_gene_symbol: dict[str, list[str]] = {}
40
+ self.ensembl_protein_to_gene_symbol: dict[str, str] = {}
37
41
 
38
42
  # Gene Symbol <-> ENST
39
- self.ensembl_transcript_version_for_gene_symbol: Dict[str, List[str]] = {}
40
- self.ensembl_transcript_version_to_gene_symbol: Dict[str, str] = {}
41
- self.ensembl_transcript_for_gene_symbol: Dict[str, List[str]] = {}
42
- self.ensembl_transcript_to_gene_symbol: Dict[str, str] = {}
43
+ self.ensembl_transcript_version_for_gene_symbol: dict[str, list[str]] = {}
44
+ self.ensembl_transcript_version_to_gene_symbol: dict[str, str] = {}
45
+ self.ensembl_transcript_for_gene_symbol: dict[str, list[str]] = {}
46
+ self.ensembl_transcript_to_gene_symbol: dict[str, str] = {}
43
47
 
44
48
  # NP_ <-> Gene Symbol
45
- self.refseq_protein_for_gene_symbol: Dict[str, List[str]] = {}
46
- self.refseq_protein_to_gene_symbol: Dict[str, str] = {}
49
+ self.refseq_protein_for_gene_symbol: dict[str, list[str]] = {}
50
+ self.refseq_protein_to_gene_symbol: dict[str, str] = {}
47
51
 
48
52
  # NM_ <-> Gene Symbol
49
- self.refseq_rna_version_for_gene_symbol: Dict[str, List[str]] = {}
50
- self.refseq_rna_version_to_gene_symbol: Dict[str, str] = {}
51
- self.refseq_rna_for_gene_symbol: Dict[str, List[str]] = {}
52
- self.refseq_rna_to_gene_symbol: Dict[str, str] = {}
53
+ self.refseq_rna_version_for_gene_symbol: dict[str, list[str]] = {}
54
+ self.refseq_rna_version_to_gene_symbol: dict[str, str] = {}
55
+ self.refseq_rna_for_gene_symbol: dict[str, list[str]] = {}
56
+ self.refseq_rna_to_gene_symbol: dict[str, str] = {}
53
57
 
54
58
  # NP -> NM
55
- self.np_to_nm: Dict[str, str] = {}
59
+ self.np_to_nm: dict[str, str] = {}
56
60
 
57
61
  # ENSP -> ENST
58
- self.ensp_to_enst: Dict[str, str] = {}
62
+ self.ensp_to_enst: dict[str, str] = {}
59
63
 
60
- self._load_transcript_mappings_data(transcript_file_path)
61
- self._load_refseq_gene_symbol_data(lrg_refseqgene_path)
64
+ self._load_transcript_mappings_data(
65
+ transcript_file_path
66
+ or get_data_file(DataFile.TRANSCRIPT_MAPPINGS, from_local)
67
+ )
68
+ self._load_refseq_gene_symbol_data(
69
+ lrg_refseqgene_path or get_data_file(DataFile.LRG_REFSEQGENE, from_local)
70
+ )
62
71
 
63
72
  def _load_transcript_mappings_data(self, transcript_file_path: Path) -> None:
64
73
  """Load transcript mappings file to dictionaries.
@@ -99,9 +108,9 @@ class TranscriptMappings:
99
108
  ).append(transcript)
100
109
  self.ensembl_transcript_to_gene_symbol[transcript] = gene
101
110
  if versioned_transcript and versioned_protein_transcript:
102
- self.ensp_to_enst[
103
- versioned_protein_transcript
104
- ] = versioned_transcript
111
+ self.ensp_to_enst[versioned_protein_transcript] = (
112
+ versioned_transcript
113
+ )
105
114
 
106
115
  def _load_refseq_gene_symbol_data(self, lrg_refseqgene_path: Path) -> None:
107
116
  """Load data from RefSeq Gene Symbol file to dictionaries.
@@ -134,7 +143,7 @@ class TranscriptMappings:
134
143
  if refseq_transcript and rna_transcript:
135
144
  self.np_to_nm[refseq_transcript] = rna_transcript
136
145
 
137
- def protein_transcripts(self, identifier: str) -> List[str]:
146
+ def protein_transcripts(self, identifier: str) -> list[str]:
138
147
  """Return a list of protein transcripts for a gene symbol.
139
148
 
140
149
  >>> from cool_seq_tool.sources import TranscriptMappings
@@ -154,7 +163,7 @@ class TranscriptMappings:
154
163
  protein_transcripts += self.refseq_protein_for_gene_symbol.get(identifier, "")
155
164
  return list(set(protein_transcripts))
156
165
 
157
- def coding_dna_transcripts(self, identifier: str) -> List[str]:
166
+ def coding_dna_transcripts(self, identifier: str) -> list[str]:
158
167
  """Return transcripts from a coding dna refseq for a gene symbol.
159
168
 
160
169
  :param identifier: Gene identifier to find transcripts for
@@ -172,7 +181,7 @@ class TranscriptMappings:
172
181
  )
173
182
  return list(set(genomic_transcripts))
174
183
 
175
- def get_gene_symbol_from_ensembl_protein(self, q: str) -> Optional[str]:
184
+ def get_gene_symbol_from_ensembl_protein(self, q: str) -> str | None:
176
185
  """Return the gene symbol for a Ensembl Protein.
177
186
 
178
187
  :param q: ensembl protein accession
@@ -184,7 +193,7 @@ class TranscriptMappings:
184
193
  gene_symbol = self.ensembl_protein_to_gene_symbol.get(q)
185
194
  return gene_symbol
186
195
 
187
- def get_gene_symbol_from_refeq_protein(self, q: str) -> Optional[str]:
196
+ def get_gene_symbol_from_refeq_protein(self, q: str) -> str | None:
188
197
  """Return the gene symbol for a Refseq Protein.
189
198
 
190
199
  :param q: RefSeq protein accession
@@ -192,7 +201,7 @@ class TranscriptMappings:
192
201
  """
193
202
  return self.refseq_protein_to_gene_symbol.get(q)
194
203
 
195
- def get_gene_symbol_from_refseq_rna(self, q: str) -> Optional[str]:
204
+ def get_gene_symbol_from_refseq_rna(self, q: str) -> str | None:
196
205
  """Return gene symbol for a Refseq RNA Transcript.
197
206
 
198
207
  :param q: RefSeq RNA transcript accession
@@ -204,7 +213,7 @@ class TranscriptMappings:
204
213
  gene_symbol = self.refseq_rna_to_gene_symbol.get(q)
205
214
  return gene_symbol
206
215
 
207
- def get_gene_symbol_from_ensembl_transcript(self, q: str) -> Optional[str]:
216
+ def get_gene_symbol_from_ensembl_transcript(self, q: str) -> str | None:
208
217
  """Return gene symbol for an Ensembl Transcript.
209
218
 
210
219
  :param q: Ensembl transcript accession