cool-seq-tool 0.4.0.dev0__py3-none-any.whl → 0.4.0.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cool_seq_tool/data/__init__.py +1 -1
- cool_seq_tool/data/data_downloads.py +19 -13
- cool_seq_tool/data/transcript_mapping.tsv +256226 -0
- cool_seq_tool/handlers/seqrepo_access.py +2 -4
- cool_seq_tool/mappers/exon_genomic_coords.py +25 -26
- cool_seq_tool/mappers/mane_transcript.py +112 -106
- cool_seq_tool/routers/default.py +7 -9
- cool_seq_tool/routers/mane.py +2 -2
- cool_seq_tool/schemas.py +30 -21
- cool_seq_tool/sources/mane_transcript_mappings.py +1 -1
- cool_seq_tool/sources/transcript_mappings.py +13 -16
- cool_seq_tool/sources/uta_database.py +134 -153
- cool_seq_tool/utils.py +5 -2
- cool_seq_tool/version.py +1 -1
- {cool_seq_tool-0.4.0.dev0.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/METADATA +7 -6
- cool_seq_tool-0.4.0.dev2.dist-info/RECORD +29 -0
- cool_seq_tool-0.4.0.dev0.dist-info/RECORD +0 -28
- {cool_seq_tool-0.4.0.dev0.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/LICENSE +0 -0
- {cool_seq_tool-0.4.0.dev0.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/WHEEL +0 -0
- {cool_seq_tool-0.4.0.dev0.dist-info → cool_seq_tool-0.4.0.dev2.dist-info}/top_level.txt +0 -0
cool_seq_tool/data/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
"""Module for data"""
|
2
|
-
from .data_downloads import DataDownload
|
2
|
+
from .data_downloads import DataDownload
|
@@ -4,7 +4,6 @@ import gzip
|
|
4
4
|
import logging
|
5
5
|
import shutil
|
6
6
|
from ftplib import FTP
|
7
|
-
from os import remove
|
8
7
|
from pathlib import Path
|
9
8
|
|
10
9
|
from dateutil import parser
|
@@ -38,18 +37,20 @@ class DataDownload:
|
|
38
37
|
files = ftp.nlst()
|
39
38
|
mane_summary_file = [f for f in files if f.endswith(".summary.txt.gz")]
|
40
39
|
if not mane_summary_file:
|
41
|
-
|
40
|
+
msg = "Unable to download MANE summary data"
|
41
|
+
raise Exception(msg)
|
42
42
|
mane_summary_file = mane_summary_file[0]
|
43
43
|
self._mane_summary_path = self._data_dir / mane_summary_file[:-3]
|
44
44
|
mane_data_path = self._data_dir / mane_summary_file
|
45
45
|
if not self._mane_summary_path.exists():
|
46
46
|
logger.info("Downloading MANE summary file from NCBI.")
|
47
|
-
with open(
|
47
|
+
with mane_data_path.open("wb") as fp:
|
48
48
|
ftp.retrbinary(f"RETR {mane_summary_file}", fp.write)
|
49
|
-
with gzip.open(
|
50
|
-
|
51
|
-
|
52
|
-
|
49
|
+
with gzip.open(
|
50
|
+
mane_data_path, "rb"
|
51
|
+
) as f_in, self._mane_summary_path.open("wb") as f_out:
|
52
|
+
shutil.copyfileobj(f_in, f_out)
|
53
|
+
mane_data_path.unlink()
|
53
54
|
logger.info("MANE summary file download complete.")
|
54
55
|
return self._mane_summary_path
|
55
56
|
|
@@ -66,18 +67,23 @@ class DataDownload:
|
|
66
67
|
ftp_file_path = f"{ftp_dir_path}{lrg_refseqgene_file}"
|
67
68
|
timestamp = ftp.voidcmd(f"MDTM {ftp_file_path}")[4:].strip()
|
68
69
|
date = str(parser.parse(timestamp)).split()[0]
|
69
|
-
version =
|
70
|
+
version = (
|
71
|
+
datetime.datetime.strptime(date, "%Y-%m-%d")
|
72
|
+
.astimezone(tz=datetime.timezone.utc)
|
73
|
+
.strftime("%Y%m%d")
|
74
|
+
)
|
70
75
|
fn_versioned = f"{lrg_refseqgene_file}_{version}"
|
71
76
|
lrg_refseqgene_path = self._data_dir / lrg_refseqgene_file
|
72
77
|
self._lrg_refseqgene_path = self._data_dir / fn_versioned
|
73
78
|
if not self._lrg_refseqgene_path.exists():
|
74
79
|
logger.info("Downloading LRG RefSeq data from NCBI.")
|
75
80
|
ftp.cwd(ftp_dir_path)
|
76
|
-
with open(
|
81
|
+
with lrg_refseqgene_path.open("wb") as fp:
|
77
82
|
ftp.retrbinary(f"RETR {lrg_refseqgene_file}", fp.write)
|
78
|
-
with open(
|
79
|
-
|
80
|
-
|
81
|
-
|
83
|
+
with lrg_refseqgene_path.open(
|
84
|
+
"rb"
|
85
|
+
) as f_in, self._lrg_refseqgene_path.open("wb") as f_out:
|
86
|
+
shutil.copyfileobj(f_in, f_out)
|
87
|
+
lrg_refseqgene_path.unlink()
|
82
88
|
logger.info("LRG RefSeq data download complete.")
|
83
89
|
return self._lrg_refseqgene_path
|