XspecT 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- xspect/definitions.py +7 -0
- xspect/{download_filters.py → download_models.py} +2 -2
- xspect/fastapi.py +2 -2
- xspect/main.py +61 -8
- xspect/mlst_feature/__init__.py +0 -0
- xspect/mlst_feature/mlst_helper.py +155 -0
- xspect/mlst_feature/pub_mlst_handler.py +119 -0
- xspect/model_management.py +3 -4
- xspect/models/probabilistic_filter_mlst_model.py +287 -0
- xspect/models/probabilistic_filter_model.py +2 -11
- xspect/models/probabilistic_filter_svm_model.py +3 -0
- xspect/models/probabilistic_single_filter_model.py +4 -6
- xspect/models/result.py +8 -7
- xspect/pipeline.py +1 -1
- xspect/run.py +1 -1
- xspect/train.py +2 -39
- xspect/train_filter/extract_and_concatenate.py +1 -1
- xspect/train_filter/ncbi_api/download_assemblies.py +2 -2
- xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +13 -13
- xspect/train_filter/ncbi_api/ncbi_children_tree.py +1 -1
- xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +2 -2
- {XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/METADATA +16 -16
- xspect-0.2.7.dist-info/RECORD +33 -0
- {XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/WHEEL +1 -1
- XspecT-0.2.5.dist-info/RECORD +0 -30
- xspect/train_filter/html_scrap.py +0 -114
- {XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/LICENSE +0 -0
- {XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/entry_points.txt +0 -0
- {XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
xspect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
xspect/definitions.py,sha256=Z6RiCOQnsA_i8DPIq-7SUhrKo7KYf1Azp36UZZpcOX4,1419
|
|
3
|
+
xspect/download_models.py,sha256=lml8pSyM0pF-MxghgSRC9noDT4pkUcBZraaTTImVfbA,739
|
|
4
|
+
xspect/fastapi.py,sha256=FDiGXJmLEeTLD83Hem8yV5aoPJ-GhSG5WmDIQRAA_w4,3257
|
|
5
|
+
xspect/file_io.py,sha256=zKhl6Fd9KZAYiD8YgIyje5TbDYk5lxMp1WUrNkGSBo8,2779
|
|
6
|
+
xspect/main.py,sha256=3HqmnMowjkLNwhaZWtY4aeJCyCyT6h_nZWzYIunHfKg,5325
|
|
7
|
+
xspect/model_management.py,sha256=xF-wjVNJbXYv64RajsIcpLfZUvicDyalJEdSeCx3nQI,3542
|
|
8
|
+
xspect/pipeline.py,sha256=kgxClqnm-5BaZ8BlcCYDCKhBpavt5vIONs8aBdEnjuE,7217
|
|
9
|
+
xspect/run.py,sha256=pastcT-IFNLkWQvl0BL-h1mPheDK8mrHH-UytZWbyUI,1187
|
|
10
|
+
xspect/train.py,sha256=wVGhirXyFK5yPWd24Ai35z7iRGiYyO6QR94xZgoWJ6I,9300
|
|
11
|
+
xspect/mlst_feature/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
xspect/mlst_feature/mlst_helper.py,sha256=cPl-8ZASEef4ElF1dqYFjdmpoeLMOU_lvx-yBEqYlss,5914
|
|
13
|
+
xspect/mlst_feature/pub_mlst_handler.py,sha256=oss3CkJNt6041p3qnMdOfoX8ZgUfpB93CUim-Yakc9A,5031
|
|
14
|
+
xspect/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
xspect/models/probabilistic_filter_mlst_model.py,sha256=JMc0yBJPo7J9b-GpvhDmzhwWPIKOwatAq0edDgM72PE,11735
|
|
16
|
+
xspect/models/probabilistic_filter_model.py,sha256=zCn5dcuq5Z4pvmsV9igS0lQ1plUi9-Kky_zRflfrIkI,9659
|
|
17
|
+
xspect/models/probabilistic_filter_svm_model.py,sha256=uabDrF1_CSuIWf9wWyQAkqjAuRUBzEZLkv3J6YHfJsM,5641
|
|
18
|
+
xspect/models/probabilistic_single_filter_model.py,sha256=TdGbQp8ylOif7dD13OSWaS-zFNJo8mXOb6BaQ0mcPdo,3810
|
|
19
|
+
xspect/models/result.py,sha256=_pvWERp-9SHsftyhVnbe05jfOOE6GdENr6_9t-prR-M,4832
|
|
20
|
+
xspect/train_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
xspect/train_filter/create_svm.py,sha256=w6gq40yHINVfNzLhJfYFykUaNCwpU9AEDcbkUfis3DY,1504
|
|
22
|
+
xspect/train_filter/extract_and_concatenate.py,sha256=g6iUTlZYoemddHdXzxL-Hq8AtE3va6bFaSBxiKd2gHE,4838
|
|
23
|
+
xspect/train_filter/ncbi_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
xspect/train_filter/ncbi_api/download_assemblies.py,sha256=PTXs22T_fH-_skAemzCfD1uS26qJ3Huw3MdqDciu0v8,1285
|
|
25
|
+
xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py,sha256=_G5oo281WyLQspI-hYzeWcizK0DKRLiDiL0u9l53NAo,3843
|
|
26
|
+
xspect/train_filter/ncbi_api/ncbi_children_tree.py,sha256=Tbiu740BTvXvRPBslPp9fSTL3pA3UolMKwrIrNtW8fc,1819
|
|
27
|
+
xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py,sha256=y0a2RnXsXiIBleMpCM-mYnQSjtsxJ4cjhIXl-Xr5oLA,1866
|
|
28
|
+
xspect-0.2.7.dist-info/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
|
|
29
|
+
xspect-0.2.7.dist-info/METADATA,sha256=mzC1AwcjQzSJl1N0bf1uJWjZEhi4cKI9hI8ye7hIf7Y,4714
|
|
30
|
+
xspect-0.2.7.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
|
31
|
+
xspect-0.2.7.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
|
|
32
|
+
xspect-0.2.7.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
|
|
33
|
+
xspect-0.2.7.dist-info/RECORD,,
|
XspecT-0.2.5.dist-info/RECORD
DELETED
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
xspect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
xspect/definitions.py,sha256=gg6NvT8ypNzlnJvMMo3nHsyh8DHFFu41lOfnILkRDpE,1215
|
|
3
|
-
xspect/download_filters.py,sha256=ByE7Oggx-AyJ02Wirk_wcJHNdRDrJMfjwhmUe5tgWbE,741
|
|
4
|
-
xspect/fastapi.py,sha256=C8pBBiqM6UdedLZgzfL_YYRuy98aPj8dcw_CLFrtMMc,3260
|
|
5
|
-
xspect/file_io.py,sha256=zKhl6Fd9KZAYiD8YgIyje5TbDYk5lxMp1WUrNkGSBo8,2779
|
|
6
|
-
xspect/main.py,sha256=eOA9PAeq3VvPWWoOZxXFErvPNW-ANzOxqMsbQJPCvDw,3651
|
|
7
|
-
xspect/model_management.py,sha256=w0aqjLUoixCokyKTYrcN1vih5IoLYLJG9p8aeYdVc8Y,3560
|
|
8
|
-
xspect/pipeline.py,sha256=h7duhVZ-hupwO_KQPstzFo8KMfMI2yleb9HmtTiMjic,7219
|
|
9
|
-
xspect/run.py,sha256=OJ7pCFqva3AhIYklKjVnqWGooVRO7S3b56kIAy-xabY,1189
|
|
10
|
-
xspect/train.py,sha256=khC1lldqfr4NvzLUiSJjSlh7DBG1ePielvQMiB29Hl8,10399
|
|
11
|
-
xspect/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
xspect/models/probabilistic_filter_model.py,sha256=ImyNRzR7jf2CBPGI65ItG0_eYmrQjo9soQYlsM0r-P0,9829
|
|
13
|
-
xspect/models/probabilistic_filter_svm_model.py,sha256=Z_aAigE_fC_gm80hRfxvROHGs6LuBqZnATHPpAkQGQE,5466
|
|
14
|
-
xspect/models/probabilistic_single_filter_model.py,sha256=nDAd_-_Ci2eH0KOJtf4wA-w63FMq9rGSR1LGiIA-gdw,3884
|
|
15
|
-
xspect/models/result.py,sha256=vHUEFXvbFyB8WmasXp99IrztjwaxH1f9QMFiRUPe40Q,4824
|
|
16
|
-
xspect/train_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
xspect/train_filter/create_svm.py,sha256=w6gq40yHINVfNzLhJfYFykUaNCwpU9AEDcbkUfis3DY,1504
|
|
18
|
-
xspect/train_filter/extract_and_concatenate.py,sha256=lLrczGgfZi2vAGqxq8fcEmJi5pvqyK33JkB_ZoCNYG8,4840
|
|
19
|
-
xspect/train_filter/html_scrap.py,sha256=76VV_ZbvD2I3IxRb62SiQwRPu2tr4fwn1HkfJQYaosM,3809
|
|
20
|
-
xspect/train_filter/ncbi_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
xspect/train_filter/ncbi_api/download_assemblies.py,sha256=MB_mxSjCTL05DqIt1WQem8AGU3PjtJnzPndeI9J-AOI,1285
|
|
22
|
-
xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py,sha256=puzDIws-yyBAEHwSAIYUM7g8FpLFmvOKh5xH1EsY8ZE,3830
|
|
23
|
-
xspect/train_filter/ncbi_api/ncbi_children_tree.py,sha256=_8puOsnsKp5lsMV2gZY1ijkfD_BZKG9eXZCX09qph5E,1819
|
|
24
|
-
xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py,sha256=O6JDXC4E6AYaf7NPnb34eSJyZhMB8r--bjoVF_ZsEdA,1868
|
|
25
|
-
XspecT-0.2.5.dist-info/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
|
|
26
|
-
XspecT-0.2.5.dist-info/METADATA,sha256=NDw2i1MawAjAkybDXzaQfIIGFI4sw86MSlQJ8z6vkWs,4834
|
|
27
|
-
XspecT-0.2.5.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
|
28
|
-
XspecT-0.2.5.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
|
|
29
|
-
XspecT-0.2.5.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
|
|
30
|
-
XspecT-0.2.5.dist-info/RECORD,,
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
""" HTML Scraping for the taxonomy check results from NCBI."""
|
|
2
|
-
|
|
3
|
-
__author__ = "Berger, Phillip"
|
|
4
|
-
|
|
5
|
-
import datetime
|
|
6
|
-
import pickle
|
|
7
|
-
import sys
|
|
8
|
-
import time
|
|
9
|
-
import requests
|
|
10
|
-
from loguru import logger
|
|
11
|
-
from xspect.definitions import get_xspect_root_path
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class TaxonomyCheck:
|
|
15
|
-
"""Class to get the GCFs that passed the taxonomy check from NCBI."""
|
|
16
|
-
|
|
17
|
-
_main_url = "https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/ANI_report_prokaryotes.txt"
|
|
18
|
-
_test_url = "https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/"
|
|
19
|
-
_main_path = get_xspect_root_path() / "taxonomy_check.txt"
|
|
20
|
-
_test_path = get_xspect_root_path() / "tax_check_date.txt"
|
|
21
|
-
_new_time: list
|
|
22
|
-
_tax_check_ok = []
|
|
23
|
-
|
|
24
|
-
def __init__(self):
|
|
25
|
-
old_time = self._get_old_tax_date()
|
|
26
|
-
self._new_time = self._get_new_tax_date()
|
|
27
|
-
# Both Dates could be found.
|
|
28
|
-
# Check if the html file was updated since the last time it was downloaded.
|
|
29
|
-
# If yes than update the file.
|
|
30
|
-
if self._new_time and old_time:
|
|
31
|
-
if self._new_time == old_time:
|
|
32
|
-
logger.info("File was not updated")
|
|
33
|
-
self._get_old_file()
|
|
34
|
-
else:
|
|
35
|
-
logger.info("Updating file")
|
|
36
|
-
self._update_tax_check()
|
|
37
|
-
|
|
38
|
-
# The old date does not exist.
|
|
39
|
-
# Get the html file for the taxonomy check results.
|
|
40
|
-
elif self._new_time and not old_time:
|
|
41
|
-
logger.info("No file was found. Creating new file")
|
|
42
|
-
self._update_tax_check()
|
|
43
|
-
|
|
44
|
-
elif not self._new_time and old_time:
|
|
45
|
-
self._get_old_file()
|
|
46
|
-
|
|
47
|
-
else:
|
|
48
|
-
logger.error("Nothing was found")
|
|
49
|
-
logger.error("Aborting")
|
|
50
|
-
sys.exit()
|
|
51
|
-
|
|
52
|
-
def _get_old_tax_date(self):
|
|
53
|
-
try:
|
|
54
|
-
with open(self._test_path, "rb") as f:
|
|
55
|
-
old_time = pickle.load(f)
|
|
56
|
-
return old_time
|
|
57
|
-
except FileNotFoundError:
|
|
58
|
-
return None
|
|
59
|
-
|
|
60
|
-
def _get_new_tax_date(self):
|
|
61
|
-
raw_response = requests.get(self._test_url, timeout=5)
|
|
62
|
-
data = raw_response.text.split("\n")
|
|
63
|
-
for line in data:
|
|
64
|
-
if "ANI_report_prokaryotes.txt" in line:
|
|
65
|
-
line_parts = line.split()
|
|
66
|
-
date_parts = line_parts[-3].split("-")
|
|
67
|
-
date = datetime.date(
|
|
68
|
-
int(date_parts[0]), int(date_parts[1]), int(date_parts[2])
|
|
69
|
-
)
|
|
70
|
-
time_parts = line_parts[-2].split(":")
|
|
71
|
-
combined_time = datetime.time(int(time_parts[0]), int(time_parts[1]))
|
|
72
|
-
new_time = [date, combined_time]
|
|
73
|
-
|
|
74
|
-
return new_time
|
|
75
|
-
|
|
76
|
-
return None
|
|
77
|
-
|
|
78
|
-
def _update_tax_check(self):
|
|
79
|
-
raw_response = requests.get(self._main_url, timeout=5)
|
|
80
|
-
all_tax_checks = raw_response.text.split("\n")[1:-1]
|
|
81
|
-
self._get_gcf_ok(all_tax_checks)
|
|
82
|
-
self._save_time()
|
|
83
|
-
self._save_file()
|
|
84
|
-
|
|
85
|
-
def _get_gcf_ok(self, all_tax_checks: list):
|
|
86
|
-
tax_check_ok = []
|
|
87
|
-
for line in all_tax_checks:
|
|
88
|
-
line_parts = line.split("\t")
|
|
89
|
-
gcf = line_parts[1]
|
|
90
|
-
tax_check_status = line_parts[-1]
|
|
91
|
-
if tax_check_status == "OK":
|
|
92
|
-
tax_check_ok.append(gcf)
|
|
93
|
-
|
|
94
|
-
self._tax_check_ok = tax_check_ok
|
|
95
|
-
|
|
96
|
-
def _save_time(self):
|
|
97
|
-
with open(self._test_path, "wb") as f:
|
|
98
|
-
pickle.dump(self._new_time, f)
|
|
99
|
-
|
|
100
|
-
def _save_file(self):
|
|
101
|
-
with open(self._main_path, "wb") as f:
|
|
102
|
-
pickle.dump(self._tax_check_ok, f)
|
|
103
|
-
|
|
104
|
-
def _get_old_file(self):
|
|
105
|
-
with open(self._main_path, "rb") as f:
|
|
106
|
-
self._tax_check_ok = pickle.load(f)
|
|
107
|
-
|
|
108
|
-
@staticmethod
|
|
109
|
-
def _get_current_time():
|
|
110
|
-
return time.asctime(time.localtime()).split()[3]
|
|
111
|
-
|
|
112
|
-
def ani_gcf(self):
|
|
113
|
-
"""Returns ANI GCFs that passed the taxonomy check."""
|
|
114
|
-
return self._tax_check_ok
|
|
File without changes
|
|
File without changes
|
|
File without changes
|