XspecT 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/METADATA +23 -29
- XspecT-0.2.0.dist-info/RECORD +30 -0
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/WHEEL +1 -1
- xspect/definitions.py +42 -0
- xspect/download_filters.py +11 -26
- xspect/fastapi.py +101 -0
- xspect/file_io.py +34 -103
- xspect/main.py +70 -66
- xspect/model_management.py +88 -0
- xspect/models/__init__.py +0 -0
- xspect/models/probabilistic_filter_model.py +277 -0
- xspect/models/probabilistic_filter_svm_model.py +169 -0
- xspect/models/probabilistic_single_filter_model.py +109 -0
- xspect/models/result.py +148 -0
- xspect/pipeline.py +201 -0
- xspect/run.py +38 -0
- xspect/train.py +304 -0
- xspect/train_filter/create_svm.py +6 -183
- xspect/train_filter/extract_and_concatenate.py +117 -121
- xspect/train_filter/html_scrap.py +16 -28
- xspect/train_filter/ncbi_api/download_assemblies.py +7 -8
- xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +9 -17
- xspect/train_filter/ncbi_api/ncbi_children_tree.py +3 -2
- xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +7 -5
- XspecT-0.1.2.dist-info/RECORD +0 -48
- xspect/BF_v2.py +0 -648
- xspect/Bootstrap.py +0 -29
- xspect/Classifier.py +0 -142
- xspect/OXA_Table.py +0 -53
- xspect/WebApp.py +0 -737
- xspect/XspecT_mini.py +0 -1377
- xspect/XspecT_trainer.py +0 -611
- xspect/map_kmers.py +0 -155
- xspect/search_filter.py +0 -504
- xspect/static/How-To.png +0 -0
- xspect/static/Logo.png +0 -0
- xspect/static/Logo2.png +0 -0
- xspect/static/Workflow_AspecT.png +0 -0
- xspect/static/Workflow_ClAssT.png +0 -0
- xspect/static/js.js +0 -615
- xspect/static/main.css +0 -280
- xspect/templates/400.html +0 -64
- xspect/templates/401.html +0 -62
- xspect/templates/404.html +0 -62
- xspect/templates/500.html +0 -62
- xspect/templates/about.html +0 -544
- xspect/templates/home.html +0 -51
- xspect/templates/layoutabout.html +0 -87
- xspect/templates/layouthome.html +0 -63
- xspect/templates/layoutspecies.html +0 -468
- xspect/templates/species.html +0 -33
- xspect/train_filter/get_paths.py +0 -35
- xspect/train_filter/interface_XspecT.py +0 -204
- xspect/train_filter/k_mer_count.py +0 -162
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/LICENSE +0 -0
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/entry_points.txt +0 -0
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
"""
|
|
1
|
+
""" Module for extracting and concatenating assemblies. """
|
|
4
2
|
|
|
5
3
|
__author__ = "Berger, Phillip"
|
|
6
4
|
|
|
@@ -8,121 +6,119 @@ import os
|
|
|
8
6
|
import shutil
|
|
9
7
|
from pathlib import Path
|
|
10
8
|
from Bio import SeqIO
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
for
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
for assembly in self._all_assemblies:
|
|
128
|
-
file.write(f"{assembly}\n")
|
|
9
|
+
from xspect import file_io
|
|
10
|
+
from xspect.definitions import get_xspect_tmp_path, fasta_endings
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def change_header(assemblies_path, species_accessions: dict):
|
|
14
|
+
"""Change the header of the assemblies to the species name."""
|
|
15
|
+
files = os.listdir(assemblies_path)
|
|
16
|
+
# Iterate through all species.
|
|
17
|
+
for name, accessions in species_accessions.items():
|
|
18
|
+
# Iterate through all accessions of the current species.
|
|
19
|
+
for accession in accessions:
|
|
20
|
+
# Iterate through all file names.
|
|
21
|
+
for file in files:
|
|
22
|
+
if accession in file:
|
|
23
|
+
file_path = assemblies_path / str(file)
|
|
24
|
+
# Change the header.
|
|
25
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
26
|
+
sequence = ""
|
|
27
|
+
for line in f.readlines():
|
|
28
|
+
if line[0] != ">":
|
|
29
|
+
sequence += line
|
|
30
|
+
new_header = f">{name}\n"
|
|
31
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
32
|
+
f.write(new_header)
|
|
33
|
+
f.write(sequence)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def copy_assemblies(unzipped_path, assemblies_path):
|
|
37
|
+
"""Copy all assemblies to a new directory."""
|
|
38
|
+
os.mkdir(assemblies_path)
|
|
39
|
+
for folder in os.listdir(unzipped_path):
|
|
40
|
+
for root, _, files in os.walk(unzipped_path / str(folder)):
|
|
41
|
+
for file in files:
|
|
42
|
+
file_ending = file.split(".")[-1]
|
|
43
|
+
if file_ending in fasta_endings:
|
|
44
|
+
file_path = Path(root) / file
|
|
45
|
+
shutil.copy(file_path, (assemblies_path / file))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def concatenate_bf(unzipped_path, concatenate_path):
|
|
49
|
+
"""Concatenate all assemblies for Bloom filter training."""
|
|
50
|
+
|
|
51
|
+
all_assemblies = []
|
|
52
|
+
|
|
53
|
+
# Make new directory
|
|
54
|
+
os.mkdir(concatenate_path)
|
|
55
|
+
|
|
56
|
+
# Open the fasta files for each species.
|
|
57
|
+
for folder in os.listdir(unzipped_path):
|
|
58
|
+
species_files = []
|
|
59
|
+
# Walk through dirs to get all fasta files.
|
|
60
|
+
for root, _, files in os.walk(unzipped_path / folder):
|
|
61
|
+
for file in files:
|
|
62
|
+
file_ending = file.split(".")[-1]
|
|
63
|
+
if file_ending in fasta_endings:
|
|
64
|
+
species_files.append(Path(root) / file)
|
|
65
|
+
all_assemblies.append(".".join(str(file).split(".")[:-1]))
|
|
66
|
+
|
|
67
|
+
# Gather all sequences and headers.
|
|
68
|
+
sequences = []
|
|
69
|
+
headers = []
|
|
70
|
+
for file in species_files:
|
|
71
|
+
records = SeqIO.parse(file, "fasta")
|
|
72
|
+
for record in records:
|
|
73
|
+
headers.append(record.id)
|
|
74
|
+
sequences.append(str(record.seq))
|
|
75
|
+
|
|
76
|
+
# Concatenate sequences
|
|
77
|
+
species_sequence = "".join(sequences)
|
|
78
|
+
species_header = ">" + " § ".join(headers) + "\n"
|
|
79
|
+
|
|
80
|
+
# Save concatenated sequences and headers
|
|
81
|
+
species_path = concatenate_path / (folder + ".fasta")
|
|
82
|
+
with open(species_path, "w", encoding="utf-8") as species_file:
|
|
83
|
+
species_file.write(species_header)
|
|
84
|
+
species_file.write(species_sequence)
|
|
85
|
+
|
|
86
|
+
return all_assemblies
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def save_all_assemblies(dir_path: Path, all_assemblies: list[str]):
|
|
90
|
+
"""Save all assemblies to a file."""
|
|
91
|
+
path = dir_path / "all_bf_assemblies.txt"
|
|
92
|
+
with open(path, "w", encoding="utf-8") as file:
|
|
93
|
+
for assembly in all_assemblies:
|
|
94
|
+
file.write(f"{assembly}\n")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def bf(dir_name: str, delete: bool):
|
|
98
|
+
"""Extract and concatenate assemblies for Bloom filter training."""
|
|
99
|
+
dir_path = get_xspect_tmp_path() / dir_name
|
|
100
|
+
zip_path = dir_path / "zip_files"
|
|
101
|
+
unzipped_path = dir_path / "zip_files_extracted"
|
|
102
|
+
concatenate_path = dir_path / "concatenate"
|
|
103
|
+
file_io.extract_zip(zip_path, unzipped_path)
|
|
104
|
+
all_assemblies = concatenate_bf(unzipped_path, concatenate_path)
|
|
105
|
+
save_all_assemblies(dir_path, all_assemblies)
|
|
106
|
+
if delete:
|
|
107
|
+
file_io.delete_zip_files(zip_path)
|
|
108
|
+
shutil.rmtree(zip_path, ignore_errors=False)
|
|
109
|
+
shutil.rmtree(unzipped_path, ignore_errors=False)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def svm(species_accessions: dict, dir_name: str, delete: bool):
|
|
113
|
+
"""Extract and concatenate assemblies for generating SVM training data."""
|
|
114
|
+
dir_path = get_xspect_tmp_path() / dir_name
|
|
115
|
+
zip_path = dir_path / "training_data_zipped"
|
|
116
|
+
unzipped_path = dir_path / "training_data_unzipped"
|
|
117
|
+
assemblies_path = dir_path / "training_data"
|
|
118
|
+
file_io.extract_zip(zip_path, unzipped_path)
|
|
119
|
+
copy_assemblies(unzipped_path, assemblies_path)
|
|
120
|
+
change_header(assemblies_path, species_accessions)
|
|
121
|
+
if delete:
|
|
122
|
+
file_io.delete_zip_files(zip_path)
|
|
123
|
+
shutil.rmtree(zip_path, ignore_errors=False)
|
|
124
|
+
shutil.rmtree(unzipped_path, ignore_errors=False)
|
|
@@ -1,26 +1,25 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
"""
|
|
1
|
+
""" HTML Scraping for the taxonomy check results from NCBI."""
|
|
4
2
|
|
|
5
3
|
__author__ = "Berger, Phillip"
|
|
6
4
|
|
|
7
5
|
import datetime
|
|
8
|
-
import requests
|
|
9
6
|
import pickle
|
|
7
|
+
import sys
|
|
10
8
|
import time
|
|
11
|
-
|
|
12
|
-
import os
|
|
13
|
-
|
|
9
|
+
import requests
|
|
14
10
|
from loguru import logger
|
|
11
|
+
from xspect.definitions import get_xspect_root_path
|
|
15
12
|
|
|
16
13
|
|
|
17
14
|
class TaxonomyCheck:
|
|
15
|
+
"""Class to get the GCFs that passed the taxonomy check from NCBI."""
|
|
16
|
+
|
|
18
17
|
_main_url = "https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/ANI_report_prokaryotes.txt"
|
|
19
18
|
_test_url = "https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/"
|
|
20
|
-
_main_path =
|
|
21
|
-
_test_path =
|
|
19
|
+
_main_path = get_xspect_root_path() / "taxonomy_check.txt"
|
|
20
|
+
_test_path = get_xspect_root_path() / "tax_check_date.txt"
|
|
22
21
|
_new_time: list
|
|
23
|
-
_tax_check_ok =
|
|
22
|
+
_tax_check_ok = []
|
|
24
23
|
|
|
25
24
|
def __init__(self):
|
|
26
25
|
old_time = self._get_old_tax_date()
|
|
@@ -48,7 +47,7 @@ class TaxonomyCheck:
|
|
|
48
47
|
else:
|
|
49
48
|
logger.error("Nothing was found")
|
|
50
49
|
logger.error("Aborting")
|
|
51
|
-
exit()
|
|
50
|
+
sys.exit()
|
|
52
51
|
|
|
53
52
|
def _get_old_tax_date(self):
|
|
54
53
|
try:
|
|
@@ -59,7 +58,7 @@ class TaxonomyCheck:
|
|
|
59
58
|
return None
|
|
60
59
|
|
|
61
60
|
def _get_new_tax_date(self):
|
|
62
|
-
raw_response = requests.get(self._test_url)
|
|
61
|
+
raw_response = requests.get(self._test_url, timeout=5)
|
|
63
62
|
data = raw_response.text.split("\n")
|
|
64
63
|
for line in data:
|
|
65
64
|
if "ANI_report_prokaryotes.txt" in line:
|
|
@@ -69,22 +68,22 @@ class TaxonomyCheck:
|
|
|
69
68
|
int(date_parts[0]), int(date_parts[1]), int(date_parts[2])
|
|
70
69
|
)
|
|
71
70
|
time_parts = line_parts[-2].split(":")
|
|
72
|
-
|
|
73
|
-
new_time = [date,
|
|
71
|
+
combined_time = datetime.time(int(time_parts[0]), int(time_parts[1]))
|
|
72
|
+
new_time = [date, combined_time]
|
|
74
73
|
|
|
75
74
|
return new_time
|
|
76
75
|
|
|
77
76
|
return None
|
|
78
77
|
|
|
79
78
|
def _update_tax_check(self):
|
|
80
|
-
raw_response = requests.get(self._main_url)
|
|
79
|
+
raw_response = requests.get(self._main_url, timeout=5)
|
|
81
80
|
all_tax_checks = raw_response.text.split("\n")[1:-1]
|
|
82
81
|
self._get_gcf_ok(all_tax_checks)
|
|
83
82
|
self._save_time()
|
|
84
83
|
self._save_file()
|
|
85
84
|
|
|
86
85
|
def _get_gcf_ok(self, all_tax_checks: list):
|
|
87
|
-
tax_check_ok =
|
|
86
|
+
tax_check_ok = []
|
|
88
87
|
for line in all_tax_checks:
|
|
89
88
|
line_parts = line.split("\t")
|
|
90
89
|
gcf = line_parts[1]
|
|
@@ -111,16 +110,5 @@ class TaxonomyCheck:
|
|
|
111
110
|
return time.asctime(time.localtime()).split()[3]
|
|
112
111
|
|
|
113
112
|
def ani_gcf(self):
|
|
113
|
+
"""Returns ANI GCFs that passed the taxonomy check."""
|
|
114
114
|
return self._tax_check_ok
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def main():
|
|
118
|
-
start = time.perf_counter()
|
|
119
|
-
tax_check = TaxonomyCheck()
|
|
120
|
-
end = time.perf_counter()
|
|
121
|
-
print(f"{end-start:.2f} s\n")
|
|
122
|
-
print(tax_check.ani_gcf()[:5])
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
if __name__ == "__main__":
|
|
126
|
-
main()
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
"""
|
|
1
|
+
"""This module contains methods to download assemblies from the NCBI database."""
|
|
4
2
|
|
|
5
3
|
__author__ = "Berger, Phillip"
|
|
6
4
|
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
|
|
7
7
|
import os
|
|
8
|
-
from pathlib import Path
|
|
9
8
|
import requests
|
|
9
|
+
from xspect.definitions import get_xspect_tmp_path
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def download_assemblies(accessions, dir_name, target_folder, zip_file_name):
|
|
@@ -21,12 +21,11 @@ def download_assemblies(accessions, dir_name, target_folder, zip_file_name):
|
|
|
21
21
|
:param zip_file_name: Name of the zip file. E.g. Klebsiella aerogenes.zip.
|
|
22
22
|
:type zip_file_name: str
|
|
23
23
|
"""
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
)
|
|
24
|
+
|
|
25
|
+
path = get_xspect_tmp_path() / dir_name / target_folder / zip_file_name
|
|
27
26
|
api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v1/genome/accession/{','.join(accessions)}/download"
|
|
28
27
|
parameters = {"include_annotation_type": "GENOME_FASTA", "filename": zip_file_name}
|
|
29
28
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
30
|
-
genome_download = requests.get(api_url, params=parameters)
|
|
29
|
+
genome_download = requests.get(api_url, params=parameters, timeout=20)
|
|
31
30
|
with open(path, "wb") as f:
|
|
32
31
|
f.write(genome_download.content)
|
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
"""
|
|
1
|
+
""" Collects metadata of assemblies from NCBI API """
|
|
4
2
|
|
|
5
3
|
__author__ = "Berger, Phillip"
|
|
6
4
|
|
|
7
|
-
from time import sleep
|
|
5
|
+
from time import sleep
|
|
8
6
|
|
|
9
7
|
import requests
|
|
10
8
|
|
|
@@ -12,6 +10,8 @@ from loguru import logger
|
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
class NCBIAssemblyMetadata:
|
|
13
|
+
"""Class to collect metadata of assemblies from the NCBI API."""
|
|
14
|
+
|
|
15
15
|
_all_metadata: dict
|
|
16
16
|
_count: int
|
|
17
17
|
_ani_gcf: list
|
|
@@ -28,9 +28,9 @@ class NCBIAssemblyMetadata:
|
|
|
28
28
|
|
|
29
29
|
self._set_parameters()
|
|
30
30
|
|
|
31
|
-
tmp_metadata =
|
|
31
|
+
tmp_metadata = {}
|
|
32
32
|
for tax_id, curr_metadata in self._all_metadata.items():
|
|
33
|
-
sleep(
|
|
33
|
+
sleep(2)
|
|
34
34
|
species_name = curr_metadata["sci_name"]
|
|
35
35
|
logger.info("Collecting metadata of {name}", name=species_name)
|
|
36
36
|
accessions = self._make_request(taxon=tax_id)
|
|
@@ -73,10 +73,10 @@ class NCBIAssemblyMetadata:
|
|
|
73
73
|
|
|
74
74
|
def _make_request(self, taxon: str):
|
|
75
75
|
api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v1/genome/taxon/{taxon}"
|
|
76
|
-
accessions =
|
|
76
|
+
accessions = []
|
|
77
77
|
count = 0
|
|
78
78
|
for request_type, parameters in self._parameters.items():
|
|
79
|
-
raw_response = requests.get(api_url, params=parameters)
|
|
79
|
+
raw_response = requests.get(api_url, params=parameters, timeout=5)
|
|
80
80
|
response = raw_response.json()
|
|
81
81
|
if response:
|
|
82
82
|
try:
|
|
@@ -95,7 +95,6 @@ class NCBIAssemblyMetadata:
|
|
|
95
95
|
else:
|
|
96
96
|
break
|
|
97
97
|
except KeyError:
|
|
98
|
-
pass
|
|
99
98
|
logger.debug(
|
|
100
99
|
"While requesting: {type} an error response was given",
|
|
101
100
|
type=request_type,
|
|
@@ -107,12 +106,5 @@ class NCBIAssemblyMetadata:
|
|
|
107
106
|
return accessions
|
|
108
107
|
|
|
109
108
|
def get_all_metadata(self):
|
|
109
|
+
"""Returns all metadata of the assemblies."""
|
|
110
110
|
return self._all_metadata_complete
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def main():
|
|
114
|
-
pass
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
if __name__ == "__main__":
|
|
118
|
-
main()
|
|
@@ -6,6 +6,7 @@ The children are only of the next lower rank of the parent taxon.
|
|
|
6
6
|
|
|
7
7
|
__author__ = "Berger, Phillip"
|
|
8
8
|
|
|
9
|
+
import sys
|
|
9
10
|
import requests
|
|
10
11
|
|
|
11
12
|
from loguru import logger
|
|
@@ -24,7 +25,7 @@ class NCBIChildrenTree:
|
|
|
24
25
|
def _request_tree(self):
|
|
25
26
|
"""Make the request for the children tree at the NCBI Datasets API."""
|
|
26
27
|
api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v1/taxonomy/taxon/{self._taxon}/filtered_subtree"
|
|
27
|
-
raw_response = requests.get(api_url)
|
|
28
|
+
raw_response = requests.get(api_url, timeout=5)
|
|
28
29
|
self._response = raw_response.json()["edges"]
|
|
29
30
|
self._parent_taxon_id = str(self._response["1"]["visible_children"][0])
|
|
30
31
|
try:
|
|
@@ -33,7 +34,7 @@ class NCBIChildrenTree:
|
|
|
33
34
|
logger.error("KeyError for key: {key}", key=self._parent_taxon_id)
|
|
34
35
|
logger.error("Response: {response}", response=str(self._response))
|
|
35
36
|
logger.error("Aborting")
|
|
36
|
-
exit()
|
|
37
|
+
sys.exit()
|
|
37
38
|
for child_id in tmp_children_ids:
|
|
38
39
|
self._children_taxon_ids.append(str(child_id))
|
|
39
40
|
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
"""
|
|
1
|
+
""" This module is used to retrieve metadata from the NCBI taxonomy database. """
|
|
4
2
|
|
|
5
3
|
__author__ = "Berger, Phillip"
|
|
6
4
|
|
|
@@ -10,19 +8,21 @@ from loguru import logger
|
|
|
10
8
|
|
|
11
9
|
|
|
12
10
|
class NCBITaxonMetadata:
|
|
11
|
+
"""Class to retrieve metadata from the NCBI taxonomy database."""
|
|
12
|
+
|
|
13
13
|
_taxon: str
|
|
14
14
|
_response: dict
|
|
15
15
|
_all_metadata: dict
|
|
16
16
|
|
|
17
17
|
def __init__(self, taxon: list[str]):
|
|
18
18
|
self._taxon = ",".join(taxon)
|
|
19
|
-
self._all_metadata =
|
|
19
|
+
self._all_metadata = {}
|
|
20
20
|
self._request_metadata()
|
|
21
21
|
self._collect_all_metadata()
|
|
22
22
|
|
|
23
23
|
def _request_metadata(self):
|
|
24
24
|
api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v1/taxonomy/taxon/{str(self._taxon)}"
|
|
25
|
-
raw_response = requests.get(api_url)
|
|
25
|
+
raw_response = requests.get(api_url, timeout=5)
|
|
26
26
|
self._response = raw_response.json()["taxonomy_nodes"]
|
|
27
27
|
|
|
28
28
|
def _collect_all_metadata(self):
|
|
@@ -47,7 +47,9 @@ class NCBITaxonMetadata:
|
|
|
47
47
|
logger.debug("{name} was not used for training", name=name)
|
|
48
48
|
|
|
49
49
|
def get_response(self):
|
|
50
|
+
"""Returns the raw response from the NCBI taxonomy database."""
|
|
50
51
|
return self._response
|
|
51
52
|
|
|
52
53
|
def get_metadata(self):
|
|
54
|
+
"""Returns the metadata from the NCBI taxonomy database."""
|
|
53
55
|
return self._all_metadata
|
XspecT-0.1.2.dist-info/RECORD
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
xspect/BF_v2.py,sha256=3zJgWY6VxfE-6eSqUTgoOY4Z_mp6IKBKDpuWu34FlKI,26080
|
|
2
|
-
xspect/Bootstrap.py,sha256=AYyEBo3MoOnPqhPAHe726mX8L9NuXDa5SATxZKLMv3s,830
|
|
3
|
-
xspect/Classifier.py,sha256=BgqpZiMYi2maaccTzJcgH2tjrtDH-U7COc7E4t4cQt8,3602
|
|
4
|
-
xspect/OXA_Table.py,sha256=1GxsyxMpUEgQirY0nJHtR3jl61DoPZh2Rb9L0VdMxD4,1632
|
|
5
|
-
xspect/WebApp.py,sha256=eo1EJOMjW5grCZyvX5g1J4ppwyZb_M9lYGCNuJidM0Q,25224
|
|
6
|
-
xspect/XspecT_mini.py,sha256=t_4OlhzLytRXkM0ig9lo0Szfm2QgJhls52TScUxFN1s,55411
|
|
7
|
-
xspect/XspecT_trainer.py,sha256=6Gj2mltyVyM8Rsh5EU8tSCGMG7niYBLfId664zYaVXI,21703
|
|
8
|
-
xspect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
xspect/download_filters.py,sha256=wSyX-IucjuKIEcVx-E0ClsA0XL0DI1FgMlO2UULgaXc,1048
|
|
10
|
-
xspect/file_io.py,sha256=IWae7xxAt-EmyEbxo0nDSe3RJHmLkQT5jNS2Z3qLKdg,4807
|
|
11
|
-
xspect/main.py,sha256=bF7ntgy_gR0ZNIB9JVxtXb-a6o0Lt0__tI_zzj03B24,2977
|
|
12
|
-
xspect/map_kmers.py,sha256=63iTQS_GZZBK2DxjEs5xoI4KgfpZOntCKul06rrgi5w,6000
|
|
13
|
-
xspect/search_filter.py,sha256=EZkM2917cjy4Q0zQDC9bJ0S-dyD-MBBmJqrAHQ1P260,17190
|
|
14
|
-
xspect/static/How-To.png,sha256=QO6HydIHcL3oM9feMxmfZcKE8M62fIRl2xs_5S_NL5M,119621
|
|
15
|
-
xspect/static/Logo.png,sha256=bvOWMpqxmBigg9jEvZtIMOsXncbSFwnYu4eYNSf1_Qw,296095
|
|
16
|
-
xspect/static/Logo2.png,sha256=V7hpGb3XYLN5vEQQNJdpNjQX_F2A_f1wKAP8N37NwGs,292730
|
|
17
|
-
xspect/static/Workflow_AspecT.png,sha256=RsFIN_18d71cSdupskRHb9X-P0xCsX2mNSS381bPzfc,80863
|
|
18
|
-
xspect/static/Workflow_ClAssT.png,sha256=92stJJHeLO7RhK1shuKehP-AGWeQLWMEbpX3KneSe4I,1136026
|
|
19
|
-
xspect/static/js.js,sha256=2sVwS3AToSO0IsEYvRxT_8HQRQPPJuzVFAQok16SDhg,21344
|
|
20
|
-
xspect/static/main.css,sha256=9Dkd6EaiF9-mLkXsOV0tQhDoO6Uuw-X2StqzKUGv2lE,4623
|
|
21
|
-
xspect/templates/400.html,sha256=XpJT3nyaCHgCYUpbNJymUziB3SBaccb02PV77d5QHjI,2672
|
|
22
|
-
xspect/templates/401.html,sha256=5LUGd5MEmsFaOBbXHL1nDQJBmEi3KEYTgc4gdM8s2wU,2504
|
|
23
|
-
xspect/templates/404.html,sha256=XjnJXkKHJfVMOhJ75xDS-a7VnbcikAXI7JS5sdxokVo,2487
|
|
24
|
-
xspect/templates/500.html,sha256=y5UIlBffXdg7c95hcT3qwWmxvXHqSlLUE8dUV7gkIFE,2570
|
|
25
|
-
xspect/templates/about.html,sha256=xLI9orZJAAoFnRDmrVczZuFnTkurbC3qmC_ITQdan2Y,26509
|
|
26
|
-
xspect/templates/home.html,sha256=MGQN1dJ2yaz7j8Z2N5pzvTDaZyiWdrlQvYwiTGXpjfA,1667
|
|
27
|
-
xspect/templates/layoutabout.html,sha256=ICC8g0DP8a7MLNrEYnXBgtnkwMjIktsimmqwqjMU2yw,3847
|
|
28
|
-
xspect/templates/layouthome.html,sha256=6EtVD-L6xlTc7XGk77f9CARKW7JLpv2iiyUci1BK00A,2870
|
|
29
|
-
xspect/templates/layoutspecies.html,sha256=MNGSDEvuKFvgsyXoRLCu-rma10gntUI9vP_9a2sNl7M,24008
|
|
30
|
-
xspect/templates/species.html,sha256=rD9fCmSgyI8hRcmy56mNQH7VR5jnmtriv9WlvTIJJjE,2412
|
|
31
|
-
xspect/train_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
-
xspect/train_filter/create_svm.py,sha256=E1QwBeUtAlOlKf6QKfmRtKaz_6idv7M8Hb-jbNb_wGk,6820
|
|
33
|
-
xspect/train_filter/extract_and_concatenate.py,sha256=kXGqCrOk3TbOkKLJV8nKC6nL8Zg0TWKDCJu2gq8K_cw,5239
|
|
34
|
-
xspect/train_filter/get_paths.py,sha256=JXPbv_Fx5BKHZQ4bkSIGU7yj5zjkmhsI0Z6U4nU0gug,941
|
|
35
|
-
xspect/train_filter/html_scrap.py,sha256=iQXREhG37SNUx7gHoP8eqayMEIH00QLFMTNmIMogb_M,3799
|
|
36
|
-
xspect/train_filter/interface_XspecT.py,sha256=HVCwVHqtvJ1EA9u6GByeKCve-6sADK5AceB5itPV62k,6735
|
|
37
|
-
xspect/train_filter/k_mer_count.py,sha256=0yHCxzsOH8LhO6tD35O7BjWodfE5lJDKWYzzcCrr0JE,5226
|
|
38
|
-
xspect/train_filter/ncbi_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
xspect/train_filter/ncbi_api/download_assemblies.py,sha256=iX1qK8R6p2b3RiHPfqVsLp-dV_7iZZv0AxY1xQ-Ad48,1171
|
|
40
|
-
xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py,sha256=RhHvxKiQ8HJgoSb6njYEgO_vPioBqEMPvT3lE2lHXp0,3766
|
|
41
|
-
xspect/train_filter/ncbi_api/ncbi_children_tree.py,sha256=pmzg6-fDGLinNSXNbBRv0v62lRgHxW4aXZ0uV1TJhOE,1793
|
|
42
|
-
xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py,sha256=uhBBGffgL4mcJpyp9KxVyOGUh8FxUTAI4xKzoLDav_Y,1577
|
|
43
|
-
XspecT-0.1.2.dist-info/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
|
|
44
|
-
XspecT-0.1.2.dist-info/METADATA,sha256=h4OX8L719oZsPj0Xcab4bx4ZstZiMUuPFpVcbZoGc_w,5475
|
|
45
|
-
XspecT-0.1.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
46
|
-
XspecT-0.1.2.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
|
|
47
|
-
XspecT-0.1.2.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
|
|
48
|
-
XspecT-0.1.2.dist-info/RECORD,,
|