XspecT 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/METADATA +23 -29
- XspecT-0.2.0.dist-info/RECORD +30 -0
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/WHEEL +1 -1
- xspect/definitions.py +42 -0
- xspect/download_filters.py +11 -26
- xspect/fastapi.py +101 -0
- xspect/file_io.py +34 -103
- xspect/main.py +70 -66
- xspect/model_management.py +88 -0
- xspect/models/__init__.py +0 -0
- xspect/models/probabilistic_filter_model.py +277 -0
- xspect/models/probabilistic_filter_svm_model.py +169 -0
- xspect/models/probabilistic_single_filter_model.py +109 -0
- xspect/models/result.py +148 -0
- xspect/pipeline.py +201 -0
- xspect/run.py +38 -0
- xspect/train.py +304 -0
- xspect/train_filter/create_svm.py +6 -183
- xspect/train_filter/extract_and_concatenate.py +117 -121
- xspect/train_filter/html_scrap.py +16 -28
- xspect/train_filter/ncbi_api/download_assemblies.py +7 -8
- xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +9 -17
- xspect/train_filter/ncbi_api/ncbi_children_tree.py +3 -2
- xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +7 -5
- XspecT-0.1.3.dist-info/RECORD +0 -49
- xspect/BF_v2.py +0 -637
- xspect/Bootstrap.py +0 -29
- xspect/Classifier.py +0 -142
- xspect/OXA_Table.py +0 -53
- xspect/WebApp.py +0 -724
- xspect/XspecT_mini.py +0 -1363
- xspect/XspecT_trainer.py +0 -611
- xspect/map_kmers.py +0 -155
- xspect/search_filter.py +0 -504
- xspect/static/How-To.png +0 -0
- xspect/static/Logo.png +0 -0
- xspect/static/Logo2.png +0 -0
- xspect/static/Workflow_AspecT.png +0 -0
- xspect/static/Workflow_ClAssT.png +0 -0
- xspect/static/js.js +0 -615
- xspect/static/main.css +0 -280
- xspect/templates/400.html +0 -64
- xspect/templates/401.html +0 -62
- xspect/templates/404.html +0 -62
- xspect/templates/500.html +0 -62
- xspect/templates/about.html +0 -544
- xspect/templates/home.html +0 -51
- xspect/templates/layoutabout.html +0 -87
- xspect/templates/layouthome.html +0 -63
- xspect/templates/layoutspecies.html +0 -468
- xspect/templates/species.html +0 -33
- xspect/train_filter/README_XspecT_Erweiterung.md +0 -119
- xspect/train_filter/get_paths.py +0 -35
- xspect/train_filter/interface_XspecT.py +0 -204
- xspect/train_filter/k_mer_count.py +0 -162
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/LICENSE +0 -0
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/entry_points.txt +0 -0
- {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/top_level.txt +0 -0
xspect/Classifier.py
DELETED
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
from sklearn.svm import SVC
|
|
2
|
-
import csv
|
|
3
|
-
from copy import deepcopy
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def cut_csv(csv_file, lst, table=False):
|
|
7
|
-
"""Returns desired data from Training_data"""
|
|
8
|
-
r = csv.reader(open(csv_file))
|
|
9
|
-
m = list(r)
|
|
10
|
-
selected = deepcopy(lst)
|
|
11
|
-
|
|
12
|
-
header = m[0]
|
|
13
|
-
m = m[1:]
|
|
14
|
-
labels = header[1:-1]
|
|
15
|
-
|
|
16
|
-
X_train = []
|
|
17
|
-
y_train = []
|
|
18
|
-
files = []
|
|
19
|
-
|
|
20
|
-
if selected[8] and len(header[8:-1]) > 0:
|
|
21
|
-
# Added Genomes selected
|
|
22
|
-
del selected[8]
|
|
23
|
-
selected = selected + ([True] * len(header[9:-1]))
|
|
24
|
-
|
|
25
|
-
else:
|
|
26
|
-
# Added Genomes not selected
|
|
27
|
-
del selected[8]
|
|
28
|
-
selected = selected + ([False] * len(header[9:-1]))
|
|
29
|
-
|
|
30
|
-
# creating matrix
|
|
31
|
-
for i in range(len(m)):
|
|
32
|
-
X_train.append(m[i][1:-1])
|
|
33
|
-
y_train.append(m[i][-1])
|
|
34
|
-
files.append(m[i][0])
|
|
35
|
-
|
|
36
|
-
# Deleting Cols
|
|
37
|
-
for i in range(len(X_train)):
|
|
38
|
-
for j in range(len(X_train[i]) - 1, -1, -1):
|
|
39
|
-
if selected[j]:
|
|
40
|
-
pass
|
|
41
|
-
else:
|
|
42
|
-
del X_train[i][j]
|
|
43
|
-
|
|
44
|
-
# Deleting Rows
|
|
45
|
-
valid = ["None"]
|
|
46
|
-
for i in range(len(selected)):
|
|
47
|
-
if selected[i]:
|
|
48
|
-
valid.append(labels[i])
|
|
49
|
-
|
|
50
|
-
for i in range(len(X_train) - 1, -1, -1):
|
|
51
|
-
if y_train[i] not in valid:
|
|
52
|
-
del y_train[i]
|
|
53
|
-
del X_train[i]
|
|
54
|
-
del files[i]
|
|
55
|
-
|
|
56
|
-
if table:
|
|
57
|
-
# Inserting Infos for Table
|
|
58
|
-
for i in range(len(X_train)):
|
|
59
|
-
X_train[i].insert(0, files[i])
|
|
60
|
-
X_train[i].append(y_train[i])
|
|
61
|
-
|
|
62
|
-
for i in range(len(header) - 1, -1, -1):
|
|
63
|
-
if header[i] not in valid:
|
|
64
|
-
del header[i]
|
|
65
|
-
|
|
66
|
-
header.insert(0, "File")
|
|
67
|
-
header.append("Label")
|
|
68
|
-
|
|
69
|
-
X_train.insert(0, header)
|
|
70
|
-
|
|
71
|
-
else:
|
|
72
|
-
pass
|
|
73
|
-
|
|
74
|
-
return X_train, y_train
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def cut_csv_spec(csv_file):
|
|
78
|
-
"""Returns svm Training_data"""
|
|
79
|
-
# read the training-data
|
|
80
|
-
r = csv.reader(open(csv_file))
|
|
81
|
-
m = list(r)
|
|
82
|
-
header = m[0]
|
|
83
|
-
m = m[1:]
|
|
84
|
-
X_train = []
|
|
85
|
-
y_train = []
|
|
86
|
-
|
|
87
|
-
# creating matrix as input for the classifier
|
|
88
|
-
for i in range(len(m)):
|
|
89
|
-
X_train.append(m[i][1:-1])
|
|
90
|
-
y_train.append(m[i][-1])
|
|
91
|
-
|
|
92
|
-
return X_train, y_train
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def classify(csv_file, result, lst):
|
|
96
|
-
"""Classifys Result-vector and calculates needed vectors"""
|
|
97
|
-
r = csv.reader(open(csv_file))
|
|
98
|
-
m = list(r)
|
|
99
|
-
# deciding which kernel-function will be used
|
|
100
|
-
if m[0][1] == "IC1":
|
|
101
|
-
mode = "ClAssT"
|
|
102
|
-
X_train, y_train = cut_csv(csv_file, lst)
|
|
103
|
-
svm = SVC(kernel="poly", C=1.0).fit(X_train, y_train)
|
|
104
|
-
else:
|
|
105
|
-
mode = "XspecT"
|
|
106
|
-
X_train, y_train = cut_csv_spec(csv_file)
|
|
107
|
-
svm = SVC(kernel="rbf", C=1.5).fit(X_train, y_train)
|
|
108
|
-
# perform a prediction using the svm
|
|
109
|
-
prediction = svm.predict([result])
|
|
110
|
-
if mode == "XspecT":
|
|
111
|
-
if max(result) < 0.3:
|
|
112
|
-
prediction = ["sp.", 0]
|
|
113
|
-
else:
|
|
114
|
-
if max(result) < 0.3:
|
|
115
|
-
prediction = ["None", 0]
|
|
116
|
-
|
|
117
|
-
return prediction[0]
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def IC3_classify(result_2):
|
|
121
|
-
ic = "International Clonetype 3 (ST32 or ST250)"
|
|
122
|
-
m_3 = [
|
|
123
|
-
["GCF_000278625.1", 1.0, ic],
|
|
124
|
-
["GCF_001674185.1", 0.86, ic],
|
|
125
|
-
["fictional", 0.85, "NONE of the selected Clonetypes or Genomes"],
|
|
126
|
-
["fictional", 0.01, "NONE of the selected Clonetypes or Genomes"],
|
|
127
|
-
]
|
|
128
|
-
|
|
129
|
-
X = []
|
|
130
|
-
y = []
|
|
131
|
-
for i in range(len(m_3)):
|
|
132
|
-
X.append(m_3[i][1])
|
|
133
|
-
y.append(m_3[i][2])
|
|
134
|
-
|
|
135
|
-
for i in range(len(X)):
|
|
136
|
-
X[i] = [X[i]]
|
|
137
|
-
svm_IC3 = SVC(kernel="poly", C=1).fit(X, y)
|
|
138
|
-
|
|
139
|
-
return svm_IC3.predict([result_2]), result_2[0]
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
|
xspect/OXA_Table.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from Bio import SeqIO
|
|
3
|
-
import os
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class OXATable:
|
|
7
|
-
def __init__(self):
|
|
8
|
-
self.kmere = {}
|
|
9
|
-
self.total = 0
|
|
10
|
-
self.found = 0
|
|
11
|
-
|
|
12
|
-
def create_table(self, directory):
|
|
13
|
-
"""Reads in fasta file, creates Dictionary with k-mer counter"""
|
|
14
|
-
# taking all fasta files for one big table
|
|
15
|
-
oxas = {}
|
|
16
|
-
files = os.listdir(directory)
|
|
17
|
-
for i in range(len(files)):
|
|
18
|
-
kmere = {}
|
|
19
|
-
file = directory + files[i]
|
|
20
|
-
for sequence in SeqIO.parse(file, "fasta"):
|
|
21
|
-
for j in range(0, len(sequence.seq) - 20 + 1):
|
|
22
|
-
kmer = str(sequence.seq[j : j + 20])
|
|
23
|
-
count = kmere.get(kmer, 0)
|
|
24
|
-
kmere[kmer] = count + 1
|
|
25
|
-
oxas[files[i][:-6]] = kmere
|
|
26
|
-
|
|
27
|
-
def lookup(self, gene, kmer):
|
|
28
|
-
"""Tests if kmer in dictionary, if so: reduces the counter"""
|
|
29
|
-
|
|
30
|
-
# Only returns True if kmer has been found and there was one left
|
|
31
|
-
if kmer in self.kmere[gene]:
|
|
32
|
-
if self.kmere[gene][kmer] > 0:
|
|
33
|
-
self.kmere[gene][kmer] -= 1
|
|
34
|
-
return True
|
|
35
|
-
else:
|
|
36
|
-
return False
|
|
37
|
-
else:
|
|
38
|
-
return False
|
|
39
|
-
|
|
40
|
-
def save_dic(self, path):
|
|
41
|
-
"""writes dictionary to file using json"""
|
|
42
|
-
json.dump(self.kmere, open(path, "w"))
|
|
43
|
-
|
|
44
|
-
def read_dic(self, path):
|
|
45
|
-
"""Reads dictionary from file using json"""
|
|
46
|
-
self.kmere = json.load(open(path))
|
|
47
|
-
|
|
48
|
-
def cleanup(self):
|
|
49
|
-
self.kmere = {}
|
|
50
|
-
|
|
51
|
-
def get_counter(self, path=r"filter/OXAs_dict/counter.txt"):
|
|
52
|
-
counter = json.load(open(path))
|
|
53
|
-
return counter
|