XspecT 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/METADATA +23 -29
- XspecT-0.2.0.dist-info/RECORD +30 -0
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/WHEEL +1 -1
- xspect/definitions.py +42 -0
- xspect/download_filters.py +11 -26
- xspect/fastapi.py +101 -0
- xspect/file_io.py +34 -103
- xspect/main.py +70 -66
- xspect/model_management.py +88 -0
- xspect/models/__init__.py +0 -0
- xspect/models/probabilistic_filter_model.py +277 -0
- xspect/models/probabilistic_filter_svm_model.py +169 -0
- xspect/models/probabilistic_single_filter_model.py +109 -0
- xspect/models/result.py +148 -0
- xspect/pipeline.py +201 -0
- xspect/run.py +38 -0
- xspect/train.py +304 -0
- xspect/train_filter/create_svm.py +6 -183
- xspect/train_filter/extract_and_concatenate.py +117 -121
- xspect/train_filter/html_scrap.py +16 -28
- xspect/train_filter/ncbi_api/download_assemblies.py +7 -8
- xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +9 -17
- xspect/train_filter/ncbi_api/ncbi_children_tree.py +3 -2
- xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +7 -5
- XspecT-0.1.2.dist-info/RECORD +0 -48
- xspect/BF_v2.py +0 -648
- xspect/Bootstrap.py +0 -29
- xspect/Classifier.py +0 -142
- xspect/OXA_Table.py +0 -53
- xspect/WebApp.py +0 -737
- xspect/XspecT_mini.py +0 -1377
- xspect/XspecT_trainer.py +0 -611
- xspect/map_kmers.py +0 -155
- xspect/search_filter.py +0 -504
- xspect/static/How-To.png +0 -0
- xspect/static/Logo.png +0 -0
- xspect/static/Logo2.png +0 -0
- xspect/static/Workflow_AspecT.png +0 -0
- xspect/static/Workflow_ClAssT.png +0 -0
- xspect/static/js.js +0 -615
- xspect/static/main.css +0 -280
- xspect/templates/400.html +0 -64
- xspect/templates/401.html +0 -62
- xspect/templates/404.html +0 -62
- xspect/templates/500.html +0 -62
- xspect/templates/about.html +0 -544
- xspect/templates/home.html +0 -51
- xspect/templates/layoutabout.html +0 -87
- xspect/templates/layouthome.html +0 -63
- xspect/templates/layoutspecies.html +0 -468
- xspect/templates/species.html +0 -33
- xspect/train_filter/get_paths.py +0 -35
- xspect/train_filter/interface_XspecT.py +0 -204
- xspect/train_filter/k_mer_count.py +0 -162
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/LICENSE +0 -0
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/entry_points.txt +0 -0
- {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/top_level.txt +0 -0
xspect/WebApp.py
DELETED
|
@@ -1,737 +0,0 @@
|
|
|
1
|
-
"""XspecT Flask web app"""
|
|
2
|
-
|
|
3
|
-
import re
|
|
4
|
-
import sys
|
|
5
|
-
import warnings
|
|
6
|
-
import subprocess
|
|
7
|
-
import os
|
|
8
|
-
import csv
|
|
9
|
-
import json
|
|
10
|
-
import time
|
|
11
|
-
import logging
|
|
12
|
-
import pickle
|
|
13
|
-
import secrets
|
|
14
|
-
import pandas as pd
|
|
15
|
-
from Bio import Entrez, Medline
|
|
16
|
-
from flask import (
|
|
17
|
-
Flask,
|
|
18
|
-
render_template,
|
|
19
|
-
session,
|
|
20
|
-
request,
|
|
21
|
-
redirect,
|
|
22
|
-
abort,
|
|
23
|
-
make_response,
|
|
24
|
-
jsonify,
|
|
25
|
-
)
|
|
26
|
-
from xspect.Classifier import classify
|
|
27
|
-
from xspect.search_filter import (
|
|
28
|
-
single_oxa,
|
|
29
|
-
get_added_genomes,
|
|
30
|
-
read_search,
|
|
31
|
-
read_search_spec,
|
|
32
|
-
pre_processing,
|
|
33
|
-
pre_processing_prefilter2,
|
|
34
|
-
read_search_pre,
|
|
35
|
-
)
|
|
36
|
-
from xspect.train_filter.interface_XspecT import load_translation_dict
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
warnings.filterwarnings("ignore")
|
|
40
|
-
|
|
41
|
-
# Source Logging and Error Handling
|
|
42
|
-
# https://flask.palletsprojects.com/en/1.1.x/logging/
|
|
43
|
-
# https://pythonise.com/series/learning-flask/flask-error-handling
|
|
44
|
-
# Logging Source: https://stackoverflow.com/questions/17743019/flask-logging-cannot-get-it-to-write-to-a-file
|
|
45
|
-
logging.basicConfig(filename="logger.log", level=logging.ERROR)
|
|
46
|
-
|
|
47
|
-
# init WebApp with flask
|
|
48
|
-
app = Flask(__name__)
|
|
49
|
-
|
|
50
|
-
app.secret_key = "test"
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
# Initialisiere eine leere Liste für die Optionen
|
|
54
|
-
def load_saved_options():
|
|
55
|
-
try:
|
|
56
|
-
with open("saved_options.txt", "r") as file:
|
|
57
|
-
options = [line.strip() for line in file]
|
|
58
|
-
return options
|
|
59
|
-
except FileNotFoundError:
|
|
60
|
-
return []
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
saved_options = load_saved_options()
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
# Error Handling:
|
|
67
|
-
# https://pythonise.com/series/learning-flask/flask-error-handling
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
@app.route("/load_saved_options", methods=["GET"])
|
|
71
|
-
def load_saved_options_route():
|
|
72
|
-
options = load_saved_options()
|
|
73
|
-
return jsonify({"options": options})
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
@app.errorhandler(404)
|
|
77
|
-
def not_found(e):
|
|
78
|
-
return render_template("404.html")
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
@app.errorhandler(500)
|
|
82
|
-
def not_found(e):
|
|
83
|
-
app.logger.error(f"SERVER ERROR 500 at route {request.url} with error message: {e}")
|
|
84
|
-
app.logger.error(
|
|
85
|
-
f'Parameters: IC_Lookup{session.get("IC_lookup")}, \n'
|
|
86
|
-
f'OXA: {session.get("OXA")}, \n'
|
|
87
|
-
f'QUICK: {session.get("quick")}, \n'
|
|
88
|
-
f'Filename: {session.get("filename")}, \n'
|
|
89
|
-
f'Vals OXA: {session.get("vals_oxa")}, \n'
|
|
90
|
-
f'Vals IC: {session.get("vals_ct")}, \n'
|
|
91
|
-
f'Hits IC: {session.get("hits_ct")}, \n'
|
|
92
|
-
f'Time: {session.get("time")}, \n'
|
|
93
|
-
f'Prediction: {session.get("prediction")}'
|
|
94
|
-
)
|
|
95
|
-
return render_template("500.html")
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
@app.errorhandler(400)
|
|
99
|
-
def not_found(e):
|
|
100
|
-
return render_template("400.html")
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
@app.errorhandler(401)
|
|
104
|
-
def not_found(e):
|
|
105
|
-
return render_template("401.html")
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
# redirects to the homepage
|
|
109
|
-
@app.route("/", methods=["GET", "POST"])
|
|
110
|
-
def redirect_home():
|
|
111
|
-
return redirect("home")
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
# about page
|
|
115
|
-
@app.route("/home")
|
|
116
|
-
def home():
|
|
117
|
-
"""returns home page"""
|
|
118
|
-
return render_template("home.html")
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
# Starts Assignment-Process for AspecT and leads to result-page
|
|
122
|
-
@app.route("/assignspec")
|
|
123
|
-
def assignspec():
|
|
124
|
-
"""Uses User Options to process the file, returns a signal to the loadingpage to go the the
|
|
125
|
-
result-page when done"""
|
|
126
|
-
|
|
127
|
-
# getting user parameters back with session function
|
|
128
|
-
filename = session.get("filename", None)
|
|
129
|
-
quick = session.get("quick")
|
|
130
|
-
metagenome = session.get("metagenome")
|
|
131
|
-
added = session.get("added", None)
|
|
132
|
-
oxa = session.get("OXA", None)
|
|
133
|
-
genus = session.get("genus")
|
|
134
|
-
start = time.time()
|
|
135
|
-
|
|
136
|
-
if not (os.path.exists(filename)):
|
|
137
|
-
# in case that user types in route of loading screen
|
|
138
|
-
# or file does not exist anymore
|
|
139
|
-
return redirect("/resultsspec")
|
|
140
|
-
|
|
141
|
-
else:
|
|
142
|
-
# Checking file type
|
|
143
|
-
# if the file is fasta -> concat lines
|
|
144
|
-
ext = filename.split(".")[-2]
|
|
145
|
-
with open(filename) as f:
|
|
146
|
-
reads = f.read().splitlines()
|
|
147
|
-
|
|
148
|
-
# Concat Lines if not .fq file
|
|
149
|
-
if ext != "fq" and ext != "fastq":
|
|
150
|
-
reads = "".join(reads)
|
|
151
|
-
reads = reads.split(">")
|
|
152
|
-
if quick:
|
|
153
|
-
quick = 1
|
|
154
|
-
else:
|
|
155
|
-
quick = 0
|
|
156
|
-
if metagenome:
|
|
157
|
-
quick = 4
|
|
158
|
-
reads.pop(0)
|
|
159
|
-
else:
|
|
160
|
-
if metagenome:
|
|
161
|
-
quick = 4
|
|
162
|
-
else:
|
|
163
|
-
quick = 2
|
|
164
|
-
# deleting file
|
|
165
|
-
os.remove(filename)
|
|
166
|
-
|
|
167
|
-
for i in range(len(reads)):
|
|
168
|
-
reads[i] = reads[i].upper()
|
|
169
|
-
# starts the lookup for a given sequence
|
|
170
|
-
if metagenome:
|
|
171
|
-
start_meta = time.time()
|
|
172
|
-
reads, reads_oxa = read_search_pre(reads, BF_Master_prefilter, ext)
|
|
173
|
-
end_meta = time.time()
|
|
174
|
-
needed_meta = round(end_meta - start_meta, 2)
|
|
175
|
-
print("Runtime filtering: ", needed_meta)
|
|
176
|
-
|
|
177
|
-
# Lookup in Bloomfilter
|
|
178
|
-
# reads should be a list of sequences
|
|
179
|
-
|
|
180
|
-
score_ct, names_ct, hits_ct, predictions = read_search_spec(
|
|
181
|
-
reads, quick, BF_Master, ext, genus
|
|
182
|
-
)
|
|
183
|
-
print("Testing Step 2")
|
|
184
|
-
|
|
185
|
-
# preparing reads for oxa search
|
|
186
|
-
if metagenome:
|
|
187
|
-
reads = reads_oxa
|
|
188
|
-
|
|
189
|
-
# storing values in session for creating plot
|
|
190
|
-
if metagenome:
|
|
191
|
-
reads_classified = score_ct
|
|
192
|
-
session["reads_classified"] = reads_classified
|
|
193
|
-
# assign reads to species
|
|
194
|
-
species_dict = {}
|
|
195
|
-
predictions_names = set()
|
|
196
|
-
for ele in predictions:
|
|
197
|
-
predictions_names.add(ele)
|
|
198
|
-
for species in predictions_names:
|
|
199
|
-
species_dict[species] = []
|
|
200
|
-
# dict with species as keys and reads as values for oxa search
|
|
201
|
-
for i in range(len(predictions)):
|
|
202
|
-
species_dict[predictions[i]].append(reads[i])
|
|
203
|
-
|
|
204
|
-
if not metagenome:
|
|
205
|
-
session["vals_ct_spec"] = score_ct
|
|
206
|
-
session["names_ct_spec"] = names_ct
|
|
207
|
-
session["hits_ct_spec"] = hits_ct
|
|
208
|
-
|
|
209
|
-
if oxa:
|
|
210
|
-
if not metagenome:
|
|
211
|
-
(
|
|
212
|
-
score_oxa,
|
|
213
|
-
names_oxa,
|
|
214
|
-
coordinates_forward,
|
|
215
|
-
coordinates_reversed,
|
|
216
|
-
) = single_oxa(reads, ext)
|
|
217
|
-
for k in range(len(score_oxa)):
|
|
218
|
-
if score_oxa[k] > 1:
|
|
219
|
-
score_oxa[k] = 1
|
|
220
|
-
session["vals_oxa_spec"] = score_oxa
|
|
221
|
-
session["names_oxa_spec"] = names_oxa
|
|
222
|
-
session["coordinates_forward"] = coordinates_forward
|
|
223
|
-
session["coordinates_reversed"] = coordinates_reversed
|
|
224
|
-
elif metagenome:
|
|
225
|
-
# lookup for individual species
|
|
226
|
-
score_oxa_list = []
|
|
227
|
-
names_oxa_list = []
|
|
228
|
-
coordinates_forward_list = []
|
|
229
|
-
coordinates_reversed_list = []
|
|
230
|
-
for species in species_dict:
|
|
231
|
-
(
|
|
232
|
-
score_oxa,
|
|
233
|
-
names_oxa,
|
|
234
|
-
coordinates_forward,
|
|
235
|
-
coordinates_reversed,
|
|
236
|
-
) = single_oxa(species_dict[species], ext)
|
|
237
|
-
for k in range(len(score_oxa)):
|
|
238
|
-
if score_oxa[k] > 1:
|
|
239
|
-
score_oxa[k] = 1
|
|
240
|
-
score_oxa_list.append((score_oxa, species))
|
|
241
|
-
names_oxa.append(names_oxa_list)
|
|
242
|
-
coordinates_forward_list.append(coordinates_forward)
|
|
243
|
-
coordinates_reversed_list.append(coordinates_reversed)
|
|
244
|
-
oxa_results = []
|
|
245
|
-
# Ansatz mit den Tupeln geht nicht mit max funktion weil zu viele argumente
|
|
246
|
-
for i in range(len(score_oxa_list)):
|
|
247
|
-
if max(score_oxa_list[i][0]) > 0:
|
|
248
|
-
oxa_results.append(
|
|
249
|
-
(
|
|
250
|
-
score_oxa_list[i][1],
|
|
251
|
-
names_oxa[
|
|
252
|
-
score_oxa_list[i][0].index(max(score_oxa_list[i][0]))
|
|
253
|
-
],
|
|
254
|
-
max(score_oxa_list[i][0]),
|
|
255
|
-
)
|
|
256
|
-
)
|
|
257
|
-
# print(oxa_results)
|
|
258
|
-
session["oxa_results"] = oxa_results
|
|
259
|
-
session["vals_oxa_spec"] = score_oxa_list
|
|
260
|
-
session["names_oxa_spec"] = names_oxa_list
|
|
261
|
-
session["coordinates_forward"] = coordinates_forward_list
|
|
262
|
-
session["coordinates_reversed"] = coordinates_reversed_list
|
|
263
|
-
|
|
264
|
-
else:
|
|
265
|
-
session["oxa_results"] = "None"
|
|
266
|
-
session["vals_oxa_spec"] = "None"
|
|
267
|
-
session["names_oxa_spec"] = "None"
|
|
268
|
-
|
|
269
|
-
# making prediction
|
|
270
|
-
if not metagenome:
|
|
271
|
-
# add dynamic path
|
|
272
|
-
prediction = classify(
|
|
273
|
-
r"Training_data/" + genus + "_Training_data_spec.csv", score_ct, True
|
|
274
|
-
)
|
|
275
|
-
prediction_claast = prediction
|
|
276
|
-
if prediction == "sp.":
|
|
277
|
-
prediction = "NONE of the known " + genus + " species"
|
|
278
|
-
else:
|
|
279
|
-
translation_dict = load_translation_dict(genus)
|
|
280
|
-
prediction = translation_dict[prediction]
|
|
281
|
-
else:
|
|
282
|
-
prediction_claast = None
|
|
283
|
-
prediction = []
|
|
284
|
-
for species in reads_classified:
|
|
285
|
-
prediction.append(species)
|
|
286
|
-
|
|
287
|
-
session["prediction"] = prediction
|
|
288
|
-
|
|
289
|
-
end = time.time()
|
|
290
|
-
needed = round(end - start, 2)
|
|
291
|
-
print("Runtime: ", needed)
|
|
292
|
-
session["time"] = str(needed)
|
|
293
|
-
|
|
294
|
-
if prediction_claast == "470":
|
|
295
|
-
IC_lookup = [True, True, True, True, True, True, True, True, False]
|
|
296
|
-
score_claast, names_claast, hits_claast = read_search(IC_lookup, reads, quick=1)
|
|
297
|
-
# making prediction
|
|
298
|
-
prediction_claast = classify(
|
|
299
|
-
r"Training_data/Training_data_IC.csv", score_claast, IC_lookup
|
|
300
|
-
)
|
|
301
|
-
# Making Label look nicer
|
|
302
|
-
if "IC" in prediction_claast and len(prediction_claast) == 3:
|
|
303
|
-
prediction_claast = "International Clone " + prediction_claast[2]
|
|
304
|
-
elif prediction_claast == "None":
|
|
305
|
-
prediction_claast = "NONE of the selected Clones or Genomes"
|
|
306
|
-
else:
|
|
307
|
-
pass
|
|
308
|
-
session["prediction_claast"] = prediction_claast
|
|
309
|
-
session["vals_claast"] = score_claast
|
|
310
|
-
session["names_claast"] = names_claast
|
|
311
|
-
session["hits_claast"] = hits_claast
|
|
312
|
-
app.logger.info(
|
|
313
|
-
"Assignment done for " + str(filename) + ", Time needed: " + str(needed)
|
|
314
|
-
)
|
|
315
|
-
return redirect("/resultsspec")
|
|
316
|
-
else:
|
|
317
|
-
session["prediction_claast"] = "n/a"
|
|
318
|
-
session["vals_claast"] = [0, 0, 0, 0, 0, 0, 0, 0]
|
|
319
|
-
session["names_claast"] = [0, 0, 0, 0, 0, 0, 0, 0]
|
|
320
|
-
session["hits_claast"] = [0, 0, 0, 0, 0, 0, 0, 0]
|
|
321
|
-
app.logger.info(
|
|
322
|
-
"Assignment done for " + str(filename) + ", Time needed: " + str(needed)
|
|
323
|
-
)
|
|
324
|
-
return redirect("/resultsspec")
|
|
325
|
-
|
|
326
|
-
app.logger.info(
|
|
327
|
-
"Assignment done for " + str(filename) + ", Time needed: " + str(needed)
|
|
328
|
-
)
|
|
329
|
-
return redirect("/resultsspec")
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
# about page
|
|
333
|
-
@app.route("/about")
|
|
334
|
-
def about():
|
|
335
|
-
"""returns about page"""
|
|
336
|
-
counter = json.load(open(r"filter/OXAs_dict/counter.txt"))
|
|
337
|
-
ids = [*counter]
|
|
338
|
-
r = csv.reader(open(r"Training_data/Training_data_IC.csv"))
|
|
339
|
-
df = pd.DataFrame(data=list(r))
|
|
340
|
-
svm_table = df.to_html(index=False, header=False)
|
|
341
|
-
return render_template("about.html", svm_table=svm_table, oxa_ids=ids)
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
# load new BF
|
|
345
|
-
@app.route("/change_genus", methods=["GET", "POST"])
|
|
346
|
-
def change_genus():
|
|
347
|
-
"""Load new BF for selected genus"""
|
|
348
|
-
selected_genus = request.form.get("genus")
|
|
349
|
-
session["genus"] = selected_genus
|
|
350
|
-
|
|
351
|
-
global BF_Master
|
|
352
|
-
global BF_Master_prefilter
|
|
353
|
-
BF_Master = pre_processing(selected_genus)
|
|
354
|
-
BF_Master_prefilter = pre_processing_prefilter2(selected_genus)
|
|
355
|
-
|
|
356
|
-
# Leere Antwort zurückgeben
|
|
357
|
-
return make_response("", 200)
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
# train new genus
|
|
361
|
-
@app.route("/train_new_genus", methods=["GET", "POST"])
|
|
362
|
-
def train_new_genus():
|
|
363
|
-
"""Train new genus"""
|
|
364
|
-
if request.method == "POST":
|
|
365
|
-
# extract genus name from request
|
|
366
|
-
genus_name = list(request.json.values())[0]
|
|
367
|
-
|
|
368
|
-
# Sys arguments for the Python program
|
|
369
|
-
system_arguments = [genus_name, "1"]
|
|
370
|
-
|
|
371
|
-
# Run XspecT_Trainer
|
|
372
|
-
subprocess.run([sys.executable, "XspecT_trainer.py"] + system_arguments)
|
|
373
|
-
print("")
|
|
374
|
-
print("Training done!")
|
|
375
|
-
|
|
376
|
-
# save genus in options
|
|
377
|
-
# Überprüfe, ob die Option bereits vorhanden ist
|
|
378
|
-
if genus_name not in saved_options:
|
|
379
|
-
print("Saving new genus: " + genus_name)
|
|
380
|
-
# Füge die Option zur Liste hinzu
|
|
381
|
-
saved_options.append(genus_name)
|
|
382
|
-
|
|
383
|
-
# Speichere die Optionen in der Datei (oder Datenbank)
|
|
384
|
-
with open("saved_options.txt", "a") as file:
|
|
385
|
-
file.write(genus_name + "\n")
|
|
386
|
-
|
|
387
|
-
# Erfolgreiche Antwort zurückgeben
|
|
388
|
-
return redirect("/species")
|
|
389
|
-
|
|
390
|
-
# Leere Antwort zurückgeben
|
|
391
|
-
return make_response("", 200)
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
# species assignment page
|
|
395
|
-
@app.route("/species", methods=["GET", "POST"])
|
|
396
|
-
def species():
|
|
397
|
-
"""returns species page"""
|
|
398
|
-
added = get_added_genomes()
|
|
399
|
-
if request.method == "POST":
|
|
400
|
-
data = request.json
|
|
401
|
-
if data is not None:
|
|
402
|
-
filename = data[-4]
|
|
403
|
-
session["quick"] = data[-3]
|
|
404
|
-
session["OXA"] = data[-2]
|
|
405
|
-
session["metagenome"] = data[-1]
|
|
406
|
-
del data[-4:]
|
|
407
|
-
|
|
408
|
-
if not (os.path.exists("files")):
|
|
409
|
-
os.mkdir("files")
|
|
410
|
-
name = r"files/" + str(secrets.token_hex(8)) + filename + ".txt"
|
|
411
|
-
|
|
412
|
-
with open(name, "w") as filehandle:
|
|
413
|
-
for read in data:
|
|
414
|
-
filehandle.write("%s\n" % read)
|
|
415
|
-
|
|
416
|
-
session["filename"] = name
|
|
417
|
-
|
|
418
|
-
# Returning a json signal to ajax to redirect to loading page
|
|
419
|
-
# the loading page then triggers the assignment process
|
|
420
|
-
app.logger.info("Assignment started for " + filename)
|
|
421
|
-
return json.dumps({"success": True})
|
|
422
|
-
|
|
423
|
-
else:
|
|
424
|
-
# Source: https://flask-restplus.readthedocs.io/en/stable/errors.html
|
|
425
|
-
abort(400)
|
|
426
|
-
return render_template(
|
|
427
|
-
"species.html",
|
|
428
|
-
added=added,
|
|
429
|
-
results_oxa=[0, 0, 0, 0],
|
|
430
|
-
oxas="None",
|
|
431
|
-
results_ct=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
432
|
-
hits_ct=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
433
|
-
clonetypes=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
434
|
-
results_claast=[0, 0, 0, 0, 0, 0, 0, 0],
|
|
435
|
-
hits_claast=[0, 0, 0, 0, 0, 0, 0, 0],
|
|
436
|
-
clonetypes_claast=[0, 0, 0, 0, 0, 0, 0, 0],
|
|
437
|
-
filename="filename",
|
|
438
|
-
maxi=1,
|
|
439
|
-
time=0,
|
|
440
|
-
prediction="n/a",
|
|
441
|
-
prediction_claast="n/a",
|
|
442
|
-
literature="",
|
|
443
|
-
literature_content="",
|
|
444
|
-
literature_abstract="",
|
|
445
|
-
literature_authors=[[""], [""], [""], [""], [""], [""], [""], [""], [""], [""]],
|
|
446
|
-
literature_journal="",
|
|
447
|
-
literature_all="",
|
|
448
|
-
text="",
|
|
449
|
-
additional_info="",
|
|
450
|
-
metagenome=False,
|
|
451
|
-
oxa_labels="",
|
|
452
|
-
oxa_data="",
|
|
453
|
-
)
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
@app.route("/resultsspec", methods=["GET", "POST"])
|
|
457
|
-
def resultsspec():
|
|
458
|
-
"""gets XspecT-Results, creates a Plot and displays them on page with further information"""
|
|
459
|
-
|
|
460
|
-
# CALCULATING RESULTS -----------------------------------------------------
|
|
461
|
-
|
|
462
|
-
metagenome = session.get("metagenome")
|
|
463
|
-
|
|
464
|
-
if not metagenome:
|
|
465
|
-
# Values of clonetypes, is None if not existing
|
|
466
|
-
filename = session.get("filename")
|
|
467
|
-
values_ct = session.get("vals_ct_spec")
|
|
468
|
-
hits_ct = session.get("hits_ct_spec")
|
|
469
|
-
clonetypes = session.get("names_ct_spec")
|
|
470
|
-
values_claast = session.get("vals_claast")
|
|
471
|
-
hits_claast = session.get("hits_claast")
|
|
472
|
-
clonetypes_claast = session.get("names_claast")
|
|
473
|
-
prediction = session.get("prediction")
|
|
474
|
-
prediction_claast = session.get("prediction_claast")
|
|
475
|
-
# Values of OXAs
|
|
476
|
-
values_oxa = session.get("vals_oxa_spec")
|
|
477
|
-
oxa_names = session.get("names_oxa_spec")
|
|
478
|
-
additional_info = "Score"
|
|
479
|
-
maxi = 1
|
|
480
|
-
text = "Most similar Acinetobacter species"
|
|
481
|
-
metagenome = False
|
|
482
|
-
oxa_labels = "None"
|
|
483
|
-
oxa_data = "None"
|
|
484
|
-
|
|
485
|
-
dic = {}
|
|
486
|
-
clonetypes_sorted = []
|
|
487
|
-
# the values will be sorted by highest values for better readability
|
|
488
|
-
for i in range(len(values_ct)):
|
|
489
|
-
dic[clonetypes[i]] = values_ct[i]
|
|
490
|
-
values_sorted = sorted(values_ct, reverse=True)
|
|
491
|
-
for i in sorted(dic, key=dic.get, reverse=True):
|
|
492
|
-
clonetypes_sorted.append(i)
|
|
493
|
-
|
|
494
|
-
# only the 10 biggest values will be shown for visibility
|
|
495
|
-
if len(values_sorted) > 10:
|
|
496
|
-
values_sorted = values_sorted[:10]
|
|
497
|
-
clonetypes_sorted = clonetypes_sorted[:10]
|
|
498
|
-
|
|
499
|
-
# if less then 5 values are found, add empty values
|
|
500
|
-
if len(values_sorted) < 5:
|
|
501
|
-
for i in range(5 - len(values_sorted)):
|
|
502
|
-
values_sorted.append(0)
|
|
503
|
-
clonetypes_sorted.append("n/a")
|
|
504
|
-
|
|
505
|
-
# Save results in csv file
|
|
506
|
-
# TODO later
|
|
507
|
-
# with open(r"Results/WebApp/results_" + filename[22:-4] + ".csv", 'w', newline='') as file:
|
|
508
|
-
# file.write("XspecT Prediction, XspecT Score, ClAssT Prediction, ClAssT Score, Oxa Prediction, Oxa Score\n")
|
|
509
|
-
# for i in range(len(values_sorted)):
|
|
510
|
-
# file.write(clonetypes_sorted[i] + ", " + str(values_sorted[i]) + ", " + str(prediction_claast) + ", " + str(values_claast) + ", " + str(prediction) + ", " + str(values_oxa[i]) + "\n")
|
|
511
|
-
|
|
512
|
-
elif metagenome:
|
|
513
|
-
reads_classified = session.get("reads_classified")
|
|
514
|
-
# sort reads_classified by highest value of the second element
|
|
515
|
-
sorted_reads_classified = dict(
|
|
516
|
-
sorted(reads_classified.items(), key=lambda x: x[1][1], reverse=True)
|
|
517
|
-
)
|
|
518
|
-
# get key of reads_classified with highest value of the second element from the value
|
|
519
|
-
predictions = []
|
|
520
|
-
values = []
|
|
521
|
-
for key, value in sorted_reads_classified.items():
|
|
522
|
-
predictions.append(key)
|
|
523
|
-
values.append(value[1])
|
|
524
|
-
clonetypes_sorted = predictions[:12]
|
|
525
|
-
values_sorted = values[:12]
|
|
526
|
-
prediction = predictions[0]
|
|
527
|
-
maxi = values[0]
|
|
528
|
-
additional_info = []
|
|
529
|
-
metagenome = True
|
|
530
|
-
filename = session.get("filename")
|
|
531
|
-
|
|
532
|
-
# Save results in csv file
|
|
533
|
-
# TODO later
|
|
534
|
-
# with open(r"Results/WebApp/results_" + filename[22:-4] + ".csv", 'w', newline='') as file:
|
|
535
|
-
# file.write("Prediction, Score Median, Number of Contigs, Contig-Length Median, Uniqueness, Bootstrap Median\n")
|
|
536
|
-
# for key, value in sorted_reads_classified.items():
|
|
537
|
-
# file.write(key + "," + str(value[0]) + "," + str(value[1]) + "," + str(value[2]) + "," + str(value[3]) + "," + str(value[4]) + "\n")
|
|
538
|
-
|
|
539
|
-
for key, value in sorted_reads_classified.items():
|
|
540
|
-
number_of_contigs = value[1]
|
|
541
|
-
value[0] = "Score Median: " + str(value[0])
|
|
542
|
-
value[1] = "Number of Contigs: " + str(number_of_contigs)
|
|
543
|
-
value[2] = "Contig-Length Median: " + str(value[2])
|
|
544
|
-
value[3] = "Uniqueness: " + str(value[3])
|
|
545
|
-
value[4] = "Bootstrap Median: " + str(value[4])
|
|
546
|
-
additional_info.append(
|
|
547
|
-
value[0]
|
|
548
|
-
+ "\n"
|
|
549
|
-
+ value[1]
|
|
550
|
-
+ "\n"
|
|
551
|
-
+ value[2]
|
|
552
|
-
+ "\n"
|
|
553
|
-
+ value[3]
|
|
554
|
-
+ "\n"
|
|
555
|
-
+ value[4]
|
|
556
|
-
)
|
|
557
|
-
text = "Detected Acinetobacter species"
|
|
558
|
-
|
|
559
|
-
# Values of clonetypes, is None if not existing
|
|
560
|
-
values_ct = session.get("vals_ct_spec")
|
|
561
|
-
hits_ct = session.get("hits_ct_spec")
|
|
562
|
-
clonetypes = session.get("names_ct_spec")
|
|
563
|
-
values_claast = session.get("vals_claast")
|
|
564
|
-
hits_claast = session.get("hits_claast")
|
|
565
|
-
clonetypes_claast = session.get("names_claast")
|
|
566
|
-
prediction_claast = session.get("prediction_claast")
|
|
567
|
-
# Values of OXAs
|
|
568
|
-
values_oxa = session.get("vals_oxa_spec")
|
|
569
|
-
oxa_names = session.get("names_oxa_spec")
|
|
570
|
-
oxa_results = session.get("oxa_results")
|
|
571
|
-
|
|
572
|
-
if oxa_results != "None":
|
|
573
|
-
oxa_labels = []
|
|
574
|
-
oxa_data = []
|
|
575
|
-
for results in oxa_results:
|
|
576
|
-
oxa_labels.append("A. " + results[0] + ": " + results[1])
|
|
577
|
-
oxa_data.append(results[2])
|
|
578
|
-
else:
|
|
579
|
-
oxa_labels = "None"
|
|
580
|
-
oxa_data = "None"
|
|
581
|
-
|
|
582
|
-
filename = session.get("filename")[22:]
|
|
583
|
-
filename = os.path.splitext(filename)[0]
|
|
584
|
-
|
|
585
|
-
# PUBMED LITERATURE SEARCH --------------------------------------------------------------------------------------------
|
|
586
|
-
|
|
587
|
-
# Pubmed literature search Source: https://gist.github.com/bonzanini/5a4c39e4c02502a8451d
|
|
588
|
-
# and https://biopython-tutorial.readthedocs.io/en/latest/notebooks/09%20-%20Accessing%20NCBIs%20Entrez%20databases.html
|
|
589
|
-
Entrez.email = "xspectBIOINF@web.de"
|
|
590
|
-
handle = Entrez.esearch(
|
|
591
|
-
db="pubmed", sort="relevance", retmax="10", retmode="xml", term=prediction
|
|
592
|
-
)
|
|
593
|
-
pubmed_results = Entrez.read(handle)
|
|
594
|
-
|
|
595
|
-
id_list = pubmed_results["IdList"]
|
|
596
|
-
literature = []
|
|
597
|
-
for i in id_list:
|
|
598
|
-
literature.append("https://pubmed.ncbi.nlm.nih.gov/" + str(i) + "/")
|
|
599
|
-
ids = ",".join(id_list)
|
|
600
|
-
handle = Entrez.efetch(db="pubmed", retmode="xml", id=ids)
|
|
601
|
-
papers = Entrez.read(handle)
|
|
602
|
-
|
|
603
|
-
handle2 = Entrez.efetch(db="pubmed", id=ids, rettype="medline")
|
|
604
|
-
literature_info = Medline.parse(handle2)
|
|
605
|
-
literature_info = list(literature_info)
|
|
606
|
-
|
|
607
|
-
literature_content = []
|
|
608
|
-
literature_abstract = []
|
|
609
|
-
literature_authors = []
|
|
610
|
-
literature_journal = []
|
|
611
|
-
literature_id = []
|
|
612
|
-
for paper in papers["PubmedArticle"]:
|
|
613
|
-
literature_content.append(paper["MedlineCitation"]["Article"]["ArticleTitle"])
|
|
614
|
-
try:
|
|
615
|
-
literature_abstract.append(
|
|
616
|
-
paper["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]
|
|
617
|
-
)
|
|
618
|
-
except:
|
|
619
|
-
literature_abstract.append(["No abstract available"])
|
|
620
|
-
|
|
621
|
-
for i in range(len(literature_content)):
|
|
622
|
-
literature_id.append("paper_" + str(i))
|
|
623
|
-
|
|
624
|
-
for record in literature_info:
|
|
625
|
-
literature_authors.append(record.get("AU", "?"))
|
|
626
|
-
literature_journal.append(record.get("SO", "?"))
|
|
627
|
-
|
|
628
|
-
for i in range(len(literature_authors)):
|
|
629
|
-
literature_authors[i] = " ,".join(literature_authors[i])
|
|
630
|
-
|
|
631
|
-
for i in range(len(literature_abstract)):
|
|
632
|
-
literature_abstract[i] = " ".join(literature_abstract[i])
|
|
633
|
-
|
|
634
|
-
CLEANR = re.compile("<.*?>")
|
|
635
|
-
|
|
636
|
-
for i in range(len(literature_content)):
|
|
637
|
-
literature_content[i] = re.sub(CLEANR, "", literature_content[i])
|
|
638
|
-
literature_abstract[i] = re.sub(CLEANR, "", literature_abstract[i])
|
|
639
|
-
|
|
640
|
-
literature_all = [
|
|
641
|
-
literature,
|
|
642
|
-
literature_content,
|
|
643
|
-
literature_abstract,
|
|
644
|
-
literature_authors,
|
|
645
|
-
literature_journal,
|
|
646
|
-
literature_id,
|
|
647
|
-
]
|
|
648
|
-
|
|
649
|
-
if request.method == "POST":
|
|
650
|
-
data = request.json
|
|
651
|
-
Entrez.email = "xspectBIOINF@web.de"
|
|
652
|
-
handle = Entrez.esearch(
|
|
653
|
-
db="pubmed",
|
|
654
|
-
sort=str(data[1]),
|
|
655
|
-
retmax=str(data[0]),
|
|
656
|
-
retmode="xml",
|
|
657
|
-
term=prediction,
|
|
658
|
-
)
|
|
659
|
-
pubmed_results = Entrez.read(handle)
|
|
660
|
-
|
|
661
|
-
id_list = pubmed_results["IdList"]
|
|
662
|
-
literature = []
|
|
663
|
-
for i in id_list:
|
|
664
|
-
literature.append("https://pubmed.ncbi.nlm.nih.gov/" + str(i) + "/")
|
|
665
|
-
ids = ",".join(id_list)
|
|
666
|
-
handle = Entrez.efetch(db="pubmed", retmode="xml", id=ids)
|
|
667
|
-
papers = Entrez.read(handle)
|
|
668
|
-
|
|
669
|
-
handle2 = Entrez.efetch(db="pubmed", id=ids, rettype="medline")
|
|
670
|
-
literature_info = Medline.parse(handle2)
|
|
671
|
-
literature_info = list(literature_info)
|
|
672
|
-
|
|
673
|
-
literature_content = []
|
|
674
|
-
literature_abstract = []
|
|
675
|
-
literature_authors = []
|
|
676
|
-
literature_journal = []
|
|
677
|
-
literature_id = []
|
|
678
|
-
for paper in papers["PubmedArticle"]:
|
|
679
|
-
literature_content.append(
|
|
680
|
-
paper["MedlineCitation"]["Article"]["ArticleTitle"]
|
|
681
|
-
)
|
|
682
|
-
literature_abstract.append(
|
|
683
|
-
paper["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]
|
|
684
|
-
)
|
|
685
|
-
|
|
686
|
-
for i in range(len(literature_content)):
|
|
687
|
-
literature_id.append("paper_" + str(i))
|
|
688
|
-
|
|
689
|
-
for record in literature_info:
|
|
690
|
-
literature_authors.append(record.get("AU", "?"))
|
|
691
|
-
literature_journal.append(record.get("SO", "?"))
|
|
692
|
-
|
|
693
|
-
for i in range(len(literature_authors)):
|
|
694
|
-
literature_authors[i] = " ,".join(literature_authors[i])
|
|
695
|
-
|
|
696
|
-
for i in range(len(literature_abstract)):
|
|
697
|
-
literature_abstract[i] = " ".join(literature_abstract[i])
|
|
698
|
-
|
|
699
|
-
CLEANR = re.compile("<.*?>")
|
|
700
|
-
|
|
701
|
-
for i in range(len(literature_content)):
|
|
702
|
-
literature_content[i] = re.sub(CLEANR, "", literature_content[i])
|
|
703
|
-
literature_abstract[i] = re.sub(CLEANR, "", literature_abstract[i])
|
|
704
|
-
|
|
705
|
-
literature_all = [
|
|
706
|
-
literature,
|
|
707
|
-
literature_content,
|
|
708
|
-
literature_abstract,
|
|
709
|
-
literature_authors,
|
|
710
|
-
literature_journal,
|
|
711
|
-
literature_id,
|
|
712
|
-
]
|
|
713
|
-
|
|
714
|
-
return json.dumps(literature_all)
|
|
715
|
-
|
|
716
|
-
return render_template(
|
|
717
|
-
"species.html",
|
|
718
|
-
results_oxa=values_oxa,
|
|
719
|
-
oxas=oxa_names,
|
|
720
|
-
results_ct=values_sorted,
|
|
721
|
-
hits_ct=hits_ct,
|
|
722
|
-
clonetypes=clonetypes_sorted,
|
|
723
|
-
results_claast=values_claast,
|
|
724
|
-
hits_claast=hits_claast,
|
|
725
|
-
clonetypes_claast=clonetypes_claast,
|
|
726
|
-
filename=filename,
|
|
727
|
-
maxi=maxi,
|
|
728
|
-
time=session.get("time"),
|
|
729
|
-
prediction=prediction,
|
|
730
|
-
prediction_claast=prediction_claast,
|
|
731
|
-
literature_all=literature_all,
|
|
732
|
-
additional_info=additional_info,
|
|
733
|
-
text=text,
|
|
734
|
-
metagenome=metagenome,
|
|
735
|
-
oxa_labels=oxa_labels,
|
|
736
|
-
oxa_data=oxa_data,
|
|
737
|
-
)
|