XspecT 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of XspecT might be problematic. Click here for more details.

Files changed (58) hide show
  1. {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/METADATA +23 -29
  2. XspecT-0.2.0.dist-info/RECORD +30 -0
  3. {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/WHEEL +1 -1
  4. xspect/definitions.py +42 -0
  5. xspect/download_filters.py +11 -26
  6. xspect/fastapi.py +101 -0
  7. xspect/file_io.py +34 -103
  8. xspect/main.py +70 -66
  9. xspect/model_management.py +88 -0
  10. xspect/models/__init__.py +0 -0
  11. xspect/models/probabilistic_filter_model.py +277 -0
  12. xspect/models/probabilistic_filter_svm_model.py +169 -0
  13. xspect/models/probabilistic_single_filter_model.py +109 -0
  14. xspect/models/result.py +148 -0
  15. xspect/pipeline.py +201 -0
  16. xspect/run.py +38 -0
  17. xspect/train.py +304 -0
  18. xspect/train_filter/create_svm.py +6 -183
  19. xspect/train_filter/extract_and_concatenate.py +117 -121
  20. xspect/train_filter/html_scrap.py +16 -28
  21. xspect/train_filter/ncbi_api/download_assemblies.py +7 -8
  22. xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +9 -17
  23. xspect/train_filter/ncbi_api/ncbi_children_tree.py +3 -2
  24. xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +7 -5
  25. XspecT-0.1.3.dist-info/RECORD +0 -49
  26. xspect/BF_v2.py +0 -637
  27. xspect/Bootstrap.py +0 -29
  28. xspect/Classifier.py +0 -142
  29. xspect/OXA_Table.py +0 -53
  30. xspect/WebApp.py +0 -724
  31. xspect/XspecT_mini.py +0 -1363
  32. xspect/XspecT_trainer.py +0 -611
  33. xspect/map_kmers.py +0 -155
  34. xspect/search_filter.py +0 -504
  35. xspect/static/How-To.png +0 -0
  36. xspect/static/Logo.png +0 -0
  37. xspect/static/Logo2.png +0 -0
  38. xspect/static/Workflow_AspecT.png +0 -0
  39. xspect/static/Workflow_ClAssT.png +0 -0
  40. xspect/static/js.js +0 -615
  41. xspect/static/main.css +0 -280
  42. xspect/templates/400.html +0 -64
  43. xspect/templates/401.html +0 -62
  44. xspect/templates/404.html +0 -62
  45. xspect/templates/500.html +0 -62
  46. xspect/templates/about.html +0 -544
  47. xspect/templates/home.html +0 -51
  48. xspect/templates/layoutabout.html +0 -87
  49. xspect/templates/layouthome.html +0 -63
  50. xspect/templates/layoutspecies.html +0 -468
  51. xspect/templates/species.html +0 -33
  52. xspect/train_filter/README_XspecT_Erweiterung.md +0 -119
  53. xspect/train_filter/get_paths.py +0 -35
  54. xspect/train_filter/interface_XspecT.py +0 -204
  55. xspect/train_filter/k_mer_count.py +0 -162
  56. {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/LICENSE +0 -0
  57. {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/entry_points.txt +0 -0
  58. {XspecT-0.1.3.dist-info → XspecT-0.2.0.dist-info}/top_level.txt +0 -0
xspect/WebApp.py DELETED
@@ -1,724 +0,0 @@
1
- """XspecT Flask web app"""
2
-
3
- import re
4
- import sys
5
- import warnings
6
- import subprocess
7
- import os
8
- import csv
9
- import json
10
- import time
11
- import logging
12
- import pickle
13
- import secrets
14
- import pandas as pd
15
- from Bio import Entrez, Medline, SeqIO
16
- from flask import (
17
- Flask,
18
- render_template,
19
- session,
20
- request,
21
- redirect,
22
- abort,
23
- make_response,
24
- jsonify,
25
- )
26
- from xspect.Classifier import classify
27
- from xspect.search_filter import (
28
- single_oxa,
29
- get_added_genomes,
30
- read_search,
31
- read_search_spec,
32
- pre_processing,
33
- pre_processing_prefilter2,
34
- read_search_pre,
35
- )
36
- from xspect.train_filter.interface_XspecT import load_translation_dict
37
-
38
-
39
- warnings.filterwarnings("ignore")
40
-
41
- # Source Logging and Error Handling
42
- # https://flask.palletsprojects.com/en/1.1.x/logging/
43
- # https://pythonise.com/series/learning-flask/flask-error-handling
44
- # Logging Source: https://stackoverflow.com/questions/17743019/flask-logging-cannot-get-it-to-write-to-a-file
45
- logging.basicConfig(filename="logger.log", level=logging.ERROR)
46
-
47
- # init WebApp with flask
48
- app = Flask(__name__)
49
-
50
- app.secret_key = "test"
51
-
52
-
53
- # Initialisiere eine leere Liste für die Optionen
54
- def load_saved_options():
55
- try:
56
- with open("saved_options.txt", "r") as file:
57
- options = [line.strip() for line in file]
58
- return options
59
- except FileNotFoundError:
60
- return []
61
-
62
-
63
- saved_options = load_saved_options()
64
-
65
-
66
- # Error Handling:
67
- # https://pythonise.com/series/learning-flask/flask-error-handling
68
-
69
-
70
- @app.route("/load_saved_options", methods=["GET"])
71
- def load_saved_options_route():
72
- options = load_saved_options()
73
- return jsonify({"options": options})
74
-
75
-
76
- @app.errorhandler(404)
77
- def not_found(e):
78
- return render_template("404.html")
79
-
80
-
81
- @app.errorhandler(500)
82
- def not_found(e):
83
- app.logger.error(f"SERVER ERROR 500 at route {request.url} with error message: {e}")
84
- app.logger.error(
85
- f'Parameters: IC_Lookup{session.get("IC_lookup")}, \n'
86
- f'OXA: {session.get("OXA")}, \n'
87
- f'QUICK: {session.get("quick")}, \n'
88
- f'Filename: {session.get("filename")}, \n'
89
- f'Vals OXA: {session.get("vals_oxa")}, \n'
90
- f'Vals IC: {session.get("vals_ct")}, \n'
91
- f'Hits IC: {session.get("hits_ct")}, \n'
92
- f'Time: {session.get("time")}, \n'
93
- f'Prediction: {session.get("prediction")}'
94
- )
95
- return render_template("500.html")
96
-
97
-
98
- @app.errorhandler(400)
99
- def not_found(e):
100
- return render_template("400.html")
101
-
102
-
103
- @app.errorhandler(401)
104
- def not_found(e):
105
- return render_template("401.html")
106
-
107
-
108
- # redirects to the homepage
109
- @app.route("/", methods=["GET", "POST"])
110
- def redirect_home():
111
- return redirect("home")
112
-
113
-
114
- # about page
115
- @app.route("/home")
116
- def home():
117
- """returns home page"""
118
- return render_template("home.html")
119
-
120
-
121
- # Starts Assignment-Process for AspecT and leads to result-page
122
- @app.route("/assignspec")
123
- def assignspec():
124
- """Uses User Options to process the file, returns a signal to the loadingpage to go the the
125
- result-page when done"""
126
-
127
- # getting user parameters back with session function
128
- filename = session.get("filename", None)
129
- quick = session.get("quick")
130
- metagenome = session.get("metagenome")
131
- added = session.get("added", None)
132
- oxa = session.get("OXA", None)
133
- genus = session.get("genus")
134
- start = time.time()
135
-
136
- if not (os.path.exists(filename)):
137
- # in case that user types in route of loading screen
138
- # or file does not exist anymore
139
- return redirect("/resultsspec")
140
-
141
- else:
142
- ext = filename.split(".")[-1]
143
- with open(filename) as f:
144
- reads = f.read().splitlines()
145
-
146
- if ext == "fq" or ext == "fastq":
147
- sequences = SeqIO.parse(filename, "fastq")
148
- quick = 2
149
- else:
150
- if quick:
151
- quick = 1
152
- else:
153
- quick = 0
154
- sequences = SeqIO.parse(filename, "fasta")
155
-
156
- reads = [str(sequence.seq).upper() for sequence in sequences]
157
-
158
- # starts the lookup for a given sequence
159
- if metagenome:
160
- quick = 4
161
- start_meta = time.time()
162
- reads, reads_oxa = read_search_pre(reads, BF_Master_prefilter, ext)
163
- end_meta = time.time()
164
- needed_meta = round(end_meta - start_meta, 2)
165
- print("Runtime filtering: ", needed_meta)
166
-
167
- # Lookup in Bloomfilter
168
- # reads should be a list of sequences
169
-
170
- score_ct, names_ct, hits_ct, predictions = read_search_spec(
171
- reads, quick, BF_Master, ext, genus
172
- )
173
- print("Testing Step 2")
174
-
175
- # preparing reads for oxa search
176
- if metagenome:
177
- reads = reads_oxa
178
-
179
- # storing values in session for creating plot
180
- if metagenome:
181
- reads_classified = score_ct
182
- session["reads_classified"] = reads_classified
183
- # assign reads to species
184
- species_dict = {}
185
- predictions_names = set()
186
- for prediction in predictions:
187
- predictions_names.add(prediction)
188
- for species in predictions_names:
189
- species_dict[species] = []
190
- # dict with species as keys and reads as values for oxa search
191
- for i in range(len(predictions)):
192
- species_dict[predictions[i]].append(reads[i])
193
-
194
- if not metagenome:
195
- session["vals_ct_spec"] = score_ct
196
- session["names_ct_spec"] = names_ct
197
- session["hits_ct_spec"] = hits_ct
198
-
199
- if oxa:
200
- if not metagenome:
201
- (
202
- score_oxa,
203
- names_oxa,
204
- coordinates_forward,
205
- coordinates_reversed,
206
- ) = single_oxa(reads, ext)
207
- for k in range(len(score_oxa)):
208
- if score_oxa[k] > 1:
209
- score_oxa[k] = 1
210
- session["vals_oxa_spec"] = score_oxa
211
- session["names_oxa_spec"] = names_oxa
212
- session["coordinates_forward"] = coordinates_forward
213
- session["coordinates_reversed"] = coordinates_reversed
214
- elif metagenome:
215
- # lookup for individual species
216
- score_oxa_list = []
217
- names_oxa_list = []
218
- coordinates_forward_list = []
219
- coordinates_reversed_list = []
220
- for species in species_dict:
221
- (
222
- score_oxa,
223
- names_oxa,
224
- coordinates_forward,
225
- coordinates_reversed,
226
- ) = single_oxa(species_dict[species], ext)
227
- for k in range(len(score_oxa)):
228
- if score_oxa[k] > 1:
229
- score_oxa[k] = 1
230
- score_oxa_list.append((score_oxa, species))
231
- names_oxa.append(names_oxa_list)
232
- coordinates_forward_list.append(coordinates_forward)
233
- coordinates_reversed_list.append(coordinates_reversed)
234
- oxa_results = []
235
- # Ansatz mit den Tupeln geht nicht mit max funktion weil zu viele argumente
236
- for i in range(len(score_oxa_list)):
237
- if max(score_oxa_list[i][0]) > 0:
238
- oxa_results.append(
239
- (
240
- score_oxa_list[i][1],
241
- names_oxa[
242
- score_oxa_list[i][0].index(max(score_oxa_list[i][0]))
243
- ],
244
- max(score_oxa_list[i][0]),
245
- )
246
- )
247
- # print(oxa_results)
248
- session["oxa_results"] = oxa_results
249
- session["vals_oxa_spec"] = score_oxa_list
250
- session["names_oxa_spec"] = names_oxa_list
251
- session["coordinates_forward"] = coordinates_forward_list
252
- session["coordinates_reversed"] = coordinates_reversed_list
253
-
254
- else:
255
- session["oxa_results"] = "None"
256
- session["vals_oxa_spec"] = "None"
257
- session["names_oxa_spec"] = "None"
258
-
259
- # making prediction
260
- if not metagenome:
261
- # add dynamic path
262
- prediction = classify(
263
- r"Training_data/" + genus + "_Training_data_spec.csv", score_ct, True
264
- )
265
- prediction_claast = prediction
266
- if prediction == "sp.":
267
- prediction = "NONE of the known " + genus + " species"
268
- else:
269
- translation_dict = load_translation_dict(genus)
270
- prediction = translation_dict[prediction]
271
- else:
272
- prediction_claast = None
273
- prediction = []
274
- for species in reads_classified:
275
- prediction.append(species)
276
-
277
- session["prediction"] = prediction
278
-
279
- end = time.time()
280
- needed = round(end - start, 2)
281
- print("Runtime: ", needed)
282
- session["time"] = str(needed)
283
-
284
- if prediction_claast == "470":
285
- IC_lookup = [True, True, True, True, True, True, True, True, False]
286
- score_claast, names_claast, hits_claast = read_search(IC_lookup, reads, quick=1)
287
- # making prediction
288
- prediction_claast = classify(
289
- r"Training_data/Training_data_IC.csv", score_claast, IC_lookup
290
- )
291
- # Making Label look nicer
292
- if "IC" in prediction_claast and len(prediction_claast) == 3:
293
- prediction_claast = "International Clone " + prediction_claast[2]
294
- elif prediction_claast == "None":
295
- prediction_claast = "NONE of the selected Clones or Genomes"
296
- else:
297
- pass
298
- session["prediction_claast"] = prediction_claast
299
- session["vals_claast"] = score_claast
300
- session["names_claast"] = names_claast
301
- session["hits_claast"] = hits_claast
302
- app.logger.info(
303
- "Assignment done for " + str(filename) + ", Time needed: " + str(needed)
304
- )
305
- return redirect("/resultsspec")
306
- else:
307
- session["prediction_claast"] = "n/a"
308
- session["vals_claast"] = [0, 0, 0, 0, 0, 0, 0, 0]
309
- session["names_claast"] = [0, 0, 0, 0, 0, 0, 0, 0]
310
- session["hits_claast"] = [0, 0, 0, 0, 0, 0, 0, 0]
311
- app.logger.info(
312
- "Assignment done for " + str(filename) + ", Time needed: " + str(needed)
313
- )
314
- return redirect("/resultsspec")
315
-
316
-
317
- # about page
318
- @app.route("/about")
319
- def about():
320
- """returns about page"""
321
- counter = json.load(open(r"filter/OXAs_dict/counter.txt"))
322
- ids = [*counter]
323
- r = csv.reader(open(r"Training_data/Training_data_IC.csv"))
324
- df = pd.DataFrame(data=list(r))
325
- svm_table = df.to_html(index=False, header=False)
326
- return render_template("about.html", svm_table=svm_table, oxa_ids=ids)
327
-
328
-
329
- # load new BF
330
- @app.route("/change_genus", methods=["GET", "POST"])
331
- def change_genus():
332
- """Load new BF for selected genus"""
333
- selected_genus = request.form.get("genus")
334
- session["genus"] = selected_genus
335
-
336
- global BF_Master
337
- global BF_Master_prefilter
338
- BF_Master = pre_processing(selected_genus)
339
- BF_Master_prefilter = pre_processing_prefilter2(selected_genus)
340
-
341
- # Leere Antwort zurückgeben
342
- return make_response("", 200)
343
-
344
-
345
- # train new genus
346
- @app.route("/train_new_genus", methods=["GET", "POST"])
347
- def train_new_genus():
348
- """Train new genus"""
349
- if request.method == "POST":
350
- # extract genus name from request
351
- genus_name = list(request.json.values())[0]
352
-
353
- # Sys arguments for the Python program
354
- system_arguments = [genus_name, "1"]
355
-
356
- # Run XspecT_Trainer
357
- subprocess.run([sys.executable, "XspecT_trainer.py"] + system_arguments)
358
- print("")
359
- print("Training done!")
360
-
361
- # save genus in options
362
- # Überprüfe, ob die Option bereits vorhanden ist
363
- if genus_name not in saved_options:
364
- print("Saving new genus: " + genus_name)
365
- # Füge die Option zur Liste hinzu
366
- saved_options.append(genus_name)
367
-
368
- # Speichere die Optionen in der Datei (oder Datenbank)
369
- with open("saved_options.txt", "a") as file:
370
- file.write(genus_name + "\n")
371
-
372
- # Erfolgreiche Antwort zurückgeben
373
- return redirect("/species")
374
-
375
- # Leere Antwort zurückgeben
376
- return make_response("", 200)
377
-
378
-
379
- # species assignment page
380
- @app.route("/species", methods=["GET", "POST"])
381
- def species():
382
- """returns species page"""
383
- added = get_added_genomes()
384
- if request.method == "POST":
385
- data = request.json
386
- if data is not None:
387
- filename = data[-4]
388
- session["quick"] = data[-3]
389
- session["OXA"] = data[-2]
390
- session["metagenome"] = data[-1]
391
- del data[-4:]
392
-
393
- if not (os.path.exists("files")):
394
- os.mkdir("files")
395
- name = r"files/" + str(secrets.token_hex(8)) + filename + ".txt"
396
-
397
- with open(name, "w") as filehandle:
398
- for read in data:
399
- filehandle.write("%s\n" % read)
400
-
401
- session["filename"] = name
402
-
403
- # Returning a json signal to ajax to redirect to loading page
404
- # the loading page then triggers the assignment process
405
- app.logger.info("Assignment started for " + filename)
406
- return json.dumps({"success": True})
407
-
408
- else:
409
- # Source: https://flask-restplus.readthedocs.io/en/stable/errors.html
410
- abort(400)
411
- return render_template(
412
- "species.html",
413
- added=added,
414
- results_oxa=[0, 0, 0, 0],
415
- oxas="None",
416
- results_ct=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
417
- hits_ct=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
418
- clonetypes=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
419
- results_claast=[0, 0, 0, 0, 0, 0, 0, 0],
420
- hits_claast=[0, 0, 0, 0, 0, 0, 0, 0],
421
- clonetypes_claast=[0, 0, 0, 0, 0, 0, 0, 0],
422
- filename="filename",
423
- maxi=1,
424
- time=0,
425
- prediction="n/a",
426
- prediction_claast="n/a",
427
- literature="",
428
- literature_content="",
429
- literature_abstract="",
430
- literature_authors=[[""], [""], [""], [""], [""], [""], [""], [""], [""], [""]],
431
- literature_journal="",
432
- literature_all="",
433
- text="",
434
- additional_info="",
435
- metagenome=False,
436
- oxa_labels="",
437
- oxa_data="",
438
- )
439
-
440
-
441
- @app.route("/resultsspec", methods=["GET", "POST"])
442
- def resultsspec():
443
- """gets XspecT-Results, creates a Plot and displays them on page with further information"""
444
-
445
- # CALCULATING RESULTS -----------------------------------------------------
446
-
447
- metagenome = session.get("metagenome")
448
-
449
- if not metagenome:
450
- # Values of clonetypes, is None if not existing
451
- filename = session.get("filename")
452
- values_ct = session.get("vals_ct_spec")
453
- hits_ct = session.get("hits_ct_spec")
454
- clonetypes = session.get("names_ct_spec")
455
- values_claast = session.get("vals_claast")
456
- hits_claast = session.get("hits_claast")
457
- clonetypes_claast = session.get("names_claast")
458
- prediction = session.get("prediction")
459
- prediction_claast = session.get("prediction_claast")
460
- # Values of OXAs
461
- values_oxa = session.get("vals_oxa_spec")
462
- oxa_names = session.get("names_oxa_spec")
463
- additional_info = "Score"
464
- maxi = 1
465
- text = "Most similar Acinetobacter species"
466
- metagenome = False
467
- oxa_labels = "None"
468
- oxa_data = "None"
469
-
470
- dic = {}
471
- clonetypes_sorted = []
472
- # the values will be sorted by highest values for better readability
473
- for i in range(len(values_ct)):
474
- dic[clonetypes[i]] = values_ct[i]
475
- values_sorted = sorted(values_ct, reverse=True)
476
- for i in sorted(dic, key=dic.get, reverse=True):
477
- clonetypes_sorted.append(i)
478
-
479
- # only the 10 biggest values will be shown for visibility
480
- if len(values_sorted) > 10:
481
- values_sorted = values_sorted[:10]
482
- clonetypes_sorted = clonetypes_sorted[:10]
483
-
484
- # if less then 5 values are found, add empty values
485
- if len(values_sorted) < 5:
486
- for i in range(5 - len(values_sorted)):
487
- values_sorted.append(0)
488
- clonetypes_sorted.append("n/a")
489
-
490
- # Save results in csv file
491
- # TODO later
492
- # with open(r"Results/WebApp/results_" + filename[22:-4] + ".csv", 'w', newline='') as file:
493
- # file.write("XspecT Prediction, XspecT Score, ClAssT Prediction, ClAssT Score, Oxa Prediction, Oxa Score\n")
494
- # for i in range(len(values_sorted)):
495
- # file.write(clonetypes_sorted[i] + ", " + str(values_sorted[i]) + ", " + str(prediction_claast) + ", " + str(values_claast) + ", " + str(prediction) + ", " + str(values_oxa[i]) + "\n")
496
-
497
- elif metagenome:
498
- reads_classified = session.get("reads_classified")
499
- genus = session.get("genus")
500
- # sort reads_classified by highest value of the second element
501
- sorted_reads_classified = dict(
502
- sorted(reads_classified.items(), key=lambda x: x[1][1], reverse=True)
503
- )
504
- # get key of reads_classified with highest value of the second element from the value
505
- predictions = []
506
- values = []
507
- translation_dict = load_translation_dict(genus)
508
- for key, value in sorted_reads_classified.items():
509
- predictions.append(translation_dict[key])
510
- values.append(value[1])
511
- clonetypes_sorted = predictions[:12]
512
- values_sorted = values[:12]
513
- prediction = predictions[0]
514
- maxi = values[0]
515
- additional_info = []
516
- metagenome = True
517
- filename = session.get("filename")
518
-
519
- # Save results in csv file
520
- # TODO later
521
- # with open(r"Results/WebApp/results_" + filename[22:-4] + ".csv", 'w', newline='') as file:
522
- # file.write("Prediction, Score Median, Number of Contigs, Contig-Length Median, Uniqueness, Bootstrap Median\n")
523
- # for key, value in sorted_reads_classified.items():
524
- # file.write(key + "," + str(value[0]) + "," + str(value[1]) + "," + str(value[2]) + "," + str(value[3]) + "," + str(value[4]) + "\n")
525
-
526
- for key, value in sorted_reads_classified.items():
527
- number_of_contigs = value[1]
528
- value[0] = "Score Median: " + str(value[0])
529
- value[1] = "Number of Contigs: " + str(number_of_contigs)
530
- value[2] = "Contig-Length Median: " + str(value[2])
531
- value[3] = "Uniqueness: " + str(value[3])
532
- value[4] = "Bootstrap Median: " + str(value[4])
533
- additional_info.append(
534
- value[0]
535
- + "\n"
536
- + value[1]
537
- + "\n"
538
- + value[2]
539
- + "\n"
540
- + value[3]
541
- + "\n"
542
- + value[4]
543
- )
544
- text = "Detected Acinetobacter species"
545
-
546
- # Values of clonetypes, is None if not existing
547
- values_ct = session.get("vals_ct_spec")
548
- hits_ct = session.get("hits_ct_spec")
549
- clonetypes = session.get("names_ct_spec")
550
- values_claast = session.get("vals_claast")
551
- hits_claast = session.get("hits_claast")
552
- clonetypes_claast = session.get("names_claast")
553
- prediction_claast = session.get("prediction_claast")
554
- # Values of OXAs
555
- values_oxa = session.get("vals_oxa_spec")
556
- oxa_names = session.get("names_oxa_spec")
557
- oxa_results = session.get("oxa_results")
558
-
559
- if oxa_results != "None":
560
- oxa_labels = []
561
- oxa_data = []
562
- for results in oxa_results:
563
- oxa_labels.append("A. " + results[0] + ": " + results[1])
564
- oxa_data.append(results[2])
565
- else:
566
- oxa_labels = "None"
567
- oxa_data = "None"
568
-
569
- filename = session.get("filename")[22:]
570
- filename = os.path.splitext(filename)[0]
571
-
572
- # PUBMED LITERATURE SEARCH --------------------------------------------------------------------------------------------
573
-
574
- # Pubmed literature search Source: https://gist.github.com/bonzanini/5a4c39e4c02502a8451d
575
- # and https://biopython-tutorial.readthedocs.io/en/latest/notebooks/09%20-%20Accessing%20NCBIs%20Entrez%20databases.html
576
- Entrez.email = "xspectBIOINF@web.de"
577
- handle = Entrez.esearch(
578
- db="pubmed", sort="relevance", retmax="10", retmode="xml", term=prediction
579
- )
580
- pubmed_results = Entrez.read(handle)
581
-
582
- id_list = pubmed_results["IdList"]
583
- literature = []
584
- for i in id_list:
585
- literature.append("https://pubmed.ncbi.nlm.nih.gov/" + str(i) + "/")
586
- ids = ",".join(id_list)
587
- handle = Entrez.efetch(db="pubmed", retmode="xml", id=ids)
588
- papers = Entrez.read(handle)
589
-
590
- handle2 = Entrez.efetch(db="pubmed", id=ids, rettype="medline")
591
- literature_info = Medline.parse(handle2)
592
- literature_info = list(literature_info)
593
-
594
- literature_content = []
595
- literature_abstract = []
596
- literature_authors = []
597
- literature_journal = []
598
- literature_id = []
599
- for paper in papers["PubmedArticle"]:
600
- literature_content.append(paper["MedlineCitation"]["Article"]["ArticleTitle"])
601
- try:
602
- literature_abstract.append(
603
- paper["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]
604
- )
605
- except:
606
- literature_abstract.append(["No abstract available"])
607
-
608
- for i in range(len(literature_content)):
609
- literature_id.append("paper_" + str(i))
610
-
611
- for record in literature_info:
612
- literature_authors.append(record.get("AU", "?"))
613
- literature_journal.append(record.get("SO", "?"))
614
-
615
- for i in range(len(literature_authors)):
616
- literature_authors[i] = " ,".join(literature_authors[i])
617
-
618
- for i in range(len(literature_abstract)):
619
- literature_abstract[i] = " ".join(literature_abstract[i])
620
-
621
- CLEANR = re.compile("<.*?>")
622
-
623
- for i in range(len(literature_content)):
624
- literature_content[i] = re.sub(CLEANR, "", literature_content[i])
625
- literature_abstract[i] = re.sub(CLEANR, "", literature_abstract[i])
626
-
627
- literature_all = [
628
- literature,
629
- literature_content,
630
- literature_abstract,
631
- literature_authors,
632
- literature_journal,
633
- literature_id,
634
- ]
635
-
636
- if request.method == "POST":
637
- data = request.json
638
- Entrez.email = "xspectBIOINF@web.de"
639
- handle = Entrez.esearch(
640
- db="pubmed",
641
- sort=str(data[1]),
642
- retmax=str(data[0]),
643
- retmode="xml",
644
- term=prediction,
645
- )
646
- pubmed_results = Entrez.read(handle)
647
-
648
- id_list = pubmed_results["IdList"]
649
- literature = []
650
- for i in id_list:
651
- literature.append("https://pubmed.ncbi.nlm.nih.gov/" + str(i) + "/")
652
- ids = ",".join(id_list)
653
- handle = Entrez.efetch(db="pubmed", retmode="xml", id=ids)
654
- papers = Entrez.read(handle)
655
-
656
- handle2 = Entrez.efetch(db="pubmed", id=ids, rettype="medline")
657
- literature_info = Medline.parse(handle2)
658
- literature_info = list(literature_info)
659
-
660
- literature_content = []
661
- literature_abstract = []
662
- literature_authors = []
663
- literature_journal = []
664
- literature_id = []
665
- for paper in papers["PubmedArticle"]:
666
- literature_content.append(
667
- paper["MedlineCitation"]["Article"]["ArticleTitle"]
668
- )
669
- literature_abstract.append(
670
- paper["MedlineCitation"]["Article"]["Abstract"]["AbstractText"]
671
- )
672
-
673
- for i in range(len(literature_content)):
674
- literature_id.append("paper_" + str(i))
675
-
676
- for record in literature_info:
677
- literature_authors.append(record.get("AU", "?"))
678
- literature_journal.append(record.get("SO", "?"))
679
-
680
- for i in range(len(literature_authors)):
681
- literature_authors[i] = " ,".join(literature_authors[i])
682
-
683
- for i in range(len(literature_abstract)):
684
- literature_abstract[i] = " ".join(literature_abstract[i])
685
-
686
- CLEANR = re.compile("<.*?>")
687
-
688
- for i in range(len(literature_content)):
689
- literature_content[i] = re.sub(CLEANR, "", literature_content[i])
690
- literature_abstract[i] = re.sub(CLEANR, "", literature_abstract[i])
691
-
692
- literature_all = [
693
- literature,
694
- literature_content,
695
- literature_abstract,
696
- literature_authors,
697
- literature_journal,
698
- literature_id,
699
- ]
700
-
701
- return json.dumps(literature_all)
702
-
703
- return render_template(
704
- "species.html",
705
- results_oxa=values_oxa,
706
- oxas=oxa_names,
707
- results_ct=values_sorted,
708
- hits_ct=hits_ct,
709
- clonetypes=clonetypes_sorted,
710
- results_claast=values_claast,
711
- hits_claast=hits_claast,
712
- clonetypes_claast=clonetypes_claast,
713
- filename=filename,
714
- maxi=maxi,
715
- time=session.get("time"),
716
- prediction=prediction,
717
- prediction_claast=prediction_claast,
718
- literature_all=literature_all,
719
- additional_info=additional_info,
720
- text=text,
721
- metagenome=metagenome,
722
- oxa_labels=oxa_labels,
723
- oxa_data=oxa_data,
724
- )