XspecT 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of XspecT might be problematic. Click here for more details.

Files changed (57) hide show
  1. {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/METADATA +23 -29
  2. XspecT-0.2.0.dist-info/RECORD +30 -0
  3. {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/WHEEL +1 -1
  4. xspect/definitions.py +42 -0
  5. xspect/download_filters.py +11 -26
  6. xspect/fastapi.py +101 -0
  7. xspect/file_io.py +34 -103
  8. xspect/main.py +70 -66
  9. xspect/model_management.py +88 -0
  10. xspect/models/__init__.py +0 -0
  11. xspect/models/probabilistic_filter_model.py +277 -0
  12. xspect/models/probabilistic_filter_svm_model.py +169 -0
  13. xspect/models/probabilistic_single_filter_model.py +109 -0
  14. xspect/models/result.py +148 -0
  15. xspect/pipeline.py +201 -0
  16. xspect/run.py +38 -0
  17. xspect/train.py +304 -0
  18. xspect/train_filter/create_svm.py +6 -183
  19. xspect/train_filter/extract_and_concatenate.py +117 -121
  20. xspect/train_filter/html_scrap.py +16 -28
  21. xspect/train_filter/ncbi_api/download_assemblies.py +7 -8
  22. xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +9 -17
  23. xspect/train_filter/ncbi_api/ncbi_children_tree.py +3 -2
  24. xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +7 -5
  25. XspecT-0.1.2.dist-info/RECORD +0 -48
  26. xspect/BF_v2.py +0 -648
  27. xspect/Bootstrap.py +0 -29
  28. xspect/Classifier.py +0 -142
  29. xspect/OXA_Table.py +0 -53
  30. xspect/WebApp.py +0 -737
  31. xspect/XspecT_mini.py +0 -1377
  32. xspect/XspecT_trainer.py +0 -611
  33. xspect/map_kmers.py +0 -155
  34. xspect/search_filter.py +0 -504
  35. xspect/static/How-To.png +0 -0
  36. xspect/static/Logo.png +0 -0
  37. xspect/static/Logo2.png +0 -0
  38. xspect/static/Workflow_AspecT.png +0 -0
  39. xspect/static/Workflow_ClAssT.png +0 -0
  40. xspect/static/js.js +0 -615
  41. xspect/static/main.css +0 -280
  42. xspect/templates/400.html +0 -64
  43. xspect/templates/401.html +0 -62
  44. xspect/templates/404.html +0 -62
  45. xspect/templates/500.html +0 -62
  46. xspect/templates/about.html +0 -544
  47. xspect/templates/home.html +0 -51
  48. xspect/templates/layoutabout.html +0 -87
  49. xspect/templates/layouthome.html +0 -63
  50. xspect/templates/layoutspecies.html +0 -468
  51. xspect/templates/species.html +0 -33
  52. xspect/train_filter/get_paths.py +0 -35
  53. xspect/train_filter/interface_XspecT.py +0 -204
  54. xspect/train_filter/k_mer_count.py +0 -162
  55. {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/LICENSE +0 -0
  56. {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/entry_points.txt +0 -0
  57. {XspecT-0.1.2.dist-info → XspecT-0.2.0.dist-info}/top_level.txt +0 -0
xspect/Classifier.py DELETED
@@ -1,142 +0,0 @@
1
- from sklearn.svm import SVC
2
- import csv
3
- from copy import deepcopy
4
-
5
-
6
- def cut_csv(csv_file, lst, table=False):
7
- """Returns desired data from Training_data"""
8
- r = csv.reader(open(csv_file))
9
- m = list(r)
10
- selected = deepcopy(lst)
11
-
12
- header = m[0]
13
- m = m[1:]
14
- labels = header[1:-1]
15
-
16
- X_train = []
17
- y_train = []
18
- files = []
19
-
20
- if selected[8] and len(header[8:-1]) > 0:
21
- # Added Genomes selected
22
- del selected[8]
23
- selected = selected + ([True] * len(header[9:-1]))
24
-
25
- else:
26
- # Added Genomes not selected
27
- del selected[8]
28
- selected = selected + ([False] * len(header[9:-1]))
29
-
30
- # creating matrix
31
- for i in range(len(m)):
32
- X_train.append(m[i][1:-1])
33
- y_train.append(m[i][-1])
34
- files.append(m[i][0])
35
-
36
- # Deleting Cols
37
- for i in range(len(X_train)):
38
- for j in range(len(X_train[i]) - 1, -1, -1):
39
- if selected[j]:
40
- pass
41
- else:
42
- del X_train[i][j]
43
-
44
- # Deleting Rows
45
- valid = ["None"]
46
- for i in range(len(selected)):
47
- if selected[i]:
48
- valid.append(labels[i])
49
-
50
- for i in range(len(X_train) - 1, -1, -1):
51
- if y_train[i] not in valid:
52
- del y_train[i]
53
- del X_train[i]
54
- del files[i]
55
-
56
- if table:
57
- # Inserting Infos for Table
58
- for i in range(len(X_train)):
59
- X_train[i].insert(0, files[i])
60
- X_train[i].append(y_train[i])
61
-
62
- for i in range(len(header) - 1, -1, -1):
63
- if header[i] not in valid:
64
- del header[i]
65
-
66
- header.insert(0, "File")
67
- header.append("Label")
68
-
69
- X_train.insert(0, header)
70
-
71
- else:
72
- pass
73
-
74
- return X_train, y_train
75
-
76
-
77
- def cut_csv_spec(csv_file):
78
- """Returns svm Training_data"""
79
- # read the training-data
80
- r = csv.reader(open(csv_file))
81
- m = list(r)
82
- header = m[0]
83
- m = m[1:]
84
- X_train = []
85
- y_train = []
86
-
87
- # creating matrix as input for the classifier
88
- for i in range(len(m)):
89
- X_train.append(m[i][1:-1])
90
- y_train.append(m[i][-1])
91
-
92
- return X_train, y_train
93
-
94
-
95
- def classify(csv_file, result, lst):
96
- """Classifys Result-vector and calculates needed vectors"""
97
- r = csv.reader(open(csv_file))
98
- m = list(r)
99
- # deciding which kernel-function will be used
100
- if m[0][1] == "IC1":
101
- mode = "ClAssT"
102
- X_train, y_train = cut_csv(csv_file, lst)
103
- svm = SVC(kernel="poly", C=1.0).fit(X_train, y_train)
104
- else:
105
- mode = "XspecT"
106
- X_train, y_train = cut_csv_spec(csv_file)
107
- svm = SVC(kernel="rbf", C=1.5).fit(X_train, y_train)
108
- # perform a prediction using the svm
109
- prediction = svm.predict([result])
110
- if mode == "XspecT":
111
- if max(result) < 0.3:
112
- prediction = ["sp.", 0]
113
- else:
114
- if max(result) < 0.3:
115
- prediction = ["None", 0]
116
-
117
- return prediction[0]
118
-
119
-
120
- def IC3_classify(result_2):
121
- ic = "International Clonetype 3 (ST32 or ST250)"
122
- m_3 = [
123
- ["GCF_000278625.1", 1.0, ic],
124
- ["GCF_001674185.1", 0.86, ic],
125
- ["fictional", 0.85, "NONE of the selected Clonetypes or Genomes"],
126
- ["fictional", 0.01, "NONE of the selected Clonetypes or Genomes"],
127
- ]
128
-
129
- X = []
130
- y = []
131
- for i in range(len(m_3)):
132
- X.append(m_3[i][1])
133
- y.append(m_3[i][2])
134
-
135
- for i in range(len(X)):
136
- X[i] = [X[i]]
137
- svm_IC3 = SVC(kernel="poly", C=1).fit(X, y)
138
-
139
- return svm_IC3.predict([result_2]), result_2[0]
140
-
141
-
142
- # https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
xspect/OXA_Table.py DELETED
@@ -1,53 +0,0 @@
1
- import json
2
- from Bio import SeqIO
3
- import os
4
-
5
-
6
- class OXATable:
7
- def __init__(self):
8
- self.kmere = {}
9
- self.total = 0
10
- self.found = 0
11
-
12
- def create_table(self, directory):
13
- """Reads in fasta file, creates Dictionary with k-mer counter"""
14
- # taking all fasta files for one big table
15
- oxas = {}
16
- files = os.listdir(directory)
17
- for i in range(len(files)):
18
- kmere = {}
19
- file = directory + files[i]
20
- for sequence in SeqIO.parse(file, "fasta"):
21
- for j in range(0, len(sequence.seq) - 20 + 1):
22
- kmer = str(sequence.seq[j : j + 20])
23
- count = kmere.get(kmer, 0)
24
- kmere[kmer] = count + 1
25
- oxas[files[i][:-6]] = kmere
26
-
27
- def lookup(self, gene, kmer):
28
- """Tests if kmer in dictionary, if so: reduces the counter"""
29
-
30
- # Only returns True if kmer has been found and there was one left
31
- if kmer in self.kmere[gene]:
32
- if self.kmere[gene][kmer] > 0:
33
- self.kmere[gene][kmer] -= 1
34
- return True
35
- else:
36
- return False
37
- else:
38
- return False
39
-
40
- def save_dic(self, path):
41
- """writes dictionary to file using json"""
42
- json.dump(self.kmere, open(path, "w"))
43
-
44
- def read_dic(self, path):
45
- """Reads dictionary from file using json"""
46
- self.kmere = json.load(open(path))
47
-
48
- def cleanup(self):
49
- self.kmere = {}
50
-
51
- def get_counter(self, path=r"filter/OXAs_dict/counter.txt"):
52
- counter = json.load(open(path))
53
- return counter