XspecT 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of XspecT might be problematic. Click here for more details.

Files changed (78) hide show
  1. xspect/classify.py +32 -0
  2. xspect/file_io.py +3 -9
  3. xspect/filter_sequences.py +56 -0
  4. xspect/main.py +13 -18
  5. xspect/mlst_feature/mlst_helper.py +102 -13
  6. xspect/mlst_feature/pub_mlst_handler.py +32 -6
  7. xspect/models/probabilistic_filter_mlst_model.py +160 -32
  8. xspect/models/probabilistic_filter_model.py +1 -0
  9. xspect/ncbi.py +8 -6
  10. xspect/train.py +13 -5
  11. xspect/web.py +173 -0
  12. xspect/xspect-web/.gitignore +24 -0
  13. xspect/xspect-web/README.md +54 -0
  14. xspect/xspect-web/components.json +21 -0
  15. xspect/xspect-web/dist/assets/index-CMG4V7fZ.js +290 -0
  16. xspect/xspect-web/dist/assets/index-jIKg1HIy.css +1 -0
  17. xspect/xspect-web/dist/index.html +14 -0
  18. xspect/xspect-web/dist/vite.svg +1 -0
  19. xspect/xspect-web/eslint.config.js +28 -0
  20. xspect/xspect-web/index.html +13 -0
  21. xspect/xspect-web/package-lock.json +6865 -0
  22. xspect/xspect-web/package.json +58 -0
  23. xspect/xspect-web/pnpm-lock.yaml +4317 -0
  24. xspect/xspect-web/public/vite.svg +1 -0
  25. xspect/xspect-web/src/App.tsx +29 -0
  26. xspect/xspect-web/src/api.tsx +62 -0
  27. xspect/xspect-web/src/assets/react.svg +1 -0
  28. xspect/xspect-web/src/components/classification-form.tsx +284 -0
  29. xspect/xspect-web/src/components/classify.tsx +18 -0
  30. xspect/xspect-web/src/components/data-table.tsx +78 -0
  31. xspect/xspect-web/src/components/dropdown-checkboxes.tsx +63 -0
  32. xspect/xspect-web/src/components/dropdown-slider.tsx +42 -0
  33. xspect/xspect-web/src/components/filter-form.tsx +423 -0
  34. xspect/xspect-web/src/components/filter.tsx +15 -0
  35. xspect/xspect-web/src/components/header.tsx +46 -0
  36. xspect/xspect-web/src/components/landing.tsx +7 -0
  37. xspect/xspect-web/src/components/models-details.tsx +138 -0
  38. xspect/xspect-web/src/components/models.tsx +53 -0
  39. xspect/xspect-web/src/components/result-chart.tsx +44 -0
  40. xspect/xspect-web/src/components/result.tsx +155 -0
  41. xspect/xspect-web/src/components/spinner.tsx +30 -0
  42. xspect/xspect-web/src/components/ui/accordion.tsx +64 -0
  43. xspect/xspect-web/src/components/ui/button.tsx +59 -0
  44. xspect/xspect-web/src/components/ui/card.tsx +92 -0
  45. xspect/xspect-web/src/components/ui/chart.tsx +351 -0
  46. xspect/xspect-web/src/components/ui/command.tsx +175 -0
  47. xspect/xspect-web/src/components/ui/dialog.tsx +135 -0
  48. xspect/xspect-web/src/components/ui/dropdown-menu.tsx +255 -0
  49. xspect/xspect-web/src/components/ui/file-upload.tsx +1459 -0
  50. xspect/xspect-web/src/components/ui/form.tsx +165 -0
  51. xspect/xspect-web/src/components/ui/input.tsx +21 -0
  52. xspect/xspect-web/src/components/ui/label.tsx +24 -0
  53. xspect/xspect-web/src/components/ui/navigation-menu.tsx +168 -0
  54. xspect/xspect-web/src/components/ui/popover.tsx +46 -0
  55. xspect/xspect-web/src/components/ui/select.tsx +183 -0
  56. xspect/xspect-web/src/components/ui/separator.tsx +26 -0
  57. xspect/xspect-web/src/components/ui/slider.tsx +61 -0
  58. xspect/xspect-web/src/components/ui/switch.tsx +29 -0
  59. xspect/xspect-web/src/components/ui/table.tsx +113 -0
  60. xspect/xspect-web/src/components/ui/tabs.tsx +64 -0
  61. xspect/xspect-web/src/index.css +120 -0
  62. xspect/xspect-web/src/lib/utils.ts +6 -0
  63. xspect/xspect-web/src/main.tsx +10 -0
  64. xspect/xspect-web/src/types.tsx +34 -0
  65. xspect/xspect-web/src/utils.tsx +6 -0
  66. xspect/xspect-web/src/vite-env.d.ts +1 -0
  67. xspect/xspect-web/tsconfig.app.json +32 -0
  68. xspect/xspect-web/tsconfig.json +13 -0
  69. xspect/xspect-web/tsconfig.node.json +24 -0
  70. xspect/xspect-web/vite.config.ts +24 -0
  71. {xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/METADATA +6 -8
  72. xspect-0.5.0.dist-info/RECORD +85 -0
  73. {xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/WHEEL +1 -1
  74. xspect/fastapi.py +0 -102
  75. xspect-0.4.1.dist-info/RECORD +0 -24
  76. {xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/entry_points.txt +0 -0
  77. {xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/licenses/LICENSE +0 -0
  78. {xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/top_level.txt +0 -0
@@ -24,6 +24,7 @@ class ProbabilisticFilterMlstSchemeModel:
24
24
  base_path: Path,
25
25
  fpr: float = 0.001,
26
26
  ) -> None:
27
+ """Initialise a ProbabilisticFilterMlstSchemeModel object."""
27
28
  if k < 1:
28
29
  raise ValueError("Invalid k value, must be greater than 0")
29
30
  if not isinstance(base_path, Path):
@@ -41,7 +42,12 @@ class ProbabilisticFilterMlstSchemeModel:
41
42
  self.indices = []
42
43
 
43
44
  def to_dict(self) -> dict:
44
- """Returns a dictionary representation of the model"""
45
+ """
46
+ Returns a dictionary representation of the model.
47
+
48
+ Returns:
49
+ dict: The dictionary containing all metadata of an object.
50
+ """
45
51
  return {
46
52
  "k": self.k,
47
53
  "model_display_name": self.model_display_name,
@@ -54,14 +60,37 @@ class ProbabilisticFilterMlstSchemeModel:
54
60
  }
55
61
 
56
62
  def get_cobs_index_path(self, scheme: str, locus: str) -> Path:
57
- """Returns the path to the cobs index"""
63
+ """
64
+ Get the path to the cobs indices.
65
+
66
+ This function creates a directory based on the scheme name, if it does not exist.
67
+ A COBS-Index file is created for every locus in a scheme.
68
+
69
+ Args:
70
+ scheme (str): The name of the scheme.
71
+ locus (str): The name of the locus.
72
+
73
+ Returns:
74
+ Path: The path to the COBS indices.
75
+ """
58
76
  # To differentiate from genus and species models
59
77
  cobs_path = self.base_path / f"{scheme}"
60
78
  cobs_path.mkdir(exist_ok=True, parents=True)
61
79
  return cobs_path / f"{locus}.cobs_compact"
62
80
 
63
81
  def fit(self, scheme_path: Path) -> None:
64
- """Trains a COBS structure for every locus with all its alleles"""
82
+ """
83
+ Trains a COBS structure for every locus with all its alleles.
84
+
85
+ This function creates COBS-indices.
86
+ Many attributes of an object are set in this function.
87
+
88
+ Args:
89
+ scheme_path (Path): The path to the scheme directory with all loci.
90
+
91
+ Raises:
92
+ ValueError: If the scheme alleles have not been downloaded prior.
93
+ """
65
94
  if not scheme_path.exists():
66
95
  raise ValueError(
67
96
  "Scheme not found. Please make sure to download the schemes prior!"
@@ -112,7 +141,15 @@ class ProbabilisticFilterMlstSchemeModel:
112
141
 
113
142
  @staticmethod
114
143
  def load(scheme_path: Path) -> "ProbabilisticFilterMlstSchemeModel":
115
- """Loads the model from a JSON-file"""
144
+ """
145
+ Loads the model from a JSON-file.
146
+
147
+ Args:
148
+ scheme_path (Path): The path of the scheme model.
149
+
150
+ Returns:
151
+ ProbabilisticFilterMlstSchemeModel: A trained model from the disk in JSON format.
152
+ """
116
153
  scheme_name = str(scheme_path).split("/")[-1]
117
154
  json_path = scheme_path / f"{scheme_name}.json"
118
155
  with open(json_path, "r", encoding="utf-8") as file:
@@ -137,8 +174,30 @@ class ProbabilisticFilterMlstSchemeModel:
137
174
  model.indices.append(cobs_index.Search(str(entry), False))
138
175
  return model
139
176
 
140
- def calculate_hits(self, path: Path, sequence: Seq, step: int = 1) -> list[dict]:
141
- """Calculates the hits for a sequence"""
177
+ def calculate_hits(
178
+ self, cobs_path: Path, sequence: Seq, step: int = 1
179
+ ) -> list[dict]:
180
+ """
181
+ Calculates the hits for a sequence.
182
+
183
+ This function has two ways of identifying strain types.
184
+ Sequences with a length of up to 10000 base pairs are handled without preprocessing.
185
+ Sequences with a length >= 10000 base pairs are divided into substrings.
186
+ The results of each substring are added up to find the strain type.
187
+
188
+ Args:
189
+ cobs_path (Path): The path of the COBS-structure directory.
190
+ sequence (Seq): The input sequence for classification.
191
+ step (int, optional): The amount of kmers that are passed; defaults to one.
192
+
193
+ Returns:
194
+ list[dict]: The results of the prediction.
195
+
196
+ Raises:
197
+ ValueError: If the model has not been trained.
198
+ ValueError: If the sequence is shorter than k.
199
+ ValueError: If the sequence is not a Seq-object.
200
+ """
142
201
  if not isinstance(sequence, Seq):
143
202
  raise ValueError("Invalid sequence, must be a Bio.Seq object")
144
203
 
@@ -149,7 +208,7 @@ class ProbabilisticFilterMlstSchemeModel:
149
208
  raise ValueError("The Model has not been trained yet")
150
209
 
151
210
  scheme_path_list = []
152
- for entry in sorted(path.iterdir()):
211
+ for entry in sorted(cobs_path.iterdir()):
153
212
  if str(entry).endswith(".json"):
154
213
  continue
155
214
  file_name = str(entry).split("/")[-1] # file_name = locus
@@ -166,11 +225,12 @@ class ProbabilisticFilterMlstSchemeModel:
166
225
  split_sequence = self.sequence_splitter(str(sequence), allele_len)
167
226
  for split in split_sequence:
168
227
  res = index.search(split, step=step)
169
- split_result = self.get_cobs_result(res)
228
+ split_result = self.get_cobs_result(res, True)
170
229
  if not split_result:
171
230
  continue
172
231
  cobs_results.append(split_result)
173
232
 
233
+ # add all split results of an Allele id into one
174
234
  all_counts = defaultdict(int)
175
235
  for result in cobs_results:
176
236
  for name, value in result.items():
@@ -179,21 +239,36 @@ class ProbabilisticFilterMlstSchemeModel:
179
239
  sorted_counts = dict(
180
240
  sorted(all_counts.items(), key=lambda item: -item[1])
181
241
  )
182
- first_key = next(iter(sorted_counts))
183
- highest_result = sorted_counts[first_key]
184
- result_dict[scheme_path_list[counter]] = sorted_counts
185
- highest_results[scheme_path_list[counter]] = {first_key: highest_result}
242
+ if not sorted_counts:
243
+ result_dict = "A Strain type could not be detected because of no kmer matches!"
244
+ highest_results[scheme_path_list[counter]] = {"N/A": 0}
245
+ else:
246
+ first_key = next(iter(sorted_counts))
247
+ highest_result = sorted_counts[first_key]
248
+ result_dict[scheme_path_list[counter]] = sorted_counts
249
+ highest_results[scheme_path_list[counter]] = {
250
+ first_key: highest_result
251
+ }
186
252
  counter += 1
187
253
  else:
188
254
  for index in self.indices:
189
255
  res = index.search(
190
256
  str(sequence), step=step
191
257
  ) # COBS can't handle Seq-Objects
192
- result_dict[scheme_path_list[counter]] = self.get_cobs_result(res)
193
- highest_results[scheme_path_list[counter]] = (
194
- self.get_highest_cobs_result(res)
258
+ result_dict[scheme_path_list[counter]] = self.get_cobs_result(
259
+ res, False
260
+ )
261
+ first_key, highest_result = next(
262
+ iter(result_dict[scheme_path_list[counter]].items())
195
263
  )
264
+ highest_results[scheme_path_list[counter]] = {first_key: highest_result}
196
265
  counter += 1
266
+ # check if the strain type has sufficient amount of kmer hits
267
+ is_valid = self.has_sufficient_score(highest_results, self.avg_locus_bp_size)
268
+ if not is_valid:
269
+ highest_results["Attention:"] = (
270
+ "This strain type is not reliable due to low kmer hit rates!"
271
+ )
197
272
  return [{"Strain type": highest_results}, {"All results": result_dict}]
198
273
 
199
274
  def predict(
@@ -208,7 +283,20 @@ class ProbabilisticFilterMlstSchemeModel:
208
283
  ),
209
284
  step: int = 1,
210
285
  ) -> MlstResult:
211
- """Returns scores for the sequence(s) based on the filters in the model"""
286
+ """
287
+ Get scores for the sequence(s) based on the filters in the model.
288
+
289
+ Args:
290
+ cobs_path (Path): The path of the COBS-structure directory.
291
+ sequence_input (Seq): The input sequence for classification
292
+ step (int, optional): The amount of kmers that are passed; defaults to one
293
+
294
+ Returns:
295
+ MlstResult: The results of the prediction.
296
+
297
+ Raises:
298
+ ValueError: If the sequence input is invalid.
299
+ """
212
300
  if isinstance(sequence_input, SeqRecord):
213
301
  if sequence_input.id == "<unknown id>":
214
302
  sequence_input.id = "test"
@@ -238,31 +326,48 @@ class ProbabilisticFilterMlstSchemeModel:
238
326
  " SeqIO FastaIterator, or a SeqIO FastqPhredIterator"
239
327
  )
240
328
 
241
- def get_highest_cobs_result(self, cobs_result: cobs_index.SearchResult) -> dict:
242
- """Returns the first entry in a COBS search result."""
243
- # counter = 1
244
- # dictio = {}
245
- for individual_result in cobs_result:
246
- # COBS already sorts the result in descending order
247
- # The first doc_name has the highest result which is needed to determine the allele
248
- return {individual_result.doc_name: individual_result.score}
249
-
250
- def get_cobs_result(self, cobs_result: cobs_index.SearchResult) -> dict:
251
- """Returns all entries in a COBS search result."""
329
+ def get_cobs_result(
330
+ self, cobs_result: cobs_index.SearchResult, kmer_threshold: bool
331
+ ) -> dict:
332
+ """
333
+ Get every entry in a COBS search result.
334
+
335
+ Args:
336
+ cobs_result (SearchResult): The result of the prediction.
337
+ kmer_threshold (bool): Applying a kmer threshold to mitigate false positives
338
+
339
+ Returns:
340
+ dict: A dictionary storing the allele id of locus as key and the score as value.
341
+ """
252
342
  return {
253
343
  individual_result.doc_name: individual_result.score
254
344
  for individual_result in cobs_result
255
- if individual_result.score > 50
345
+ if not kmer_threshold or individual_result.score > 50
256
346
  }
257
347
 
258
348
  def sequence_splitter(self, input_sequence: str, allele_len: int) -> list[str]:
259
- """Returns an equally divided sequence in form of a list."""
349
+ """
350
+ Get an equally divided sequence in form of a list.
351
+
352
+ This function is splitting very long sequences into substrings.
353
+ The split is based on sequence and allele length.
354
+ Measures have been taken to not lose kmers while splitting.
355
+
356
+ Args:
357
+ input_sequence (str): The sequence of interest.
358
+ allele_len (int): The average length of an allele.
359
+
360
+ Returns:
361
+ list[str]: A list containing all substrings of a sequence greater than 10000 bp.
362
+
363
+ Raises:
364
+ ValueError: If the sequence input is invalid.
365
+ """
366
+
260
367
  # An input sequence will have 10000 or more base pairs.
261
368
  sequence_len = len(input_sequence)
262
369
 
263
- if sequence_len < 100000:
264
- substring_length = allele_len // 10
265
- elif 100000 <= sequence_len < 1000000:
370
+ if sequence_len < 1000000:
266
371
  substring_length = allele_len
267
372
  elif 1000000 <= sequence_len < 10000000:
268
373
  substring_length = allele_len * 10
@@ -285,3 +390,26 @@ class ProbabilisticFilterMlstSchemeModel:
285
390
  else:
286
391
  substring_list.append(remaining_substring)
287
392
  return substring_list
393
+
394
+ def has_sufficient_score(
395
+ self, highest_results: dict, locus_size: list[int]
396
+ ) -> bool:
397
+ """
398
+ Checks if at least one locus in highest_results has a score >= 0.5 * avg base pair size.
399
+
400
+ Args:
401
+ highest_results (dict): Dict where each key is a locus and each value is the kmer score.
402
+ locus_size (list[int]): List of average base pair sizes per locus (in directory order).
403
+
404
+ Returns:
405
+ bool: True if any locus score >= 0.5 * its avg base pair size, False otherwise.
406
+ """
407
+ for i, (locus, allele_score_dict) in enumerate(highest_results.items()):
408
+ if not allele_score_dict:
409
+ continue # skip empty values
410
+
411
+ # Take the score (the only value) from the nested dict
412
+ score = next(iter(allele_score_dict.values()))
413
+ if score >= 0.5 * locus_size[i]:
414
+ return True
415
+ return False
@@ -56,6 +56,7 @@ class ProbabilisticFilterModel:
56
56
  def to_dict(self) -> dict:
57
57
  """Returns a dictionary representation of the model"""
58
58
  return {
59
+ "model_slug": self.slug(),
59
60
  "k": self.k,
60
61
  "model_display_name": self.model_display_name,
61
62
  "author": self.author,
xspect/ncbi.py CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  from enum import Enum
4
4
  from pathlib import Path
5
- import requests
6
5
  import time
6
+ import requests
7
7
 
8
8
  # pylint: disable=line-too-long
9
9
 
@@ -55,14 +55,14 @@ class NCBIHandler:
55
55
  elapsed_time = now - self.last_request_time
56
56
  if elapsed_time < self.min_interval:
57
57
  time.sleep(self.min_interval - elapsed_time)
58
- self.last_request_time = now # Update last request time
58
+ self.last_request_time = now
59
59
 
60
- def _make_request(self, endpoint: str, timeout: int = 5) -> dict:
60
+ def _make_request(self, endpoint: str, timeout: int = 15) -> dict:
61
61
  """Make a request to the NCBI Datasets API.
62
62
 
63
63
  Args:
64
64
  endpoint (str): The endpoint to make the request to.
65
- timeout (int, optional): The timeout for the request in seconds. Defaults to 5.
65
+ timeout (int, optional): The timeout for the request in seconds. Defaults to 10.
66
66
 
67
67
  Returns:
68
68
  dict: The response from the API.
@@ -229,7 +229,9 @@ class NCBIHandler:
229
229
  == "OK"
230
230
  ]
231
231
  except (IndexError, KeyError, TypeError):
232
- print(f"Could not get accessions for taxon with ID: {taxon_id}. Skipping.")
232
+ print(
233
+ f"Could not get {assembly_level.value} accessions for taxon with ID: {taxon_id}. Skipping."
234
+ )
233
235
  return []
234
236
  return accessions[:count] # Limit to count
235
237
 
@@ -255,7 +257,7 @@ class NCBIHandler:
255
257
 
256
258
  self._enforce_rate_limit()
257
259
 
258
- response = requests.get(self.base_url + endpoint, stream=True, timeout=5)
260
+ response = requests.get(self.base_url + endpoint, stream=True, timeout=15)
259
261
  if response.status_code != 200:
260
262
  response.raise_for_status()
261
263
 
xspect/train.py CHANGED
@@ -243,11 +243,19 @@ def train_from_ncbi(
243
243
  cobs_dir.mkdir(parents=True, exist_ok=True)
244
244
  svm_dir.mkdir(parents=True, exist_ok=True)
245
245
 
246
- ncbi_handler.download_assemblies(
247
- accessions=sum(accessions.values(), []), output_dir=tmp_dir
248
- )
249
- extract_zip(tmp_dir, tmp_dir)
250
- accession_paths = get_ncbi_dataset_accession_paths(tmp_dir / "ncbi_dataset")
246
+ # download assemblies
247
+ all_accessions = sum(accessions.values(), [])
248
+ batch_size = 100
249
+ accession_paths = {}
250
+ for i in range(0, len(all_accessions), batch_size):
251
+ batch = all_accessions[i : i + batch_size]
252
+ ncbi_handler.download_assemblies(accessions=batch, output_dir=tmp_dir)
253
+ extract_zip(
254
+ tmp_dir / "ncbi_dataset.zip", tmp_dir / f"batch-{i}-{i+batch_size}"
255
+ )
256
+ accession_paths.update(
257
+ get_ncbi_dataset_accession_paths(tmp_dir / f"batch-{i}-{i+batch_size}")
258
+ )
251
259
 
252
260
  # select accessions
253
261
  cobs_accessions = {}
xspect/web.py ADDED
@@ -0,0 +1,173 @@
1
+ """FastAPI-based web application for XspecT."""
2
+
3
+ from uuid import uuid4
4
+ import json
5
+ from shutil import copyfileobj
6
+ import importlib.resources as pkg_resources
7
+ from fastapi import APIRouter, FastAPI, HTTPException, UploadFile, BackgroundTasks
8
+ from fastapi.responses import RedirectResponse
9
+ from xspect.definitions import get_xspect_runs_path, get_xspect_upload_path
10
+ from xspect.download_models import download_test_models
11
+ import xspect.model_management as mm
12
+ from xspect.train import train_from_ncbi
13
+ from xspect import classify, filter_sequences
14
+ from fastapi.staticfiles import StaticFiles
15
+
16
+ app = FastAPI()
17
+ app.mount(
18
+ "/xspect-web",
19
+ StaticFiles(directory=str(pkg_resources.files("xspect") / "xspect-web" / "dist")),
20
+ name="static",
21
+ )
22
+ router = APIRouter()
23
+
24
+
25
+ @app.get("/")
26
+ def root():
27
+ """Root endpoint, forwards to /xspect-web/index.html."""
28
+ return RedirectResponse(url="/xspect-web/index.html")
29
+
30
+
31
+ @router.get("/download-filters")
32
+ def download_filters():
33
+ """Download filters."""
34
+ download_test_models("http://assets.adrianromberg.com/xspect-models.zip")
35
+
36
+
37
+ @router.get("/classification-result")
38
+ def get_classification_result(uuid: str):
39
+ """Get classification result."""
40
+ result_path = get_xspect_runs_path() / f"result_{uuid}.json"
41
+ if not result_path.exists():
42
+ raise HTTPException(
43
+ status_code=404, detail="No result found for the specified uuid."
44
+ )
45
+ return json.loads(result_path.read_text())
46
+
47
+
48
+ @router.post("/classify")
49
+ def classify_post(
50
+ classification_type: str,
51
+ model: str,
52
+ file: str,
53
+ background_tasks: BackgroundTasks,
54
+ step: int = 1,
55
+ ):
56
+ """Classify uploaded sample."""
57
+ input_path = get_xspect_upload_path() / file
58
+ if not input_path.exists():
59
+ raise FileNotFoundError(f"File {input_path} does not exist.")
60
+
61
+ uuid = str(uuid4())
62
+
63
+ if classification_type == "Genus":
64
+ background_tasks.add_task(
65
+ classify.classify_genus,
66
+ model,
67
+ input_path,
68
+ get_xspect_runs_path() / f"result_{uuid}.json",
69
+ step=step,
70
+ )
71
+ return {"message": "Classification started.", "uuid": uuid}
72
+
73
+ elif classification_type == "Species":
74
+ background_tasks.add_task(
75
+ classify.classify_species,
76
+ model,
77
+ input_path,
78
+ get_xspect_runs_path() / f"result_{uuid}.json",
79
+ step=step,
80
+ )
81
+ return {"message": "Classification started.", "uuid": uuid}
82
+
83
+ raise NotImplementedError(
84
+ f"Classification type {classification_type} is not implemented."
85
+ )
86
+
87
+
88
+ router.post("/filter")
89
+
90
+
91
+ def filter_post(
92
+ filter_type: str,
93
+ model: str,
94
+ input_file: str,
95
+ threshold: float,
96
+ filter_species: str = None,
97
+ ):
98
+ """Filter sequences."""
99
+ input_path = get_xspect_upload_path() / input_file
100
+ output_path = get_xspect_upload_path() / f"filtered_{input_file}"
101
+
102
+ if not input_path.exists():
103
+ raise FileNotFoundError(f"File {input_path} does not exist.")
104
+
105
+ if filter_type == "Genus":
106
+ filter_sequences.filter_genus(model, input_path, output_path, threshold)
107
+ return {"message": "Genus Filtering started."}
108
+
109
+ elif filter_type == "Species":
110
+ filter_sequences.filter_species(
111
+ model, filter_species, input_path, output_path, threshold
112
+ )
113
+ return {"message": "Species Filtering started."}
114
+
115
+ raise NotImplementedError(f"Filter type {filter_type} is not implemented.")
116
+
117
+
118
+ @router.post("/train")
119
+ def train(genus: str, background_tasks: BackgroundTasks, svm_steps: int = 1):
120
+ """Train NCBI model."""
121
+ background_tasks.add_task(train_from_ncbi, genus, svm_steps)
122
+
123
+ return {"message": "Training started."}
124
+
125
+
126
+ @router.get("/list-models")
127
+ def list_models():
128
+ """List available models."""
129
+ return mm.get_models()
130
+
131
+
132
+ @router.get("/model-metadata")
133
+ def get_model_metadata(model_slug: str):
134
+ """Get metadata of a model."""
135
+ return mm.get_model_metadata(model_slug)
136
+
137
+
138
+ @router.post("/model-metadata")
139
+ def post_model_metadata(model_slug: str, author: str, author_email: str):
140
+ """Update metadata of a model."""
141
+ try:
142
+ mm.update_model_metadata(model_slug, author, author_email)
143
+ except ValueError as e:
144
+ return {"error": str(e)}
145
+ return {"message": "Metadata updated."}
146
+
147
+
148
+ @router.post("/model-display-name")
149
+ def post_model_display_name(model_slug: str, filter_id: str, display_name: str):
150
+ """Update display name of a filter in a model."""
151
+ try:
152
+ mm.update_model_display_name(model_slug, filter_id, display_name)
153
+ except ValueError as e:
154
+ return {"error": str(e)}
155
+ return {"message": "Display name updated."}
156
+
157
+
158
+ @router.post("/upload-file")
159
+ def upload_file(file: UploadFile):
160
+ """Upload file to the server."""
161
+ upload_path = get_xspect_upload_path() / file.filename
162
+
163
+ if not upload_path.exists():
164
+ try:
165
+ with upload_path.open("wb") as buffer:
166
+ copyfileobj(file.file, buffer)
167
+ finally:
168
+ file.file.close()
169
+
170
+ return {"filename": file.filename}
171
+
172
+
173
+ app.include_router(router, prefix="/api", tags=["api"])
@@ -0,0 +1,24 @@
1
+ # Logs
2
+ logs
3
+ *.log
4
+ npm-debug.log*
5
+ yarn-debug.log*
6
+ yarn-error.log*
7
+ pnpm-debug.log*
8
+ lerna-debug.log*
9
+
10
+ node_modules
11
+ dist
12
+ dist-ssr
13
+ *.local
14
+
15
+ # Editor directories and files
16
+ .vscode/*
17
+ !.vscode/extensions.json
18
+ .idea
19
+ .DS_Store
20
+ *.suo
21
+ *.ntvs*
22
+ *.njsproj
23
+ *.sln
24
+ *.sw?
@@ -0,0 +1,54 @@
1
+ # React + TypeScript + Vite
2
+
3
+ This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
4
+
5
+ Currently, two official plugins are available:
6
+
7
+ - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) for Fast Refresh
8
+ - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
9
+
10
+ ## Expanding the ESLint configuration
11
+
12
+ If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
13
+
14
+ ```js
15
+ export default tseslint.config({
16
+ extends: [
17
+ // Remove ...tseslint.configs.recommended and replace with this
18
+ ...tseslint.configs.recommendedTypeChecked,
19
+ // Alternatively, use this for stricter rules
20
+ ...tseslint.configs.strictTypeChecked,
21
+ // Optionally, add this for stylistic rules
22
+ ...tseslint.configs.stylisticTypeChecked,
23
+ ],
24
+ languageOptions: {
25
+ // other options...
26
+ parserOptions: {
27
+ project: ['./tsconfig.node.json', './tsconfig.app.json'],
28
+ tsconfigRootDir: import.meta.dirname,
29
+ },
30
+ },
31
+ })
32
+ ```
33
+
34
+ You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
35
+
36
+ ```js
37
+ // eslint.config.js
38
+ import reactX from 'eslint-plugin-react-x'
39
+ import reactDom from 'eslint-plugin-react-dom'
40
+
41
+ export default tseslint.config({
42
+ plugins: {
43
+ // Add the react-x and react-dom plugins
44
+ 'react-x': reactX,
45
+ 'react-dom': reactDom,
46
+ },
47
+ rules: {
48
+ // other rules...
49
+ // Enable its recommended typescript rules
50
+ ...reactX.configs['recommended-typescript'].rules,
51
+ ...reactDom.configs.recommended.rules,
52
+ },
53
+ })
54
+ ```
@@ -0,0 +1,21 @@
1
+ {
2
+ "$schema": "https://ui.shadcn.com/schema.json",
3
+ "style": "new-york",
4
+ "rsc": false,
5
+ "tsx": true,
6
+ "tailwind": {
7
+ "config": "",
8
+ "css": "src/index.css",
9
+ "baseColor": "neutral",
10
+ "cssVariables": true,
11
+ "prefix": ""
12
+ },
13
+ "aliases": {
14
+ "components": "@/components",
15
+ "utils": "@/lib/utils",
16
+ "ui": "@/components/ui",
17
+ "lib": "@/lib",
18
+ "hooks": "@/hooks"
19
+ },
20
+ "iconLibrary": "lucide"
21
+ }