XspecT 0.5.3__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of XspecT might be problematic. Click here for more details.

xspect/classify.py CHANGED
@@ -1,11 +1,13 @@
1
+ """Classification module"""
2
+
1
3
  from pathlib import Path
2
- from xspect.mlst_feature.mlst_helper import pick_scheme_from_models_dir
4
+ from importlib import import_module
3
5
  import xspect.model_management as mm
4
- from xspect.models.probabilistic_filter_mlst_model import (
5
- ProbabilisticFilterMlstSchemeModel,
6
- )
7
6
  from xspect.file_io import prepare_input_output_paths
8
7
 
8
+ # inline imports lead to "invalid name" issues
9
+ # pylint: disable=invalid-name
10
+
9
11
 
10
12
  def classify_genus(
11
13
  model_genus: str, input_path: Path, output_path: Path, step: int = 1
@@ -22,7 +24,12 @@ def classify_genus(
22
24
  output_path (Path): The path to the output file where results will be saved.
23
25
  step (int): The amount of kmers to be skipped.
24
26
  """
25
- model = mm.get_genus_model(model_genus)
27
+ ProbabilisticSingleFilterModel = import_module(
28
+ "xspect.models.probabilistic_single_filter_model"
29
+ ).ProbabilisticSingleFilterModel
30
+
31
+ model_path = mm.get_genus_model_path(model_genus)
32
+ model = ProbabilisticSingleFilterModel.load(model_path)
26
33
  input_paths, get_output_path = prepare_input_output_paths(input_path)
27
34
 
28
35
  for idx, current_path in enumerate(input_paths):
@@ -34,7 +41,11 @@ def classify_genus(
34
41
 
35
42
 
36
43
  def classify_species(
37
- model_genus: str, input_path: Path, output_path: Path, step: int = 1
44
+ model_genus: str,
45
+ input_path: Path,
46
+ output_path: Path,
47
+ step: int = 1,
48
+ display_name: bool = False,
38
49
  ):
39
50
  """
40
51
  Classify the species of sequences.
@@ -47,12 +58,18 @@ def classify_species(
47
58
  input_path (Path): The path to the input file/directory containing sequences.
48
59
  output_path (Path): The path to the output file where results will be saved.
49
60
  step (int): The amount of kmers to be skipped.
61
+ display_name (bool): Includes a display name for each tax_ID.
50
62
  """
51
- model = mm.get_species_model(model_genus)
63
+ ProbabilisticFilterSVMModel = import_module(
64
+ "xspect.models.probabilistic_filter_svm_model"
65
+ ).ProbabilisticFilterSVMModel
66
+
67
+ model_path = mm.get_species_model_path(model_genus)
68
+ model = ProbabilisticFilterSVMModel.load(model_path)
52
69
  input_paths, get_output_path = prepare_input_output_paths(input_path)
53
70
 
54
71
  for idx, current_path in enumerate(input_paths):
55
- result = model.predict(current_path, step=step)
72
+ result = model.predict(current_path, step=step, display_name=display_name)
56
73
  result.input_source = current_path.name
57
74
  cls_path = get_output_path(idx, output_path)
58
75
  result.save(cls_path)
@@ -68,6 +85,12 @@ def classify_mlst(input_path: Path, output_path: Path, limit: bool):
68
85
  output_path (Path): The path to the output file where results will be saved.
69
86
  limit (bool): A limit for the highest allele_id results that are shown.
70
87
  """
88
+ pick_scheme_from_models_dir = import_module(
89
+ "xspect.mlst_feature.mlst_helper"
90
+ ).pick_scheme_from_models_dir
91
+ ProbabilisticFilterMlstSchemeModel = import_module(
92
+ "xspect.models.probabilistic_filter_mlst_model"
93
+ ).ProbabilisticFilterMlstSchemeModel
71
94
 
72
95
  scheme_path = pick_scheme_from_models_dir()
73
96
  model = ProbabilisticFilterMlstSchemeModel.load(scheme_path)
xspect/definitions.py CHANGED
@@ -11,8 +11,9 @@ def get_xspect_root_path() -> Path:
11
11
  """
12
12
  Return the root path for XspecT data.
13
13
 
14
- Returns the path to the XspecT data directory, which can be located either in the user's home directory or in the current working directory.
15
- If neither exists, it creates the directory in the user's home directory.
14
+ Returns the path to the XspecT data directory, which can be located either in the user's home
15
+ directory or in the current working directory. If neither exists, it creates the directory in
16
+ the user's home directory.
16
17
 
17
18
  Returns:
18
19
  Path: The path to the XspecT data directory.
@@ -34,8 +35,8 @@ def get_xspect_model_path() -> Path:
34
35
  """
35
36
  Return the path to the XspecT models.
36
37
 
37
- Returns the path to the XspecT models directory, which is located within the XspecT data directory.
38
- If the directory does not exist, it creates the directory.
38
+ Returns the path to the XspecT models directory, which is located within the XspecT data
39
+ directory. If the directory does not exist, it creates the directory.
39
40
 
40
41
  Returns:
41
42
  Path: The path to the XspecT models directory.
@@ -49,8 +50,8 @@ def get_xspect_upload_path() -> Path:
49
50
  """
50
51
  Return the path to the XspecT upload directory.
51
52
 
52
- Returns the path to the XspecT uploads directory, which is located within the XspecT data directory.
53
- If the directory does not exist, it creates the directory.
53
+ Returns the path to the XspecT uploads directory, which is located within the XspecT data
54
+ directory. If the directory does not exist, it creates the directory.
54
55
 
55
56
  Returns:
56
57
  Path: The path to the XspecT uploads directory.
@@ -64,8 +65,8 @@ def get_xspect_runs_path() -> Path:
64
65
  """
65
66
  Return the path to the XspecT runs directory.
66
67
 
67
- Returns the path to the XspecT runs directory, which is located within the XspecT data directory.
68
- If the directory does not exist, it creates the directory.
68
+ Returns the path to the XspecT runs directory, which is located within the XspecT data
69
+ directory. If the directory does not exist, it creates the directory.
69
70
 
70
71
  Returns:
71
72
  Path: The path to the XspecT runs directory.
@@ -79,8 +80,8 @@ def get_xspect_mlst_path() -> Path:
79
80
  """
80
81
  Return the path to the XspecT MLST directory.
81
82
 
82
- Returns the path to the XspecT MLST directory, which is located within the XspecT data directory.
83
- If the directory does not exist, it creates the directory.
83
+ Returns the path to the XspecT MLST directory, which is located within the XspecT data
84
+ directory. If the directory does not exist, it creates the directory.
84
85
 
85
86
  Returns:
86
87
  Path: The path to the XspecT MLST directory.
xspect/file_io.py CHANGED
@@ -113,7 +113,8 @@ def concatenate_metagenome(fasta_dir: Path, meta_path: Path) -> None:
113
113
  Concatenate all fasta files in a directory into one file.
114
114
 
115
115
  This function searches for all fasta files in the specified directory and writes their contents
116
- into a single output file. The output file will contain the concatenated sequences from all fasta files.
116
+ into a single output file. The output file will contain the concatenated sequences from all
117
+ fasta files.
117
118
 
118
119
  Args:
119
120
  fasta_dir (Path): Path to the directory with the fasta files.
@@ -1,7 +1,13 @@
1
+ """Sequence filtering module"""
2
+
1
3
  from pathlib import Path
2
- from xspect.model_management import get_genus_model, get_species_model
4
+ from importlib import import_module
5
+ from xspect.model_management import get_genus_model_path, get_species_model_path
3
6
  from xspect.file_io import filter_sequences, prepare_input_output_paths
4
7
 
8
+ # inline imports lead to "invalid name" issues
9
+ # pylint: disable=invalid-name
10
+
5
11
 
6
12
  def filter_species(
7
13
  model_genus: str,
@@ -31,7 +37,12 @@ def filter_species(
31
37
  available species scores.
32
38
  sparse_sampling_step (int): The step size for sparse sampling. Defaults to 1.
33
39
  """
34
- species_model = get_species_model(model_genus)
40
+ ProbabilisticFilterSVMModel = import_module(
41
+ "xspect.models.probabilistic_filter_svm_model"
42
+ ).ProbabilisticFilterSVMModel
43
+
44
+ species_model_path = get_species_model_path(model_genus)
45
+ species_model = ProbabilisticFilterSVMModel.load(species_model_path)
35
46
  input_paths, get_output_path = prepare_input_output_paths(input_path)
36
47
 
37
48
  for idx, current_path in enumerate(input_paths):
@@ -82,11 +93,16 @@ def filter_genus(
82
93
  sparse_sampling_step (int): The step size for sparse sampling. Defaults to 1.
83
94
 
84
95
  """
85
- model = get_genus_model(model_genus)
96
+ ProbabilisticSingleFilterModel = import_module(
97
+ "xspect.models.probabilistic_single_filter_model"
98
+ ).ProbabilisticSingleFilterModel
99
+
100
+ genus_model_path = get_genus_model_path(model_genus)
101
+ genus_model = ProbabilisticSingleFilterModel.load(genus_model_path)
86
102
  input_paths, get_output_path = prepare_input_output_paths(input_path)
87
103
 
88
104
  for idx, current_path in enumerate(input_paths):
89
- result = model.predict(current_path, step=sparse_sampling_step)
105
+ result = genus_model.predict(current_path, step=sparse_sampling_step)
90
106
  result.input_source = current_path.name
91
107
 
92
108
  if classification_output_path:
xspect/main.py CHANGED
@@ -2,25 +2,12 @@
2
2
 
3
3
  from pathlib import Path
4
4
  from uuid import uuid4
5
+ from importlib import import_module
5
6
  import click
6
- import uvicorn
7
- from xspect import classify
8
- from xspect.web import app
9
- from xspect.download_models import download_test_models
10
- from xspect import filter_sequences
11
- from xspect.train import train_from_directory, train_from_ncbi
12
- from xspect.definitions import (
13
- get_xspect_model_path,
14
- )
15
- from xspect.mlst_feature.mlst_helper import pick_scheme
16
- from xspect.mlst_feature.pub_mlst_handler import PubMLSTHandler
17
- from xspect.models.probabilistic_filter_mlst_model import (
18
- ProbabilisticFilterMlstSchemeModel,
19
- )
20
- from xspect.model_management import (
21
- get_model_metadata,
22
- get_models,
23
- )
7
+ from xspect.model_management import get_models
8
+
9
+ # inline imports lead to "invalid name" issues
10
+ # pylint: disable=invalid-name
24
11
 
25
12
 
26
13
  @click.group()
@@ -32,7 +19,10 @@ def cli():
32
19
  @cli.command()
33
20
  def web():
34
21
  """Open the XspecT web application."""
35
- uvicorn.run(app, host="0.0.0.0", port=8000)
22
+ app = import_module("xspect.web").app
23
+ run = import_module("uvicorn").run
24
+
25
+ run(app, host="0.0.0.0", port=8000)
36
26
 
37
27
 
38
28
  # # # # # # # # # # # # # # #
@@ -49,6 +39,8 @@ def models():
49
39
  def download():
50
40
  """Download models."""
51
41
  click.echo("Downloading models, this may take a while...")
42
+ download_test_models = import_module("xspect.download_models").download_test_models
43
+
52
44
  download_test_models(
53
45
  "https://assets.adrianromberg.com/science/xspect-models-07-08-2025.zip"
54
46
  )
@@ -64,7 +56,6 @@ def list_models():
64
56
  if not available_models:
65
57
  click.echo("No models found.")
66
58
  return
67
- # todo: make this machine readable
68
59
  click.echo("Models found:")
69
60
  click.echo("--------------")
70
61
  for model_type, names in available_models.items():
@@ -100,6 +91,8 @@ def train_ncbi(model_genus, svm_steps, author, author_email):
100
91
  """Train a species and a genus model based on NCBI data."""
101
92
  click.echo(f"Training {model_genus} species and genus metagenome model.")
102
93
  try:
94
+ train_from_ncbi = import_module("xspect.train").train_from_ncbi
95
+
103
96
  train_from_ncbi(model_genus, svm_steps, author, author_email)
104
97
  except ValueError as e:
105
98
  click.echo(f"Error: {e}")
@@ -143,6 +136,8 @@ def train_ncbi(model_genus, svm_steps, author, author_email):
143
136
  def train_directory(model_genus, input_path, svm_steps, meta, author, author_email):
144
137
  """Train a model based on data from a directory for a given genus."""
145
138
  click.echo(f"Training {model_genus} model with {svm_steps} SVM steps.")
139
+ train_from_directory = import_module("xspect.train").train_from_directory
140
+
146
141
  train_from_directory(
147
142
  model_genus,
148
143
  Path(input_path),
@@ -167,12 +162,28 @@ def train_directory(model_genus, input_path, svm_steps, meta, author, author_ema
167
162
  def train_mlst(choose_schemes):
168
163
  """Download alleles and train bloom filters."""
169
164
  click.echo("Updating alleles")
165
+ mlst_helper = import_module("xspect.mlst_feature.mlst_helper")
166
+ pick_scheme = mlst_helper.pick_scheme
167
+
168
+ pub_mlst_handler = import_module("xspect.mlst_feature.pub_mlst_handler")
169
+ PubMLSTHandler = pub_mlst_handler.PubMLSTHandler
170
+
171
+ probabilistic_filter_mlst_model = import_module(
172
+ "xspect.models.probabilistic_filter_mlst_model"
173
+ )
174
+ ProbabilisticFilterMlstSchemeModel = (
175
+ probabilistic_filter_mlst_model.ProbabilisticFilterMlstSchemeModel
176
+ )
177
+
178
+ definitions = import_module("xspect.definitions")
179
+ get_xspect_model_path = definitions.get_xspect_model_path
180
+
170
181
  handler = PubMLSTHandler()
171
182
  handler.download_alleles(choose_schemes)
172
183
  click.echo("Download finished")
173
184
  scheme_path = pick_scheme(handler.get_scheme_paths())
174
185
  species_name = str(scheme_path).split("/")[-2]
175
- scheme_name = str(scheme_path).split("/")[-1]
186
+ scheme_name = str(scheme_path).rsplit("/", maxsplit=1)[-1]
176
187
  scheme_url = handler.scheme_mapping[str(scheme_path)]
177
188
  model = ProbabilisticFilterMlstSchemeModel(
178
189
  31, f"{species_name}:{scheme_name}", get_xspect_model_path(), scheme_url
@@ -230,6 +241,8 @@ def classify_seqs():
230
241
  def classify_genus(model_genus, input_path, output_path, sparse_sampling_step):
231
242
  """Classify samples using a genus model."""
232
243
  click.echo("Classifying...")
244
+ classify = import_module("xspect.classify")
245
+
233
246
  classify.classify_genus(
234
247
  model_genus, Path(input_path), Path(output_path), sparse_sampling_step
235
248
  )
@@ -268,11 +281,25 @@ def classify_genus(model_genus, input_path, output_path, sparse_sampling_step):
268
281
  help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
269
282
  default=1,
270
283
  )
271
- def classify_species(model_genus, input_path, output_path, sparse_sampling_step):
284
+ @click.option(
285
+ "-n",
286
+ "--display-names",
287
+ help="Includes the display names next to taxonomy-IDs.",
288
+ is_flag=True,
289
+ )
290
+ def classify_species(
291
+ model_genus, input_path, output_path, sparse_sampling_step, display_names
292
+ ):
272
293
  """Classify samples using a species model."""
273
294
  click.echo("Classifying...")
295
+ classify = import_module("xspect.classify")
296
+
274
297
  classify.classify_species(
275
- model_genus, Path(input_path), Path(output_path), sparse_sampling_step
298
+ model_genus,
299
+ Path(input_path),
300
+ Path(output_path),
301
+ sparse_sampling_step,
302
+ display_names,
276
303
  )
277
304
 
278
305
 
@@ -301,6 +328,8 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
301
328
  def classify_mlst(input_path, output_path, limit):
302
329
  """MLST classify a sample."""
303
330
  click.echo("Classifying...")
331
+ classify = import_module("xspect.classify")
332
+
304
333
  classify.classify_mlst(Path(input_path), Path(output_path), limit)
305
334
 
306
335
 
@@ -372,6 +401,7 @@ def filter_genus(
372
401
  ):
373
402
  """Filter samples using a genus model."""
374
403
  click.echo("Filtering...")
404
+ filter_sequences = import_module("xspect.filter_sequences")
375
405
 
376
406
  filter_sequences.filter_genus(
377
407
  model_genus,
@@ -426,14 +456,16 @@ def filter_genus(
426
456
  "-t",
427
457
  "--threshold",
428
458
  type=float,
429
- help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring species.",
459
+ help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring "
460
+ "species.",
430
461
  default=0.7,
431
462
  prompt=True,
432
463
  )
433
464
  @click.option(
434
465
  "--sparse-sampling-step",
435
466
  type=int,
436
- help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
467
+ help="Sparse sampling step (e. g. only every 500th kmer for "
468
+ "'--sparse-sampling-step 500').",
437
469
  default=1,
438
470
  )
439
471
  def filter_species(
@@ -449,9 +481,12 @@ def filter_species(
449
481
 
450
482
  if threshold != -1 and (threshold < 0 or threshold > 1):
451
483
  raise click.BadParameter(
452
- "Threshold must be between 0 and 1, or -1 for filtering by the highest scoring species."
484
+ "Threshold must be between 0 and 1, or -1 for filtering by the highest "
485
+ "scoring species."
453
486
  )
454
487
 
488
+ get_model_metadata = import_module("xspect.model_management").get_model_metadata
489
+
455
490
  available_species = get_model_metadata(f"{model_genus}-species")["display_names"]
456
491
  available_species = {
457
492
  id: name.replace(f"{model_genus} ", "")
@@ -476,6 +511,8 @@ def filter_species(
476
511
  ][0]
477
512
 
478
513
  click.echo("Filtering...")
514
+ filter_sequences = import_module("xspect.filter_sequences")
515
+
479
516
  filter_sequences.filter_species(
480
517
  model_genus,
481
518
  model_species,
@@ -2,10 +2,10 @@
2
2
 
3
3
  __author__ = "Cetin, Oemer"
4
4
 
5
- import requests
6
5
  import json
7
- from io import StringIO
8
6
  from pathlib import Path
7
+ from io import StringIO
8
+ import requests
9
9
  from Bio import SeqIO
10
10
  from xspect.definitions import get_xspect_model_path
11
11
 
@@ -29,7 +29,7 @@ def create_fasta_files(locus_path: Path, fasta_batch: str) -> None:
29
29
  output_fasta_file = locus_path / f"Allele_ID_{number}.fasta"
30
30
  if output_fasta_file.exists():
31
31
  continue # Ignore existing ones
32
- with open(output_fasta_file, "w") as allele:
32
+ with open(output_fasta_file, "w", encoding="utf-8") as allele:
33
33
  SeqIO.write(record, allele, "fasta")
34
34
 
35
35
 
@@ -59,10 +59,9 @@ def pick_species_number_from_db(available_species: dict) -> str:
59
59
  if int(choice) in available_species.keys():
60
60
  chosen_species = available_species.get(int(choice))
61
61
  return chosen_species
62
- else:
63
- print(
64
- "Wrong input! Try again with a number that is available in the list above."
65
- )
62
+ print(
63
+ "Wrong input! Try again with a number that is available in the list above."
64
+ )
66
65
  except ValueError:
67
66
  print(
68
67
  "Wrong input! Try again with a number that is available in the list above."
@@ -95,10 +94,9 @@ def pick_scheme_number_from_db(available_schemes: dict) -> str:
95
94
  if int(choice) in available_schemes.keys():
96
95
  chosen_scheme = available_schemes.get(int(choice))[1]
97
96
  return chosen_scheme
98
- else:
99
- print(
100
- "Wrong input! Try again with a number that is available in the above list."
101
- )
97
+ print(
98
+ "Wrong input! Try again with a number that is available in the above list."
99
+ )
102
100
  except ValueError:
103
101
  print(
104
102
  "Wrong input! Try again with a number that is available in the above list."
@@ -162,12 +160,12 @@ def pick_scheme(available_schemes: dict) -> Path:
162
160
  for counter, scheme in available_schemes.items():
163
161
  # For Strain Typing with an API-POST Request to the db
164
162
  if str(scheme).startswith("http"):
165
- scheme_json = requests.get(scheme).json()
163
+ scheme_json = requests.get(scheme, timeout=10).json()
166
164
  print(str(counter) + ":" + scheme_json["description"])
167
165
 
168
166
  # To pick a scheme after download for fitting
169
167
  else:
170
- print(str(counter) + ":" + str(scheme).split("/")[-1])
168
+ print(str(counter) + ":" + str(scheme).rsplit("/", maxsplit=1)[-1])
171
169
 
172
170
  print("\nPick a scheme for strain type prediction")
173
171
  while True:
@@ -176,10 +174,9 @@ def pick_scheme(available_schemes: dict) -> Path:
176
174
  if int(choice) in available_schemes.keys():
177
175
  chosen_scheme = available_schemes.get(int(choice))
178
176
  return chosen_scheme
179
- else:
180
- print(
181
- "Wrong input! Try again with a number that is available in the above list."
182
- )
177
+ print(
178
+ "Wrong input! Try again with a number that is available in the above list."
179
+ )
183
180
  except ValueError:
184
181
  print(
185
182
  "Wrong input! Try again with a number that is available in the above list."
@@ -209,8 +206,7 @@ class MlstResult:
209
206
  Returns:
210
207
  dict: The result dictionary with s sequence ID as key and the Strain type as value.
211
208
  """
212
- results = {seq_id: result for seq_id, result in self.hits.items()}
213
- return results
209
+ return dict(self.hits.items())
214
210
 
215
211
  def to_dict(self) -> dict:
216
212
  """
@@ -2,8 +2,8 @@
2
2
 
3
3
  __author__ = "Cetin, Oemer"
4
4
 
5
- import requests
6
5
  import json
6
+ import requests
7
7
  from xspect.mlst_feature.mlst_helper import (
8
8
  create_fasta_files,
9
9
  pick_species_number_from_db,
@@ -51,7 +51,7 @@ class PubMLSTHandler:
51
51
  counter = 1
52
52
  # retrieve all available species
53
53
  species_url = PubMLSTHandler.base_url
54
- for species_databases in requests.get(species_url).json():
54
+ for species_databases in requests.get(species_url, timeout=10).json():
55
55
  for database in species_databases["databases"]:
56
56
  if database["name"].endswith("seqdef"):
57
57
  available_species[counter] = database["name"]
@@ -61,7 +61,7 @@ class PubMLSTHandler:
61
61
 
62
62
  counter = 1
63
63
  scheme_url = f"{species_url}/{chosen_species}/schemes"
64
- for scheme in requests.get(scheme_url).json()["schemes"]:
64
+ for scheme in requests.get(scheme_url, timeout=10).json()["schemes"]:
65
65
  # scheme["description"] stores the name of a scheme.
66
66
  # scheme["scheme"] stores the URL that is needed for downloading all loci.
67
67
  available_schemes[counter] = [scheme["description"], scheme["scheme"]]
@@ -70,11 +70,8 @@ class PubMLSTHandler:
70
70
  # Selection process of available scheme from a species for download (doubles are caught!)
71
71
  while True:
72
72
  chosen_scheme = pick_scheme_number_from_db(available_schemes)
73
- (
73
+ if chosen_scheme not in chosen_schemes:
74
74
  chosen_schemes.append(chosen_scheme)
75
- if chosen_scheme not in chosen_schemes
76
- else None
77
- )
78
75
  choice = input(
79
76
  "Do you want to pick another scheme to download? (y/n):"
80
77
  ).lower()
@@ -97,7 +94,7 @@ class PubMLSTHandler:
97
94
  self.choose_schemes() # changes the scheme_list attribute
98
95
 
99
96
  for scheme in self.scheme_list:
100
- scheme_json = requests.get(scheme).json()
97
+ scheme_json = requests.get(scheme, timeout=10).json()
101
98
  # We only want the name and the respective featured loci of a scheme
102
99
  scheme_name = scheme_json["description"]
103
100
  locus_list = scheme_json["loci"]
@@ -117,7 +114,7 @@ class PubMLSTHandler:
117
114
  if not locus_path.exists():
118
115
  locus_path.mkdir(exist_ok=True, parents=True)
119
116
 
120
- alleles = requests.get(f"{locus_url}/alleles_fasta").text
117
+ alleles = requests.get(f"{locus_url}/alleles_fasta", timeout=10).text
121
118
  create_fasta_files(locus_path, alleles)
122
119
 
123
120
  def assign_strain_type_by_db(self) -> None:
@@ -132,13 +129,15 @@ class PubMLSTHandler:
132
129
  str(pick_scheme(scheme_list_to_dict(self.scheme_list))) + "/sequence"
133
130
  )
134
131
  fasta_file = get_xspect_upload_path() / "Test.fna"
135
- with open(fasta_file, "r") as file:
132
+ with open(fasta_file, "r", encoding="utf-8") as file:
136
133
  data = file.read()
137
134
  payload = { # Essential API-POST-Body
138
135
  "sequence": data,
139
136
  "filetype": "fasta",
140
137
  }
141
- response = requests.post(scheme_url, data=json.dumps(payload)).json()
138
+ response = requests.post(
139
+ scheme_url, data=json.dumps(payload), timeout=10
140
+ ).json()
142
141
 
143
142
  for locus, meta_data in response["exact_matches"].items():
144
143
  # meta_data is a list containing a dictionary, therefore [0] and then key value.
@@ -170,18 +169,16 @@ class PubMLSTHandler:
170
169
  }
171
170
  }
172
171
 
173
- response = requests.post(post_url + "/designations", json=payload)
172
+ response = requests.post(post_url + "/designations", json=payload, timeout=10)
174
173
 
175
174
  if response.status_code == 200:
176
175
  data = response.json()
177
176
  if "fields" in data:
178
177
  post_response = data["fields"]
179
178
  return post_response
180
- else:
181
- post_response = "No matching Strain Type found in the database. "
182
- post_response += "Possibly a novel Strain Type."
183
- return post_response
184
- else:
185
- post_response = "Error:" + str(response.status_code)
186
- post_response += response.text
179
+ post_response = "No matching Strain Type found in the database. "
180
+ post_response += "Possibly a novel Strain Type."
187
181
  return post_response
182
+ post_response = "Error:" + str(response.status_code)
183
+ post_response += response.text
184
+ return post_response
@@ -2,45 +2,41 @@
2
2
 
3
3
  from json import loads, dumps
4
4
  from pathlib import Path
5
- from xspect.models.probabilistic_single_filter_model import (
6
- ProbabilisticSingleFilterModel,
7
- )
8
- from xspect.models.probabilistic_filter_svm_model import ProbabilisticFilterSVMModel
9
5
  from xspect.definitions import get_xspect_model_path
10
6
 
11
7
 
12
- def get_genus_model(genus) -> ProbabilisticSingleFilterModel:
8
+ def get_genus_model_path(genus) -> Path:
13
9
  """
14
- Get a genus model for the specified genus.
10
+ Get a genus model path for the specified genus.
15
11
 
16
- This function retrieves a pre-trained genus classification model based on the provided genus name.
12
+ This function retrieves the path of a pre-trained genus classification model based on the
13
+ provided genus name.
17
14
 
18
15
  Args:
19
16
  genus (str): The genus name for which the model is to be retrieved.
20
17
 
21
18
  Returns:
22
- ProbabilisticSingleFilterModel: An instance of the genus classification model.
19
+ Path: The file path of the genus classification model.
23
20
  """
24
21
  genus_model_path = get_xspect_model_path() / (genus.lower() + "-genus.json")
25
- genus_filter_model = ProbabilisticSingleFilterModel.load(genus_model_path)
26
- return genus_filter_model
22
+ return genus_model_path
27
23
 
28
24
 
29
- def get_species_model(genus) -> ProbabilisticFilterSVMModel:
25
+ def get_species_model_path(genus) -> Path:
30
26
  """
31
- Get a species classification model for the specified genus.
27
+ Get a species model path for the specified genus.
32
28
 
33
- This function retrieves a pre-trained species classification model based on the provided genus name.
29
+ This function retrieves the path of a pre-trained species classification model based on the
30
+ provided genus name.
34
31
 
35
32
  Args:
36
33
  genus (str): The genus name for which the species model is to be retrieved.
37
34
 
38
35
  Returns:
39
- ProbabilisticFilterSVMModel: An instance of the species classification model.
36
+ Path: The file path of the species classification model.
40
37
  """
41
38
  species_model_path = get_xspect_model_path() / (genus.lower() + "-species.json")
42
- species_filter_model = ProbabilisticFilterSVMModel.load(species_model_path)
43
- return species_filter_model
39
+ return species_model_path
44
40
 
45
41
 
46
42
  def get_model_metadata(model: str | Path) -> dict:
@@ -121,7 +117,8 @@ def get_models() -> dict[str, list[dict]]:
121
117
  This function scans the model directory for JSON files and organizes them by their model type.
122
118
 
123
119
  Returns:
124
- dict[str, list[dict]]: A dictionary where keys are model types and values are lists of model display names.
120
+ dict[str, list[dict]]: A dictionary where keys are model types and values are lists of
121
+ model display names.
125
122
  """
126
123
  model_dict = {}
127
124
  for model_file in get_xspect_model_path().glob("*.json"):
@@ -2,14 +2,14 @@
2
2
 
3
3
  __author__ = "Cetin, Oemer"
4
4
 
5
- import cobs_index
6
5
  import json
7
6
  from pathlib import Path
7
+ from collections import defaultdict
8
+ import cobs_index
9
+ from cobs_index import DocumentList
8
10
  from Bio import SeqIO
9
11
  from Bio.Seq import Seq
10
12
  from Bio.SeqRecord import SeqRecord
11
- from cobs_index import DocumentList
12
- from collections import defaultdict
13
13
  from xspect.file_io import get_record_iterator
14
14
  from xspect.mlst_feature.mlst_helper import MlstResult
15
15
  from xspect.mlst_feature.pub_mlst_handler import PubMLSTHandler
@@ -100,11 +100,11 @@ class ProbabilisticFilterMlstSchemeModel:
100
100
  "Scheme not found. Please make sure to download the schemes prior!"
101
101
  )
102
102
 
103
- scheme = str(scheme_path).split("/")[-1]
103
+ scheme = str(scheme_path).rsplit("/", maxsplit=1)[-1]
104
104
  cobs_path = ""
105
105
  # COBS structure for every locus (default = 7 for Oxford or Pasteur scheme)
106
106
  for locus_path in sorted(scheme_path.iterdir()):
107
- locus = str(locus_path).split("/")[-1]
107
+ locus = str(locus_path).rsplit("/", maxsplit=1)[-1]
108
108
  # counts all fasta files that belong to a locus
109
109
  self.loci[locus] = sum(
110
110
  (1 for _ in locus_path.iterdir() if not str(_).endswith("cache"))
@@ -112,7 +112,7 @@ class ProbabilisticFilterMlstSchemeModel:
112
112
 
113
113
  # determine the avg base pair size of alleles
114
114
  fasta_file = next(locus_path.glob("*.fasta"), None)
115
- with open(fasta_file, "r") as handle:
115
+ with open(fasta_file, "r", encoding="utf-8") as handle:
116
116
  record = next(SeqIO.parse(handle, "fasta"))
117
117
  self.avg_locus_bp_size.append(len(record.seq))
118
118
 
@@ -134,7 +134,8 @@ class ProbabilisticFilterMlstSchemeModel:
134
134
 
135
135
  def save(self) -> None:
136
136
  """Saves the model to disk"""
137
- scheme = str(self.scheme_path).split("/")[-1] # [-1] contains the scheme name
137
+ # [-1] contains the scheme name
138
+ scheme = str(self.scheme_path).rsplit("/", maxsplit=1)[-1]
138
139
  json_path = self.base_path / scheme / f"{scheme}.json"
139
140
  json_object = json.dumps(self.to_dict(), indent=4)
140
141
 
@@ -152,7 +153,7 @@ class ProbabilisticFilterMlstSchemeModel:
152
153
  Returns:
153
154
  ProbabilisticFilterMlstSchemeModel: A trained model from the disk in JSON format.
154
155
  """
155
- scheme_name = str(scheme_path).split("/")[-1]
156
+ scheme_name = str(scheme_path).rsplit("/", maxsplit=1)[-1]
156
157
  json_path = scheme_path / f"{scheme_name}.json"
157
158
  with open(json_path, "r", encoding="utf-8") as file:
158
159
  json_object = file.read()
@@ -221,7 +222,7 @@ class ProbabilisticFilterMlstSchemeModel:
221
222
  for entry in sorted(cobs_path.iterdir()):
222
223
  if str(entry).endswith(".json"):
223
224
  continue
224
- file_name = str(entry).split("/")[-1] # file_name = locus
225
+ file_name = str(entry).rsplit("/", maxsplit=1)[-1] # file_name = locus
225
226
  scheme_path_list.append(file_name.split(".")[0]) # without the file ending
226
227
 
227
228
  result_dict = {}
@@ -442,7 +443,7 @@ class ProbabilisticFilterMlstSchemeModel:
442
443
  Returns:
443
444
  bool: True if any locus score >= 0.5 * its avg base pair size, False otherwise.
444
445
  """
445
- for i, (locus, allele_score_dict) in enumerate(highest_results.items()):
446
+ for i, (_, allele_score_dict) in enumerate(highest_results.items()):
446
447
  if not allele_score_dict:
447
448
  continue # skip empty values
448
449
 
@@ -135,8 +135,8 @@ class ProbabilisticFilterModel:
135
135
  display_names (dict | None): A dictionary mapping file names to display names.
136
136
  If None, uses file names as display names.
137
137
  training_accessions (dict[str, list[str]] | None): A dictionary mapping filter IDs to
138
- lists of accession numbers used for training the model. If None, no training accessions
139
- are set.
138
+ lists of accession numbers used for training the model. If None, no training
139
+ accessions are set.
140
140
  Raises:
141
141
  ValueError: If the directory path is invalid, does not exist, or is not a directory.
142
142
  """
@@ -230,6 +230,7 @@ class ProbabilisticFilterModel:
230
230
  ),
231
231
  filter_ids: list[str] = None,
232
232
  step: int = 1,
233
+ display_name: bool = False,
233
234
  ) -> ModelResult:
234
235
  """
235
236
  Returns a model result object for the sequence(s) based on the filters in the model
@@ -246,6 +247,7 @@ class ProbabilisticFilterModel:
246
247
  filter_ids (list[str]): A list of filter IDs to filter the results. If None,
247
248
  all results are returned.
248
249
  step (int): The step size for the k-mer search. Default is 1.
250
+ display_name (bool): Includes a display name for each tax_ID.
249
251
 
250
252
  Returns:
251
253
  ModelResult: An object containing the hits for each sequence, the number of kmers,
@@ -253,11 +255,12 @@ class ProbabilisticFilterModel:
253
255
 
254
256
  Raises:
255
257
  ValueError: If the input sequence is not valid, or if it is not a Seq object,
256
- a list of Seq objects, a SeqIO iterator, or a Path object to a fasta/fastq file.
258
+ a list of Seq objects, a SeqIO iterator, or a Path object to a fasta/fastq
259
+ file.
257
260
  """
258
261
  if isinstance(sequence_input, (SeqRecord)):
259
262
  return ProbabilisticFilterModel.predict(
260
- self, [sequence_input], filter_ids, step=step
263
+ self, [sequence_input], filter_ids, step=step, display_name=display_name
261
264
  )
262
265
 
263
266
  if self._is_sequence_list(sequence_input) | self._is_sequence_iterator(
@@ -272,12 +275,25 @@ class ProbabilisticFilterModel:
272
275
  num_kmers[individual_sequence.id] = self._count_kmers(
273
276
  individual_sequence, step=step
274
277
  )
278
+ if display_name:
279
+ individual_hits.update(
280
+ {
281
+ f"{key} -{self.display_names.get(key, 'Unknown').replace(
282
+ self.model_display_name, '', 1)}": individual_hits.pop(
283
+ key
284
+ )
285
+ for key in list(individual_hits.keys())
286
+ }
287
+ )
275
288
  hits[individual_sequence.id] = individual_hits
276
289
  return ModelResult(self.slug(), hits, num_kmers, sparse_sampling_step=step)
277
290
 
278
291
  if isinstance(sequence_input, Path):
279
292
  return ProbabilisticFilterModel.predict(
280
- self, get_record_iterator(sequence_input), step=step
293
+ self,
294
+ get_record_iterator(sequence_input),
295
+ step=step,
296
+ display_name=display_name,
281
297
  )
282
298
 
283
299
  raise ValueError(
@@ -55,10 +55,14 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
55
55
  base_path (Path): The base path where the model will be stored.
56
56
  kernel (str): The kernel type for the SVM (e.g., 'linear', 'rbf').
57
57
  c (float): Regularization parameter for the SVM.
58
- fpr (float, optional): False positive rate for the probabilistic filter. Defaults to 0.01.
59
- num_hashes (int, optional): Number of hashes for the probabilistic filter. Defaults to 7.
60
- training_accessions (dict[str, list[str]] | None, optional): Accessions used for training the probabilistic filter. Defaults to None.
61
- svm_accessions (dict[str, list[str]] | None, optional): Accessions used for training the SVM. Defaults to None.
58
+ fpr (float, optional): False positive rate for the probabilistic filter.
59
+ Defaults to 0.01.
60
+ num_hashes (int, optional): Number of hashes for the probabilistic filter.
61
+ Defaults to 7.
62
+ training_accessions (dict[str, list[str]] | None, optional): Accessions used for
63
+ training the probabilistic filter. Defaults to None.
64
+ svm_accessions (dict[str, list[str]] | None, optional): Accessions used for
65
+ training the SVM. Defaults to None.
62
66
  """
63
67
  super().__init__(
64
68
  k=k,
@@ -112,17 +116,18 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
112
116
  """
113
117
  Fit the SVM to the sequences and labels.
114
118
 
115
- This method first trains the probabilistic filter model and then
116
- calculates scores for the SVM training. It expects the sequences to be in
117
- the specified directory and the SVM training sequences to be in the
118
- specified SVM path. The scores are saved in a CSV file for later use.
119
+ This method first trains the probabilistic filter model and then calculates scores for
120
+ the SVM training. It expects the sequences to be in the specified directory and the SVM
121
+ training sequences to be in the specified SVM path. The scores are saved in a CSV file
122
+ for later use.
119
123
 
120
124
  Args:
121
125
  dir_path (Path): The directory containing the training sequences.
122
126
  svm_path (Path): The directory containing the SVM training sequences.
123
127
  display_names (dict[str, str] | None): A mapping of accession IDs to display names.
124
128
  svm_step (int): Step size for sparse sampling in SVM training.
125
- training_accessions (dict[str, list[str]] | None): Accessions used for training the probabilistic filter.
129
+ training_accessions (dict[str, list[str]] | None): Accessions used for training the
130
+ probabilistic filter.
126
131
  svm_accessions (dict[str, list[str]] | None): Accessions used for training the SVM.
127
132
  """
128
133
 
@@ -178,6 +183,7 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
178
183
  ),
179
184
  filter_ids: list[str] = None,
180
185
  step: int = 1,
186
+ display_name: bool = False,
181
187
  ) -> ModelResult:
182
188
  """
183
189
  Predict the labels of the sequences.
@@ -187,25 +193,33 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
187
193
  with the probabilistic filter model, and it will return a `ModelResult`.
188
194
 
189
195
  Args:
190
- sequence_input (SeqRecord | list[SeqRecord] | SeqIO.FastaIO.FastaIterator | SeqIO.QualityIO.FastqPhredIterator | Path): The input sequences to predict.
191
- filter_ids (list[str], optional): A list of IDs to filter the predictions. Defaults to None.
196
+ sequence_input (SeqRecord | list[SeqRecord] | SeqIO.FastaIO.FastaIterator |
197
+ SeqIO.QualityIO.FastqPhredIterator | Path): The input sequences to predict.
198
+ filter_ids (list[str], optional): A list of IDs to filter the predictions.
192
199
  step (int, optional): Step size for sparse sampling. Defaults to 1.
200
+ display_name (bool): Includes a display name for each tax_ID.
193
201
 
194
202
  Returns:
195
- ModelResult: The result of the prediction containing hits, number of kmers, and the predicted label.
203
+ ModelResult: The result of the prediction containing hits, number of kmers, and the
204
+ predicted label.
196
205
  """
197
206
  # get scores and format them for the SVM
198
- res = super().predict(sequence_input, filter_ids, step=step)
207
+ res = super().predict(sequence_input, filter_ids, step, display_name)
199
208
  svm_scores = dict(sorted(res.get_scores()["total"].items()))
200
209
  svm_scores = [list(svm_scores.values())]
201
210
 
202
211
  svm = self._get_svm(filter_ids)
212
+ svm_prediction = str(svm.predict(svm_scores)[0])
213
+ if display_name:
214
+ svm_prediction = f"{svm_prediction} -{self.display_names.get(svm_prediction, 'Unknown')}".replace(
215
+ self.model_display_name, "", 1
216
+ )
203
217
  return ModelResult(
204
218
  self.slug(),
205
219
  res.hits,
206
220
  res.num_kmers,
207
221
  sparse_sampling_step=step,
208
- prediction=str(svm.predict(svm_scores)[0]),
222
+ prediction=svm_prediction,
209
223
  )
210
224
 
211
225
  def _get_svm(self, id_keys) -> SVC:
@@ -217,7 +231,8 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
217
231
  training data to only include those keys.
218
232
 
219
233
  Args:
220
- id_keys (list[str] | None): A list of IDs to filter the training data. If None, all data is used.
234
+ id_keys (list[str] | None): A list of IDs to filter the training data.
235
+ If None, all data is used.
221
236
 
222
237
  Returns:
223
238
  SVC: The trained SVM model.
@@ -34,8 +34,8 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
34
34
  ) -> None:
35
35
  """Initialize probabilistic single filter model.
36
36
 
37
- This model uses a Bloom filter to store k-mers from the training sequences. It is designed to
38
- be used with a single filter, which is suitable e.g. for genus-level classification.
37
+ This model uses a Bloom filter to store k-mers from the training sequences. It is designed
38
+ to be used with a single filter, which is suitable e.g. for genus-level classification.
39
39
 
40
40
  Args:
41
41
  k (int): Length of the k-mers to use for filtering
@@ -45,7 +45,7 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
45
45
  model_type (str): Type of the model, e.g. "probabilistic_single_filter"
46
46
  base_path (Path): Base path where the model will be saved
47
47
  fpr (float): False positive rate for the Bloom filter, default is 0.01
48
- training_accessions (list[str] | None): List of accessions used for training, default is None
48
+ training_accessions (list[str] | None): List of accessions used for training
49
49
  """
50
50
  super().__init__(
51
51
  k=k,
@@ -75,7 +75,7 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
75
75
  Args:
76
76
  file_path (Path): Path to the file containing sequences in FASTA format
77
77
  display_name (str): Display name for the model
78
- training_accessions (list[str] | None): List of accessions used for training, default is None
78
+ training_accessions (list[str] | None): List of accessions used for training
79
79
  """
80
80
  self.training_accessions = training_accessions
81
81
 
@@ -104,7 +104,7 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
104
104
  Calculates the number of k-mers in the sequence that are present in the Bloom filter.
105
105
 
106
106
  Args:
107
- sequence (Seq | SeqRecord): Sequence to calculate hits for, can be a Bio.Seq or Bio.SeqRecord object
107
+ sequence (Seq | SeqRecord): Sequence to calculate hits for
108
108
  filter_ids (list[str] | None): List of filter IDs to use, default is None
109
109
  step (int): Step size for generating k-mers, default is 1
110
110
  Returns:
@@ -162,13 +162,15 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
162
162
  """
163
163
  Generate kmers from the sequence
164
164
 
165
- Generates k-mers from the sequence, considering both the forward and reverse complement strands.
165
+ Generates k-mers from the sequence, considering both the forward and reverse complement
166
+ strands.
166
167
 
167
168
  Args:
168
169
  sequence (Seq): Sequence to generate k-mers from
169
170
  step (int): Step size for generating k-mers, default is 1
170
171
  Yields:
171
- str: The minimizer k-mer (the lexicographically smallest k-mer between the forward and reverse complement)
172
+ str: The minimizer k-mer (the lexicographically smallest k-mer between the forward and
173
+ reverse complement)
172
174
  """
173
175
  num_kmers = ceil((len(sequence) - self.k + 1) / step)
174
176
  for i in range(num_kmers):
xspect/models/result.py CHANGED
@@ -50,7 +50,8 @@ class ModelResult:
50
50
 
51
51
  Returns:
52
52
  dict: A dictionary where keys are subsequence names and values are dictionaries
53
- with labels as keys and scores as values. Also includes a 'total' key for overall scores.
53
+ with labels as keys and scores as values. Also includes a 'total' key for
54
+ overall scores.
54
55
  """
55
56
  scores = {
56
57
  subsequence: {
@@ -78,7 +79,8 @@ class ModelResult:
78
79
  The total hits are calculated by summing the hits for each label across all subsequences.
79
80
 
80
81
  Returns:
81
- dict: A dictionary where keys are labels and values are the total number of hits for that label.
82
+ dict: A dictionary where keys are labels and values are the total number of hits for
83
+ that label.
82
84
  """
83
85
  total_hits = {label: 0 for label in list(self.hits.values())[0]}
84
86
  for _, subsequence_hits in self.hits.items():
@@ -97,8 +99,8 @@ class ModelResult:
97
99
 
98
100
  Args:
99
101
  label (str): The label for which to filter the subsequences.
100
- filter_threshold (float): The threshold for filtering subsequences. Must be between 0 and 1,
101
- or -1 to return the subsequence with the maximum score for the label.
102
+ filter_threshold (float): The threshold for filtering subsequences. Must be between 0
103
+ and 1, or -1 to return the subsequence with the maximum score for the label.
102
104
 
103
105
  Returns:
104
106
  dict[str, bool]: A dictionary where keys are subsequence names and values are booleans
@@ -114,11 +116,10 @@ class ModelResult:
114
116
  subsequence: score[label] >= filter_threshold
115
117
  for subsequence, score in scores.items()
116
118
  }
117
- else:
118
- return {
119
- subsequence: score[label] == max(score.values())
120
- for subsequence, score in scores.items()
121
- }
119
+ return {
120
+ subsequence: score[label] == max(score.values())
121
+ for subsequence, score in scores.items()
122
+ }
122
123
 
123
124
  def get_filtered_subsequence_labels(
124
125
  self, label: str, filter_threshold: float = 0.7
@@ -126,15 +127,17 @@ class ModelResult:
126
127
  """
127
128
  Return the labels of filtered subsequences.
128
129
 
129
- This method filters subsequences based on the scores for a given label and a filter threshold.
130
+ This method filters subsequences based on the scores for a given label and a filter
131
+ threshold.
130
132
 
131
133
  Args:
132
134
  label (str): The label for which to filter the subsequences.
133
- filter_threshold (float): The threshold for filtering subsequences. Must be between 0 and 1,
134
- or -1 to return the subsequence with the maximum score for the label.
135
+ filter_threshold (float): The threshold for filtering subsequences. Must be between 0
136
+ and 1, or -1 to return the subsequence with the maximum score for the label.
135
137
 
136
138
  Returns:
137
- list[str]: A list of subsequence names that meet the filter criteria for the given label.
139
+ list[str]: A list of subsequence names that meet the filter criteria for the given
140
+ label.
138
141
  """
139
142
  return [
140
143
  subsequence
@@ -148,11 +151,13 @@ class ModelResult:
148
151
  """
149
152
  Return the result as a dictionary.
150
153
 
151
- This method converts the ModelResult object into a dictionary format suitable for serialization.
154
+ This method converts the ModelResult object into a dictionary format suitable for
155
+ serialization.
152
156
 
153
157
  Returns:
154
158
  dict: A dictionary representation of the ModelResult object, including model slug,
155
- sparse sampling step, hits, scores, number of k-mers, input source, and prediction if available.
159
+ sparse sampling step, hits, scores, number of k-mers, input source, and prediction if
160
+ available.
156
161
  """
157
162
  res = {
158
163
  "model_slug": self.model_slug,
xspect/web.py CHANGED
@@ -1,17 +1,26 @@
1
1
  """FastAPI-based web application for XspecT."""
2
2
 
3
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
4
+
5
+
3
6
  from uuid import uuid4
4
7
  import json
5
8
  from shutil import copyfileobj
6
9
  import importlib.resources as pkg_resources
7
- from fastapi import APIRouter, FastAPI, HTTPException, UploadFile, BackgroundTasks
10
+ from fastapi import (
11
+ APIRouter,
12
+ BackgroundTasks,
13
+ FastAPI,
14
+ HTTPException,
15
+ UploadFile,
16
+ )
8
17
  from fastapi.responses import FileResponse, RedirectResponse
18
+ from fastapi.staticfiles import StaticFiles
9
19
  from xspect.definitions import get_xspect_runs_path, get_xspect_upload_path
10
20
  from xspect.download_models import download_test_models
11
21
  import xspect.model_management as mm
12
22
  from xspect.train import train_from_ncbi
13
23
  from xspect import classify, filter_sequences
14
- from fastapi.staticfiles import StaticFiles
15
24
 
16
25
  app = FastAPI()
17
26
  app.mount(
@@ -72,7 +81,7 @@ def classify_post(
72
81
  )
73
82
  return {"message": "Classification started.", "uuid": uuid}
74
83
 
75
- elif classification_type == "Species":
84
+ if classification_type == "Species":
76
85
  background_tasks.add_task(
77
86
  classify.classify_species,
78
87
  model,
@@ -119,7 +128,7 @@ def filter_post(
119
128
  )
120
129
  return {"message": "Genus filtering started.", "uuid": uuid}
121
130
 
122
- elif filter_type == "Species":
131
+ if filter_type == "Species":
123
132
  if not filter_species:
124
133
  raise ValueError("filter_species must be provided for species filtering.")
125
134
  background_tasks.add_task(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: XspecT
3
- Version: 0.5.3
3
+ Version: 0.5.4
4
4
  Summary: Tool to monitor and characterize pathogens using Bloom filters.
5
5
  License: MIT License
6
6
 
@@ -1,23 +1,23 @@
1
1
  xspect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- xspect/classify.py,sha256=4E95JCMdLIaWQOlTVzWeNi3J4kAYsek__wMK8iiV_mI,3122
3
- xspect/definitions.py,sha256=FpdY1lzWG8mOLUIvL6YVCJX_jV_qdRSItk2yoodCQMg,2730
2
+ xspect/classify.py,sha256=SOTfUsEarZVbVtBuwNTlOYQ_MvoW0ADAqH7rTASWEI8,3936
3
+ xspect/definitions.py,sha256=8PpU8bpzcwv8PPacncywz-Na_MicMl-JsvjiX3e46yo,2734
4
4
  xspect/download_models.py,sha256=VALcnowzkUpR-OAvgB5BUdEq9WnyNbli0CxH3OT40Rc,1121
5
- xspect/file_io.py,sha256=Y9BX5O2Ni-EEhWxq2Nh0zn0mzWfai-6s5f6ASE83eUA,8116
6
- xspect/filter_sequences.py,sha256=LOmhvK7SmUVpx5zsrCSySDFB1n6wbemBKysK34b4N-s,4574
7
- xspect/main.py,sha256=Dt4ji-NgAUoR5DCcVXFxT0ZJSKgw7DJG8of3U9R6THw,13088
8
- xspect/model_management.py,sha256=KhWL_TtbqZNIM8sakm_QPZAJMFgAFUwConqYKM7ekMk,5290
5
+ xspect/file_io.py,sha256=QX2nBtlLAexBdfUr7rtHLlWOuXiaKvfRdpn1Dn0avnY,8120
6
+ xspect/filter_sequences.py,sha256=QKjgUCk3RBY3U9hHmyvSQeQt8n1voBna-NjOoTqdp3A,5196
7
+ xspect/main.py,sha256=vRCsSH_QVKY6usU5d5pjehPAhk4WJfZ_eyl_0xUGu5E,14137
8
+ xspect/model_management.py,sha256=yWbCk6tUn7-OYpzH0BViX2oWr4cdNkEBjrvnaw5GPdQ,4893
9
9
  xspect/ncbi.py,sha256=VRbFvtfGR4WTsc3buZE9UCabE3OJUTRphDRY20g63-E,11704
10
10
  xspect/train.py,sha256=jxjK4OqzTywmd5KGPou9A-doH8Nwhlv_xF4X7M6X_jI,11588
11
- xspect/web.py,sha256=XD5G232DvGxVzX56-cWgAkss7m2dxk8t7-7WSuNXCIA,6949
11
+ xspect/web.py,sha256=kM4BZ3fA0f731EEXScAaiGrJZvjjfep1iC1iZemfazw,7039
12
12
  xspect/mlst_feature/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- xspect/mlst_feature/mlst_helper.py,sha256=MsBzMtqz2KW_YnLYOqfYdFTPkx2gC2T8chY9E_2JgLU,8236
14
- xspect/mlst_feature/pub_mlst_handler.py,sha256=t84obSvp0FUccUEsmNBjVxBj42kbzblJo41AYTvHfUk,7686
13
+ xspect/mlst_feature/mlst_helper.py,sha256=pxRX_nRbrTSIFPf_FDV3dxR_FonmGtxttFgqNS7sIxE,8130
14
+ xspect/mlst_feature/pub_mlst_handler.py,sha256=gX0bgAqXTaW9weWgxcbsiD7UtMGuDD9veE9mj42Ffm8,7685
15
15
  xspect/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- xspect/models/probabilistic_filter_mlst_model.py,sha256=s6JnqEHvh38Wg8qLD3-J_aU51REDolCY5DNAQtGAjGc,17630
17
- xspect/models/probabilistic_filter_model.py,sha256=3fAdQlOLvMZba9T-ej_qeGuhO_Fri8vAY_aG2vlvPFw,18350
18
- xspect/models/probabilistic_filter_svm_model.py,sha256=kzHYdkGFRD-ICviFc8EyawBi8P_9dzjiq0dOJc2rXys,10708
19
- xspect/models/probabilistic_single_filter_model.py,sha256=CoJeVe3Xw-DDbiBaFPGCpO3CPgiwG7i1Remz01uipSA,6880
20
- xspect/models/result.py,sha256=gKuyTUPEeXCPoVhj3T6J9LrWXG2usy91nzqeVNKcNTk,7028
16
+ xspect/models/probabilistic_filter_mlst_model.py,sha256=w9ibUkAYA-DSOEkU8fBenlENrs8JwRRLaF5KO1HVKoM,17716
17
+ xspect/models/probabilistic_filter_model.py,sha256=pUgkN4E2EO-gePVR4BMndgMhJcyvOfVzfjVypjIz2JA,19047
18
+ xspect/models/probabilistic_filter_svm_model.py,sha256=n_HMARvcUMP1i-csiW8uvskcocrvhWMjue7kfsaKPpI,11146
19
+ xspect/models/probabilistic_single_filter_model.py,sha256=vJvKZrAybYHq_UdKQ2GvvVwgTYwqRrL-nDDQZxb6RRc,6828
20
+ xspect/models/result.py,sha256=Wpsm9EYrvMazDO0JAqF51Sb8BJqAZwYx4G6-SUOt5-c,7070
21
21
  xspect/xspect-web/.gitignore,sha256=_nGOe6uxTzy60tl_CIibnOUhXtP-DkOyuM-_s7m4ROg,253
22
22
  xspect/xspect-web/README.md,sha256=Fa5cCk66ohbqD_AAVgnXUZLhuzshnLxhlUFhxyscScc,1942
23
23
  xspect/xspect-web/components.json,sha256=5emhfq5JRW9J8Zga-1N5jAcj4B-r8VREXnH7Z6tZGNk,425
@@ -78,9 +78,9 @@ xspect/xspect-web/src/components/ui/switch.tsx,sha256=uIqRXtd41ba0eusIEUWVyYZv82
78
78
  xspect/xspect-web/src/components/ui/table.tsx,sha256=M2-TIHKwPFWuXrwysSufdQRSMJT-K9jPzGOokfU6PXo,2463
79
79
  xspect/xspect-web/src/components/ui/tabs.tsx,sha256=BImHKcdDCtrS3CCV1AGgn8qg0b65RB5P-QdH49IAhx0,1955
80
80
  xspect/xspect-web/src/lib/utils.ts,sha256=66ibdQiEHKftZBq1OMLmOKqWma1BkO-O60rc1IQYwLE,165
81
- xspect-0.5.3.dist-info/licenses/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
82
- xspect-0.5.3.dist-info/METADATA,sha256=T9yTZsMadZgZXFx3Uezd75GGg9iTnELTwcAeiX5G51k,4569
83
- xspect-0.5.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
84
- xspect-0.5.3.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
85
- xspect-0.5.3.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
86
- xspect-0.5.3.dist-info/RECORD,,
81
+ xspect-0.5.4.dist-info/licenses/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
82
+ xspect-0.5.4.dist-info/METADATA,sha256=T1EVSE_qesDZjlSCaq3xgnUN57n0NIFjOIQCi4swsEo,4569
83
+ xspect-0.5.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
84
+ xspect-0.5.4.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
85
+ xspect-0.5.4.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
86
+ xspect-0.5.4.dist-info/RECORD,,
File without changes