XspecT 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of XspecT might be problematic. Click here for more details.

xspect/main.py CHANGED
@@ -18,6 +18,7 @@ from xspect.models.probabilistic_filter_mlst_model import (
18
18
  )
19
19
  from xspect.model_management import (
20
20
  get_genus_model,
21
+ get_model_metadata,
21
22
  get_models,
22
23
  get_species_model,
23
24
  )
@@ -41,7 +42,6 @@ def web():
41
42
  @cli.group()
42
43
  def models():
43
44
  """Model management commands."""
44
- pass
45
45
 
46
46
 
47
47
  @models.command(
@@ -77,7 +77,6 @@ def list_models():
77
77
  @models.group()
78
78
  def train():
79
79
  """Train models."""
80
- pass
81
80
 
82
81
 
83
82
  @train.command(
@@ -191,10 +190,12 @@ def train_mlst(choose_schemes):
191
190
  )
192
191
  def classify_seqs():
193
192
  """Classification commands."""
194
- pass
195
193
 
196
194
 
197
- @classify_seqs.command()
195
+ @classify_seqs.command(
196
+ name="genus",
197
+ help="Classify samples using a genus model.",
198
+ )
198
199
  @click.option(
199
200
  "-g",
200
201
  "--genus",
@@ -217,7 +218,7 @@ def classify_seqs():
217
218
  type=click.Path(dir_okay=True, file_okay=True),
218
219
  default=Path(".") / f"result_{uuid4()}.json",
219
220
  )
220
- def genus(model_genus, input_path, output_path):
221
+ def classify_genus(model_genus, input_path, output_path):
221
222
  """Classify samples using a genus model."""
222
223
  click.echo("Classifying...")
223
224
  genus_model = get_genus_model(model_genus)
@@ -226,7 +227,10 @@ def genus(model_genus, input_path, output_path):
226
227
  click.echo(f"Result saved as {output_path}.")
227
228
 
228
229
 
229
- @classify_seqs.command()
230
+ @classify_seqs.command(
231
+ name="species",
232
+ help="Classify samples using a species model.",
233
+ )
230
234
  @click.option(
231
235
  "-g",
232
236
  "--genus",
@@ -252,10 +256,10 @@ def genus(model_genus, input_path, output_path):
252
256
  @click.option(
253
257
  "--sparse-sampling-step",
254
258
  type=int,
255
- help="Sparse sampling step size (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
259
+ help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
256
260
  default=1,
257
261
  )
258
- def species(model_genus, input_path, output_path, sparse_sampling_step):
262
+ def classify_species(model_genus, input_path, output_path, sparse_sampling_step):
259
263
  """Classify samples using a species model."""
260
264
  click.echo("Classifying...")
261
265
  species_model = get_species_model(model_genus)
@@ -302,7 +306,6 @@ def classify_mlst(input_path, output_path):
302
306
  )
303
307
  def filter_seqs():
304
308
  """Filter commands."""
305
- pass
306
309
 
307
310
 
308
311
  @filter_seqs.command(
@@ -336,6 +339,7 @@ def filter_seqs():
336
339
  type=float,
337
340
  help="Threshold for filtering (default: 0.7).",
338
341
  default=0.7,
342
+ prompt=True,
339
343
  )
340
344
  def filter_genus(model_genus, input_path, output_path, threshold):
341
345
  """Filter samples using a genus model."""
@@ -368,12 +372,10 @@ def filter_genus(model_genus, input_path, output_path, threshold):
368
372
  prompt=True,
369
373
  )
370
374
  @click.option(
371
- # todo: this should be a choice of the species in the model w/ display names
372
375
  "-s",
373
376
  "--species",
374
377
  "model_species",
375
378
  help="Species of the model to filter for.",
376
- prompt=True,
377
379
  )
378
380
  @click.option(
379
381
  "-i",
@@ -392,11 +394,36 @@ def filter_genus(model_genus, input_path, output_path, threshold):
392
394
  @click.option(
393
395
  "--threshold",
394
396
  type=float,
395
- help="Threshold for filtering (default: 0.7).",
397
+ help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring species.",
396
398
  default=0.7,
399
+ prompt=True,
397
400
  )
398
401
  def filter_species(model_genus, model_species, input_path, output_path, threshold):
399
402
  """Filter a sample using the species model."""
403
+
404
+ available_species = get_model_metadata(f"{model_genus}-species")["display_names"]
405
+ available_species = {
406
+ id: name.replace(f"{model_genus} ", "")
407
+ for id, name in available_species.items()
408
+ }
409
+ if not model_species:
410
+ sorted_available_species = sorted(available_species.values())
411
+ model_species = click.prompt(
412
+ f"Please enter the species name: {model_genus}",
413
+ type=click.Choice(sorted_available_species, case_sensitive=False),
414
+ )
415
+ if model_species not in available_species.values():
416
+ raise click.BadParameter(
417
+ f"Species '{model_species}' not found in the {model_genus} species model."
418
+ )
419
+
420
+ # get the species ID from the name
421
+ model_species = [
422
+ id
423
+ for id, name in available_species.items()
424
+ if name.lower() == model_species.lower()
425
+ ][0]
426
+
400
427
  click.echo("Filtering...")
401
428
  species_model = get_species_model(model_genus)
402
429
  result = species_model.predict(Path(input_path))
@@ -2,7 +2,6 @@
2
2
 
3
3
  from json import loads, dumps
4
4
  from pathlib import Path
5
- from xspect.models.probabilistic_filter_model import ProbabilisticFilterModel
6
5
  from xspect.models.probabilistic_single_filter_model import (
7
6
  ProbabilisticSingleFilterModel,
8
7
  )
@@ -24,23 +23,10 @@ def get_species_model(genus):
24
23
  return species_filter_model
25
24
 
26
25
 
27
- def get_model_by_slug(model_slug: str):
28
- """Get a model by its slug."""
29
- model_path = get_xspect_model_path() / (model_slug + ".json")
30
- model_metadata = get_model_metadata(model_path)
31
- if model_metadata["model_class"] == "ProbabilisticSingleFilterModel":
32
- return ProbabilisticSingleFilterModel.load(model_path)
33
- if model_metadata["model_class"] == "ProbabilisticFilterSVMModel":
34
- return ProbabilisticFilterSVMModel.load(model_path)
35
- if model_metadata["model_class"] == "ProbabilisticFilterModel":
36
- return ProbabilisticFilterModel.load(model_path)
37
- raise ValueError(f"Model class {model_metadata['model_class']} not recognized.")
38
-
39
-
40
26
  def get_model_metadata(model: str | Path):
41
27
  """Get the metadata of a model."""
42
28
  if isinstance(model, str):
43
- model_path = get_xspect_model_path() / (model + ".json")
29
+ model_path = get_xspect_model_path() / (model.lower() + ".json")
44
30
  elif isinstance(model, Path):
45
31
  model_path = model
46
32
  else:
xspect/models/result.py CHANGED
@@ -58,16 +58,28 @@ class ModelResult:
58
58
  return total_hits
59
59
 
60
60
  def get_filter_mask(self, label: str, filter_threshold: float) -> dict[str, bool]:
61
- """Return a mask for filtered subsequences."""
62
- if filter_threshold < 0 or filter_threshold > 1:
61
+ """Return a mask for filtered subsequences.
62
+
63
+ The mask is a dictionary with subsequence names as keys and boolean values
64
+ indicating whether the subsequence is above the filter threshold for the given label.
65
+ A value of -1 for filter_threshold indicates that the subsequence with the maximum score
66
+ for the given label should be returned.
67
+ """
68
+ if filter_threshold < 0 and not filter_threshold == -1 or filter_threshold > 1:
63
69
  raise ValueError("The filter threshold must be between 0 and 1.")
64
70
 
65
71
  scores = self.get_scores()
66
72
  scores.pop("total")
67
- return {
68
- subsequence: score[label] >= filter_threshold
69
- for subsequence, score in scores.items()
70
- }
73
+ if not filter_threshold == -1:
74
+ return {
75
+ subsequence: score[label] >= filter_threshold
76
+ for subsequence, score in scores.items()
77
+ }
78
+ else:
79
+ return {
80
+ subsequence: score[label] == max(score.values())
81
+ for subsequence, score in scores.items()
82
+ }
71
83
 
72
84
  def get_filtered_subsequence_labels(
73
85
  self, label: str, filter_threshold: float = 0.7
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: XspecT
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Tool to monitor and characterize pathogens using Bloom filters.
5
5
  License: MIT License
6
6
 
@@ -54,6 +54,7 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
54
54
  Provides-Extra: test
55
55
  Requires-Dist: pytest; extra == "test"
56
56
  Requires-Dist: pytest-cov; extra == "test"
57
+ Requires-Dist: httpx; extra == "test"
57
58
  Dynamic: license-file
58
59
 
59
60
  # XspecT - Acinetobacter Species Assignment Tool
@@ -3,8 +3,8 @@ xspect/definitions.py,sha256=fVn_li_s2hriOSGJ69o_H8H-vkw1znvkryhBj7WMnF4,1219
3
3
  xspect/download_models.py,sha256=y1wFJZa1xOJfvUP78zKkRs46O-WqKBL90vmo5AYUio0,853
4
4
  xspect/fastapi.py,sha256=DOef3MqWPdBmdYBo8Z9SPmWrbJHOsQxQe3GrC4f__Rc,3165
5
5
  xspect/file_io.py,sha256=YmfoKEQdHHEi8dO2G5Kt4tSNi5LuWW0VZ74pyYRHiTo,5937
6
- xspect/main.py,sha256=uVj1fooDU5WW8sMug5YPwuAphb8zd3PDpNFNlTIyXBw,11155
7
- xspect/model_management.py,sha256=LItMidbfxZfttEZHa8da_nnkwkH7XVLWDM0uVrFUZ0Q,3753
6
+ xspect/main.py,sha256=twIn48wPDFOEejQroYN8JM8a40naEqT_BUgrMAwwYck,12154
7
+ xspect/model_management.py,sha256=UbmAr3YOZ4oy_9iVvApCLstYHGkcmneHEC_yftRIqCI,3010
8
8
  xspect/ncbi.py,sha256=sSJO3g8n89Qw6UJjAy13bpjOcIGSquTKNKVHNUMbDeM,10072
9
9
  xspect/train.py,sha256=7I7-inWGJe_VDzII9dLZ8U-8SUCZDIrhb-eNOZEyfss,10703
10
10
  xspect/mlst_feature/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -15,10 +15,10 @@ xspect/models/probabilistic_filter_mlst_model.py,sha256=JMc0yBJPo7J9b-GpvhDmzhwW
15
15
  xspect/models/probabilistic_filter_model.py,sha256=l8mhcRgHPso7qIgI56buCnE3ZleO3gPWOZEpgrycOBA,10029
16
16
  xspect/models/probabilistic_filter_svm_model.py,sha256=xXimcv3iWnG1JiFyrk6UqkP9hFIxWGDdb__fRdQYwro,6245
17
17
  xspect/models/probabilistic_single_filter_model.py,sha256=yxWnCt4IP-3ZRLP4pRA3f2VTHc0_4g17PDCyOFayDDg,4090
18
- xspect/models/result.py,sha256=fhTS43XYAIkNiiAMyNpaif0kM4Ab3xLBnVJnutkOuFU,3400
19
- xspect-0.4.0.dist-info/licenses/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
20
- xspect-0.4.0.dist-info/METADATA,sha256=mmsNmdiRqOC0RCBe7yW6oofue2OctwErCWVyiJD86nI,4439
21
- xspect-0.4.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
22
- xspect-0.4.0.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
23
- xspect-0.4.0.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
24
- xspect-0.4.0.dist-info/RECORD,,
18
+ xspect/models/result.py,sha256=ELWiDlQPlxNG7ceLpth60Z_Hb1ZdopDJ3vgHBPgSRm8,3989
19
+ xspect-0.4.1.dist-info/licenses/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
20
+ xspect-0.4.1.dist-info/METADATA,sha256=BLTAPyNGEjUxxFUqGvdgyFy8T0p9b8w8IOBbUBvnv28,4477
21
+ xspect-0.4.1.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
22
+ xspect-0.4.1.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
23
+ xspect-0.4.1.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
24
+ xspect-0.4.1.dist-info/RECORD,,
File without changes