XspecT 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- xspect/main.py +39 -12
- xspect/model_management.py +1 -15
- xspect/models/result.py +18 -6
- {xspect-0.4.0.dist-info → xspect-0.4.1.dist-info}/METADATA +2 -1
- {xspect-0.4.0.dist-info → xspect-0.4.1.dist-info}/RECORD +9 -9
- {xspect-0.4.0.dist-info → xspect-0.4.1.dist-info}/WHEEL +0 -0
- {xspect-0.4.0.dist-info → xspect-0.4.1.dist-info}/entry_points.txt +0 -0
- {xspect-0.4.0.dist-info → xspect-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {xspect-0.4.0.dist-info → xspect-0.4.1.dist-info}/top_level.txt +0 -0
xspect/main.py
CHANGED
|
@@ -18,6 +18,7 @@ from xspect.models.probabilistic_filter_mlst_model import (
|
|
|
18
18
|
)
|
|
19
19
|
from xspect.model_management import (
|
|
20
20
|
get_genus_model,
|
|
21
|
+
get_model_metadata,
|
|
21
22
|
get_models,
|
|
22
23
|
get_species_model,
|
|
23
24
|
)
|
|
@@ -41,7 +42,6 @@ def web():
|
|
|
41
42
|
@cli.group()
|
|
42
43
|
def models():
|
|
43
44
|
"""Model management commands."""
|
|
44
|
-
pass
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
@models.command(
|
|
@@ -77,7 +77,6 @@ def list_models():
|
|
|
77
77
|
@models.group()
|
|
78
78
|
def train():
|
|
79
79
|
"""Train models."""
|
|
80
|
-
pass
|
|
81
80
|
|
|
82
81
|
|
|
83
82
|
@train.command(
|
|
@@ -191,10 +190,12 @@ def train_mlst(choose_schemes):
|
|
|
191
190
|
)
|
|
192
191
|
def classify_seqs():
|
|
193
192
|
"""Classification commands."""
|
|
194
|
-
pass
|
|
195
193
|
|
|
196
194
|
|
|
197
|
-
@classify_seqs.command(
|
|
195
|
+
@classify_seqs.command(
|
|
196
|
+
name="genus",
|
|
197
|
+
help="Classify samples using a genus model.",
|
|
198
|
+
)
|
|
198
199
|
@click.option(
|
|
199
200
|
"-g",
|
|
200
201
|
"--genus",
|
|
@@ -217,7 +218,7 @@ def classify_seqs():
|
|
|
217
218
|
type=click.Path(dir_okay=True, file_okay=True),
|
|
218
219
|
default=Path(".") / f"result_{uuid4()}.json",
|
|
219
220
|
)
|
|
220
|
-
def
|
|
221
|
+
def classify_genus(model_genus, input_path, output_path):
|
|
221
222
|
"""Classify samples using a genus model."""
|
|
222
223
|
click.echo("Classifying...")
|
|
223
224
|
genus_model = get_genus_model(model_genus)
|
|
@@ -226,7 +227,10 @@ def genus(model_genus, input_path, output_path):
|
|
|
226
227
|
click.echo(f"Result saved as {output_path}.")
|
|
227
228
|
|
|
228
229
|
|
|
229
|
-
@classify_seqs.command(
|
|
230
|
+
@classify_seqs.command(
|
|
231
|
+
name="species",
|
|
232
|
+
help="Classify samples using a species model.",
|
|
233
|
+
)
|
|
230
234
|
@click.option(
|
|
231
235
|
"-g",
|
|
232
236
|
"--genus",
|
|
@@ -252,10 +256,10 @@ def genus(model_genus, input_path, output_path):
|
|
|
252
256
|
@click.option(
|
|
253
257
|
"--sparse-sampling-step",
|
|
254
258
|
type=int,
|
|
255
|
-
help="Sparse sampling step
|
|
259
|
+
help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
|
|
256
260
|
default=1,
|
|
257
261
|
)
|
|
258
|
-
def
|
|
262
|
+
def classify_species(model_genus, input_path, output_path, sparse_sampling_step):
|
|
259
263
|
"""Classify samples using a species model."""
|
|
260
264
|
click.echo("Classifying...")
|
|
261
265
|
species_model = get_species_model(model_genus)
|
|
@@ -302,7 +306,6 @@ def classify_mlst(input_path, output_path):
|
|
|
302
306
|
)
|
|
303
307
|
def filter_seqs():
|
|
304
308
|
"""Filter commands."""
|
|
305
|
-
pass
|
|
306
309
|
|
|
307
310
|
|
|
308
311
|
@filter_seqs.command(
|
|
@@ -336,6 +339,7 @@ def filter_seqs():
|
|
|
336
339
|
type=float,
|
|
337
340
|
help="Threshold for filtering (default: 0.7).",
|
|
338
341
|
default=0.7,
|
|
342
|
+
prompt=True,
|
|
339
343
|
)
|
|
340
344
|
def filter_genus(model_genus, input_path, output_path, threshold):
|
|
341
345
|
"""Filter samples using a genus model."""
|
|
@@ -368,12 +372,10 @@ def filter_genus(model_genus, input_path, output_path, threshold):
|
|
|
368
372
|
prompt=True,
|
|
369
373
|
)
|
|
370
374
|
@click.option(
|
|
371
|
-
# todo: this should be a choice of the species in the model w/ display names
|
|
372
375
|
"-s",
|
|
373
376
|
"--species",
|
|
374
377
|
"model_species",
|
|
375
378
|
help="Species of the model to filter for.",
|
|
376
|
-
prompt=True,
|
|
377
379
|
)
|
|
378
380
|
@click.option(
|
|
379
381
|
"-i",
|
|
@@ -392,11 +394,36 @@ def filter_genus(model_genus, input_path, output_path, threshold):
|
|
|
392
394
|
@click.option(
|
|
393
395
|
"--threshold",
|
|
394
396
|
type=float,
|
|
395
|
-
help="Threshold for filtering (default: 0.7).",
|
|
397
|
+
help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring species.",
|
|
396
398
|
default=0.7,
|
|
399
|
+
prompt=True,
|
|
397
400
|
)
|
|
398
401
|
def filter_species(model_genus, model_species, input_path, output_path, threshold):
|
|
399
402
|
"""Filter a sample using the species model."""
|
|
403
|
+
|
|
404
|
+
available_species = get_model_metadata(f"{model_genus}-species")["display_names"]
|
|
405
|
+
available_species = {
|
|
406
|
+
id: name.replace(f"{model_genus} ", "")
|
|
407
|
+
for id, name in available_species.items()
|
|
408
|
+
}
|
|
409
|
+
if not model_species:
|
|
410
|
+
sorted_available_species = sorted(available_species.values())
|
|
411
|
+
model_species = click.prompt(
|
|
412
|
+
f"Please enter the species name: {model_genus}",
|
|
413
|
+
type=click.Choice(sorted_available_species, case_sensitive=False),
|
|
414
|
+
)
|
|
415
|
+
if model_species not in available_species.values():
|
|
416
|
+
raise click.BadParameter(
|
|
417
|
+
f"Species '{model_species}' not found in the {model_genus} species model."
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
# get the species ID from the name
|
|
421
|
+
model_species = [
|
|
422
|
+
id
|
|
423
|
+
for id, name in available_species.items()
|
|
424
|
+
if name.lower() == model_species.lower()
|
|
425
|
+
][0]
|
|
426
|
+
|
|
400
427
|
click.echo("Filtering...")
|
|
401
428
|
species_model = get_species_model(model_genus)
|
|
402
429
|
result = species_model.predict(Path(input_path))
|
xspect/model_management.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
from json import loads, dumps
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from xspect.models.probabilistic_filter_model import ProbabilisticFilterModel
|
|
6
5
|
from xspect.models.probabilistic_single_filter_model import (
|
|
7
6
|
ProbabilisticSingleFilterModel,
|
|
8
7
|
)
|
|
@@ -24,23 +23,10 @@ def get_species_model(genus):
|
|
|
24
23
|
return species_filter_model
|
|
25
24
|
|
|
26
25
|
|
|
27
|
-
def get_model_by_slug(model_slug: str):
|
|
28
|
-
"""Get a model by its slug."""
|
|
29
|
-
model_path = get_xspect_model_path() / (model_slug + ".json")
|
|
30
|
-
model_metadata = get_model_metadata(model_path)
|
|
31
|
-
if model_metadata["model_class"] == "ProbabilisticSingleFilterModel":
|
|
32
|
-
return ProbabilisticSingleFilterModel.load(model_path)
|
|
33
|
-
if model_metadata["model_class"] == "ProbabilisticFilterSVMModel":
|
|
34
|
-
return ProbabilisticFilterSVMModel.load(model_path)
|
|
35
|
-
if model_metadata["model_class"] == "ProbabilisticFilterModel":
|
|
36
|
-
return ProbabilisticFilterModel.load(model_path)
|
|
37
|
-
raise ValueError(f"Model class {model_metadata['model_class']} not recognized.")
|
|
38
|
-
|
|
39
|
-
|
|
40
26
|
def get_model_metadata(model: str | Path):
|
|
41
27
|
"""Get the metadata of a model."""
|
|
42
28
|
if isinstance(model, str):
|
|
43
|
-
model_path = get_xspect_model_path() / (model + ".json")
|
|
29
|
+
model_path = get_xspect_model_path() / (model.lower() + ".json")
|
|
44
30
|
elif isinstance(model, Path):
|
|
45
31
|
model_path = model
|
|
46
32
|
else:
|
xspect/models/result.py
CHANGED
|
@@ -58,16 +58,28 @@ class ModelResult:
|
|
|
58
58
|
return total_hits
|
|
59
59
|
|
|
60
60
|
def get_filter_mask(self, label: str, filter_threshold: float) -> dict[str, bool]:
|
|
61
|
-
"""Return a mask for filtered subsequences.
|
|
62
|
-
|
|
61
|
+
"""Return a mask for filtered subsequences.
|
|
62
|
+
|
|
63
|
+
The mask is a dictionary with subsequence names as keys and boolean values
|
|
64
|
+
indicating whether the subsequence is above the filter threshold for the given label.
|
|
65
|
+
A value of -1 for filter_threshold indicates that the subsequence with the maximum score
|
|
66
|
+
for the given label should be returned.
|
|
67
|
+
"""
|
|
68
|
+
if filter_threshold < 0 and not filter_threshold == -1 or filter_threshold > 1:
|
|
63
69
|
raise ValueError("The filter threshold must be between 0 and 1.")
|
|
64
70
|
|
|
65
71
|
scores = self.get_scores()
|
|
66
72
|
scores.pop("total")
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
73
|
+
if not filter_threshold == -1:
|
|
74
|
+
return {
|
|
75
|
+
subsequence: score[label] >= filter_threshold
|
|
76
|
+
for subsequence, score in scores.items()
|
|
77
|
+
}
|
|
78
|
+
else:
|
|
79
|
+
return {
|
|
80
|
+
subsequence: score[label] == max(score.values())
|
|
81
|
+
for subsequence, score in scores.items()
|
|
82
|
+
}
|
|
71
83
|
|
|
72
84
|
def get_filtered_subsequence_labels(
|
|
73
85
|
self, label: str, filter_threshold: float = 0.7
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -54,6 +54,7 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
|
54
54
|
Provides-Extra: test
|
|
55
55
|
Requires-Dist: pytest; extra == "test"
|
|
56
56
|
Requires-Dist: pytest-cov; extra == "test"
|
|
57
|
+
Requires-Dist: httpx; extra == "test"
|
|
57
58
|
Dynamic: license-file
|
|
58
59
|
|
|
59
60
|
# XspecT - Acinetobacter Species Assignment Tool
|
|
@@ -3,8 +3,8 @@ xspect/definitions.py,sha256=fVn_li_s2hriOSGJ69o_H8H-vkw1znvkryhBj7WMnF4,1219
|
|
|
3
3
|
xspect/download_models.py,sha256=y1wFJZa1xOJfvUP78zKkRs46O-WqKBL90vmo5AYUio0,853
|
|
4
4
|
xspect/fastapi.py,sha256=DOef3MqWPdBmdYBo8Z9SPmWrbJHOsQxQe3GrC4f__Rc,3165
|
|
5
5
|
xspect/file_io.py,sha256=YmfoKEQdHHEi8dO2G5Kt4tSNi5LuWW0VZ74pyYRHiTo,5937
|
|
6
|
-
xspect/main.py,sha256=
|
|
7
|
-
xspect/model_management.py,sha256=
|
|
6
|
+
xspect/main.py,sha256=twIn48wPDFOEejQroYN8JM8a40naEqT_BUgrMAwwYck,12154
|
|
7
|
+
xspect/model_management.py,sha256=UbmAr3YOZ4oy_9iVvApCLstYHGkcmneHEC_yftRIqCI,3010
|
|
8
8
|
xspect/ncbi.py,sha256=sSJO3g8n89Qw6UJjAy13bpjOcIGSquTKNKVHNUMbDeM,10072
|
|
9
9
|
xspect/train.py,sha256=7I7-inWGJe_VDzII9dLZ8U-8SUCZDIrhb-eNOZEyfss,10703
|
|
10
10
|
xspect/mlst_feature/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -15,10 +15,10 @@ xspect/models/probabilistic_filter_mlst_model.py,sha256=JMc0yBJPo7J9b-GpvhDmzhwW
|
|
|
15
15
|
xspect/models/probabilistic_filter_model.py,sha256=l8mhcRgHPso7qIgI56buCnE3ZleO3gPWOZEpgrycOBA,10029
|
|
16
16
|
xspect/models/probabilistic_filter_svm_model.py,sha256=xXimcv3iWnG1JiFyrk6UqkP9hFIxWGDdb__fRdQYwro,6245
|
|
17
17
|
xspect/models/probabilistic_single_filter_model.py,sha256=yxWnCt4IP-3ZRLP4pRA3f2VTHc0_4g17PDCyOFayDDg,4090
|
|
18
|
-
xspect/models/result.py,sha256=
|
|
19
|
-
xspect-0.4.
|
|
20
|
-
xspect-0.4.
|
|
21
|
-
xspect-0.4.
|
|
22
|
-
xspect-0.4.
|
|
23
|
-
xspect-0.4.
|
|
24
|
-
xspect-0.4.
|
|
18
|
+
xspect/models/result.py,sha256=ELWiDlQPlxNG7ceLpth60Z_Hb1ZdopDJ3vgHBPgSRm8,3989
|
|
19
|
+
xspect-0.4.1.dist-info/licenses/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
|
|
20
|
+
xspect-0.4.1.dist-info/METADATA,sha256=BLTAPyNGEjUxxFUqGvdgyFy8T0p9b8w8IOBbUBvnv28,4477
|
|
21
|
+
xspect-0.4.1.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
|
|
22
|
+
xspect-0.4.1.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
|
|
23
|
+
xspect-0.4.1.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
|
|
24
|
+
xspect-0.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|