XspecT 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of XspecT might be problematic. Click here for more details.
- {xspect-0.4.0 → xspect-0.4.1}/PKG-INFO +2 -1
- {xspect-0.4.0 → xspect-0.4.1}/pyproject.toml +2 -2
- {xspect-0.4.0 → xspect-0.4.1}/src/XspecT.egg-info/PKG-INFO +2 -1
- {xspect-0.4.0 → xspect-0.4.1}/src/XspecT.egg-info/SOURCES.txt +1 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/XspecT.egg-info/requires.txt +1 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/main.py +39 -12
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/model_management.py +1 -15
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/models/result.py +18 -6
- {xspect-0.4.0 → xspect-0.4.1}/tests/conftest.py +20 -0
- xspect-0.4.1/tests/test_cli.py +190 -0
- xspect-0.4.1/tests/test_fastapi.py +109 -0
- xspect-0.4.0/tests/test_cli.py +0 -93
- {xspect-0.4.0 → xspect-0.4.1}/.github/workflows/black.yml +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/.github/workflows/docs.yml +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/.github/workflows/pylint.yml +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/.github/workflows/pypi.yml +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/.github/workflows/test.yml +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/.gitignore +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/LICENSE +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/README.md +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/About.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/AddFilter.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/AddSpecies1.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/AddSpecies2.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/BF.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/ClAssT_Ergebnis1.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/ClAssT_Ergebnis2.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/ClAssT_Ergebnis3.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/ClAssT_Hauptseite.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/CommandLine_Input.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/CommandLine_results.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/CommandLine_whole.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/How2Use.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/HowtouseAspecT.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/XspecT_Ergebnis1.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/XspecT_Ergebnis2.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/XspecT_Ergebnis3.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/XspecT_Ergebnis4.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/XspecT_Hauptseite.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/XspecT_Runtime.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/XspecT_Runtime_Oxa.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/XspecT_Startseite.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/change_pw.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/modify_vecs.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Instructions/pictures/secretkey.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/Makefile +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/cli.md +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/conf.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/diagrams/probabilistic_filter_models.md +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/img/logo.png +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/index.md +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/input_data.md +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/installation.md +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/make.bat +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/quickstart.md +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/docs/web.md +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/setup.cfg +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/XspecT.egg-info/dependency_links.txt +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/XspecT.egg-info/entry_points.txt +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/XspecT.egg-info/top_level.txt +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/__init__.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/definitions.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/download_models.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/fastapi.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/file_io.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/mlst_feature/__init__.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/mlst_feature/mlst_helper.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/mlst_feature/pub_mlst_handler.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/models/__init__.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/models/probabilistic_filter_mlst_model.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/models/probabilistic_filter_model.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/models/probabilistic_filter_svm_model.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/models/probabilistic_single_filter_model.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/ncbi.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/src/xspect/train.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/__init__.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_file_io.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_model_management.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_model_result.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_ncbi.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_probabilisitc_filter_mlst_model.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_probabilistic_filter_model.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_probabilistic_filter_svm_model.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_probabilistic_single_filter_model.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_pub_mlst_handler.py +0 -0
- {xspect-0.4.0 → xspect-0.4.1}/tests/test_train.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -54,6 +54,7 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
|
54
54
|
Provides-Extra: test
|
|
55
55
|
Requires-Dist: pytest; extra == "test"
|
|
56
56
|
Requires-Dist: pytest-cov; extra == "test"
|
|
57
|
+
Requires-Dist: httpx; extra == "test"
|
|
57
58
|
Dynamic: license-file
|
|
58
59
|
|
|
59
60
|
# XspecT - Acinetobacter Species Assignment Tool
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "XspecT"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.1"
|
|
4
4
|
description = "Tool to monitor and characterize pathogens using Bloom filters."
|
|
5
5
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
6
6
|
license = {file = "LICENSE"}
|
|
@@ -51,4 +51,4 @@ pythonpath = [
|
|
|
51
51
|
|
|
52
52
|
[project.optional-dependencies]
|
|
53
53
|
docs = ["sphinx", "furo", "myst-parser", "sphinx-copybutton", "sphinx-autobuild"]
|
|
54
|
-
test = ["pytest", "pytest-cov"]
|
|
54
|
+
test = ["pytest", "pytest-cov", "httpx"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: XspecT
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Tool to monitor and characterize pathogens using Bloom filters.
|
|
5
5
|
License: MIT License
|
|
6
6
|
|
|
@@ -54,6 +54,7 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
|
54
54
|
Provides-Extra: test
|
|
55
55
|
Requires-Dist: pytest; extra == "test"
|
|
56
56
|
Requires-Dist: pytest-cov; extra == "test"
|
|
57
|
+
Requires-Dist: httpx; extra == "test"
|
|
57
58
|
Dynamic: license-file
|
|
58
59
|
|
|
59
60
|
# XspecT - Acinetobacter Species Assignment Tool
|
|
@@ -18,6 +18,7 @@ from xspect.models.probabilistic_filter_mlst_model import (
|
|
|
18
18
|
)
|
|
19
19
|
from xspect.model_management import (
|
|
20
20
|
get_genus_model,
|
|
21
|
+
get_model_metadata,
|
|
21
22
|
get_models,
|
|
22
23
|
get_species_model,
|
|
23
24
|
)
|
|
@@ -41,7 +42,6 @@ def web():
|
|
|
41
42
|
@cli.group()
|
|
42
43
|
def models():
|
|
43
44
|
"""Model management commands."""
|
|
44
|
-
pass
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
@models.command(
|
|
@@ -77,7 +77,6 @@ def list_models():
|
|
|
77
77
|
@models.group()
|
|
78
78
|
def train():
|
|
79
79
|
"""Train models."""
|
|
80
|
-
pass
|
|
81
80
|
|
|
82
81
|
|
|
83
82
|
@train.command(
|
|
@@ -191,10 +190,12 @@ def train_mlst(choose_schemes):
|
|
|
191
190
|
)
|
|
192
191
|
def classify_seqs():
|
|
193
192
|
"""Classification commands."""
|
|
194
|
-
pass
|
|
195
193
|
|
|
196
194
|
|
|
197
|
-
@classify_seqs.command(
|
|
195
|
+
@classify_seqs.command(
|
|
196
|
+
name="genus",
|
|
197
|
+
help="Classify samples using a genus model.",
|
|
198
|
+
)
|
|
198
199
|
@click.option(
|
|
199
200
|
"-g",
|
|
200
201
|
"--genus",
|
|
@@ -217,7 +218,7 @@ def classify_seqs():
|
|
|
217
218
|
type=click.Path(dir_okay=True, file_okay=True),
|
|
218
219
|
default=Path(".") / f"result_{uuid4()}.json",
|
|
219
220
|
)
|
|
220
|
-
def
|
|
221
|
+
def classify_genus(model_genus, input_path, output_path):
|
|
221
222
|
"""Classify samples using a genus model."""
|
|
222
223
|
click.echo("Classifying...")
|
|
223
224
|
genus_model = get_genus_model(model_genus)
|
|
@@ -226,7 +227,10 @@ def genus(model_genus, input_path, output_path):
|
|
|
226
227
|
click.echo(f"Result saved as {output_path}.")
|
|
227
228
|
|
|
228
229
|
|
|
229
|
-
@classify_seqs.command(
|
|
230
|
+
@classify_seqs.command(
|
|
231
|
+
name="species",
|
|
232
|
+
help="Classify samples using a species model.",
|
|
233
|
+
)
|
|
230
234
|
@click.option(
|
|
231
235
|
"-g",
|
|
232
236
|
"--genus",
|
|
@@ -252,10 +256,10 @@ def genus(model_genus, input_path, output_path):
|
|
|
252
256
|
@click.option(
|
|
253
257
|
"--sparse-sampling-step",
|
|
254
258
|
type=int,
|
|
255
|
-
help="Sparse sampling step
|
|
259
|
+
help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
|
|
256
260
|
default=1,
|
|
257
261
|
)
|
|
258
|
-
def
|
|
262
|
+
def classify_species(model_genus, input_path, output_path, sparse_sampling_step):
|
|
259
263
|
"""Classify samples using a species model."""
|
|
260
264
|
click.echo("Classifying...")
|
|
261
265
|
species_model = get_species_model(model_genus)
|
|
@@ -302,7 +306,6 @@ def classify_mlst(input_path, output_path):
|
|
|
302
306
|
)
|
|
303
307
|
def filter_seqs():
|
|
304
308
|
"""Filter commands."""
|
|
305
|
-
pass
|
|
306
309
|
|
|
307
310
|
|
|
308
311
|
@filter_seqs.command(
|
|
@@ -336,6 +339,7 @@ def filter_seqs():
|
|
|
336
339
|
type=float,
|
|
337
340
|
help="Threshold for filtering (default: 0.7).",
|
|
338
341
|
default=0.7,
|
|
342
|
+
prompt=True,
|
|
339
343
|
)
|
|
340
344
|
def filter_genus(model_genus, input_path, output_path, threshold):
|
|
341
345
|
"""Filter samples using a genus model."""
|
|
@@ -368,12 +372,10 @@ def filter_genus(model_genus, input_path, output_path, threshold):
|
|
|
368
372
|
prompt=True,
|
|
369
373
|
)
|
|
370
374
|
@click.option(
|
|
371
|
-
# todo: this should be a choice of the species in the model w/ display names
|
|
372
375
|
"-s",
|
|
373
376
|
"--species",
|
|
374
377
|
"model_species",
|
|
375
378
|
help="Species of the model to filter for.",
|
|
376
|
-
prompt=True,
|
|
377
379
|
)
|
|
378
380
|
@click.option(
|
|
379
381
|
"-i",
|
|
@@ -392,11 +394,36 @@ def filter_genus(model_genus, input_path, output_path, threshold):
|
|
|
392
394
|
@click.option(
|
|
393
395
|
"--threshold",
|
|
394
396
|
type=float,
|
|
395
|
-
help="Threshold for filtering (default: 0.7).",
|
|
397
|
+
help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring species.",
|
|
396
398
|
default=0.7,
|
|
399
|
+
prompt=True,
|
|
397
400
|
)
|
|
398
401
|
def filter_species(model_genus, model_species, input_path, output_path, threshold):
|
|
399
402
|
"""Filter a sample using the species model."""
|
|
403
|
+
|
|
404
|
+
available_species = get_model_metadata(f"{model_genus}-species")["display_names"]
|
|
405
|
+
available_species = {
|
|
406
|
+
id: name.replace(f"{model_genus} ", "")
|
|
407
|
+
for id, name in available_species.items()
|
|
408
|
+
}
|
|
409
|
+
if not model_species:
|
|
410
|
+
sorted_available_species = sorted(available_species.values())
|
|
411
|
+
model_species = click.prompt(
|
|
412
|
+
f"Please enter the species name: {model_genus}",
|
|
413
|
+
type=click.Choice(sorted_available_species, case_sensitive=False),
|
|
414
|
+
)
|
|
415
|
+
if model_species not in available_species.values():
|
|
416
|
+
raise click.BadParameter(
|
|
417
|
+
f"Species '{model_species}' not found in the {model_genus} species model."
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
# get the species ID from the name
|
|
421
|
+
model_species = [
|
|
422
|
+
id
|
|
423
|
+
for id, name in available_species.items()
|
|
424
|
+
if name.lower() == model_species.lower()
|
|
425
|
+
][0]
|
|
426
|
+
|
|
400
427
|
click.echo("Filtering...")
|
|
401
428
|
species_model = get_species_model(model_genus)
|
|
402
429
|
result = species_model.predict(Path(input_path))
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
from json import loads, dumps
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from xspect.models.probabilistic_filter_model import ProbabilisticFilterModel
|
|
6
5
|
from xspect.models.probabilistic_single_filter_model import (
|
|
7
6
|
ProbabilisticSingleFilterModel,
|
|
8
7
|
)
|
|
@@ -24,23 +23,10 @@ def get_species_model(genus):
|
|
|
24
23
|
return species_filter_model
|
|
25
24
|
|
|
26
25
|
|
|
27
|
-
def get_model_by_slug(model_slug: str):
|
|
28
|
-
"""Get a model by its slug."""
|
|
29
|
-
model_path = get_xspect_model_path() / (model_slug + ".json")
|
|
30
|
-
model_metadata = get_model_metadata(model_path)
|
|
31
|
-
if model_metadata["model_class"] == "ProbabilisticSingleFilterModel":
|
|
32
|
-
return ProbabilisticSingleFilterModel.load(model_path)
|
|
33
|
-
if model_metadata["model_class"] == "ProbabilisticFilterSVMModel":
|
|
34
|
-
return ProbabilisticFilterSVMModel.load(model_path)
|
|
35
|
-
if model_metadata["model_class"] == "ProbabilisticFilterModel":
|
|
36
|
-
return ProbabilisticFilterModel.load(model_path)
|
|
37
|
-
raise ValueError(f"Model class {model_metadata['model_class']} not recognized.")
|
|
38
|
-
|
|
39
|
-
|
|
40
26
|
def get_model_metadata(model: str | Path):
|
|
41
27
|
"""Get the metadata of a model."""
|
|
42
28
|
if isinstance(model, str):
|
|
43
|
-
model_path = get_xspect_model_path() / (model + ".json")
|
|
29
|
+
model_path = get_xspect_model_path() / (model.lower() + ".json")
|
|
44
30
|
elif isinstance(model, Path):
|
|
45
31
|
model_path = model
|
|
46
32
|
else:
|
|
@@ -58,16 +58,28 @@ class ModelResult:
|
|
|
58
58
|
return total_hits
|
|
59
59
|
|
|
60
60
|
def get_filter_mask(self, label: str, filter_threshold: float) -> dict[str, bool]:
|
|
61
|
-
"""Return a mask for filtered subsequences.
|
|
62
|
-
|
|
61
|
+
"""Return a mask for filtered subsequences.
|
|
62
|
+
|
|
63
|
+
The mask is a dictionary with subsequence names as keys and boolean values
|
|
64
|
+
indicating whether the subsequence is above the filter threshold for the given label.
|
|
65
|
+
A value of -1 for filter_threshold indicates that the subsequence with the maximum score
|
|
66
|
+
for the given label should be returned.
|
|
67
|
+
"""
|
|
68
|
+
if filter_threshold < 0 and not filter_threshold == -1 or filter_threshold > 1:
|
|
63
69
|
raise ValueError("The filter threshold must be between 0 and 1.")
|
|
64
70
|
|
|
65
71
|
scores = self.get_scores()
|
|
66
72
|
scores.pop("total")
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
73
|
+
if not filter_threshold == -1:
|
|
74
|
+
return {
|
|
75
|
+
subsequence: score[label] >= filter_threshold
|
|
76
|
+
for subsequence, score in scores.items()
|
|
77
|
+
}
|
|
78
|
+
else:
|
|
79
|
+
return {
|
|
80
|
+
subsequence: score[label] == max(score.values())
|
|
81
|
+
for subsequence, score in scores.items()
|
|
82
|
+
}
|
|
71
83
|
|
|
72
84
|
def get_filtered_subsequence_labels(
|
|
73
85
|
self, label: str, filter_threshold: float = 0.7
|
|
@@ -15,6 +15,7 @@ def pytest_sessionstart():
|
|
|
15
15
|
"GCF_000006945.2_ASM694v2_genomic.fna": "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCF_000006945.2/download?include_annotation_type=GENOME_FASTA",
|
|
16
16
|
"GCF_000018445.1_ASM1844v1_genomic.fna": "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCF_000018445.1/download?include_annotation_type=GENOME_FASTA",
|
|
17
17
|
"GCF_000069245.1_ASM6924v1_genomic.fna": "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCF_000069245.1/download?include_annotation_type=GENOME_FASTA",
|
|
18
|
+
"GCA_900444805.1_58932_B01_genomic.fna": "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCA_900444805.1/download?include_annotation_type=GENOME_FASTA",
|
|
18
19
|
}
|
|
19
20
|
if not os.path.exists("tests/test_assemblies"):
|
|
20
21
|
os.makedirs("tests/test_assemblies")
|
|
@@ -107,3 +108,22 @@ def concatenated_assembly_file_path(tmp_path):
|
|
|
107
108
|
) as infile:
|
|
108
109
|
shutil.copyfileobj(infile, outfile)
|
|
109
110
|
return (tmp_path / "concatenated_assembly.fna").as_posix()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@pytest.fixture
|
|
114
|
+
def mixed_species_assembly_file_path(tmp_path):
|
|
115
|
+
"""Create a temporary directory a fasta file which contains two mixed species assemblies"""
|
|
116
|
+
# two acinetobacter assemblies
|
|
117
|
+
assemblies = [
|
|
118
|
+
"GCF_000018445.1_ASM1844v1_genomic.fna",
|
|
119
|
+
"GCA_900444805.1_58932_B01_genomic.fna",
|
|
120
|
+
]
|
|
121
|
+
with open(
|
|
122
|
+
tmp_path / "mixed_species_assembly.fna", "w", encoding="utf-8"
|
|
123
|
+
) as outfile:
|
|
124
|
+
for assembly in assemblies:
|
|
125
|
+
with open(
|
|
126
|
+
"tests/test_assemblies/" + assembly, "r", encoding="utf-8"
|
|
127
|
+
) as infile:
|
|
128
|
+
shutil.copyfileobj(infile, outfile)
|
|
129
|
+
return (tmp_path / "mixed_species_assembly.fna").as_posix()
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Test XspecT CLI"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import pytest
|
|
6
|
+
from click.testing import CliRunner
|
|
7
|
+
from xspect.main import cli
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_list_models():
|
|
11
|
+
"""Test the list models command"""
|
|
12
|
+
runner = CliRunner()
|
|
13
|
+
result = runner.invoke(cli, ["models", "list"])
|
|
14
|
+
assert result.exit_code == 0, f"Error: {result.output}"
|
|
15
|
+
assert "Genus" in result.output
|
|
16
|
+
assert "Species" in result.output
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.mark.parametrize(
|
|
20
|
+
"assembly_file_path",
|
|
21
|
+
[
|
|
22
|
+
"GCF_000069245.1_ASM6924v1_genomic.fna",
|
|
23
|
+
],
|
|
24
|
+
indirect=["assembly_file_path"],
|
|
25
|
+
)
|
|
26
|
+
def test_classify_genus(assembly_file_path, tmpdir):
|
|
27
|
+
"""Test the classify genus command"""
|
|
28
|
+
runner = CliRunner()
|
|
29
|
+
result = runner.invoke(
|
|
30
|
+
cli,
|
|
31
|
+
[
|
|
32
|
+
"classify",
|
|
33
|
+
"genus",
|
|
34
|
+
"-g",
|
|
35
|
+
"Acinetobacter",
|
|
36
|
+
"-i",
|
|
37
|
+
assembly_file_path,
|
|
38
|
+
"-o",
|
|
39
|
+
str(tmpdir) + "/classify_genus.json",
|
|
40
|
+
],
|
|
41
|
+
)
|
|
42
|
+
assert result.exit_code == 0, f"Error: {result.output}"
|
|
43
|
+
with open(str(tmpdir) + "/classify_genus.json", encoding="utf-8") as f:
|
|
44
|
+
result_content = json.load(f)
|
|
45
|
+
assert result_content["scores"]["total"]["Acinetobacter"] == 0.85
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@pytest.mark.parametrize(
|
|
49
|
+
["assembly_file_path", "genus", "species"],
|
|
50
|
+
[
|
|
51
|
+
(
|
|
52
|
+
"GCF_000069245.1_ASM6924v1_genomic.fna",
|
|
53
|
+
"Acinetobacter",
|
|
54
|
+
"470",
|
|
55
|
+
),
|
|
56
|
+
(
|
|
57
|
+
"GCF_000018445.1_ASM1844v1_genomic.fna",
|
|
58
|
+
"Acinetobacter",
|
|
59
|
+
"470",
|
|
60
|
+
),
|
|
61
|
+
("GCF_000006945.2_ASM694v2_genomic.fna", "Salmonella", "28901"),
|
|
62
|
+
],
|
|
63
|
+
indirect=["assembly_file_path"],
|
|
64
|
+
)
|
|
65
|
+
def test_classify_species(assembly_file_path, genus, species, tmpdir):
|
|
66
|
+
"""Test the species assignment"""
|
|
67
|
+
runner = CliRunner()
|
|
68
|
+
result = runner.invoke(
|
|
69
|
+
cli,
|
|
70
|
+
[
|
|
71
|
+
"classify",
|
|
72
|
+
"species",
|
|
73
|
+
"-g",
|
|
74
|
+
genus,
|
|
75
|
+
"-i",
|
|
76
|
+
assembly_file_path,
|
|
77
|
+
"-o",
|
|
78
|
+
str(tmpdir) + "/classify_species.json",
|
|
79
|
+
],
|
|
80
|
+
)
|
|
81
|
+
assert result.exit_code == 0, f"Error: {result.output}"
|
|
82
|
+
|
|
83
|
+
with open(str(tmpdir) + "/classify_species.json", encoding="utf-8") as f:
|
|
84
|
+
result_content = json.load(f)
|
|
85
|
+
assert result_content["prediction"] == species
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@pytest.mark.parametrize(
|
|
89
|
+
["assembly_file_path", "genus", "species"],
|
|
90
|
+
[
|
|
91
|
+
(
|
|
92
|
+
"GCF_000069245.1_ASM6924v1_genomic.fna",
|
|
93
|
+
"Acinetobacter",
|
|
94
|
+
"470",
|
|
95
|
+
),
|
|
96
|
+
],
|
|
97
|
+
indirect=["assembly_file_path"],
|
|
98
|
+
)
|
|
99
|
+
def test_filter_genus_and_classify_species(assembly_file_path, genus, species, tmpdir):
|
|
100
|
+
"""Test filtering by a genus and then classifying species ("metagenome mode")"""
|
|
101
|
+
runner = CliRunner()
|
|
102
|
+
result = runner.invoke(
|
|
103
|
+
cli,
|
|
104
|
+
[
|
|
105
|
+
"filter",
|
|
106
|
+
"genus",
|
|
107
|
+
"-g",
|
|
108
|
+
genus,
|
|
109
|
+
"-i",
|
|
110
|
+
assembly_file_path,
|
|
111
|
+
"-o",
|
|
112
|
+
str(tmpdir) + "/genus_filtered.fna",
|
|
113
|
+
],
|
|
114
|
+
)
|
|
115
|
+
assert result.exit_code == 0, f"Error: {result.output}"
|
|
116
|
+
result = runner.invoke(
|
|
117
|
+
cli,
|
|
118
|
+
[
|
|
119
|
+
"classify",
|
|
120
|
+
"species",
|
|
121
|
+
"-g",
|
|
122
|
+
genus,
|
|
123
|
+
"-i",
|
|
124
|
+
str(tmpdir) + "/genus_filtered.fna",
|
|
125
|
+
"-o",
|
|
126
|
+
str(tmpdir) + "/out.json",
|
|
127
|
+
],
|
|
128
|
+
)
|
|
129
|
+
assert result.exit_code == 0, f"Error: {result.output}"
|
|
130
|
+
with open(str(tmpdir) + "/out.json", encoding="utf-8") as f:
|
|
131
|
+
result_content = json.load(f)
|
|
132
|
+
assert result_content["prediction"] == species
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@pytest.mark.parametrize(
|
|
136
|
+
"assembly_file_path",
|
|
137
|
+
[
|
|
138
|
+
"GCF_000006945.2_ASM694v2_genomic.fna",
|
|
139
|
+
],
|
|
140
|
+
indirect=["assembly_file_path"],
|
|
141
|
+
)
|
|
142
|
+
def test_filter_species(assembly_file_path, tmpdir):
|
|
143
|
+
"""Test filtering by species"""
|
|
144
|
+
runner = CliRunner()
|
|
145
|
+
result = runner.invoke(
|
|
146
|
+
cli,
|
|
147
|
+
[
|
|
148
|
+
"filter",
|
|
149
|
+
"species",
|
|
150
|
+
"-g",
|
|
151
|
+
"Salmonella",
|
|
152
|
+
"-s",
|
|
153
|
+
"enterica",
|
|
154
|
+
"-i",
|
|
155
|
+
assembly_file_path,
|
|
156
|
+
"-o",
|
|
157
|
+
str(tmpdir) + "/species_filtered.fna",
|
|
158
|
+
],
|
|
159
|
+
)
|
|
160
|
+
assert result.exit_code == 0, f"Error: {result.output}"
|
|
161
|
+
assert Path(str(tmpdir) + "/species_filtered.fna").exists()
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_filter_species_max_scoring(mixed_species_assembly_file_path, tmpdir):
|
|
165
|
+
"""Test filtering by species"""
|
|
166
|
+
runner = CliRunner()
|
|
167
|
+
result = runner.invoke(
|
|
168
|
+
cli,
|
|
169
|
+
[
|
|
170
|
+
"filter",
|
|
171
|
+
"species",
|
|
172
|
+
"-g",
|
|
173
|
+
"Acinetobacter",
|
|
174
|
+
"-s",
|
|
175
|
+
"calcoaceticus",
|
|
176
|
+
"-i",
|
|
177
|
+
mixed_species_assembly_file_path,
|
|
178
|
+
"-o",
|
|
179
|
+
str(tmpdir) + "/mixed_species_filtered.fna",
|
|
180
|
+
"--threshold",
|
|
181
|
+
"-1",
|
|
182
|
+
],
|
|
183
|
+
)
|
|
184
|
+
assert result.exit_code == 0, f"Error: {result.output}"
|
|
185
|
+
assert Path(str(tmpdir) + "/mixed_species_filtered.fna").exists()
|
|
186
|
+
|
|
187
|
+
with open(str(tmpdir) + "/mixed_species_filtered.fna", encoding="utf-8") as f:
|
|
188
|
+
filtered_content = f.read()
|
|
189
|
+
assert "Acinetobacter calcoaceticus" in filtered_content
|
|
190
|
+
assert "Acinetobacter baumannii" not in filtered_content
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for the FastAPI module.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# pylint: disable=redefined-outer-name
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
from fastapi.testclient import TestClient
|
|
9
|
+
from xspect.fastapi import app
|
|
10
|
+
from xspect.model_management import get_model_metadata
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture
|
|
15
|
+
def client():
|
|
16
|
+
"""Create a FastAPI test client."""
|
|
17
|
+
return TestClient(app)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def client_with_uploaded_file(client, request):
|
|
22
|
+
"""Create a FastAPI test client with an uploaded file."""
|
|
23
|
+
assembly_file_path = Path(request.param)
|
|
24
|
+
with open(assembly_file_path, "rb") as f:
|
|
25
|
+
response = client.post(
|
|
26
|
+
"/upload-file",
|
|
27
|
+
files={"file": (assembly_file_path.name, f)},
|
|
28
|
+
)
|
|
29
|
+
return client
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_list_models(client):
|
|
33
|
+
"""Test the /list-models endpoint."""
|
|
34
|
+
response = client.get("/list-models")
|
|
35
|
+
assert response.status_code == 200
|
|
36
|
+
assert "Genus" in response.json()
|
|
37
|
+
assert "Species" in response.json()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_get_model_metadata(client):
|
|
41
|
+
"""Test the /model-metadata endpoint."""
|
|
42
|
+
response = client.get(
|
|
43
|
+
"/model-metadata", params={"model_slug": "acinetobacter-species"}
|
|
44
|
+
)
|
|
45
|
+
assert response.status_code == 200
|
|
46
|
+
response_json = response.json()
|
|
47
|
+
assert response_json["model_display_name"] == "Acinetobacter"
|
|
48
|
+
assert response_json["display_names"]["471"] == "Acinetobacter calcoaceticus"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_post_model_metadata(client):
|
|
52
|
+
"""Test the /model-metadata endpoint."""
|
|
53
|
+
response = client.post(
|
|
54
|
+
"/model-metadata",
|
|
55
|
+
params={
|
|
56
|
+
"model_slug": "acinetobacter-species",
|
|
57
|
+
"author": "Test Author",
|
|
58
|
+
"author_email": "test@example.com",
|
|
59
|
+
},
|
|
60
|
+
)
|
|
61
|
+
assert response.status_code == 200
|
|
62
|
+
response_json = response.json()
|
|
63
|
+
assert response_json["message"] == "Metadata updated."
|
|
64
|
+
model_metadata = get_model_metadata("acinetobacter-species")
|
|
65
|
+
assert model_metadata["author"] == "Test Author"
|
|
66
|
+
assert model_metadata["author_email"] == "test@example.com"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_post_model_display_name(client):
|
|
70
|
+
"""Test the /model-display-name endpoint."""
|
|
71
|
+
response = client.post(
|
|
72
|
+
"/model-display-name",
|
|
73
|
+
params={
|
|
74
|
+
"model_slug": "acinetobacter-species",
|
|
75
|
+
"filter_id": "470",
|
|
76
|
+
"display_name": "AB",
|
|
77
|
+
},
|
|
78
|
+
)
|
|
79
|
+
assert response.status_code == 200
|
|
80
|
+
response_json = response.json()
|
|
81
|
+
assert response_json["message"] == "Display name updated."
|
|
82
|
+
model_metadata = get_model_metadata("acinetobacter-species")
|
|
83
|
+
assert model_metadata["display_names"]["470"] == "AB"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pytest.mark.parametrize(
|
|
87
|
+
["assembly_file_path", "client_with_uploaded_file"],
|
|
88
|
+
[
|
|
89
|
+
(
|
|
90
|
+
"tests/test_assemblies/GCF_000018445.1_ASM1844v1_genomic.fna",
|
|
91
|
+
"tests/test_assemblies/GCF_000018445.1_ASM1844v1_genomic.fna",
|
|
92
|
+
)
|
|
93
|
+
],
|
|
94
|
+
indirect=["client_with_uploaded_file"],
|
|
95
|
+
)
|
|
96
|
+
def test_classify(client_with_uploaded_file, assembly_file_path):
|
|
97
|
+
"""Test the /classify endpoint."""
|
|
98
|
+
response = client_with_uploaded_file.get(
|
|
99
|
+
"/classify",
|
|
100
|
+
params={
|
|
101
|
+
"genus": "acinetobacter",
|
|
102
|
+
"file": Path(assembly_file_path).name,
|
|
103
|
+
"step": 1,
|
|
104
|
+
"included_ids": ["470"],
|
|
105
|
+
},
|
|
106
|
+
)
|
|
107
|
+
assert response.status_code == 200
|
|
108
|
+
response_json = response.json()
|
|
109
|
+
assert response_json["prediction"] == "470"
|
xspect-0.4.0/tests/test_cli.py
DELETED
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
"""Test XspecT CLI"""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import pytest
|
|
5
|
-
from click.testing import CliRunner
|
|
6
|
-
from xspect.main import cli
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@pytest.mark.parametrize(
|
|
10
|
-
["assembly_file_path", "genus", "species"],
|
|
11
|
-
[
|
|
12
|
-
(
|
|
13
|
-
"GCF_000069245.1_ASM6924v1_genomic.fna",
|
|
14
|
-
"Acinetobacter",
|
|
15
|
-
"470",
|
|
16
|
-
),
|
|
17
|
-
(
|
|
18
|
-
"GCF_000018445.1_ASM1844v1_genomic.fna",
|
|
19
|
-
"Acinetobacter",
|
|
20
|
-
"470",
|
|
21
|
-
),
|
|
22
|
-
("GCF_000006945.2_ASM694v2_genomic.fna", "Salmonella", "28901"),
|
|
23
|
-
],
|
|
24
|
-
indirect=["assembly_file_path"],
|
|
25
|
-
)
|
|
26
|
-
def test_species_assignment(assembly_file_path, genus, species):
|
|
27
|
-
"""Test the species assignment"""
|
|
28
|
-
runner = CliRunner()
|
|
29
|
-
result = runner.invoke(
|
|
30
|
-
cli,
|
|
31
|
-
[
|
|
32
|
-
"classify",
|
|
33
|
-
"species",
|
|
34
|
-
"-g",
|
|
35
|
-
genus,
|
|
36
|
-
"-i",
|
|
37
|
-
assembly_file_path,
|
|
38
|
-
"-o",
|
|
39
|
-
"out.json",
|
|
40
|
-
],
|
|
41
|
-
)
|
|
42
|
-
assert result.exit_code == 0, f"Error: {result.output}"
|
|
43
|
-
|
|
44
|
-
with open("out.json", encoding="utf-8") as f:
|
|
45
|
-
result_content = json.load(f)
|
|
46
|
-
assert result_content["prediction"] == species
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
@pytest.mark.parametrize(
|
|
50
|
-
["assembly_file_path", "genus", "species"],
|
|
51
|
-
[
|
|
52
|
-
(
|
|
53
|
-
"GCF_000069245.1_ASM6924v1_genomic.fna",
|
|
54
|
-
"Acinetobacter",
|
|
55
|
-
"470",
|
|
56
|
-
),
|
|
57
|
-
],
|
|
58
|
-
indirect=["assembly_file_path"],
|
|
59
|
-
)
|
|
60
|
-
def test_metagenome_mode(assembly_file_path, genus, species):
|
|
61
|
-
"""Test the metagenome mode"""
|
|
62
|
-
runner = CliRunner()
|
|
63
|
-
result = runner.invoke(
|
|
64
|
-
cli,
|
|
65
|
-
[
|
|
66
|
-
"filter",
|
|
67
|
-
"genus",
|
|
68
|
-
"-g",
|
|
69
|
-
genus,
|
|
70
|
-
"-i",
|
|
71
|
-
assembly_file_path,
|
|
72
|
-
"-o",
|
|
73
|
-
"filtered.fna",
|
|
74
|
-
],
|
|
75
|
-
)
|
|
76
|
-
assert result.exit_code == 0, f"Error: {result.output}"
|
|
77
|
-
result = runner.invoke(
|
|
78
|
-
cli,
|
|
79
|
-
[
|
|
80
|
-
"classify",
|
|
81
|
-
"species",
|
|
82
|
-
"-g",
|
|
83
|
-
genus,
|
|
84
|
-
"-i",
|
|
85
|
-
"filtered.fna",
|
|
86
|
-
"-o",
|
|
87
|
-
"out.json",
|
|
88
|
-
],
|
|
89
|
-
)
|
|
90
|
-
assert result.exit_code == 0, f"Error: {result.output}"
|
|
91
|
-
with open("out.json", encoding="utf-8") as f:
|
|
92
|
-
result_content = json.load(f)
|
|
93
|
-
assert result_content["prediction"] == species
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|