PyPI - XspecT - Versions diffs - 0.4.0__tar.gz → 0.4.1__tar.gz - Mend

XspecT 0.4.0tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of XspecT might be problematic. Click here for more details.

Files changed (86) hide show

{xspect-0.4.0 → xspect-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: XspecT
-Version: 0.4.0
+Version: 0.4.1
 Summary: Tool to monitor and characterize pathogens using Bloom filters.
 License: MIT License
@@ -54,6 +54,7 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
 Provides-Extra: test
 Requires-Dist: pytest; extra == "test"
 Requires-Dist: pytest-cov; extra == "test"
+Requires-Dist: httpx; extra == "test"
 Dynamic: license-file
 # XspecT - Acinetobacter Species Assignment Tool

{xspect-0.4.0 → xspect-0.4.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "XspecT"
-version = "0.4.0"
+version = "0.4.1"
 description = "Tool to monitor and characterize pathogens using Bloom filters."
 readme = {file = "README.md", content-type = "text/markdown"}
 license = {file = "LICENSE"}
@@ -51,4 +51,4 @@ pythonpath = [
 [project.optional-dependencies]
 docs = ["sphinx", "furo", "myst-parser", "sphinx-copybutton", "sphinx-autobuild"]
-test = ["pytest", "pytest-cov"]
+test = ["pytest", "pytest-cov", "httpx"]

{xspect-0.4.0 → xspect-0.4.1}/src/XspecT.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: XspecT
-Version: 0.4.0
+Version: 0.4.1
 Summary: Tool to monitor and characterize pathogens using Bloom filters.
 License: MIT License
@@ -54,6 +54,7 @@ Requires-Dist: sphinx-autobuild; extra == "docs"
 Provides-Extra: test
 Requires-Dist: pytest; extra == "test"
 Requires-Dist: pytest-cov; extra == "test"
+Requires-Dist: httpx; extra == "test"
 Dynamic: license-file
 # XspecT - Acinetobacter Species Assignment Tool

{xspect-0.4.0 → xspect-0.4.1}/src/XspecT.egg-info/SOURCES.txt RENAMED Viewed

@@ -70,6 +70,7 @@ src/xspect/models/result.py
 tests/__init__.py
 tests/conftest.py
 tests/test_cli.py
+tests/test_fastapi.py
 tests/test_file_io.py
 tests/test_model_management.py
 tests/test_model_result.py

{xspect-0.4.0 → xspect-0.4.1}/src/XspecT.egg-info/requires.txt RENAMED Viewed

@@ -22,3 +22,4 @@ sphinx-autobuild
 [test]
 pytest
 pytest-cov
+httpx

{xspect-0.4.0 → xspect-0.4.1}/src/xspect/main.py RENAMED Viewed

@@ -18,6 +18,7 @@ from xspect.models.probabilistic_filter_mlst_model import (
 )
 from xspect.model_management import (
     get_genus_model,
+    get_model_metadata,
     get_models,
     get_species_model,
 )
@@ -41,7 +42,6 @@ def web():
 @cli.group()
 def models():
     """Model management commands."""
-    pass
 @models.command(
@@ -77,7 +77,6 @@ def list_models():
 @models.group()
 def train():
     """Train models."""
-    pass
 @train.command(
@@ -191,10 +190,12 @@ def train_mlst(choose_schemes):
 )
 def classify_seqs():
     """Classification commands."""
-    pass
-@classify_seqs.command()
+@classify_seqs.command(
+    name="genus",
+    help="Classify samples using a genus model.",
+)
 @click.option(
     "-g",
     "--genus",
@@ -217,7 +218,7 @@ def classify_seqs():
     type=click.Path(dir_okay=True, file_okay=True),
     default=Path(".") / f"result_{uuid4()}.json",
 )
-def genus(model_genus, input_path, output_path):
+def classify_genus(model_genus, input_path, output_path):
     """Classify samples using a genus model."""
     click.echo("Classifying...")
     genus_model = get_genus_model(model_genus)
@@ -226,7 +227,10 @@ def genus(model_genus, input_path, output_path):
     click.echo(f"Result saved as {output_path}.")
-@classify_seqs.command()
+@classify_seqs.command(
+    name="species",
+    help="Classify samples using a species model.",
+)
 @click.option(
     "-g",
     "--genus",
@@ -252,10 +256,10 @@ def genus(model_genus, input_path, output_path):
 @click.option(
     "--sparse-sampling-step",
     type=int,
-    help="Sparse sampling step size (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
+    help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
     default=1,
 )
-def species(model_genus, input_path, output_path, sparse_sampling_step):
+def classify_species(model_genus, input_path, output_path, sparse_sampling_step):
     """Classify samples using a species model."""
     click.echo("Classifying...")
     species_model = get_species_model(model_genus)
@@ -302,7 +306,6 @@ def classify_mlst(input_path, output_path):
 )
 def filter_seqs():
     """Filter commands."""
-    pass
 @filter_seqs.command(
@@ -336,6 +339,7 @@ def filter_seqs():
     type=float,
     help="Threshold for filtering (default: 0.7).",
     default=0.7,
+    prompt=True,
 )
 def filter_genus(model_genus, input_path, output_path, threshold):
     """Filter samples using a genus model."""
@@ -368,12 +372,10 @@ def filter_genus(model_genus, input_path, output_path, threshold):
     prompt=True,
 )
 @click.option(
-    # todo: this should be a choice of the species in the model w/ display names
     "-s",
     "--species",
     "model_species",
     help="Species of the model to filter for.",
-    prompt=True,
 )
 @click.option(
     "-i",
@@ -392,11 +394,36 @@ def filter_genus(model_genus, input_path, output_path, threshold):
 @click.option(
     "--threshold",
     type=float,
-    help="Threshold for filtering (default: 0.7).",
+    help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring species.",
     default=0.7,
+    prompt=True,
 )
 def filter_species(model_genus, model_species, input_path, output_path, threshold):
     """Filter a sample using the species model."""
+    available_species = get_model_metadata(f"{model_genus}-species")["display_names"]
+    available_species = {
+        id: name.replace(f"{model_genus} ", "")
+        for id, name in available_species.items()
+    }
+    if not model_species:
+        sorted_available_species = sorted(available_species.values())
+        model_species = click.prompt(
+            f"Please enter the species name: {model_genus}",
+            type=click.Choice(sorted_available_species, case_sensitive=False),
+        )
+    if model_species not in available_species.values():
+        raise click.BadParameter(
+            f"Species '{model_species}' not found in the {model_genus} species model."
+        )
+    # get the species ID from the name
+    model_species = [
+        id
+        for id, name in available_species.items()
+        if name.lower() == model_species.lower()
+    ][0]
     click.echo("Filtering...")
     species_model = get_species_model(model_genus)
     result = species_model.predict(Path(input_path))

{xspect-0.4.0 → xspect-0.4.1}/src/xspect/model_management.py RENAMED Viewed

@@ -2,7 +2,6 @@
 from json import loads, dumps
 from pathlib import Path
-from xspect.models.probabilistic_filter_model import ProbabilisticFilterModel
 from xspect.models.probabilistic_single_filter_model import (
     ProbabilisticSingleFilterModel,
 )
@@ -24,23 +23,10 @@ def get_species_model(genus):
     return species_filter_model
-def get_model_by_slug(model_slug: str):
-    """Get a model by its slug."""
-    model_path = get_xspect_model_path() / (model_slug + ".json")
-    model_metadata = get_model_metadata(model_path)
-    if model_metadata["model_class"] == "ProbabilisticSingleFilterModel":
-        return ProbabilisticSingleFilterModel.load(model_path)
-    if model_metadata["model_class"] == "ProbabilisticFilterSVMModel":
-        return ProbabilisticFilterSVMModel.load(model_path)
-    if model_metadata["model_class"] == "ProbabilisticFilterModel":
-        return ProbabilisticFilterModel.load(model_path)
-    raise ValueError(f"Model class {model_metadata['model_class']} not recognized.")
 def get_model_metadata(model: str | Path):
     """Get the metadata of a model."""
     if isinstance(model, str):
-        model_path = get_xspect_model_path() / (model + ".json")
+        model_path = get_xspect_model_path() / (model.lower() + ".json")
     elif isinstance(model, Path):
         model_path = model
     else:

{xspect-0.4.0 → xspect-0.4.1}/src/xspect/models/result.py RENAMED Viewed

@@ -58,16 +58,28 @@ class ModelResult:
         return total_hits
     def get_filter_mask(self, label: str, filter_threshold: float) -> dict[str, bool]:
-        """Return a mask for filtered subsequences."""
-        if filter_threshold < 0 or filter_threshold > 1:
+        """Return a mask for filtered subsequences.
+        The mask is a dictionary with subsequence names as keys and boolean values
+        indicating whether the subsequence is above the filter threshold for the given label.
+        A value of -1 for filter_threshold indicates that the subsequence with the maximum score
+        for the given label should be returned.
+        """
+        if filter_threshold < 0 and not filter_threshold == -1 or filter_threshold > 1:
             raise ValueError("The filter threshold must be between 0 and 1.")
         scores = self.get_scores()
         scores.pop("total")
-        return {
-            subsequence: score[label] >= filter_threshold
-            for subsequence, score in scores.items()
-        }
+        if not filter_threshold == -1:
+            return {
+                subsequence: score[label] >= filter_threshold
+                for subsequence, score in scores.items()
+            }
+        else:
+            return {
+                subsequence: score[label] == max(score.values())
+                for subsequence, score in scores.items()
+            }
     def get_filtered_subsequence_labels(
         self, label: str, filter_threshold: float = 0.7

{xspect-0.4.0 → xspect-0.4.1}/tests/conftest.py RENAMED Viewed

@@ -15,6 +15,7 @@ def pytest_sessionstart():
         "GCF_000006945.2_ASM694v2_genomic.fna": "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCF_000006945.2/download?include_annotation_type=GENOME_FASTA",
         "GCF_000018445.1_ASM1844v1_genomic.fna": "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCF_000018445.1/download?include_annotation_type=GENOME_FASTA",
         "GCF_000069245.1_ASM6924v1_genomic.fna": "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCF_000069245.1/download?include_annotation_type=GENOME_FASTA",
+        "GCA_900444805.1_58932_B01_genomic.fna": "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCA_900444805.1/download?include_annotation_type=GENOME_FASTA",
     }
     if not os.path.exists("tests/test_assemblies"):
         os.makedirs("tests/test_assemblies")
@@ -107,3 +108,22 @@ def concatenated_assembly_file_path(tmp_path):
             ) as infile:
                 shutil.copyfileobj(infile, outfile)
     return (tmp_path / "concatenated_assembly.fna").as_posix()
+@pytest.fixture
+def mixed_species_assembly_file_path(tmp_path):
+    """Create a temporary directory a fasta file which contains two mixed species assemblies"""
+    # two acinetobacter assemblies
+    assemblies = [
+        "GCF_000018445.1_ASM1844v1_genomic.fna",
+        "GCA_900444805.1_58932_B01_genomic.fna",
+    ]
+    with open(
+        tmp_path / "mixed_species_assembly.fna", "w", encoding="utf-8"
+    ) as outfile:
+        for assembly in assemblies:
+            with open(
+                "tests/test_assemblies/" + assembly, "r", encoding="utf-8"
+            ) as infile:
+                shutil.copyfileobj(infile, outfile)
+    return (tmp_path / "mixed_species_assembly.fna").as_posix()

xspect-0.4.1/tests/test_cli.py ADDED Viewed

@@ -0,0 +1,190 @@
+"""Test XspecT CLI"""
+import json
+from pathlib import Path
+import pytest
+from click.testing import CliRunner
+from xspect.main import cli
+def test_list_models():
+    """Test the list models command"""
+    runner = CliRunner()
+    result = runner.invoke(cli, ["models", "list"])
+    assert result.exit_code == 0, f"Error: {result.output}"
+    assert "Genus" in result.output
+    assert "Species" in result.output
+@pytest.mark.parametrize(
+    "assembly_file_path",
+    [
+        "GCF_000069245.1_ASM6924v1_genomic.fna",
+    ],
+    indirect=["assembly_file_path"],
+)
+def test_classify_genus(assembly_file_path, tmpdir):
+    """Test the classify genus command"""
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "classify",
+            "genus",
+            "-g",
+            "Acinetobacter",
+            "-i",
+            assembly_file_path,
+            "-o",
+            str(tmpdir) + "/classify_genus.json",
+        ],
+    )
+    assert result.exit_code == 0, f"Error: {result.output}"
+    with open(str(tmpdir) + "/classify_genus.json", encoding="utf-8") as f:
+        result_content = json.load(f)
+        assert result_content["scores"]["total"]["Acinetobacter"] == 0.85
+@pytest.mark.parametrize(
+    ["assembly_file_path", "genus", "species"],
+    [
+        (
+            "GCF_000069245.1_ASM6924v1_genomic.fna",
+            "Acinetobacter",
+            "470",
+        ),
+        (
+            "GCF_000018445.1_ASM1844v1_genomic.fna",
+            "Acinetobacter",
+            "470",
+        ),
+        ("GCF_000006945.2_ASM694v2_genomic.fna", "Salmonella", "28901"),
+    ],
+    indirect=["assembly_file_path"],
+)
+def test_classify_species(assembly_file_path, genus, species, tmpdir):
+    """Test the species assignment"""
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "classify",
+            "species",
+            "-g",
+            genus,
+            "-i",
+            assembly_file_path,
+            "-o",
+            str(tmpdir) + "/classify_species.json",
+        ],
+    )
+    assert result.exit_code == 0, f"Error: {result.output}"
+    with open(str(tmpdir) + "/classify_species.json", encoding="utf-8") as f:
+        result_content = json.load(f)
+        assert result_content["prediction"] == species
+@pytest.mark.parametrize(
+    ["assembly_file_path", "genus", "species"],
+    [
+        (
+            "GCF_000069245.1_ASM6924v1_genomic.fna",
+            "Acinetobacter",
+            "470",
+        ),
+    ],
+    indirect=["assembly_file_path"],
+)
+def test_filter_genus_and_classify_species(assembly_file_path, genus, species, tmpdir):
+    """Test filtering by a genus and then classifying species ("metagenome mode")"""
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "filter",
+            "genus",
+            "-g",
+            genus,
+            "-i",
+            assembly_file_path,
+            "-o",
+            str(tmpdir) + "/genus_filtered.fna",
+        ],
+    )
+    assert result.exit_code == 0, f"Error: {result.output}"
+    result = runner.invoke(
+        cli,
+        [
+            "classify",
+            "species",
+            "-g",
+            genus,
+            "-i",
+            str(tmpdir) + "/genus_filtered.fna",
+            "-o",
+            str(tmpdir) + "/out.json",
+        ],
+    )
+    assert result.exit_code == 0, f"Error: {result.output}"
+    with open(str(tmpdir) + "/out.json", encoding="utf-8") as f:
+        result_content = json.load(f)
+        assert result_content["prediction"] == species
+@pytest.mark.parametrize(
+    "assembly_file_path",
+    [
+        "GCF_000006945.2_ASM694v2_genomic.fna",
+    ],
+    indirect=["assembly_file_path"],
+)
+def test_filter_species(assembly_file_path, tmpdir):
+    """Test filtering by species"""
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "filter",
+            "species",
+            "-g",
+            "Salmonella",
+            "-s",
+            "enterica",
+            "-i",
+            assembly_file_path,
+            "-o",
+            str(tmpdir) + "/species_filtered.fna",
+        ],
+    )
+    assert result.exit_code == 0, f"Error: {result.output}"
+    assert Path(str(tmpdir) + "/species_filtered.fna").exists()
+def test_filter_species_max_scoring(mixed_species_assembly_file_path, tmpdir):
+    """Test filtering by species"""
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "filter",
+            "species",
+            "-g",
+            "Acinetobacter",
+            "-s",
+            "calcoaceticus",
+            "-i",
+            mixed_species_assembly_file_path,
+            "-o",
+            str(tmpdir) + "/mixed_species_filtered.fna",
+            "--threshold",
+            "-1",
+        ],
+    )
+    assert result.exit_code == 0, f"Error: {result.output}"
+    assert Path(str(tmpdir) + "/mixed_species_filtered.fna").exists()
+    with open(str(tmpdir) + "/mixed_species_filtered.fna", encoding="utf-8") as f:
+        filtered_content = f.read()
+        assert "Acinetobacter calcoaceticus" in filtered_content
+        assert "Acinetobacter baumannii" not in filtered_content

xspect-0.4.1/tests/test_fastapi.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""
+Tests for the FastAPI module.
+"""
+# pylint: disable=redefined-outer-name
+import pytest
+from fastapi.testclient import TestClient
+from xspect.fastapi import app
+from xspect.model_management import get_model_metadata
+from pathlib import Path
+@pytest.fixture
+def client():
+    """Create a FastAPI test client."""
+    return TestClient(app)
+@pytest.fixture
+def client_with_uploaded_file(client, request):
+    """Create a FastAPI test client with an uploaded file."""
+    assembly_file_path = Path(request.param)
+    with open(assembly_file_path, "rb") as f:
+        response = client.post(
+            "/upload-file",
+            files={"file": (assembly_file_path.name, f)},
+        )
+    return client
+def test_list_models(client):
+    """Test the /list-models endpoint."""
+    response = client.get("/list-models")
+    assert response.status_code == 200
+    assert "Genus" in response.json()
+    assert "Species" in response.json()
+def test_get_model_metadata(client):
+    """Test the /model-metadata endpoint."""
+    response = client.get(
+        "/model-metadata", params={"model_slug": "acinetobacter-species"}
+    )
+    assert response.status_code == 200
+    response_json = response.json()
+    assert response_json["model_display_name"] == "Acinetobacter"
+    assert response_json["display_names"]["471"] == "Acinetobacter calcoaceticus"
+def test_post_model_metadata(client):
+    """Test the /model-metadata endpoint."""
+    response = client.post(
+        "/model-metadata",
+        params={
+            "model_slug": "acinetobacter-species",
+            "author": "Test Author",
+            "author_email": "test@example.com",
+        },
+    )
+    assert response.status_code == 200
+    response_json = response.json()
+    assert response_json["message"] == "Metadata updated."
+    model_metadata = get_model_metadata("acinetobacter-species")
+    assert model_metadata["author"] == "Test Author"
+    assert model_metadata["author_email"] == "test@example.com"
+def test_post_model_display_name(client):
+    """Test the /model-display-name endpoint."""
+    response = client.post(
+        "/model-display-name",
+        params={
+            "model_slug": "acinetobacter-species",
+            "filter_id": "470",
+            "display_name": "AB",
+        },
+    )
+    assert response.status_code == 200
+    response_json = response.json()
+    assert response_json["message"] == "Display name updated."
+    model_metadata = get_model_metadata("acinetobacter-species")
+    assert model_metadata["display_names"]["470"] == "AB"
+@pytest.mark.parametrize(
+    ["assembly_file_path", "client_with_uploaded_file"],
+    [
+        (
+            "tests/test_assemblies/GCF_000018445.1_ASM1844v1_genomic.fna",
+            "tests/test_assemblies/GCF_000018445.1_ASM1844v1_genomic.fna",
+        )
+    ],
+    indirect=["client_with_uploaded_file"],
+)
+def test_classify(client_with_uploaded_file, assembly_file_path):
+    """Test the /classify endpoint."""
+    response = client_with_uploaded_file.get(
+        "/classify",
+        params={
+            "genus": "acinetobacter",
+            "file": Path(assembly_file_path).name,
+            "step": 1,
+            "included_ids": ["470"],
+        },
+    )
+    assert response.status_code == 200
+    response_json = response.json()
+    assert response_json["prediction"] == "470"

xspect-0.4.0/tests/test_cli.py DELETED Viewed

@@ -1,93 +0,0 @@
-"""Test XspecT CLI"""
-import json
-import pytest
-from click.testing import CliRunner
-from xspect.main import cli
-@pytest.mark.parametrize(
-    ["assembly_file_path", "genus", "species"],
-    [
-        (
-            "GCF_000069245.1_ASM6924v1_genomic.fna",
-            "Acinetobacter",
-            "470",
-        ),
-        (
-            "GCF_000018445.1_ASM1844v1_genomic.fna",
-            "Acinetobacter",
-            "470",
-        ),
-        ("GCF_000006945.2_ASM694v2_genomic.fna", "Salmonella", "28901"),
-    ],
-    indirect=["assembly_file_path"],
-)
-def test_species_assignment(assembly_file_path, genus, species):
-    """Test the species assignment"""
-    runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        [
-            "classify",
-            "species",
-            "-g",
-            genus,
-            "-i",
-            assembly_file_path,
-            "-o",
-            "out.json",
-        ],
-    )
-    assert result.exit_code == 0, f"Error: {result.output}"
-    with open("out.json", encoding="utf-8") as f:
-        result_content = json.load(f)
-        assert result_content["prediction"] == species
-@pytest.mark.parametrize(
-    ["assembly_file_path", "genus", "species"],
-    [
-        (
-            "GCF_000069245.1_ASM6924v1_genomic.fna",
-            "Acinetobacter",
-            "470",
-        ),
-    ],
-    indirect=["assembly_file_path"],
-)
-def test_metagenome_mode(assembly_file_path, genus, species):
-    """Test the metagenome mode"""
-    runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        [
-            "filter",
-            "genus",
-            "-g",
-            genus,
-            "-i",
-            assembly_file_path,
-            "-o",
-            "filtered.fna",
-        ],
-    )
-    assert result.exit_code == 0, f"Error: {result.output}"
-    result = runner.invoke(
-        cli,
-        [
-            "classify",
-            "species",
-            "-g",
-            genus,
-            "-i",
-            "filtered.fna",
-            "-o",
-            "out.json",
-        ],
-    )
-    assert result.exit_code == 0, f"Error: {result.output}"
-    with open("out.json", encoding="utf-8") as f:
-        result_content = json.load(f)
-        assert result_content["prediction"] == species