PyPI - XspecT - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

XspecT 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of XspecT might be problematic. Click here for more details.

Files changed (15) hide show

xspect/classify.py +41 -6
xspect/definitions.py +11 -3
xspect/filter_sequences.py +104 -22
xspect/main.py +52 -32
xspect/models/probabilistic_filter_model.py +7 -7
xspect/models/probabilistic_filter_svm_model.py +7 -7
xspect/models/probabilistic_single_filter_model.py +7 -4
xspect/ncbi.py +3 -2
xspect/train.py +17 -10
{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/METADATA +11 -5
{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/RECORD +15 -15
{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/WHEEL +1 -1
{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/entry_points.txt +0 -0
{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/licenses/LICENSE +0 -0
{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/top_level.txt +0 -0

xspect/classify.py CHANGED Viewed

@@ -4,6 +4,7 @@ import xspect.model_management as mm
 from xspect.models.probabilistic_filter_mlst_model import (
     ProbabilisticFilterMlstSchemeModel,
 )
+from xspect.definitions import fasta_endings, fastq_endings
 def classify_genus(
@@ -11,17 +12,51 @@ def classify_genus(
 ):
     """Classify the input file using the genus model."""
     model = mm.get_genus_model(model_genus)
-    result = model.predict(input_path, step=step)
-    result.input_source = input_path.name
-    result.save(output_path)
+    input_paths = []
+    input_is_dir = input_path.is_dir()
+    ending_wildcards = [f"*.{ending}" for ending in fasta_endings + fastq_endings]
+    if input_is_dir:
+        input_paths = [p for e in ending_wildcards for p in input_path.glob(e)]
+    elif input_path.is_file():
+        input_paths = [input_path]
+    for idx, current_path in enumerate(input_paths):
+        result = model.predict(current_path, step=step)
+        result.input_source = current_path.name
+        output_name = (
+            f"{output_path.stem}_{idx+1}{output_path.suffix}"
+            if input_is_dir
+            else output_path.name
+        )
+        result.save(output_path.parent / output_name)
+        print(f"Saved result as {output_name}")
 def classify_species(model_genus, input_path, output_path, step=1):
     """Classify the input file using the species model."""
     model = mm.get_species_model(model_genus)
-    result = model.predict(input_path, step=step)
-    result.input_source = input_path.name
-    result.save(output_path)
+    input_paths = []
+    input_is_dir = input_path.is_dir()
+    ending_wildcards = [f"*.{ending}" for ending in fasta_endings + fastq_endings]
+    if input_is_dir:
+        input_paths = [p for e in ending_wildcards for p in input_path.glob(e)]
+    elif input_path.is_file():
+        input_paths = [input_path]
+    for idx, current_path in enumerate(input_paths):
+        result = model.predict(current_path, step=step)
+        result.input_source = current_path.name
+        output_name = (
+            f"{output_path.stem}_{idx+1}{output_path.suffix}"
+            if input_is_dir
+            else output_path.name
+        )
+        result.save(output_path.parent / output_name)
+        print(f"Saved result as {output_name}")
 def classify_mlst(input_path, output_path):

xspect/definitions.py CHANGED Viewed

@@ -9,9 +9,17 @@ fastq_endings = ["fastq", "fq"]
 def get_xspect_root_path():
     """Return the root path for XspecT data."""
-    root_path = Path(getcwd()) / "xspect-data"
-    root_path.mkdir(exist_ok=True, parents=True)
-    return root_path
+    home_based_dir = Path.home() / "xspect-data"
+    if home_based_dir.exists():
+        return home_based_dir
+    cwd_based_dir = Path(getcwd()) / "xspect-data"
+    if cwd_based_dir.exists():
+        return cwd_based_dir
+    home_based_dir.mkdir(exist_ok=True, parents=True)
+    return home_based_dir
 def get_xspect_model_path():

xspect/filter_sequences.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from pathlib import Path
 from xspect.model_management import get_genus_model, get_species_model
 from xspect.file_io import filter_sequences
+from xspect.definitions import fasta_endings, fastq_endings
 def filter_species(
@@ -9,6 +10,7 @@ def filter_species(
     input_path: Path,
     output_path: Path,
     threshold: float,
+    classification_output_path: Path | None = None,
 ):
     """Filter sequences by species.
     This function filters sequences from the input file based on the species model.
@@ -20,20 +22,56 @@ def filter_species(
         model_species (str): The species model slug.
         input_path (Path): The path to the input file containing sequences.
         output_path (Path): The path to the output file where filtered sequences will be saved.
+            above this threshold will be included in the output file. A threshold of -1 will
+            include only sequences if the species score is the highest among the
+            available species scores.
+        classification_output_path (Path): Optional path to save the classification results.
         threshold (float): The threshold for filtering sequences. Only sequences with a score
-            above this threshold will be included in the output file.
+            above this threshold will be included in the output file. A threshold of -1 will
+            include only sequences if the species score is the highest among the
+            available species scores.
     """
     species_model = get_species_model(model_genus)
-    result = species_model.predict(input_path)
-    included_ids = result.get_filtered_subsequence_labels(model_species, threshold)
-    if not included_ids:
-        print("No sequences found for the given species.")
-        return
-    filter_sequences(
-        input_path,
-        output_path,
-        included_ids,
-    )
+    input_paths = []
+    input_is_dir = input_path.is_dir()
+    ending_wildcards = [f"*.{ending}" for ending in fasta_endings + fastq_endings]
+    if input_is_dir:
+        input_paths = [p for e in ending_wildcards for p in input_path.glob(e)]
+    elif input_path.is_file():
+        input_paths = [input_path]
+    for idx, current_path in enumerate(input_paths):
+        result = species_model.predict(current_path)
+        result.input_source = current_path.name
+        if classification_output_path:
+            classification_output_name = (
+                f"{classification_output_path.stem}_{idx+1}{classification_output_path.suffix}"
+                if input_is_dir
+                else classification_output_path.name
+            )
+            result.save(classification_output_path.parent / classification_output_name)
+            print(
+                f"Saved classification results from {current_path.name} as {classification_output_name}"
+            )
+        included_ids = result.get_filtered_subsequence_labels(model_species, threshold)
+        if not included_ids:
+            print(f"No sequences found for the given species in {current_path.name}.")
+            continue
+        output_name = (
+            f"{output_path.stem}_{idx+1}{output_path.suffix}"
+            if input_is_dir
+            else output_path.name
+        )
+        filter_sequences(
+            current_path,
+            output_path.parent / output_name,
+            included_ids,
+        )
+        print(f"Saved filtered sequences from {current_path.name} as {output_name}")
 def filter_genus(
@@ -41,16 +79,60 @@ def filter_genus(
     input_path: Path,
     output_path: Path,
     threshold: float,
+    classification_output_path: Path | None = None,
 ):
+    """Filter sequences by genus.
+    This function filters sequences from the input file based on the genus model.
+    It uses the genus model to identify the genus of the sequences and then applies
+    the filtering based on the provided threshold.
+    Args:
+        model_genus (str): The genus model slug.
+        input_path (Path): The path to the input file containing sequences.
+        output_path (Path): The path to the output file where filtered sequences will be saved.
+        threshold (float): The threshold for filtering sequences. Only sequences with a score
+            above this threshold will be included in the output file.
+        classification_output_path (Path): Optional path to save the classification results.
+    """
     genus_model = get_genus_model(model_genus)
-    result = genus_model.predict(Path(input_path))
-    included_ids = result.get_filtered_subsequence_labels(model_genus, threshold)
-    if not included_ids:
-        print("No sequences found for the given genus.")
-        return
-    filter_sequences(
-        input_path,
-        output_path,
-        included_ids,
-    )
+    input_paths = []
+    input_is_dir = input_path.is_dir()
+    ending_wildcards = [f"*.{ending}" for ending in fasta_endings + fastq_endings]
+    if input_is_dir:
+        input_paths = [p for e in ending_wildcards for p in input_path.glob(e)]
+    elif input_path.is_file():
+        input_paths = [input_path]
+    for idx, current_path in enumerate(input_paths):
+        result = genus_model.predict(current_path)
+        result.input_source = current_path.name
+        if classification_output_path:
+            classification_output_name = (
+                f"{classification_output_path.stem}_{idx+1}{classification_output_path.suffix}"
+                if input_is_dir
+                else classification_output_path.name
+            )
+            result.save(classification_output_path.parent / classification_output_name)
+            print(
+                f"Saved classification results from {current_path.name} as {classification_output_name}"
+            )
+        included_ids = result.get_filtered_subsequence_labels(model_genus, threshold)
+        if not included_ids:
+            print(f"No sequences found for the given genus in {current_path.name}.")
+            continue
+        output_name = (
+            f"{output_path.stem}_{idx+1}{output_path.suffix}"
+            if input_is_dir
+            else output_path.name
+        )
+        filter_sequences(
+            current_path,
+            output_path.parent / output_name,
+            included_ids,
+        )
+        print(f"Saved filtered sequences from {current_path.name} as {output_name}")

xspect/main.py CHANGED Viewed

@@ -7,12 +7,12 @@ import uvicorn
 from xspect import classify
 from xspect.web import app
 from xspect.download_models import download_test_models
-from xspect.file_io import filter_sequences
+from xspect import filter_sequences
 from xspect.train import train_from_directory, train_from_ncbi
 from xspect.definitions import (
     get_xspect_model_path,
 )
-from xspect.mlst_feature.mlst_helper import pick_scheme, pick_scheme_from_models_dir
+from xspect.mlst_feature.mlst_helper import pick_scheme
 from xspect.mlst_feature.pub_mlst_handler import PubMLSTHandler
 from xspect.models.probabilistic_filter_mlst_model import (
     ProbabilisticFilterMlstSchemeModel,
@@ -211,19 +211,19 @@ def classify_seqs():
     help="Path to FASTA or FASTQ file for classification.",
     type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
+    type=click.Path(dir_okay=False, file_okay=True),
     default=Path(".") / f"result_{uuid4()}.json",
 )
 def classify_genus(model_genus, input_path, output_path):
     """Classify samples using a genus model."""
     click.echo("Classifying...")
     classify.classify_genus(model_genus, Path(input_path), Path(output_path))
-    click.echo(f"Result saved as {output_path}.")
 @classify_seqs.command(
@@ -244,12 +244,13 @@ def classify_genus(model_genus, input_path, output_path):
     help="Path to FASTA or FASTQ file for classification.",
     type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
+    type=click.Path(dir_okay=False, file_okay=True),
     default=Path(".") / f"result_{uuid4()}.json",
 )
 @click.option(
@@ -264,7 +265,6 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
     classify.classify_species(
         model_genus, Path(input_path), Path(output_path), sparse_sampling_step
     )
-    click.echo(f"Result saved as {output_path}.")
 @classify_seqs.command(
@@ -275,15 +275,14 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
     "-i",
     "--input-path",
     help="Path to FASTA-file for mlst identification.",
-    type=click.Path(exists=True, dir_okay=True, file_okay=True),
+    type=click.Path(exists=True, dir_okay=False, file_okay=True),
     prompt=True,
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
-    default=Path(".") / f"result_{uuid4()}.json",
+    type=click.Path(dir_okay=False, file_okay=True),
 )
 def classify_mlst(input_path, output_path):
     """MLST classify a sample."""
@@ -321,37 +320,42 @@ def filter_seqs():
     help="Path to FASTA or FASTQ file for classification.",
     type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
+    type=click.Path(dir_okay=False, file_okay=True),
     prompt=True,
+    default=Path(".") / f"genus_filtered_{uuid4()}.fasta",
 )
 @click.option(
+    "--classification-output-path",
+    help="Optional path to the classification output file.",
+    type=click.Path(dir_okay=False, file_okay=True),
+)
+@click.option(
+    "-t",
     "--threshold",
-    type=float,
+    type=click.FloatRange(0, 1),
     help="Threshold for filtering (default: 0.7).",
     default=0.7,
     prompt=True,
 )
-def filter_genus(model_genus, input_path, output_path, threshold):
+def filter_genus(
+    model_genus, input_path, output_path, classification_output_path, threshold
+):
     """Filter samples using a genus model."""
     click.echo("Filtering...")
-    genus_model = get_genus_model(model_genus)
-    result = genus_model.predict(Path(input_path))
-    included_ids = result.get_filtered_subsequence_labels(model_genus, threshold)
-    if not included_ids:
-        click.echo("No sequences found for the given genus.")
-        return
-    filter_sequences(
+    filter_sequences.filter_genus(
+        model_genus,
         Path(input_path),
         Path(output_path),
-        included_ids=included_ids,
+        threshold,
+        Path(classification_output_path) if classification_output_path else None,
     )
-    click.echo(f"Filtered sequences saved at {output_path}.")
 @filter_seqs.command(
@@ -378,24 +382,44 @@ def filter_genus(model_genus, input_path, output_path, threshold):
     help="Path to FASTA or FASTQ file for classification.",
     type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
+    type=click.Path(dir_okay=False, file_okay=True),
     prompt=True,
+    default=Path(".") / f"species_filtered_{uuid4()}.fasta",
+)
+@click.option(
+    "--classification-output-path",
+    help="Optional path to the classification output file.",
+    type=click.Path(dir_okay=False, file_okay=True),
 )
 @click.option(
+    "-t",
     "--threshold",
     type=float,
     help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring species.",
     default=0.7,
     prompt=True,
 )
-def filter_species(model_genus, model_species, input_path, output_path, threshold):
+def filter_species(
+    model_genus,
+    model_species,
+    input_path,
+    output_path,
+    threshold,
+    classification_output_path,
+):
     """Filter a sample using the species model."""
+    if threshold != -1 and (threshold < 0 or threshold > 1):
+        raise click.BadParameter(
+            "Threshold must be between 0 and 1, or -1 for filtering by the highest scoring species."
+        )
     available_species = get_model_metadata(f"{model_genus}-species")["display_names"]
     available_species = {
         id: name.replace(f"{model_genus} ", "")
@@ -420,18 +444,14 @@ def filter_species(model_genus, model_species, input_path, output_path, threshol
     ][0]
     click.echo("Filtering...")
-    species_model = get_species_model(model_genus)
-    result = species_model.predict(Path(input_path))
-    included_ids = result.get_filtered_subsequence_labels(model_species, threshold)
-    if not included_ids:
-        click.echo("No sequences found for the given species.")
-        return
-    filter_sequences(
+    filter_sequences.filter_species(
+        model_genus,
+        model_species,
         Path(input_path),
         Path(output_path),
-        included_ids=included_ids,
+        threshold,
+        Path(classification_output_path) if classification_output_path else None,
     )
-    click.echo(f"Filtered sequences saved at {output_path}.")
 if __name__ == "__main__":

xspect/models/probabilistic_filter_model.py CHANGED Viewed

@@ -20,13 +20,13 @@ class ProbabilisticFilterModel:
         self,
         k: int,
         model_display_name: str,
-        author: str,
-        author_email: str,
+        author: str | None,
+        author_email: str | None,
         model_type: str,
         base_path: Path,
         fpr: float = 0.01,
         num_hashes: int = 7,
-        training_accessions: dict[str, list[str]] = None,
+        training_accessions: dict[str, list[str]] | None = None,
     ) -> None:
         if k < 1:
             raise ValueError("Invalid k value, must be greater than 0")
@@ -49,7 +49,7 @@ class ProbabilisticFilterModel:
         self.index = None
         self.training_accessions = training_accessions
-    def get_cobs_index_path(self) -> Path:
+    def get_cobs_index_path(self) -> str:
         """Returns the path to the cobs index"""
         return str(self.base_path / self.slug() / "index.cobs_classic")
@@ -76,8 +76,8 @@ class ProbabilisticFilterModel:
     def fit(
         self,
         dir_path: Path,
-        display_names: dict = None,
-        training_accessions: dict[str, list[str]] = None,
+        display_names: dict | None = None,
+        training_accessions: dict[str, list[str]] | None = None,
     ) -> None:
         """Adds filters to the model"""
@@ -123,7 +123,7 @@ class ProbabilisticFilterModel:
         self.index = cobs.Search(self.get_cobs_index_path(), True)
     def calculate_hits(
-        self, sequence: Seq, filter_ids: list[str] = None, step: int = 1
+        self, sequence: Seq, filter_ids: list[str] | None = None, step: int = 1
     ) -> dict:
         """Calculates the hits for a sequence"""

xspect/models/probabilistic_filter_svm_model.py CHANGED Viewed

@@ -21,16 +21,16 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
         self,
         k: int,
         model_display_name: str,
-        author: str,
-        author_email: str,
+        author: str | None,
+        author_email: str | None,
         model_type: str,
         base_path: Path,
         kernel: str,
         c: float,
         fpr: float = 0.01,
         num_hashes: int = 7,
-        training_accessions: dict[str, list[str]] = None,
-        svm_accessions: dict[str, list[str]] = None,
+        training_accessions: dict[str, list[str]] | None = None,
+        svm_accessions: dict[str, list[str]] | None = None,
     ) -> None:
         super().__init__(
             k=k,
@@ -64,10 +64,10 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
         self,
         dir_path: Path,
         svm_path: Path,
-        display_names: dict = None,
+        display_names: dict[str, str] | None = None,
         svm_step: int = 1,
-        training_accessions: list[str] = None,
-        svm_accessions: list[str] = None,
+        training_accessions: dict[str, list[str]] | None = None,
+        svm_accessions: dict[str, list[str]] | None = None,
     ) -> None:
         """Fit the SVM to the sequences and labels"""

xspect/models/probabilistic_single_filter_model.py CHANGED Viewed

@@ -20,12 +20,12 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
         self,
         k: int,
         model_display_name: str,
-        author: str,
-        author_email: str,
+        author: str | None,
+        author_email: str | None,
         model_type: str,
         base_path: Path,
         fpr: float = 0.01,
-        training_accessions: list[str] = None,
+        training_accessions: list[str] | None = None,
     ) -> None:
         super().__init__(
             k=k,
@@ -41,7 +41,10 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
         self.bf = None
     def fit(
-        self, file_path: Path, display_name: str, training_accessions: list[str] = None
+        self,
+        file_path: Path,
+        display_name: str,
+        training_accessions: list[str] | None = None,
     ) -> None:
         """Fit the cobs classic index to the sequences and labels"""
         self.training_accessions = training_accessions

xspect/ncbi.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from enum import Enum
 from pathlib import Path
 import time
+from loguru import logger
 import requests
 # pylint: disable=line-too-long
@@ -34,7 +35,7 @@ class NCBIHandler:
     def __init__(
         self,
-        api_key: str = None,
+        api_key: str | None = None,
     ):
         """Initialise the NCBI handler."""
         self.api_key = api_key
@@ -229,7 +230,7 @@ class NCBIHandler:
                 == "OK"
             ]
         except (IndexError, KeyError, TypeError):
-            print(
+            logger.debug(
                 f"Could not get {assembly_level.value} accessions for taxon with ID: {taxon_id}. Skipping."
             )
             return []

xspect/train.py CHANGED Viewed

@@ -25,12 +25,12 @@ def train_from_directory(
     display_name: str,
     dir_path: Path,
     meta: bool = False,
-    training_accessions: dict[str, list[str]] = None,
-    svm_accessions: list[str] = None,
+    training_accessions: dict[str, list[str]] | None = None,
+    svm_accessions: dict[str, list[str]] | None = None,
     svm_step: int = 1,
-    translation_dict: dict[str, str] = None,
-    author: str = None,
-    author_email: str = None,
+    translation_dict: dict[str, str] | None = None,
+    author: str | None = None,
+    author_email: str | None = None,
 ):
     """
     Train a model from a directory containing training data.
@@ -113,10 +113,11 @@ def train_from_directory(
         species_dir = tmp_dir / "species"
         species_dir.mkdir(parents=True, exist_ok=True)
-        # concatenate files in cobs_training_data for each species
+        logger.info("Concatenating genomes for species training...")
         concatenate_species_fasta_files(cobs_folders, species_dir)
         if svm_path.exists():
+            logger.info("Training species SVM model...")
             species_model = ProbabilisticFilterSVMModel(
                 k=21,
                 model_display_name=display_name,
@@ -136,6 +137,7 @@ def train_from_directory(
                 svm_accessions=svm_accessions,
             )
         else:
+            logger.info("Training species model...")
             species_model = ProbabilisticFilterModel(
                 k=21,
                 model_display_name=display_name,
@@ -153,9 +155,11 @@ def train_from_directory(
         species_model.save()
         if meta:
+            logger.info("Concatenating genomes for metagenome training...")
             meta_fasta = tmp_dir / f"{display_name}.fasta"
             concatenate_metagenome(species_dir, meta_fasta)
+            logger.info("Training metagenome model...")
             genus_model = ProbabilisticSingleFilterModel(
                 k=21,
                 model_display_name=display_name,
@@ -179,8 +183,9 @@ def train_from_directory(
 def train_from_ncbi(
     genus: str,
     svm_step: int = 1,
-    author: str = None,
-    author_email: str = None,
+    author: str | None = None,
+    author_email: str | None = None,
+    ncbi_api_key: str | None = None,
 ):
     """Train a model using NCBI assembly data for a given genus.
@@ -193,6 +198,7 @@ def train_from_ncbi(
         svm_step (int, optional): Step size for SVM training. Defaults to 1.
         author (str, optional): Author of the model. Defaults to None.
         author_email (str, optional): Author's email. Defaults to None.
+        ncbi_api_key (str, optional): NCBI API key for accessing NCBI resources. Defaults to None.
     Raises:
         TypeError: If `genus` is not a string.
@@ -205,7 +211,8 @@ def train_from_ncbi(
     if not isinstance(genus, str):
         raise TypeError("genus must be a string")
-    ncbi_handler = NCBIHandler()
+    logger.info("Getting NCBI metadata...")
+    ncbi_handler = NCBIHandler(api_key=ncbi_api_key)
     genus_tax_id = ncbi_handler.get_genus_taxon_id(genus)
     species_ids = ncbi_handler.get_species(genus_tax_id)
     species_names = ncbi_handler.get_taxon_names(species_ids)
@@ -243,7 +250,7 @@ def train_from_ncbi(
         cobs_dir.mkdir(parents=True, exist_ok=True)
         svm_dir.mkdir(parents=True, exist_ok=True)
-        # download assemblies
+        logger.info("Downloading genomes from NCBI...")
         all_accessions = sum(accessions.values(), [])
         batch_size = 100
         accession_paths = {}

{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: XspecT
-Version: 0.5.0
+Version: 0.5.1
 Summary: Tool to monitor and characterize pathogens using Bloom filters.
 License: MIT License
@@ -55,7 +55,7 @@ Requires-Dist: pytest-retry; extra == "test"
 Requires-Dist: httpx; extra == "test"
 Dynamic: license-file
-# XspecT - Acinetobacter Species Assignment Tool
+# XspecT
 <!-- start intro -->
 ![Test](https://github.com/bionf/xspect2/actions/workflows/test.yml/badge.svg)
 [![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint)
@@ -63,7 +63,7 @@ Dynamic: license-file
 XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [kmer indices] and a [Support Vector Machine].
-XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
+XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a kmer index. Probablistic data structures ensure a fast lookup in this process. For a final prediction, the results are classified using a Support Vector Machine.
 The tool is available as a web-based application and as a command line interface.
@@ -91,16 +91,22 @@ xspect models train ncbi
 ```
 ### How to run the web app
-To run the web app, install and run [XspecT Web](https://github.com/aromberg/xspect-web). Additionally, run XspecT in API mode:
+To run the web app, simply execute:
 ```
 xspect web
 ```
+This will start a local web server. You can access the web app by navigating to `http://localhost:8000` in your web browser.
 ### How to use the XspecT command line interface
-Run XspecT with the configuration you want to run it with as arguments.
+To use the XspecT command line interface, execute `xspect` with the desired subcommand and parameters.
+**Example**:
 ```
 xspect classify species
 ```
+If you do not provide the required parameters, the command line interface will prompt you for them.
 For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
 ```
 xspect --help

{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,22 @@
 xspect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-xspect/classify.py,sha256=e0soOihf9KuVFuZ7VXs0EZKYzOFsM4-4IpOsYJtOORg,1148
-xspect/definitions.py,sha256=fVn_li_s2hriOSGJ69o_H8H-vkw1znvkryhBj7WMnF4,1219
+xspect/classify.py,sha256=ZLTtgsaPK_Eo-B52zvYIlBOr46lSJOO1UlsXvTn7BaY,2426
+xspect/definitions.py,sha256=s3oGQiF3ZQAyqUCcH7qLytvOZB6uRUZhaEW-bH0lfUM,1407
 xspect/download_models.py,sha256=y1wFJZa1xOJfvUP78zKkRs46O-WqKBL90vmo5AYUio0,853
 xspect/file_io.py,sha256=-3xm7IfOlmWJHxfrKPX5Qqit10zqsBUVt5Z7z1No2AI,5669
-xspect/filter_sequences.py,sha256=UeIPmn5MquT-vVGFYna-IZRmDzaStI-d2wExYA7M7u0,1839
-xspect/main.py,sha256=_-H6xFB3Z_rATblunTF1YhjxP6V_k8nPrnLcwAR3dcw,11913
+xspect/filter_sequences.py,sha256=KNTjTQuv2eeCcOHdjYLNUnqNYP5WIBWZppZP2mmPZJk,5698
+xspect/main.py,sha256=8fh43RFw88DtUzR-Egmj9vV-879LEfJvHq-VUmLnqt0,12138
 xspect/model_management.py,sha256=UbmAr3YOZ4oy_9iVvApCLstYHGkcmneHEC_yftRIqCI,3010
-xspect/ncbi.py,sha256=PDoVraZ8ntZIqBESLS27ZcxzgW5nqFc8ipvrHpIKb38,10100
-xspect/train.py,sha256=dnmLIhDrHTKbXzLjcF1VZAdsDDmnpv_6qqk-st4E9V8,11053
+xspect/ncbi.py,sha256=Zn5YIIzbclM3rHAnpOcUZAqopcbix7_K0tl3mUyuIBI,10140
+xspect/train.py,sha256=nUrj4kbAF4rR_MZjsd1nVHTjdRwuNEUC2DSId11Mfc8,11583
 xspect/web.py,sha256=M4fQUbmCnkpmdJeL-j-FD8r115EctWtWQZttuZWEsL8,5115
 xspect/mlst_feature/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xspect/mlst_feature/mlst_helper.py,sha256=Ff0kUdu-80969ZyYL6qUJNwEqf9eq72CC8zUyuoDElk,8114
 xspect/mlst_feature/pub_mlst_handler.py,sha256=Ez5YHKfhsLsKdHf1aNMfz7JJVVV_DpA27mah9fgNeJc,5919
 xspect/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xspect/models/probabilistic_filter_mlst_model.py,sha256=v9yriJX_i8-SRzU8W8RvDPrBvlR_ONSMVypZWGAJpv8,16164
-xspect/models/probabilistic_filter_model.py,sha256=K2gWpte2BCb03msgY-x18NVRMv1xHpDVWQWhbf4vWU4,10068
-xspect/models/probabilistic_filter_svm_model.py,sha256=xXimcv3iWnG1JiFyrk6UqkP9hFIxWGDdb__fRdQYwro,6245
-xspect/models/probabilistic_single_filter_model.py,sha256=yxWnCt4IP-3ZRLP4pRA3f2VTHc0_4g17PDCyOFayDDg,4090
+xspect/models/probabilistic_filter_model.py,sha256=CX-D3BFQ_l1SqV09knsJ7ueik-VbxjvZMA6LBnIDsHc,10109
+xspect/models/probabilistic_filter_svm_model.py,sha256=ZWErmMgoEC-mlwnzhf4IiBuntkw8p85fmu4v3RYmVH4,6326
+xspect/models/probabilistic_single_filter_model.py,sha256=QGjcTYrx7He2I-Jr_oJSrDCl6zyxdjnv42LmdaZL-XI,4143
 xspect/models/result.py,sha256=ELWiDlQPlxNG7ceLpth60Z_Hb1ZdopDJ3vgHBPgSRm8,3989
 xspect/xspect-web/.gitignore,sha256=_nGOe6uxTzy60tl_CIibnOUhXtP-DkOyuM-_s7m4ROg,253
 xspect/xspect-web/README.md,sha256=Fa5cCk66ohbqD_AAVgnXUZLhuzshnLxhlUFhxyscScc,1942
@@ -77,9 +77,9 @@ xspect/xspect-web/src/components/ui/switch.tsx,sha256=uIqRXtd41ba0eusIEUWVyYZv82
 xspect/xspect-web/src/components/ui/table.tsx,sha256=M2-TIHKwPFWuXrwysSufdQRSMJT-K9jPzGOokfU6PXo,2463
 xspect/xspect-web/src/components/ui/tabs.tsx,sha256=BImHKcdDCtrS3CCV1AGgn8qg0b65RB5P-QdH49IAhx0,1955
 xspect/xspect-web/src/lib/utils.ts,sha256=66ibdQiEHKftZBq1OMLmOKqWma1BkO-O60rc1IQYwLE,165
-xspect-0.5.0.dist-info/licenses/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
-xspect-0.5.0.dist-info/METADATA,sha256=Fjk4LvE94MuvwKcssLdFaDeVZ-rE4o4fduMr4HQcHds,4421
-xspect-0.5.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-xspect-0.5.0.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
-xspect-0.5.0.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
-xspect-0.5.0.dist-info/RECORD,,
+xspect-0.5.1.dist-info/licenses/LICENSE,sha256=bhBGDKIRUVwYIHGOGO5hshzuVHyqFJajvSOA3XXOLKI,1094
+xspect-0.5.1.dist-info/METADATA,sha256=o5zoUCtrA5rkvaLDMBnda5W5mTNAdQwV__LrWg4UJ3A,4569
+xspect-0.5.1.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
+xspect-0.5.1.dist-info/entry_points.txt,sha256=L7qliX3pIuwupQxpuOSsrBJCSHYPOPNEzH8KZKQGGUw,43
+xspect-0.5.1.dist-info/top_level.txt,sha256=hdoa4cnBv6OVzpyhMmyxpJxEydH5n2lDciy8urc1paE,7
+xspect-0.5.1.dist-info/RECORD,,

{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.7.1)
+Generator: setuptools (80.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{xspect-0.5.0.dist-info → xspect-0.5.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

XspecT 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

XspecT 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl