PyPI - XspecT - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

XspecT 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of XspecT might be problematic. Click here for more details.

Files changed (33) hide show

xspect/classify.py +61 -13
xspect/definitions.py +61 -13
xspect/download_models.py +10 -2
xspect/file_io.py +115 -48
xspect/filter_sequences.py +81 -29
xspect/main.py +90 -39
xspect/mlst_feature/mlst_helper.py +3 -0
xspect/mlst_feature/pub_mlst_handler.py +43 -1
xspect/model_management.py +84 -14
xspect/models/probabilistic_filter_mlst_model.py +75 -37
xspect/models/probabilistic_filter_model.py +201 -19
xspect/models/probabilistic_filter_svm_model.py +106 -13
xspect/models/probabilistic_single_filter_model.py +73 -9
xspect/models/result.py +77 -10
xspect/ncbi.py +48 -12
xspect/train.py +19 -11
xspect/web.py +68 -12
xspect/xspect-web/dist/assets/index-Ceo58xui.css +1 -0
xspect/xspect-web/dist/assets/{index-CMG4V7fZ.js → index-Dt_UlbgE.js} +82 -77
xspect/xspect-web/dist/index.html +2 -2
xspect/xspect-web/src/App.tsx +4 -2
xspect/xspect-web/src/api.tsx +23 -1
xspect/xspect-web/src/components/filter-form.tsx +16 -3
xspect/xspect-web/src/components/filtering-result.tsx +65 -0
xspect/xspect-web/src/components/result.tsx +2 -2
xspect/xspect-web/src/types.tsx +5 -0
{xspect-0.5.0.dist-info → xspect-0.5.2.dist-info}/METADATA +11 -5
{xspect-0.5.0.dist-info → xspect-0.5.2.dist-info}/RECORD +32 -31
{xspect-0.5.0.dist-info → xspect-0.5.2.dist-info}/WHEEL +1 -1
xspect/xspect-web/dist/assets/index-jIKg1HIy.css +0 -1
{xspect-0.5.0.dist-info → xspect-0.5.2.dist-info}/entry_points.txt +0 -0
{xspect-0.5.0.dist-info → xspect-0.5.2.dist-info}/licenses/LICENSE +0 -0
{xspect-0.5.0.dist-info → xspect-0.5.2.dist-info}/top_level.txt +0 -0

xspect/main.py CHANGED Viewed

@@ -7,21 +7,19 @@ import uvicorn
 from xspect import classify
 from xspect.web import app
 from xspect.download_models import download_test_models
-from xspect.file_io import filter_sequences
+from xspect import filter_sequences
 from xspect.train import train_from_directory, train_from_ncbi
 from xspect.definitions import (
     get_xspect_model_path,
 )
-from xspect.mlst_feature.mlst_helper import pick_scheme, pick_scheme_from_models_dir
+from xspect.mlst_feature.mlst_helper import pick_scheme
 from xspect.mlst_feature.pub_mlst_handler import PubMLSTHandler
 from xspect.models.probabilistic_filter_mlst_model import (
     ProbabilisticFilterMlstSchemeModel,
 )
 from xspect.model_management import (
-    get_genus_model,
     get_model_metadata,
     get_models,
-    get_species_model,
 )
@@ -173,8 +171,9 @@ def train_mlst(choose_schemes):
     scheme_path = pick_scheme(handler.get_scheme_paths())
     species_name = str(scheme_path).split("/")[-2]
     scheme_name = str(scheme_path).split("/")[-1]
+    scheme_url = handler.scheme_mapping[str(scheme_path)]
     model = ProbabilisticFilterMlstSchemeModel(
-        31, f"{species_name}:{scheme_name}", get_xspect_model_path()
+        31, f"{species_name}:{scheme_name}", get_xspect_model_path(), scheme_url
     )
     click.echo("Creating mlst model")
     model.fit(scheme_path)
@@ -211,19 +210,27 @@ def classify_seqs():
     help="Path to FASTA or FASTQ file for classification.",
     type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
+    type=click.Path(dir_okay=False, file_okay=True),
     default=Path(".") / f"result_{uuid4()}.json",
 )
-def classify_genus(model_genus, input_path, output_path):
+@click.option(
+    "--sparse-sampling-step",
+    type=int,
+    help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
+    default=1,
+)
+def classify_genus(model_genus, input_path, output_path, sparse_sampling_step):
     """Classify samples using a genus model."""
     click.echo("Classifying...")
-    classify.classify_genus(model_genus, Path(input_path), Path(output_path))
-    click.echo(f"Result saved as {output_path}.")
+    classify.classify_genus(
+        model_genus, Path(input_path), Path(output_path), sparse_sampling_step
+    )
 @classify_seqs.command(
@@ -244,12 +251,13 @@ def classify_genus(model_genus, input_path, output_path):
     help="Path to FASTA or FASTQ file for classification.",
     type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
+    type=click.Path(dir_okay=False, file_okay=True),
     default=Path(".") / f"result_{uuid4()}.json",
 )
 @click.option(
@@ -264,7 +272,6 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
     classify.classify_species(
         model_genus, Path(input_path), Path(output_path), sparse_sampling_step
     )
-    click.echo(f"Result saved as {output_path}.")
 @classify_seqs.command(
@@ -277,19 +284,22 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
     help="Path to FASTA-file for mlst identification.",
     type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
-    default=Path(".") / f"result_{uuid4()}.json",
+    type=click.Path(dir_okay=False, file_okay=True),
+    default=Path(".") / f"MLST_result_{uuid4()}.json",
 )
-def classify_mlst(input_path, output_path):
+@click.option(
+    "-l", "--limit", is_flag=True, help="Limit the output to 5 results for each locus."
+)
+def classify_mlst(input_path, output_path, limit):
     """MLST classify a sample."""
     click.echo("Classifying...")
-    classify.classify_mlst(Path(input_path), Path(output_path))
-    click.echo(f"Result saved as {output_path}.")
+    classify.classify_mlst(Path(input_path), Path(output_path), limit)
 # # # # # # # # # # # # # # #
@@ -321,37 +331,54 @@ def filter_seqs():
     help="Path to FASTA or FASTQ file for classification.",
     type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
+    type=click.Path(dir_okay=False, file_okay=True),
     prompt=True,
+    default=Path(".") / f"genus_filtered_{uuid4()}.fasta",
 )
 @click.option(
+    "--classification-output-path",
+    help="Optional path to the classification output file.",
+    type=click.Path(dir_okay=False, file_okay=True),
+)
+@click.option(
+    "-t",
     "--threshold",
-    type=float,
+    type=click.FloatRange(0, 1),
     help="Threshold for filtering (default: 0.7).",
     default=0.7,
     prompt=True,
 )
-def filter_genus(model_genus, input_path, output_path, threshold):
+@click.option(
+    "--sparse-sampling-step",
+    type=int,
+    help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
+    default=1,
+)
+def filter_genus(
+    model_genus,
+    input_path,
+    output_path,
+    classification_output_path,
+    threshold,
+    sparse_sampling_step,
+):
     """Filter samples using a genus model."""
     click.echo("Filtering...")
-    genus_model = get_genus_model(model_genus)
-    result = genus_model.predict(Path(input_path))
-    included_ids = result.get_filtered_subsequence_labels(model_genus, threshold)
-    if not included_ids:
-        click.echo("No sequences found for the given genus.")
-        return
-    filter_sequences(
+    filter_sequences.filter_genus(
+        model_genus,
         Path(input_path),
         Path(output_path),
-        included_ids=included_ids,
+        threshold,
+        Path(classification_output_path) if classification_output_path else None,
+        sparse_sampling_step=sparse_sampling_step,
     )
-    click.echo(f"Filtered sequences saved at {output_path}.")
 @filter_seqs.command(
@@ -378,24 +405,51 @@ def filter_genus(model_genus, input_path, output_path, threshold):
     help="Path to FASTA or FASTQ file for classification.",
     type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
-    type=click.Path(dir_okay=True, file_okay=True),
+    type=click.Path(dir_okay=False, file_okay=True),
     prompt=True,
+    default=Path(".") / f"species_filtered_{uuid4()}.fasta",
 )
 @click.option(
+    "--classification-output-path",
+    help="Optional path to the classification output file.",
+    type=click.Path(dir_okay=False, file_okay=True),
+)
+@click.option(
+    "-t",
     "--threshold",
     type=float,
     help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring species.",
     default=0.7,
     prompt=True,
 )
-def filter_species(model_genus, model_species, input_path, output_path, threshold):
+@click.option(
+    "--sparse-sampling-step",
+    type=int,
+    help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
+    default=1,
+)
+def filter_species(
+    model_genus,
+    model_species,
+    input_path,
+    output_path,
+    threshold,
+    classification_output_path,
+    sparse_sampling_step,
+):
     """Filter a sample using the species model."""
+    if threshold != -1 and (threshold < 0 or threshold > 1):
+        raise click.BadParameter(
+            "Threshold must be between 0 and 1, or -1 for filtering by the highest scoring species."
+        )
     available_species = get_model_metadata(f"{model_genus}-species")["display_names"]
     available_species = {
         id: name.replace(f"{model_genus} ", "")
@@ -420,18 +474,15 @@ def filter_species(model_genus, model_species, input_path, output_path, threshol
     ][0]
     click.echo("Filtering...")
-    species_model = get_species_model(model_genus)
-    result = species_model.predict(Path(input_path))
-    included_ids = result.get_filtered_subsequence_labels(model_species, threshold)
-    if not included_ids:
-        click.echo("No sequences found for the given species.")
-        return
-    filter_sequences(
+    filter_sequences.filter_species(
+        model_genus,
+        model_species,
         Path(input_path),
         Path(output_path),
-        included_ids=included_ids,
+        threshold,
+        Path(classification_output_path) if classification_output_path else None,
+        sparse_sampling_step=sparse_sampling_step,
     )
-    click.echo(f"Filtered sequences saved at {output_path}.")
 if __name__ == "__main__":

xspect/mlst_feature/mlst_helper.py CHANGED Viewed

@@ -194,11 +194,13 @@ class MlstResult:
         scheme_model: str,
         steps: int,
         hits: dict[str, list[dict]],
+        input_source: str = None,
     ):
         """Initialise an MlstResult object."""
         self.scheme_model = scheme_model
         self.steps = steps
         self.hits = hits
+        self.input_source = input_source
     def get_results(self) -> dict:
         """
@@ -221,6 +223,7 @@ class MlstResult:
             "Scheme": self.scheme_model,
             "Steps": self.steps,
             "Results": self.get_results(),
+            "Input_source": self.input_source,
         }
         return result

xspect/mlst_feature/pub_mlst_handler.py CHANGED Viewed

@@ -17,7 +17,7 @@ from xspect.definitions import get_xspect_mlst_path, get_xspect_upload_path
 class PubMLSTHandler:
     """Class for communicating with PubMLST and downloading alleles (FASTA-Format) from all loci."""
-    base_url = "http://rest.pubmlst.org/db"
+    base_url = "https://rest.pubmlst.org/db"
     def __init__(self):
         """Initialise a PubMLSTHandler object."""
@@ -27,6 +27,7 @@ class PubMLSTHandler:
             self.base_url + "/pubmlst_abaumannii_seqdef/schemes/2",
         ]
         self.scheme_paths = []
+        self.scheme_mapping = {}
     def get_scheme_paths(self) -> dict:
         """
@@ -103,6 +104,7 @@ class PubMLSTHandler:
             species_name = scheme.split("_")[1]  # name = pubmlst_abaumannii_seqdef
             scheme_path = get_xspect_mlst_path() / species_name / scheme_name
+            self.scheme_mapping[str(scheme_path)] = scheme
             self.scheme_paths.append(scheme_path)
             for locus_url in locus_list:
@@ -143,3 +145,43 @@ class PubMLSTHandler:
             # Example: 'Pas_fusA': [{'href': some URL, 'allele_id': '2'}]
             print(locus + ":" + meta_data[0]["allele_id"], end="; ")
         print("\nStrain Type:", response["fields"])
+    def get_strain_type_name(self, highest_results: dict, post_url: str) -> str:
+        """
+        Send an API-POST request to PubMLST with the highest result of each locus as payload.
+        This function formats the highest_result dict into an accepted input for the request.
+        It gets a response from the site which is the strain type name.
+        The name is based on the allele id with the highest score for each locus.
+        Example of post_url for the oxford scheme of A.baumannii:
+        https://rest.pubmlst.org/db/pubmlst_abaumannii_seqdef/schemes/1/designations
+        Args:
+            highest_results (dict): The allele ids with the highest kmer matches.
+            post_url (str): The specific url for the scheme of a species
+        Returns:
+            str: The response (ST name or No ST found) of the POST request.
+        """
+        payload = {
+            "designations": {
+                locus: [{"allele": str(allele)}]
+                for locus, allele in highest_results.items()
+            }
+        }
+        response = requests.post(post_url + "/designations", json=payload)
+        if response.status_code == 200:
+            data = response.json()
+            if "fields" in data:
+                post_response = data["fields"]
+                return post_response
+            else:
+                post_response = "No matching Strain Type found in the database. "
+                post_response += "Possibly a novel Strain Type."
+                return post_response
+        else:
+            post_response = "Error:" + str(response.status_code)
+            post_response += response.text
+            return post_response

xspect/model_management.py CHANGED Viewed

@@ -9,22 +9,55 @@ from xspect.models.probabilistic_filter_svm_model import ProbabilisticFilterSVMM
 from xspect.definitions import get_xspect_model_path
-def get_genus_model(genus):
-    """Get a metagenomic model for the specified genus."""
+def get_genus_model(genus) -> ProbabilisticSingleFilterModel:
+    """
+    Get a genus model for the specified genus.
+    This function retrieves a pre-trained genus classification model based on the provided genus name.
+    Args:
+        genus (str): The genus name for which the model is to be retrieved.
+    Returns:
+        ProbabilisticSingleFilterModel: An instance of the genus classification model.
+    """
     genus_model_path = get_xspect_model_path() / (genus.lower() + "-genus.json")
     genus_filter_model = ProbabilisticSingleFilterModel.load(genus_model_path)
     return genus_filter_model
-def get_species_model(genus):
-    """Get a species classification model for the specified genus."""
+def get_species_model(genus) -> ProbabilisticFilterSVMModel:
+    """
+    Get a species classification model for the specified genus.
+    This function retrieves a pre-trained species classification model based on the provided genus name.
+    Args:
+        genus (str): The genus name for which the species model is to be retrieved.
+    Returns:
+        ProbabilisticFilterSVMModel: An instance of the species classification model.
+    """
     species_model_path = get_xspect_model_path() / (genus.lower() + "-species.json")
     species_filter_model = ProbabilisticFilterSVMModel.load(species_model_path)
     return species_filter_model
-def get_model_metadata(model: str | Path):
-    """Get the metadata of a model."""
+def get_model_metadata(model: str | Path) -> dict:
+    """
+    Get metadata of a specified model.
+    This function retrieves the metadata of a model from its JSON file.
+    Args:
+        model (str | Path): The slug of the model (as a string) or the path to the model JSON file.
+    Returns:
+        dict: A dictionary containing the model metadata.
+    Raises:
+        ValueError: If the model does not exist or is not a valid file.
+    """
     if isinstance(model, str):
         model_path = get_xspect_model_path() / (model.lower() + ".json")
     elif isinstance(model, Path):
@@ -40,8 +73,17 @@ def get_model_metadata(model: str | Path):
         return model_json
-def update_model_metadata(model_slug: str, author: str, author_email: str):
-    """Update the metadata of a model."""
+def update_model_metadata(model_slug: str, author: str, author_email: str) -> None:
+    """
+    Update the metadata of a model.
+    This function updates the author and author email in the model's metadata JSON file.
+    Args:
+        model_slug (str): The slug of the model to update.
+        author (str): The name of the author to set in the metadata.
+        author_email (str): The email of the author to set in the metadata.
+    """
     model_metadata = get_model_metadata(model_slug)
     model_metadata["author"] = author
     model_metadata["author_email"] = author_email
@@ -51,8 +93,19 @@ def update_model_metadata(model_slug: str, author: str, author_email: str):
         file.write(dumps(model_metadata, indent=4))
-def update_model_display_name(model_slug: str, filter_id: str, display_name: str):
-    """Update the display name of a filter in a model."""
+def update_model_display_name(
+    model_slug: str, filter_id: str, display_name: str
+) -> None:
+    """
+    Update the display name of a filter in a model.
+    This function updates the display name of a specific filter in the model's metadata JSON file.
+    Args:
+        model_slug (str): The slug of the model to update.
+        filter_id (str): The ID of the filter whose display name is to be updated.
+        display_name (str): The new display name for the filter.
+    """
     model_metadata = get_model_metadata(model_slug)
     model_metadata["display_names"][filter_id] = display_name
@@ -61,8 +114,15 @@ def update_model_display_name(model_slug: str, filter_id: str, display_name: str
         file.write(dumps(model_metadata, indent=4))
-def get_models():
-    """Get a list of all available models in a dictionary by type."""
+def get_models() -> dict[str, list[dict]]:
+    """
+    Get a list of all available models in a dictionary by type.
+    This function scans the model directory for JSON files and organizes them by their model type.
+    Returns:
+        dict[str, list[dict]]: A dictionary where keys are model types and values are lists of model display names.
+    """
     model_dict = {}
     for model_file in get_xspect_model_path().glob("*.json"):
         model_metadata = get_model_metadata(model_file)
@@ -73,7 +133,17 @@ def get_models():
     return model_dict
-def get_model_display_names(model_slug: str):
-    """Get the display names included in a model."""
+def get_model_display_names(model_slug: str) -> list[str]:
+    """
+    Get the display names included in a model.
+    This function retrieves the display names of individual filters from the model's metadata.
+    Args:
+        model_slug (str): The slug of the model for which to retrieve display names.
+    Returns:
+        list[str]: A list of display names for the individual filters in the model.
+    """
     model_metadata = get_model_metadata(model_slug)
     return list(model_metadata["display_names"].values())

XspecT 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

XspecT 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl