PyPI - XspecT - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

XspecT 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of XspecT might be problematic. Click here for more details.

Files changed (78) hide show

xspect/classify.py +32 -0
xspect/file_io.py +3 -9
xspect/filter_sequences.py +56 -0
xspect/main.py +13 -18
xspect/mlst_feature/mlst_helper.py +102 -13
xspect/mlst_feature/pub_mlst_handler.py +32 -6
xspect/models/probabilistic_filter_mlst_model.py +160 -32
xspect/models/probabilistic_filter_model.py +1 -0
xspect/ncbi.py +8 -6
xspect/train.py +13 -5
xspect/web.py +173 -0
xspect/xspect-web/.gitignore +24 -0
xspect/xspect-web/README.md +54 -0
xspect/xspect-web/components.json +21 -0
xspect/xspect-web/dist/assets/index-CMG4V7fZ.js +290 -0
xspect/xspect-web/dist/assets/index-jIKg1HIy.css +1 -0
xspect/xspect-web/dist/index.html +14 -0
xspect/xspect-web/dist/vite.svg +1 -0
xspect/xspect-web/eslint.config.js +28 -0
xspect/xspect-web/index.html +13 -0
xspect/xspect-web/package-lock.json +6865 -0
xspect/xspect-web/package.json +58 -0
xspect/xspect-web/pnpm-lock.yaml +4317 -0
xspect/xspect-web/public/vite.svg +1 -0
xspect/xspect-web/src/App.tsx +29 -0
xspect/xspect-web/src/api.tsx +62 -0
xspect/xspect-web/src/assets/react.svg +1 -0
xspect/xspect-web/src/components/classification-form.tsx +284 -0
xspect/xspect-web/src/components/classify.tsx +18 -0
xspect/xspect-web/src/components/data-table.tsx +78 -0
xspect/xspect-web/src/components/dropdown-checkboxes.tsx +63 -0
xspect/xspect-web/src/components/dropdown-slider.tsx +42 -0
xspect/xspect-web/src/components/filter-form.tsx +423 -0
xspect/xspect-web/src/components/filter.tsx +15 -0
xspect/xspect-web/src/components/header.tsx +46 -0
xspect/xspect-web/src/components/landing.tsx +7 -0
xspect/xspect-web/src/components/models-details.tsx +138 -0
xspect/xspect-web/src/components/models.tsx +53 -0
xspect/xspect-web/src/components/result-chart.tsx +44 -0
xspect/xspect-web/src/components/result.tsx +155 -0
xspect/xspect-web/src/components/spinner.tsx +30 -0
xspect/xspect-web/src/components/ui/accordion.tsx +64 -0
xspect/xspect-web/src/components/ui/button.tsx +59 -0
xspect/xspect-web/src/components/ui/card.tsx +92 -0
xspect/xspect-web/src/components/ui/chart.tsx +351 -0
xspect/xspect-web/src/components/ui/command.tsx +175 -0
xspect/xspect-web/src/components/ui/dialog.tsx +135 -0
xspect/xspect-web/src/components/ui/dropdown-menu.tsx +255 -0
xspect/xspect-web/src/components/ui/file-upload.tsx +1459 -0
xspect/xspect-web/src/components/ui/form.tsx +165 -0
xspect/xspect-web/src/components/ui/input.tsx +21 -0
xspect/xspect-web/src/components/ui/label.tsx +24 -0
xspect/xspect-web/src/components/ui/navigation-menu.tsx +168 -0
xspect/xspect-web/src/components/ui/popover.tsx +46 -0
xspect/xspect-web/src/components/ui/select.tsx +183 -0
xspect/xspect-web/src/components/ui/separator.tsx +26 -0
xspect/xspect-web/src/components/ui/slider.tsx +61 -0
xspect/xspect-web/src/components/ui/switch.tsx +29 -0
xspect/xspect-web/src/components/ui/table.tsx +113 -0
xspect/xspect-web/src/components/ui/tabs.tsx +64 -0
xspect/xspect-web/src/index.css +120 -0
xspect/xspect-web/src/lib/utils.ts +6 -0
xspect/xspect-web/src/main.tsx +10 -0
xspect/xspect-web/src/types.tsx +34 -0
xspect/xspect-web/src/utils.tsx +6 -0
xspect/xspect-web/src/vite-env.d.ts +1 -0
xspect/xspect-web/tsconfig.app.json +32 -0
xspect/xspect-web/tsconfig.json +13 -0
xspect/xspect-web/tsconfig.node.json +24 -0
xspect/xspect-web/vite.config.ts +24 -0
{xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/METADATA +6 -8
xspect-0.5.0.dist-info/RECORD +85 -0
{xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/WHEEL +1 -1
xspect/fastapi.py +0 -102
xspect-0.4.1.dist-info/RECORD +0 -24
{xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/entry_points.txt +0 -0
{xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/licenses/LICENSE +0 -0
{xspect-0.4.1.dist-info → xspect-0.5.0.dist-info}/top_level.txt +0 -0

xspect/classify.py ADDED Viewed

@@ -0,0 +1,32 @@
+from pathlib import Path
+from xspect.mlst_feature.mlst_helper import pick_scheme_from_models_dir
+import xspect.model_management as mm
+from xspect.models.probabilistic_filter_mlst_model import (
+    ProbabilisticFilterMlstSchemeModel,
+)
+def classify_genus(
+    model_genus: str, input_path: Path, output_path: Path, step: int = 1
+):
+    """Classify the input file using the genus model."""
+    model = mm.get_genus_model(model_genus)
+    result = model.predict(input_path, step=step)
+    result.input_source = input_path.name
+    result.save(output_path)
+def classify_species(model_genus, input_path, output_path, step=1):
+    """Classify the input file using the species model."""
+    model = mm.get_species_model(model_genus)
+    result = model.predict(input_path, step=step)
+    result.input_source = input_path.name
+    result.save(output_path)
+def classify_mlst(input_path, output_path):
+    """Classify the input file using the MLST model."""
+    scheme_path = pick_scheme_from_models_dir()
+    model = ProbabilisticFilterMlstSchemeModel.load(scheme_path)
+    result = model.predict(scheme_path, input_path)
+    result.save(output_path)

xspect/file_io.py CHANGED Viewed

@@ -20,17 +20,11 @@ def delete_zip_files(dir_path):
 def extract_zip(zip_path: Path, unzipped_path: Path):
-    """Extracts all files from a directory with zip files."""
-    # Make new directory.
+    """Extracts all files from a zip file."""
     unzipped_path.mkdir(parents=True, exist_ok=True)
-    file_names = os.listdir(zip_path)
-    for file in file_names:
-        file_path = zip_path / file
-        if zipfile.is_zipfile(file_path):
-            with zipfile.ZipFile(file_path) as item:
-                directory = unzipped_path / file.replace(".zip", "")
-                item.extractall(directory)
+    with zipfile.ZipFile(zip_path) as item:
+        item.extractall(unzipped_path)
 def concatenate_meta(path: Path, genus: str):

xspect/filter_sequences.py ADDED Viewed

@@ -0,0 +1,56 @@
+from pathlib import Path
+from xspect.model_management import get_genus_model, get_species_model
+from xspect.file_io import filter_sequences
+def filter_species(
+    model_genus: str,
+    model_species: str,
+    input_path: Path,
+    output_path: Path,
+    threshold: float,
+):
+    """Filter sequences by species.
+    This function filters sequences from the input file based on the species model.
+    It uses the genus model to identify the genus of the sequences and then applies
+    the species model to filter the sequences.
+    Args:
+        model_genus (str): The genus model slug.
+        model_species (str): The species model slug.
+        input_path (Path): The path to the input file containing sequences.
+        output_path (Path): The path to the output file where filtered sequences will be saved.
+        threshold (float): The threshold for filtering sequences. Only sequences with a score
+            above this threshold will be included in the output file.
+    """
+    species_model = get_species_model(model_genus)
+    result = species_model.predict(input_path)
+    included_ids = result.get_filtered_subsequence_labels(model_species, threshold)
+    if not included_ids:
+        print("No sequences found for the given species.")
+        return
+    filter_sequences(
+        input_path,
+        output_path,
+        included_ids,
+    )
+def filter_genus(
+    model_genus: str,
+    input_path: Path,
+    output_path: Path,
+    threshold: float,
+):
+    genus_model = get_genus_model(model_genus)
+    result = genus_model.predict(Path(input_path))
+    included_ids = result.get_filtered_subsequence_labels(model_genus, threshold)
+    if not included_ids:
+        print("No sequences found for the given genus.")
+        return
+    filter_sequences(
+        input_path,
+        output_path,
+        included_ids,
+    )

xspect/main.py CHANGED Viewed

@@ -4,7 +4,8 @@ from pathlib import Path
 from uuid import uuid4
 import click
 import uvicorn
-from xspect import fastapi
+from xspect import classify
+from xspect.web import app
 from xspect.download_models import download_test_models
 from xspect.file_io import filter_sequences
 from xspect.train import train_from_directory, train_from_ncbi
@@ -33,7 +34,7 @@ def cli():
 @cli.command()
 def web():
     """Open the XspecT web application."""
-    uvicorn.run(fastapi.app, host="0.0.0.0", port=8000)
+    uvicorn.run(app, host="0.0.0.0", port=8000)
 # # # # # # # # # # # # # # #
@@ -50,7 +51,7 @@ def models():
 def download():
     """Download models."""
     click.echo("Downloading models, this may take a while...")
-    download_test_models("http://assets.adrianromberg.com/xspect-models.zip")
+    download_test_models("http://assets.adrianromberg.com/ake/xspect-models.zip")
 @models.command(
@@ -201,7 +202,7 @@ def classify_seqs():
     "--genus",
     "model_genus",
     help="Genus of the model to classify.",
-    type=click.Choice(get_models().get("Genus"), None),
+    type=click.Choice(get_models().get("Genus", [])),
     prompt=True,
 )
 @click.option(
@@ -221,9 +222,7 @@ def classify_seqs():
 def classify_genus(model_genus, input_path, output_path):
     """Classify samples using a genus model."""
     click.echo("Classifying...")
-    genus_model = get_genus_model(model_genus)
-    result = genus_model.predict(Path(input_path))
-    result.save(output_path)
+    classify.classify_genus(model_genus, Path(input_path), Path(output_path))
     click.echo(f"Result saved as {output_path}.")
@@ -236,7 +235,7 @@ def classify_genus(model_genus, input_path, output_path):
     "--genus",
     "model_genus",
     help="Genus of the model to classify.",
-    type=click.Choice(get_models().get("Species"), None),
+    type=click.Choice(get_models().get("Species", [])),
     prompt=True,
 )
 @click.option(
@@ -262,9 +261,9 @@ def classify_genus(model_genus, input_path, output_path):
 def classify_species(model_genus, input_path, output_path, sparse_sampling_step):
     """Classify samples using a species model."""
     click.echo("Classifying...")
-    species_model = get_species_model(model_genus)
-    result = species_model.predict(Path(input_path), step=sparse_sampling_step)
-    result.save(output_path)
+    classify.classify_species(
+        model_genus, Path(input_path), Path(output_path), sparse_sampling_step
+    )
     click.echo(f"Result saved as {output_path}.")
@@ -289,11 +288,7 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
 def classify_mlst(input_path, output_path):
     """MLST classify a sample."""
     click.echo("Classifying...")
-    input_path = Path(input_path)
-    scheme_path = pick_scheme_from_models_dir()
-    model = ProbabilisticFilterMlstSchemeModel.load(scheme_path)
-    result = model.predict(scheme_path, input_path)
-    result.save(output_path)
+    classify.classify_mlst(Path(input_path), Path(output_path))
     click.echo(f"Result saved as {output_path}.")
@@ -317,7 +312,7 @@ def filter_seqs():
     "--genus",
     "model_genus",
     help="Genus of the model to use for filtering.",
-    type=click.Choice(get_models().get("Species"), None),
+    type=click.Choice(get_models().get("Species", [])),
     prompt=True,
 )
 @click.option(
@@ -368,7 +363,7 @@ def filter_genus(model_genus, input_path, output_path, threshold):
     "--genus",
     "model_genus",
     help="Genus of the model to use for filtering.",
-    type=click.Choice(get_models().get("Species"), None),
+    type=click.Choice(get_models().get("Species", [])),
     prompt=True,
 )
 @click.option(

xspect/mlst_feature/mlst_helper.py CHANGED Viewed

@@ -7,11 +7,22 @@ import json
 from io import StringIO
 from pathlib import Path
 from Bio import SeqIO
-from xspect.definitions import get_xspect_model_path, get_xspect_runs_path
+from xspect.definitions import get_xspect_model_path
-def create_fasta_files(locus_path: Path, fasta_batch: str):
-    """Create Fasta-Files for every allele of a locus."""
+def create_fasta_files(locus_path: Path, fasta_batch: str) -> None:
+    """
+    Create Fasta-Files for every allele of a locus.
+    This function creates a fasta file for each record in the batch-string of a locus.
+    The batch originates from an API-GET-request to PubMLST.
+    The files are named after the record ID.
+    If a fasta file already exists, it will be skipped.
+    Args:
+        locus_path (Path): The directory where the fasta-files will be saved.
+        fasta_batch (str): A string containing every record of a locus from PubMLST.
+    """
     # fasta_batch = full string of a fasta file containing every allele sequence of a locus
     for record in SeqIO.parse(StringIO(fasta_batch), "fasta"):
         number = record.id.split("_")[-1]  # example id = Oxf_cpn60_263
@@ -23,7 +34,21 @@ def create_fasta_files(locus_path: Path, fasta_batch: str):
 def pick_species_number_from_db(available_species: dict) -> str:
-    """Returns the chosen species from all available ones in the database."""
+    """
+    Get the chosen species from all available ones in the database.
+    This function lists all available species of PubMLST.
+    The user is then asked to pick a species by its associated number.
+    Args:
+        available_species (dict): A dictionary storing all available species.
+    Returns:
+        str: The name of the chosen species.
+    Raises:
+        ValueError: If the user input is not valid.
+    """
     # The "database" string can look like this: pubmlst_abaumannii_seqdef
     for counter, database in available_species.items():
         print(str(counter) + ":" + database.split("_")[1])
@@ -45,7 +70,21 @@ def pick_species_number_from_db(available_species: dict) -> str:
 def pick_scheme_number_from_db(available_schemes: dict) -> str:
-    """Returns the chosen schemes from all available ones of a species."""
+    """
+    Get the chosen scheme from all available ones of a species.
+    This function lists all available schemes of a species.
+    The user is then asked to pick a scheme by its associated number.
+    Args:
+        available_schemes (dict): A dictionary storing all available schemes.
+    Returns:
+        str: The name of the chosen scheme.
+    Raises:
+        ValueError: If the user input is not valid.
+    """
     # List all available schemes of a species database
     for counter, scheme in available_schemes.items():
         print(str(counter) + ":" + scheme[0])
@@ -67,12 +106,28 @@ def pick_scheme_number_from_db(available_schemes: dict) -> str:
 def scheme_list_to_dict(scheme_list: list[str]):
-    """Converts the scheme list attribute into a dictionary with a number as the key."""
+    """
+    Converts the scheme list into a dictionary.
+    Args:
+        scheme_list (list[str]): A list storing all chosen schemes.
+    Returns:
+        dict: The converted dictionary.
+    """
     return dict(zip(range(1, len(scheme_list) + 1), scheme_list))
 def pick_scheme_from_models_dir() -> Path:
-    """Returns the chosen scheme from models that have been fitted prior."""
+    """
+    Get the chosen scheme from models that have been fitted prior.
+    This function creates a dictionary containing all trained models.
+    The dictionary is used as an argument for the "pick_scheme" function.
+    Returns:
+        Path: The path to the chosen model (trained).
+    """
     schemes = {}
     counter = 1
     for entry in sorted((get_xspect_model_path() / "MLST").iterdir()):
@@ -82,7 +137,21 @@ def pick_scheme_from_models_dir() -> Path:
 def pick_scheme(available_schemes: dict) -> Path:
-    """Returns the chosen scheme from the scheme list."""
+    """
+    Get the chosen scheme from the scheme dictionary.
+    This function lists all available schemes of a species that have been downloaded.
+    The user is then asked to pick a scheme by its associated number.
+    Args:
+        available_schemes (dict): A dictionary storing all available schemes.
+    Returns:
+        Path: The path to the chosen model (trained).
+    Raises:
+        ValueError: If the user input is not valid or if no scheme was downloaded prior.
+    """
     if not available_schemes:
         raise ValueError("No scheme has been chosen for download yet!")
@@ -118,7 +187,7 @@ def pick_scheme(available_schemes: dict) -> Path:
 class MlstResult:
-    """Class for storing mlst results."""
+    """Class for storing MLST results."""
     def __init__(
         self,
@@ -126,17 +195,28 @@ class MlstResult:
         steps: int,
         hits: dict[str, list[dict]],
     ):
+        """Initialise an MlstResult object."""
         self.scheme_model = scheme_model
         self.steps = steps
         self.hits = hits
     def get_results(self) -> dict:
-        """Stores the result of a prediction in a dictionary."""
+        """
+        Stores the result of a prediction in a dictionary.
+        Returns:
+            dict: The result dictionary with s sequence ID as key and the Strain type as value.
+        """
         results = {seq_id: result for seq_id, result in self.hits.items()}
         return results
     def to_dict(self) -> dict:
-        """Converts all attributes into one dictionary."""
+        """
+        Converts all attributes into one dictionary.
+        Returns:
+            dict: The dictionary containing all metadata of a run.
+        """
         result = {
             "Scheme": self.scheme_model,
             "Steps": self.steps,
@@ -144,8 +224,17 @@ class MlstResult:
         }
         return result
-    def save(self, output_path: Path) -> None:
-        """Saves the result as a JSON file."""
+    def save(self, output_path: Path | str) -> None:
+        """
+        Saves the result as a JSON file.
+        Args:
+            output_path (Path,str): The path where the results are saved.
+        """
+        if isinstance(output_path, str):
+            output_path = Path(output_path)
         output_path.parent.mkdir(exist_ok=True, parents=True)
         json_object = json.dumps(self.to_dict(), indent=4)

xspect/mlst_feature/pub_mlst_handler.py CHANGED Viewed

@@ -20,6 +20,7 @@ class PubMLSTHandler:
     base_url = "http://rest.pubmlst.org/db"
     def __init__(self):
+        """Initialise a PubMLSTHandler object."""
         # Default values: Oxford (1) and Pasteur (2) schemes of A.baumannii species
         self.scheme_list = [
             self.base_url + "/pubmlst_abaumannii_seqdef/schemes/1",
@@ -28,11 +29,21 @@ class PubMLSTHandler:
         self.scheme_paths = []
     def get_scheme_paths(self) -> dict:
-        """Returns the scheme paths in a dictionary"""
+        """
+        Get the scheme paths in a dictionary.
+        Returns:
+            dict: A dictionary containing the scheme paths.
+        """
         return scheme_list_to_dict(self.scheme_paths)
     def choose_schemes(self) -> None:
-        """Changes the scheme list attribute to feature other schemes from some species"""
+        """
+        Changes the scheme list attribute to feature other schemes from another species.
+        This function lets the user pick schemes to download all alleles that belong to it.
+        The scheme has to be available in the database.
+        """
         available_species = {}
         available_schemes = {}
         chosen_schemes = []
@@ -70,8 +81,17 @@ class PubMLSTHandler:
                 break
         self.scheme_list = chosen_schemes
-    def download_alleles(self, choice: False):
-        """Downloads every allele FASTA-file from all loci of the scheme list attribute"""
+    def download_alleles(self, choice: False) -> None:
+        """
+        Downloads every allele FASTA-file from all loci of the scheme list attribute.
+        This function sends API-GET requests to PubMLST.
+        It downloads all alleles based on the scheme_list attribute.
+        The default schemes are the Oxford and Pasteur schemes of A.baumannii
+        Args:
+            choice (bool): The decision to download different schemes, defaults to False.
+        """
         if choice:  # pick an own scheme if not Oxford or Pasteur
             self.choose_schemes()  # changes the scheme_list attribute
@@ -98,8 +118,14 @@ class PubMLSTHandler:
                 alleles = requests.get(f"{locus_url}/alleles_fasta").text
                 create_fasta_files(locus_path, alleles)
-    def assign_strain_type_by_db(self):
-        """Sends an API-POST-Request to the database for MLST without bloom filters"""
+    def assign_strain_type_by_db(self) -> None:
+        """
+        Sends an API-POST-Request to the database for MLST without bloom filters.
+        This function sends API-POST requests to PubMLST.
+        It is a different way to determine strain types based on a BLAST-Search.
+        This function is only used for testing and comparing results.
+        """
         scheme_url = (
             str(pick_scheme(scheme_list_to_dict(self.scheme_list))) + "/sequence"
         )

XspecT 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

Potentially problematic release.

XspecT 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl