PyPI - XspecT - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl - Mend

XspecT 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of XspecT might be problematic. Click here for more details.

Files changed (33) hide show

xspect/classify.py +51 -38
xspect/definitions.py +50 -10
xspect/download_models.py +10 -2
xspect/file_io.py +115 -48
xspect/filter_sequences.py +36 -66
xspect/main.py +44 -11
xspect/mlst_feature/mlst_helper.py +3 -0
xspect/mlst_feature/pub_mlst_handler.py +43 -1
xspect/model_management.py +84 -14
xspect/models/probabilistic_filter_mlst_model.py +75 -37
xspect/models/probabilistic_filter_model.py +194 -12
xspect/models/probabilistic_filter_svm_model.py +99 -6
xspect/models/probabilistic_single_filter_model.py +66 -5
xspect/models/result.py +77 -10
xspect/ncbi.py +48 -12
xspect/train.py +2 -1
xspect/web.py +71 -13
xspect/xspect-web/dist/assets/index-Ceo58xui.css +1 -0
xspect/xspect-web/dist/assets/{index-CMG4V7fZ.js → index-Dt_UlbgE.js} +82 -77
xspect/xspect-web/dist/index.html +2 -2
xspect/xspect-web/src/App.tsx +4 -2
xspect/xspect-web/src/api.tsx +23 -1
xspect/xspect-web/src/components/filter-form.tsx +16 -3
xspect/xspect-web/src/components/filtering-result.tsx +65 -0
xspect/xspect-web/src/components/result.tsx +2 -2
xspect/xspect-web/src/types.tsx +5 -0
{xspect-0.5.1.dist-info → xspect-0.5.3.dist-info}/METADATA +1 -1
{xspect-0.5.1.dist-info → xspect-0.5.3.dist-info}/RECORD +32 -31
{xspect-0.5.1.dist-info → xspect-0.5.3.dist-info}/WHEEL +1 -1
xspect/xspect-web/dist/assets/index-jIKg1HIy.css +0 -1
{xspect-0.5.1.dist-info → xspect-0.5.3.dist-info}/entry_points.txt +0 -0
{xspect-0.5.1.dist-info → xspect-0.5.3.dist-info}/licenses/LICENSE +0 -0
{xspect-0.5.1.dist-info → xspect-0.5.3.dist-info}/top_level.txt +0 -0

xspect/main.py CHANGED Viewed

@@ -18,10 +18,8 @@ from xspect.models.probabilistic_filter_mlst_model import (
     ProbabilisticFilterMlstSchemeModel,
 )
 from xspect.model_management import (
-    get_genus_model,
     get_model_metadata,
     get_models,
-    get_species_model,
 )
@@ -51,7 +49,9 @@ def models():
 def download():
     """Download models."""
     click.echo("Downloading models, this may take a while...")
-    download_test_models("http://assets.adrianromberg.com/ake/xspect-models.zip")
+    download_test_models(
+        "https://assets.adrianromberg.com/science/xspect-models-07-08-2025.zip"
+    )
 @models.command(
@@ -173,8 +173,9 @@ def train_mlst(choose_schemes):
     scheme_path = pick_scheme(handler.get_scheme_paths())
     species_name = str(scheme_path).split("/")[-2]
     scheme_name = str(scheme_path).split("/")[-1]
+    scheme_url = handler.scheme_mapping[str(scheme_path)]
     model = ProbabilisticFilterMlstSchemeModel(
-        31, f"{species_name}:{scheme_name}", get_xspect_model_path()
+        31, f"{species_name}:{scheme_name}", get_xspect_model_path(), scheme_url
     )
     click.echo("Creating mlst model")
     model.fit(scheme_path)
@@ -220,10 +221,18 @@ def classify_seqs():
     type=click.Path(dir_okay=False, file_okay=True),
     default=Path(".") / f"result_{uuid4()}.json",
 )
-def classify_genus(model_genus, input_path, output_path):
+@click.option(
+    "--sparse-sampling-step",
+    type=int,
+    help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
+    default=1,
+)
+def classify_genus(model_genus, input_path, output_path, sparse_sampling_step):
     """Classify samples using a genus model."""
     click.echo("Classifying...")
-    classify.classify_genus(model_genus, Path(input_path), Path(output_path))
+    classify.classify_genus(
+        model_genus, Path(input_path), Path(output_path), sparse_sampling_step
+    )
 @classify_seqs.command(
@@ -275,20 +284,24 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
     "-i",
     "--input-path",
     help="Path to FASTA-file for mlst identification.",
-    type=click.Path(exists=True, dir_okay=False, file_okay=True),
+    type=click.Path(exists=True, dir_okay=True, file_okay=True),
     prompt=True,
+    default=Path("."),
 )
 @click.option(
     "-o",
     "--output-path",
     help="Path to the output file.",
     type=click.Path(dir_okay=False, file_okay=True),
+    default=Path(".") / f"MLST_result_{uuid4()}.json",
 )
-def classify_mlst(input_path, output_path):
+@click.option(
+    "-l", "--limit", is_flag=True, help="Limit the output to 5 results for each locus."
+)
+def classify_mlst(input_path, output_path, limit):
     """MLST classify a sample."""
     click.echo("Classifying...")
-    classify.classify_mlst(Path(input_path), Path(output_path))
-    click.echo(f"Result saved as {output_path}.")
+    classify.classify_mlst(Path(input_path), Path(output_path), limit)
 # # # # # # # # # # # # # # #
@@ -343,8 +356,19 @@ def filter_seqs():
     default=0.7,
     prompt=True,
 )
+@click.option(
+    "--sparse-sampling-step",
+    type=int,
+    help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
+    default=1,
+)
 def filter_genus(
-    model_genus, input_path, output_path, classification_output_path, threshold
+    model_genus,
+    input_path,
+    output_path,
+    classification_output_path,
+    threshold,
+    sparse_sampling_step,
 ):
     """Filter samples using a genus model."""
     click.echo("Filtering...")
@@ -355,6 +379,7 @@ def filter_genus(
         Path(output_path),
         threshold,
         Path(classification_output_path) if classification_output_path else None,
+        sparse_sampling_step=sparse_sampling_step,
     )
@@ -405,6 +430,12 @@ def filter_genus(
     default=0.7,
     prompt=True,
 )
+@click.option(
+    "--sparse-sampling-step",
+    type=int,
+    help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
+    default=1,
+)
 def filter_species(
     model_genus,
     model_species,
@@ -412,6 +443,7 @@ def filter_species(
     output_path,
     threshold,
     classification_output_path,
+    sparse_sampling_step,
 ):
     """Filter a sample using the species model."""
@@ -451,6 +483,7 @@ def filter_species(
         Path(output_path),
         threshold,
         Path(classification_output_path) if classification_output_path else None,
+        sparse_sampling_step=sparse_sampling_step,
     )

xspect/mlst_feature/mlst_helper.py CHANGED Viewed

@@ -194,11 +194,13 @@ class MlstResult:
         scheme_model: str,
         steps: int,
         hits: dict[str, list[dict]],
+        input_source: str = None,
     ):
         """Initialise an MlstResult object."""
         self.scheme_model = scheme_model
         self.steps = steps
         self.hits = hits
+        self.input_source = input_source
     def get_results(self) -> dict:
         """
@@ -221,6 +223,7 @@ class MlstResult:
             "Scheme": self.scheme_model,
             "Steps": self.steps,
             "Results": self.get_results(),
+            "Input_source": self.input_source,
         }
         return result

xspect/mlst_feature/pub_mlst_handler.py CHANGED Viewed

@@ -17,7 +17,7 @@ from xspect.definitions import get_xspect_mlst_path, get_xspect_upload_path
 class PubMLSTHandler:
     """Class for communicating with PubMLST and downloading alleles (FASTA-Format) from all loci."""
-    base_url = "http://rest.pubmlst.org/db"
+    base_url = "https://rest.pubmlst.org/db"
     def __init__(self):
         """Initialise a PubMLSTHandler object."""
@@ -27,6 +27,7 @@ class PubMLSTHandler:
             self.base_url + "/pubmlst_abaumannii_seqdef/schemes/2",
         ]
         self.scheme_paths = []
+        self.scheme_mapping = {}
     def get_scheme_paths(self) -> dict:
         """
@@ -103,6 +104,7 @@ class PubMLSTHandler:
             species_name = scheme.split("_")[1]  # name = pubmlst_abaumannii_seqdef
             scheme_path = get_xspect_mlst_path() / species_name / scheme_name
+            self.scheme_mapping[str(scheme_path)] = scheme
             self.scheme_paths.append(scheme_path)
             for locus_url in locus_list:
@@ -143,3 +145,43 @@ class PubMLSTHandler:
             # Example: 'Pas_fusA': [{'href': some URL, 'allele_id': '2'}]
             print(locus + ":" + meta_data[0]["allele_id"], end="; ")
         print("\nStrain Type:", response["fields"])
+    def get_strain_type_name(self, highest_results: dict, post_url: str) -> str:
+        """
+        Send an API-POST request to PubMLST with the highest result of each locus as payload.
+        This function formats the highest_result dict into an accepted input for the request.
+        It gets a response from the site which is the strain type name.
+        The name is based on the allele id with the highest score for each locus.
+        Example of post_url for the oxford scheme of A.baumannii:
+        https://rest.pubmlst.org/db/pubmlst_abaumannii_seqdef/schemes/1/designations
+        Args:
+            highest_results (dict): The allele ids with the highest kmer matches.
+            post_url (str): The specific url for the scheme of a species
+        Returns:
+            str: The response (ST name or No ST found) of the POST request.
+        """
+        payload = {
+            "designations": {
+                locus: [{"allele": str(allele)}]
+                for locus, allele in highest_results.items()
+            }
+        }
+        response = requests.post(post_url + "/designations", json=payload)
+        if response.status_code == 200:
+            data = response.json()
+            if "fields" in data:
+                post_response = data["fields"]
+                return post_response
+            else:
+                post_response = "No matching Strain Type found in the database. "
+                post_response += "Possibly a novel Strain Type."
+                return post_response
+        else:
+            post_response = "Error:" + str(response.status_code)
+            post_response += response.text
+            return post_response

xspect/model_management.py CHANGED Viewed

@@ -9,22 +9,55 @@ from xspect.models.probabilistic_filter_svm_model import ProbabilisticFilterSVMM
 from xspect.definitions import get_xspect_model_path
-def get_genus_model(genus):
-    """Get a metagenomic model for the specified genus."""
+def get_genus_model(genus) -> ProbabilisticSingleFilterModel:
+    """
+    Get a genus model for the specified genus.
+    This function retrieves a pre-trained genus classification model based on the provided genus name.
+    Args:
+        genus (str): The genus name for which the model is to be retrieved.
+    Returns:
+        ProbabilisticSingleFilterModel: An instance of the genus classification model.
+    """
     genus_model_path = get_xspect_model_path() / (genus.lower() + "-genus.json")
     genus_filter_model = ProbabilisticSingleFilterModel.load(genus_model_path)
     return genus_filter_model
-def get_species_model(genus):
-    """Get a species classification model for the specified genus."""
+def get_species_model(genus) -> ProbabilisticFilterSVMModel:
+    """
+    Get a species classification model for the specified genus.
+    This function retrieves a pre-trained species classification model based on the provided genus name.
+    Args:
+        genus (str): The genus name for which the species model is to be retrieved.
+    Returns:
+        ProbabilisticFilterSVMModel: An instance of the species classification model.
+    """
     species_model_path = get_xspect_model_path() / (genus.lower() + "-species.json")
     species_filter_model = ProbabilisticFilterSVMModel.load(species_model_path)
     return species_filter_model
-def get_model_metadata(model: str | Path):
-    """Get the metadata of a model."""
+def get_model_metadata(model: str | Path) -> dict:
+    """
+    Get metadata of a specified model.
+    This function retrieves the metadata of a model from its JSON file.
+    Args:
+        model (str | Path): The slug of the model (as a string) or the path to the model JSON file.
+    Returns:
+        dict: A dictionary containing the model metadata.
+    Raises:
+        ValueError: If the model does not exist or is not a valid file.
+    """
     if isinstance(model, str):
         model_path = get_xspect_model_path() / (model.lower() + ".json")
     elif isinstance(model, Path):
@@ -40,8 +73,17 @@ def get_model_metadata(model: str | Path):
         return model_json
-def update_model_metadata(model_slug: str, author: str, author_email: str):
-    """Update the metadata of a model."""
+def update_model_metadata(model_slug: str, author: str, author_email: str) -> None:
+    """
+    Update the metadata of a model.
+    This function updates the author and author email in the model's metadata JSON file.
+    Args:
+        model_slug (str): The slug of the model to update.
+        author (str): The name of the author to set in the metadata.
+        author_email (str): The email of the author to set in the metadata.
+    """
     model_metadata = get_model_metadata(model_slug)
     model_metadata["author"] = author
     model_metadata["author_email"] = author_email
@@ -51,8 +93,19 @@ def update_model_metadata(model_slug: str, author: str, author_email: str):
         file.write(dumps(model_metadata, indent=4))
-def update_model_display_name(model_slug: str, filter_id: str, display_name: str):
-    """Update the display name of a filter in a model."""
+def update_model_display_name(
+    model_slug: str, filter_id: str, display_name: str
+) -> None:
+    """
+    Update the display name of a filter in a model.
+    This function updates the display name of a specific filter in the model's metadata JSON file.
+    Args:
+        model_slug (str): The slug of the model to update.
+        filter_id (str): The ID of the filter whose display name is to be updated.
+        display_name (str): The new display name for the filter.
+    """
     model_metadata = get_model_metadata(model_slug)
     model_metadata["display_names"][filter_id] = display_name
@@ -61,8 +114,15 @@ def update_model_display_name(model_slug: str, filter_id: str, display_name: str
         file.write(dumps(model_metadata, indent=4))
-def get_models():
-    """Get a list of all available models in a dictionary by type."""
+def get_models() -> dict[str, list[dict]]:
+    """
+    Get a list of all available models in a dictionary by type.
+    This function scans the model directory for JSON files and organizes them by their model type.
+    Returns:
+        dict[str, list[dict]]: A dictionary where keys are model types and values are lists of model display names.
+    """
     model_dict = {}
     for model_file in get_xspect_model_path().glob("*.json"):
         model_metadata = get_model_metadata(model_file)
@@ -73,7 +133,17 @@ def get_models():
     return model_dict
-def get_model_display_names(model_slug: str):
-    """Get the display names included in a model."""
+def get_model_display_names(model_slug: str) -> list[str]:
+    """
+    Get the display names included in a model.
+    This function retrieves the display names of individual filters from the model's metadata.
+    Args:
+        model_slug (str): The slug of the model for which to retrieve display names.
+    Returns:
+        list[str]: A list of display names for the individual filters in the model.
+    """
     model_metadata = get_model_metadata(model_slug)
     return list(model_metadata["display_names"].values())

xspect/models/probabilistic_filter_mlst_model.py CHANGED Viewed

@@ -12,6 +12,7 @@ from cobs_index import DocumentList
 from collections import defaultdict
 from xspect.file_io import get_record_iterator
 from xspect.mlst_feature.mlst_helper import MlstResult
+from xspect.mlst_feature.pub_mlst_handler import PubMLSTHandler
 class ProbabilisticFilterMlstSchemeModel:
@@ -19,20 +20,22 @@ class ProbabilisticFilterMlstSchemeModel:
     def __init__(
         self,
-        k: int,
-        model_display_name: str,
+        k_value: int,
+        model_name: str,
         base_path: Path,
+        scheme_url: str,
         fpr: float = 0.001,
     ) -> None:
         """Initialise a ProbabilisticFilterMlstSchemeModel object."""
-        if k < 1:
+        if k_value < 1:
             raise ValueError("Invalid k value, must be greater than 0")
         if not isinstance(base_path, Path):
             raise ValueError("Invalid base path, must be a pathlib.Path object")
-        self.k = k
-        self.model_display_name = model_display_name
+        self.k_value = k_value
+        self.model_name = model_name
         self.base_path = base_path / "MLST"
+        self.scheme_url = scheme_url
         self.fpr = fpr
         self.model_type = "Strain"
         self.loci = {}
@@ -49,9 +52,10 @@ class ProbabilisticFilterMlstSchemeModel:
             dict: The dictionary containing all metadata of an object.
         """
         return {
-            "k": self.k,
-            "model_display_name": self.model_display_name,
+            "k_value": self.k_value,
+            "model_name": self.model_name,
             "model_type": self.model_type,
+            "scheme_url": str(self.scheme_url),
             "fpr": self.fpr,
             "scheme_path": str(self.scheme_path),
             "cobs_path": str(self.cobs_path),
@@ -115,7 +119,7 @@ class ProbabilisticFilterMlstSchemeModel:
             # COBS only accepts strings as paths
             doclist = DocumentList(str(locus_path))
             index_params = cobs_index.CompactIndexParameters()
-            index_params.term_size = self.k  # k-mer size
+            index_params.term_size = self.k_value  # k-mer size
             index_params.clobber = True  # overwrite output and temporary files
             index_params.false_positive_rate = self.fpr
@@ -130,9 +134,7 @@ class ProbabilisticFilterMlstSchemeModel:
     def save(self) -> None:
         """Saves the model to disk"""
-        scheme = str(self.scheme_path).split("/")[
-            -1
-        ]  # [-1] -> contains the scheme name
+        scheme = str(self.scheme_path).split("/")[-1]  # [-1] contains the scheme name
         json_path = self.base_path / scheme / f"{scheme}.json"
         json_object = json.dumps(self.to_dict(), indent=4)
@@ -156,9 +158,10 @@ class ProbabilisticFilterMlstSchemeModel:
             json_object = file.read()
             model_json = json.loads(json_object)
             model = ProbabilisticFilterMlstSchemeModel(
-                model_json["k"],
-                model_json["model_display_name"],
+                model_json["k_value"],
+                model_json["model_name"],
                 json_path.parent,
+                model_json["scheme_url"],
                 model_json["fpr"],
             )
             model.scheme_path = model_json["scheme_path"]
@@ -175,7 +178,12 @@ class ProbabilisticFilterMlstSchemeModel:
             return model
     def calculate_hits(
-        self, cobs_path: Path, sequence: Seq, step: int = 1
+        self,
+        cobs_path: Path,
+        sequence: Seq,
+        step: int = 1,
+        limit: bool = False,
+        limit_number: int = 5,
     ) -> list[dict]:
         """
         Calculates the hits for a sequence.
@@ -189,6 +197,8 @@ class ProbabilisticFilterMlstSchemeModel:
             cobs_path (Path): The path of the COBS-structure directory.
             sequence (Seq): The input sequence for classification.
             step (int, optional): The amount of kmers that are passed; defaults to one.
+            limit (bool): Applying a filter that limits the best result.
+            limit_number (int): The amount of results when the filter is set to true.
         Returns:
             list[dict]: The results of the prediction.
@@ -201,7 +211,7 @@ class ProbabilisticFilterMlstSchemeModel:
         if not isinstance(sequence, Seq):
             raise ValueError("Invalid sequence, must be a Bio.Seq object")
-        if not len(sequence) > self.k:
+        if not len(sequence) > self.k_value:
             raise ValueError("Invalid sequence, must be longer than k")
         if not self.indices:
@@ -239,6 +249,10 @@ class ProbabilisticFilterMlstSchemeModel:
                 sorted_counts = dict(
                     sorted(all_counts.items(), key=lambda item: -item[1])
                 )
+                if limit:
+                    sorted_counts = dict(list(sorted_counts.items())[:limit_number])
                 if not sorted_counts:
                     result_dict = "A Strain type could not be detected because of no kmer matches!"
                     highest_results[scheme_path_list[counter]] = {"N/A": 0}
@@ -250,25 +264,37 @@ class ProbabilisticFilterMlstSchemeModel:
                         first_key: highest_result
                     }
                 counter += 1
-        else:
+        else:  # No split procedure is needed, when the sequence is short
             for index in self.indices:
-                res = index.search(
+                res = index.search(  # COBS can't handle Seq-Objects
                     str(sequence), step=step
-                )  # COBS can't handle Seq-Objects
-                result_dict[scheme_path_list[counter]] = self.get_cobs_result(
-                    res, False
                 )
-                first_key, highest_result = next(
-                    iter(result_dict[scheme_path_list[counter]].items())
+                result = self.get_cobs_result(res, False)
+                result = (
+                    dict(sorted(result.items(), key=lambda x: -x[1])[:limit_number])
+                    if limit
+                    else result
                 )
+                result_dict[scheme_path_list[counter]] = result
+                first_key, highest_result = next(iter(result.items()))
                 highest_results[scheme_path_list[counter]] = {first_key: highest_result}
                 counter += 1
         # check if the strain type has sufficient amount of kmer hits
         is_valid = self.has_sufficient_score(highest_results, self.avg_locus_bp_size)
         if not is_valid:
             highest_results["Attention:"] = (
                 "This strain type is not reliable due to low kmer hit rates!"
             )
+        else:
+            handler = PubMLSTHandler()
+            # allele_id is of type dict
+            flattened = {
+                locus: int(list(allele_id.keys())[0].split("_")[-1])
+                for locus, allele_id in highest_results.items()
+            }
+            strain_type_name = handler.get_strain_type_name(flattened, self.scheme_url)
+            highest_results["ST_Name"] = strain_type_name
         return [{"Strain type": highest_results}, {"All results": result_dict}]
     def predict(
@@ -282,6 +308,7 @@ class ProbabilisticFilterMlstSchemeModel:
             | Path
         ),
         step: int = 1,
+        limit: bool = False,
     ) -> MlstResult:
         """
         Get scores for the sequence(s) based on the filters in the model.
@@ -290,6 +317,7 @@ class ProbabilisticFilterMlstSchemeModel:
             cobs_path (Path): The path of the COBS-structure directory.
             sequence_input (Seq): The input sequence for classification
             step (int, optional): The amount of kmers that are passed; defaults to one
+            limit (bool, optional): Applying a filter that limits the best result.
         Returns:
             MlstResult: The results of the prediction.
@@ -301,13 +329,19 @@ class ProbabilisticFilterMlstSchemeModel:
             if sequence_input.id == "<unknown id>":
                 sequence_input.id = "test"
             hits = {
-                sequence_input.id: self.calculate_hits(cobs_path, sequence_input.seq)
+                sequence_input.id: self.calculate_hits(
+                    cobs_path, sequence_input.seq, step, limit
+                )
             }
-            return MlstResult(self.model_display_name, step, hits)
+            return MlstResult(self.model_name, step, hits, None)
         if isinstance(sequence_input, Path):
             return ProbabilisticFilterMlstSchemeModel.predict(
-                self, cobs_path, get_record_iterator(sequence_input), step=step
+                self,
+                cobs_path,
+                get_record_iterator(sequence_input),
+                step=step,
+                limit=limit,
             )
         if isinstance(
@@ -317,33 +351,35 @@ class ProbabilisticFilterMlstSchemeModel:
             hits = {}
             # individual_seq is a SeqRecord-Object
             for individual_seq in sequence_input:
-                individual_hits = self.calculate_hits(cobs_path, individual_seq.seq)
+                individual_hits = self.calculate_hits(
+                    cobs_path, individual_seq.seq, step, limit
+                )
                 hits[individual_seq.id] = individual_hits
-            return MlstResult(self.model_display_name, step, hits)
+            return MlstResult(self.model_name, step, hits, None)
         raise ValueError(
             "Invalid sequence input, must be a Seq object, a list of Seq objects, a"
             " SeqIO FastaIterator, or a SeqIO FastqPhredIterator"
         )
     def get_cobs_result(
-        self, cobs_result: cobs_index.SearchResult, kmer_threshold: bool
+        self,
+        cobs_result: cobs_index.SearchResult,
+        kmer_threshold: bool,
     ) -> dict:
         """
         Get every entry in a COBS search result.
         Args:
             cobs_result (SearchResult): The result of the prediction.
-            kmer_threshold (bool): Applying a kmer threshold to mitigate false positives
+            kmer_threshold (bool): Applying a kmer threshold to mitigate false positives.
         Returns:
             dict: A dictionary storing the allele id of locus as key and the score as value.
         """
-        return {
-            individual_result.doc_name: individual_result.score
-            for individual_result in cobs_result
-            if not kmer_threshold or individual_result.score > 50
-        }
+        hits = [
+            result for result in cobs_result if not kmer_threshold or result.score > 50
+        ]
+        return {result.doc_name: result.score for result in hits}
     def sequence_splitter(self, input_sequence: str, allele_len: int) -> list[str]:
         """
@@ -379,13 +415,15 @@ class ProbabilisticFilterMlstSchemeModel:
         while start + substring_length <= sequence_len:
             substring_list.append(input_sequence[start : start + substring_length])
-            start += substring_length - self.k + 1  # To not lose kmers when dividing
+            start += (
+                substring_length - self.k_value + 1
+            )  # To not lose kmers when dividing
         # The remaining string is either appended to the list or added to the last entry.
         if start < len(input_sequence):
             remaining_substring = input_sequence[start:]
             # A substring needs to be at least of size k for COBS.
-            if len(remaining_substring) < self.k:
+            if len(remaining_substring) < self.k_value:
                 substring_list[-1] += remaining_substring
             else:
                 substring_list.append(remaining_substring)

XspecT 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

Potentially problematic release.

XspecT 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl