PyPI - XspecT - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

XspecT 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of XspecT might be problematic. Click here for more details.

Files changed (29) hide show

xspect/definitions.py +7 -0
xspect/{download_filters.py → download_models.py} +2 -2
xspect/fastapi.py +2 -2
xspect/main.py +61 -8
xspect/mlst_feature/__init__.py +0 -0
xspect/mlst_feature/mlst_helper.py +155 -0
xspect/mlst_feature/pub_mlst_handler.py +119 -0
xspect/model_management.py +3 -4
xspect/models/probabilistic_filter_mlst_model.py +287 -0
xspect/models/probabilistic_filter_model.py +2 -11
xspect/models/probabilistic_filter_svm_model.py +3 -0
xspect/models/probabilistic_single_filter_model.py +4 -6
xspect/models/result.py +8 -7
xspect/pipeline.py +1 -1
xspect/run.py +1 -1
xspect/train.py +2 -39
xspect/train_filter/extract_and_concatenate.py +1 -1
xspect/train_filter/ncbi_api/download_assemblies.py +2 -2
xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py +13 -13
xspect/train_filter/ncbi_api/ncbi_children_tree.py +1 -1
xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py +2 -2
{XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/METADATA +16 -16
xspect-0.2.7.dist-info/RECORD +33 -0
{XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/WHEEL +1 -1
XspecT-0.2.5.dist-info/RECORD +0 -30
xspect/train_filter/html_scrap.py +0 -114
{XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/LICENSE +0 -0
{XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/entry_points.txt +0 -0
{XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/top_level.txt +0 -0

xspect/models/probabilistic_filter_mlst_model.py ADDED Viewed

@@ -0,0 +1,287 @@
+"""Probabilistic filter MLST model for sequence data"""
+__author__ = "Cetin, Oemer"
+import cobs_index
+import json
+from pathlib import Path
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from cobs_index import DocumentList
+from collections import defaultdict
+from xspect.file_io import get_record_iterator
+from xspect.mlst_feature.mlst_helper import MlstResult
+class ProbabilisticFilterMlstSchemeModel:
+    """Probabilistic filter MLST scheme model for sequence data"""
+    def __init__(
+        self,
+        k: int,
+        model_display_name: str,
+        base_path: Path,
+        fpr: float = 0.001,
+    ) -> None:
+        if k < 1:
+            raise ValueError("Invalid k value, must be greater than 0")
+        if not isinstance(base_path, Path):
+            raise ValueError("Invalid base path, must be a pathlib.Path object")
+        self.k = k
+        self.model_display_name = model_display_name
+        self.base_path = base_path / "MLST"
+        self.fpr = fpr
+        self.model_type = "Strain"
+        self.loci = {}
+        self.scheme_path = ""
+        self.cobs_path = ""
+        self.avg_locus_bp_size = []
+        self.indices = []
+    def to_dict(self) -> dict:
+        """Returns a dictionary representation of the model"""
+        return {
+            "k": self.k,
+            "model_display_name": self.model_display_name,
+            "model_type": self.model_type,
+            "fpr": self.fpr,
+            "scheme_path": str(self.scheme_path),
+            "cobs_path": str(self.cobs_path),
+            "average_locus_base_pair_size": self.avg_locus_bp_size,
+            "loci": self.loci,
+        }
+    def get_cobs_index_path(self, scheme: str, locus: str) -> Path:
+        """Returns the path to the cobs index"""
+        # To differentiate from genus and species models
+        cobs_path = self.base_path / f"{scheme}"
+        cobs_path.mkdir(exist_ok=True, parents=True)
+        return cobs_path / f"{locus}.cobs_compact"
+    def fit(self, scheme_path: Path) -> None:
+        """Trains a COBS structure for every locus with all its alleles"""
+        if not scheme_path.exists():
+            raise ValueError(
+                "Scheme not found. Please make sure to download the schemes prior!"
+            )
+        scheme = str(scheme_path).split("/")[-1]
+        cobs_path = ""
+        # COBS structure for every locus (default = 7 for Oxford or Pasteur scheme)
+        for locus_path in sorted(scheme_path.iterdir()):
+            locus = str(locus_path).split("/")[-1]
+            # counts all fasta files that belong to a locus
+            self.loci[locus] = sum(
+                (1 for _ in locus_path.iterdir() if not str(_).endswith("cache"))
+            )
+            # determine the avg base pair size of alleles
+            fasta_file = next(locus_path.glob("*.fasta"), None)
+            with open(fasta_file, "r") as handle:
+                record = next(SeqIO.parse(handle, "fasta"))
+            self.avg_locus_bp_size.append(len(record.seq))
+            # COBS only accepts strings as paths
+            doclist = DocumentList(str(locus_path))
+            index_params = cobs_index.CompactIndexParameters()
+            index_params.term_size = self.k  # k-mer size
+            index_params.clobber = True  # overwrite output and temporary files
+            index_params.false_positive_rate = self.fpr
+            # Creates COBS data structure for each locus
+            cobs_path = self.get_cobs_index_path(scheme, locus)
+            cobs_index.compact_construct_list(doclist, str(cobs_path), index_params)
+            # Saves COBS-file inside the "indices" attribute
+            self.indices.append(cobs_index.Search(str(cobs_path)))
+        self.scheme_path = scheme_path
+        self.cobs_path = cobs_path.parent
+    def save(self) -> None:
+        """Saves the model to disk"""
+        scheme = str(self.scheme_path).split("/")[
+            -1
+        ]  # [-1] -> contains the scheme name
+        json_path = self.base_path / scheme / f"{scheme}.json"
+        json_object = json.dumps(self.to_dict(), indent=4)
+        with open(json_path, "w", encoding="utf-8") as file:
+            file.write(json_object)
+    @staticmethod
+    def load(scheme_path: Path) -> "ProbabilisticFilterMlstSchemeModel":
+        """Loads the model from a JSON-file"""
+        scheme_name = str(scheme_path).split("/")[-1]
+        json_path = scheme_path / f"{scheme_name}.json"
+        with open(json_path, "r", encoding="utf-8") as file:
+            json_object = file.read()
+            model_json = json.loads(json_object)
+            model = ProbabilisticFilterMlstSchemeModel(
+                model_json["k"],
+                model_json["model_display_name"],
+                json_path.parent,
+                model_json["fpr"],
+            )
+            model.scheme_path = model_json["scheme_path"]
+            model.cobs_path = model_json["cobs_path"]
+            model.avg_locus_bp_size = model_json["average_locus_base_pair_size"]
+            model.loci = model_json["loci"]
+            for entry in sorted(json_path.parent.iterdir()):
+                if not entry.exists():
+                    raise FileNotFoundError(f"Index file not found at {entry}")
+                if str(entry).endswith(".json"):  # only COBS-files
+                    continue
+                model.indices.append(cobs_index.Search(str(entry), False))
+            return model
+    def calculate_hits(self, path: Path, sequence: Seq, step: int = 1) -> list[dict]:
+        """Calculates the hits for a sequence"""
+        if not isinstance(sequence, Seq):
+            raise ValueError("Invalid sequence, must be a Bio.Seq object")
+        if not len(sequence) > self.k:
+            raise ValueError("Invalid sequence, must be longer than k")
+        if not self.indices:
+            raise ValueError("The Model has not been trained yet")
+        scheme_path_list = []
+        for entry in sorted(path.iterdir()):
+            if str(entry).endswith(".json"):
+                continue
+            file_name = str(entry).split("/")[-1]  # file_name = locus
+            scheme_path_list.append(file_name.split(".")[0])  # without the file ending
+        result_dict = {}
+        highest_results = {}
+        counter = 0
+        # split the sequence in parts based on sequence length
+        if len(sequence) >= 10000:
+            for index in self.indices:
+                cobs_results = []
+                allele_len = self.avg_locus_bp_size[counter]
+                split_sequence = self.sequence_splitter(str(sequence), allele_len)
+                for split in split_sequence:
+                    res = index.search(split, step=step)
+                    split_result = self.get_cobs_result(res)
+                    if not split_result:
+                        continue
+                    cobs_results.append(split_result)
+                all_counts = defaultdict(int)
+                for result in cobs_results:
+                    for name, value in result.items():
+                        all_counts[name] += value
+                sorted_counts = dict(
+                    sorted(all_counts.items(), key=lambda item: -item[1])
+                )
+                first_key = next(iter(sorted_counts))
+                highest_result = sorted_counts[first_key]
+                result_dict[scheme_path_list[counter]] = sorted_counts
+                highest_results[scheme_path_list[counter]] = {first_key: highest_result}
+                counter += 1
+        else:
+            for index in self.indices:
+                res = index.search(
+                    str(sequence), step=step
+                )  # COBS can't handle Seq-Objects
+                result_dict[scheme_path_list[counter]] = self.get_cobs_result(res)
+                highest_results[scheme_path_list[counter]] = (
+                    self.get_highest_cobs_result(res)
+                )
+                counter += 1
+        return [{"Strain type": highest_results}, {"All results": result_dict}]
+    def predict(
+        self,
+        cobs_path: Path,
+        sequence_input: (
+            SeqRecord
+            | list[SeqRecord]
+            | SeqIO.FastaIO.FastaIterator
+            | SeqIO.QualityIO.FastqPhredIterator
+            | Path
+        ),
+        step: int = 1,
+    ) -> MlstResult:
+        """Returns scores for the sequence(s) based on the filters in the model"""
+        if isinstance(sequence_input, SeqRecord):
+            if sequence_input.id == "<unknown id>":
+                sequence_input.id = "test"
+            hits = {
+                sequence_input.id: self.calculate_hits(cobs_path, sequence_input.seq)
+            }
+            return MlstResult(self.model_display_name, step, hits)
+        if isinstance(sequence_input, Path):
+            return ProbabilisticFilterMlstSchemeModel.predict(
+                self, cobs_path, get_record_iterator(sequence_input), step=step
+            )
+        if isinstance(
+            sequence_input,
+            (SeqIO.FastaIO.FastaIterator, SeqIO.QualityIO.FastqPhredIterator),
+        ):
+            hits = {}
+            # individual_seq is a SeqRecord-Object
+            for individual_seq in sequence_input:
+                individual_hits = self.calculate_hits(cobs_path, individual_seq.seq)
+                hits[individual_seq.id] = individual_hits
+            return MlstResult(self.model_display_name, step, hits)
+        raise ValueError(
+            "Invalid sequence input, must be a Seq object, a list of Seq objects, a"
+            " SeqIO FastaIterator, or a SeqIO FastqPhredIterator"
+        )
+    def get_highest_cobs_result(self, cobs_result: cobs_index.SearchResult) -> dict:
+        """Returns the first entry in a COBS search result."""
+        # counter = 1
+        # dictio = {}
+        for individual_result in cobs_result:
+            # COBS already sorts the result in descending order
+            # The first doc_name has the highest result which is needed to determine the allele
+            return {individual_result.doc_name: individual_result.score}
+    def get_cobs_result(self, cobs_result: cobs_index.SearchResult) -> dict:
+        """Returns all entries in a COBS search result."""
+        return {
+            individual_result.doc_name: individual_result.score
+            for individual_result in cobs_result
+            if individual_result.score > 50
+        }
+    def sequence_splitter(self, input_sequence: str, allele_len: int) -> list[str]:
+        """Returns an equally divided sequence in form of a list."""
+        # An input sequence will have 10000 or more base pairs.
+        sequence_len = len(input_sequence)
+        if sequence_len < 100000:
+            substring_length = allele_len // 10
+        elif 100000 <= sequence_len < 1000000:
+            substring_length = allele_len
+        elif 1000000 <= sequence_len < 10000000:
+            substring_length = allele_len * 10
+        else:
+            substring_length = allele_len * 100
+        substring_list = []
+        start = 0
+        while start + substring_length <= sequence_len:
+            substring_list.append(input_sequence[start : start + substring_length])
+            start += substring_length - self.k + 1  # To not lose kmers when dividing
+        # The remaining string is either appended to the list or added to the last entry.
+        if start < len(input_sequence):
+            remaining_substring = input_sequence[start:]
+            # A substring needs to be at least of size k for COBS.
+            if len(remaining_substring) < self.k:
+                substring_list[-1] += remaining_substring
+            else:
+                substring_list.append(remaining_substring)
+        return substring_list

xspect/models/probabilistic_filter_model.py CHANGED Viewed

@@ -8,6 +8,7 @@ from Bio.SeqRecord import SeqRecord
 from Bio import SeqIO
 from slugify import slugify
 import cobs_index as cobs
+from xspect.definitions import fasta_endings, fastq_endings
 from xspect.file_io import get_record_iterator
 from xspect.models.result import ModelResult
@@ -64,10 +65,6 @@ class ProbabilisticFilterModel:
             "num_hashes": self.num_hashes,
         }
-    def __dict__(self) -> dict:
-        """Returns a dictionary representation of the model"""
-        return self.to_dict()
     def slug(self) -> str:
         """Returns a slug representation of the model"""
         return slugify(self.model_display_name + "-" + str(self.model_type))
@@ -89,13 +86,7 @@ class ProbabilisticFilterModel:
         doclist = cobs.DocumentList()
         for file in dir_path.iterdir():
-            if file.is_file() and file.suffix in [
-                ".fasta",
-                ".fna",
-                ".fa",
-                ".fastq",
-                ".fq",
-            ]:
+            if file.is_file() and file.suffix[1:] in fasta_endings + fastq_endings:
                 # cobs only uses the file name to the first "." as the document name
                 if file.name in display_names:
                     self.display_names[file.name.split(".")[0]] = display_names[

xspect/models/probabilistic_filter_svm_model.py CHANGED Viewed

@@ -65,8 +65,11 @@ class ProbabilisticFilterSVMModel(ProbabilisticFilterModel):
     ) -> None:
         """Fit the SVM to the sequences and labels"""
+        # Since the SVM works with score data, we need to train
+        # the underlying data structure for score generation first
         super().fit(dir_path, display_names=display_names)
+        # calculate scores for SVM training
         score_list = []
         for file in svm_path.iterdir():
             if not file.is_file():

xspect/models/probabilistic_single_filter_model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Probabilistic filter SVM model for sequence data"""
+"""Base probabilistic filter model for sequence data"""
 # pylint: disable=no-name-in-module, too-many-instance-attributes
@@ -14,7 +14,7 @@ from xspect.file_io import get_record_iterator
 class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
-    """Probabilistic filter SVM model for sequence data"""
+    """Base probabilistic filter model for sequence data"""
     def __init__(
         self,
@@ -25,7 +25,6 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
         model_type: str,
         base_path: Path,
         fpr: float = 0.01,
-        num_hashes: int = 7,
     ) -> None:
         super().__init__(
             k=k,
@@ -35,12 +34,12 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
             model_type=model_type,
             base_path=base_path,
             fpr=fpr,
-            num_hashes=num_hashes,
+            num_hashes=1,
         )
         self.bf = None
     def fit(self, file_path: Path, display_name: str) -> None:
-        """Fit the SVM to the sequences and labels"""
+        """Fit the cobs classic index to the sequences and labels"""
         # estimate number of kmers
         total_length = 0
         for record in get_record_iterator(file_path):
@@ -89,7 +88,6 @@ class ProbabilisticSingleFilterModel(ProbabilisticFilterModel):
                 model_json["model_type"],
                 path.parent,
                 fpr=model_json["fpr"],
-                num_hashes=model_json["num_hashes"],
             )
             model.display_names = model_json["display_names"]
             bloom_path = model.base_path / model.slug() / "filter.bloom"

xspect/models/result.py CHANGED Viewed

@@ -1,14 +1,15 @@
-""" Module for storing the results of XspecT models. """
+"""Module for storing the results of XspecT models."""
 from enum import Enum
 def get_last_processing_step(result: "ModelResult") -> "ModelResult":
     """Get the last subprocessing step of the result. First path only."""
-    last_step = result
-    while last_step.subprocessing_steps:
-        last_step = last_step.subprocessing_steps[-1].result
-    return last_step
+    # traverse result tree to get last step
+    while result.subprocessing_steps:
+        result = result.subprocessing_steps[-1].result
+    return result
 class StepType(Enum):
@@ -82,9 +83,9 @@ class ModelResult:
         scores = {
             subsequence: {
                 label: round(hits / self.num_kmers[subsequence], 2)
-                for label, hits in subseuqence_hits.items()
+                for label, hits in subsequence_hits.items()
             }
-            for subsequence, subseuqence_hits in self.hits.items()
+            for subsequence, subsequence_hits in self.hits.items()
         }
         # calculate total scores

xspect/pipeline.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" Module for defining the Pipeline class. """
+"""Module for defining the Pipeline class."""
 import json
 from pathlib import Path

xspect/run.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" Module with XspecT global run class, which summarizes individual model results. """
+"""Module with XspecT global run class, which summarizes individual model results."""
 import json
 from pathlib import Path

xspect/train.py CHANGED Viewed

@@ -22,7 +22,6 @@ from xspect.train_filter.ncbi_api import (
 )
 from xspect.train_filter import (
     create_svm,
-    html_scrap,
     extract_and_concatenate,
 )
@@ -40,7 +39,7 @@ def check_user_input(user_input: str):
         rank = metadata["rank"]
         lineage = metadata["lineage"]
         bacteria_id = 2
-        if not sci_name == user_input and not tax_id == user_input:
+        if user_input not in (sci_name, tax_id):
             print(
                 f"{get_current_time()}| The given genus: {user_input} was found as"
                 f" genus: {sci_name} ID: {tax_id}"
@@ -60,38 +59,6 @@ def check_user_input(user_input: str):
         sys.exit()
-def copy_custom_data(bf_path: str, svm_path: str, dir_name: str):
-    """
-    :param bf_path:
-    :param svm_path:
-    :param dir_name:
-    :return:
-    """
-    path = Path(os.getcwd()) / "genus_metadata" / dir_name
-    new_bf_path = path / "concatenate"
-    new_svm_path = path / "training_data"
-    # Make the new directories.
-    path.mkdir(exist_ok=True)
-    new_bf_path.mkdir(exist_ok=True)
-    new_svm_path.mkdir(exist_ok=True)
-    # Move bloomfilter files.
-    bf_files = os.listdir(bf_path)
-    for file in bf_files:
-        file_path = Path(bf_path) / file
-        new_file_path = new_bf_path / file
-        shutil.copy2(file_path, new_file_path)
-    # Move svm files.
-    svm_files = os.listdir(svm_path)
-    for file in svm_files:
-        file_path = Path(svm_path) / file
-        new_file_path = new_svm_path / file
-        shutil.copy2(file_path, new_file_path)
 def set_logger(dir_name: str):
     """Sets the logger parameters.
@@ -168,14 +135,10 @@ def train_ncbi(genus: str, svm_step: int = 1):
     children_ids = ncbi_children_tree.NCBIChildrenTree(genus).children_ids()
     species_dict = ncbi_taxon_metadata.NCBITaxonMetadata(children_ids).get_metadata()
-    # Get all gcf accessions that have Taxonomy check result OK.
-    logger.info("Checking ANI data for updates")
-    ani_gcf = html_scrap.TaxonomyCheck().ani_gcf()
     # Look for up to 8 assembly accessions per species.
     logger.info("Getting assembly metadata")
     all_metadata = ncbi_assembly_metadata.NCBIAssemblyMetadata(
-        all_metadata=species_dict, ani_gcf=ani_gcf, count=8, contig_n50=10000
+        all_metadata=species_dict, count=8, contig_n50=10000
     )
     all_metadata = all_metadata.get_all_metadata()

xspect/train_filter/extract_and_concatenate.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" Module for extracting and concatenating assemblies. """
+"""Module for extracting and concatenating assemblies."""
 __author__ = "Berger, Phillip"

xspect/train_filter/ncbi_api/download_assemblies.py CHANGED Viewed

@@ -23,9 +23,9 @@ def download_assemblies(accessions, dir_name, target_folder, zip_file_name):
     """
     path = get_xspect_tmp_path() / dir_name / target_folder / zip_file_name
-    api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v1/genome/accession/{','.join(accessions)}/download"
+    api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{','.join(accessions)}/download"
     parameters = {"include_annotation_type": "GENOME_FASTA", "filename": zip_file_name}
     os.makedirs(os.path.dirname(path), exist_ok=True)
-    genome_download = requests.get(api_url, params=parameters, timeout=20)
+    genome_download = requests.get(api_url, params=parameters, timeout=30)
     with open(path, "wb") as f:
         f.write(genome_download.content)

xspect/train_filter/ncbi_api/ncbi_assembly_metadata.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" Collects metadata of assemblies from NCBI API """
+"""Collects metadata of assemblies from NCBI API"""
 __author__ = "Berger, Phillip"
@@ -14,16 +14,14 @@ class NCBIAssemblyMetadata:
     _all_metadata: dict
     _count: int
-    _ani_gcf: list
     _parameters: dict
     _accessions: list[str]
     _contig_n50: int
     _all_metadata_complete: dict
-    def __init__(self, all_metadata: dict, ani_gcf: list, count=8, contig_n50=10000):
+    def __init__(self, all_metadata: dict, count=8, contig_n50=10000):
         self._all_metadata = all_metadata
         self._count = count
-        self._ani_gcf = ani_gcf
         self._contig_n50 = contig_n50
         self._set_parameters()
@@ -72,7 +70,7 @@ class NCBIAssemblyMetadata:
         }
     def _make_request(self, taxon: str):
-        api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v1/genome/taxon/{taxon}"
+        api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/taxon/{taxon}/dataset_report"
         accessions = []
         count = 0
         for request_type, parameters in self._parameters.items():
@@ -80,17 +78,19 @@ class NCBIAssemblyMetadata:
             response = raw_response.json()
             if response:
                 try:
-                    assemblies = response["assemblies"]
-                    for assembly in assemblies:
-                        curr_assembly = assembly["assembly"]
-                        curr_accession = curr_assembly["assembly_accession"]
-                        curr_contig_n50 = curr_assembly["contig_n50"]
+                    reports = response["reports"]
+                    for report in reports:
+                        accession = report["accession"]
+                        contig_n50 = report["assembly_stats"]["contig_n50"]
+                        taxonomy_check_status = report["average_nucleotide_identity"][
+                            "taxonomy_check_status"
+                        ]
                         if count < self._count:
                             if (
-                                curr_accession in self._ani_gcf
-                                and curr_contig_n50 > self._contig_n50
+                                taxonomy_check_status == "OK"
+                                and contig_n50 > self._contig_n50
                             ):
-                                accessions.append(curr_accession)
+                                accessions.append(accession)
                                 count += 1
                         else:
                             break

xspect/train_filter/ncbi_api/ncbi_children_tree.py CHANGED Viewed

@@ -24,7 +24,7 @@ class NCBIChildrenTree:
     def _request_tree(self):
         """Make the request for the children tree at the NCBI Datasets API."""
-        api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v1/taxonomy/taxon/{self._taxon}/filtered_subtree"
+        api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/taxonomy/taxon/{self._taxon}/filtered_subtree"
         raw_response = requests.get(api_url, timeout=5)
         self._response = raw_response.json()["edges"]
         self._parent_taxon_id = str(self._response["1"]["visible_children"][0])

xspect/train_filter/ncbi_api/ncbi_taxon_metadata.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" This module is used to retrieve metadata from the NCBI taxonomy database. """
+"""This module is used to retrieve metadata from the NCBI taxonomy database."""
 __author__ = "Berger, Phillip"
@@ -21,7 +21,7 @@ class NCBITaxonMetadata:
         self._collect_all_metadata()
     def _request_metadata(self):
-        api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v1/taxonomy/taxon/{str(self._taxon)}"
+        api_url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/taxonomy/taxon/{str(self._taxon)}"
         raw_response = requests.get(api_url, timeout=5)
         self._response = raw_response.json()["taxonomy_nodes"]

{XspecT-0.2.5.dist-info → xspect-0.2.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: XspecT
-Version: 0.2.5
+Version: 0.2.7
 Summary: Tool to monitor and characterize pathogens using Bloom filters.
 License: MIT License
@@ -46,14 +46,14 @@ Requires-Dist: fastapi
 Requires-Dist: uvicorn
 Requires-Dist: python-multipart
 Provides-Extra: docs
-Requires-Dist: sphinx ; extra == 'docs'
-Requires-Dist: furo ; extra == 'docs'
-Requires-Dist: myst-parser ; extra == 'docs'
-Requires-Dist: sphinx-copybutton ; extra == 'docs'
-Requires-Dist: sphinx-autobuild ; extra == 'docs'
+Requires-Dist: sphinx; extra == "docs"
+Requires-Dist: furo; extra == "docs"
+Requires-Dist: myst-parser; extra == "docs"
+Requires-Dist: sphinx-copybutton; extra == "docs"
+Requires-Dist: sphinx-autobuild; extra == "docs"
 Provides-Extra: test
-Requires-Dist: pytest ; extra == 'test'
-Requires-Dist: pytest-cov ; extra == 'test'
+Requires-Dist: pytest; extra == "test"
+Requires-Dist: pytest-cov; extra == "test"
 # XspecT - Acinetobacter Species Assignment Tool
 ![Test](https://github.com/bionf/xspect2/actions/workflows/test.yml/badge.svg)
@@ -63,7 +63,7 @@ Requires-Dist: pytest-cov ; extra == 'test'
 <img src="/docs/img/logo.png" height="50%" width="50%">
 <!-- start intro -->
-XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or sub-type level using [Bloom Filters] and a [Support Vector Machine]. It also identifies existing [blaOxa-genes] and provides a list of relevant research papers for further information.
+XspecT is a Python-based tool to taxonomically classify sequence-reads (or assembled genomes) on the species and/or MLST level using [Bloom Filters] and a [Support Vector Machine].
 <br/><br/>
 XspecT utilizes the uniqueness of kmers and compares extracted kmers from the input-data to a reference database. Bloom Filter ensure a fast lookup in this process. For a final prediction the results are classified using a Support Vector Machine.
@@ -88,14 +88,14 @@ pip install xspect
 Please note that Windows and Alpine Linux is currently not supported.
 ## Usage
-### Get the Bloomfilters
-To download basic pre-trained filters, you can use the built-in command:
+### Get the models
+To download basic pre-trained models, you can use the built-in command:
 ```
-xspect download-filters
+xspect download-models
 ```
-Additional species filters can be trained using:
+Additional species models can be trained using:
 ```
-xspect train you-ncbi-genus-name
+xspect train-species you-ncbi-genus-name
 ```
 ### How to run the web app
@@ -107,7 +107,7 @@ xspect api
 ### How to use the XspecT command line interface
 Run xspect with the configuration you want to run it with as arguments.
 ```
-xspect classify your-genus path/to/your/input-set
+xspect classify-species your-genus path/to/your/input-set
 ```
 For further instructions on how to use the command line interface, please refer to the [documentation] or execute:
 ```

XspecT 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

Potentially problematic release.

XspecT 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl