PyPI - XspecT - Versions diffs - 0.5.3__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

XspecT 0.5.3py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of XspecT might be problematic. Click here for more details.

Files changed (26) hide show

xspect/classify.py +38 -8
xspect/definitions.py +30 -10
xspect/file_io.py +2 -1
xspect/filter_sequences.py +20 -4
xspect/main.py +126 -28
xspect/misclassification_detection/__init__.py +0 -0
xspect/misclassification_detection/mapping.py +168 -0
xspect/misclassification_detection/point_pattern_analysis.py +102 -0
xspect/misclassification_detection/simulate_reads.py +55 -0
xspect/mlst_feature/mlst_helper.py +15 -19
xspect/mlst_feature/pub_mlst_handler.py +16 -19
xspect/model_management.py +14 -17
xspect/models/probabilistic_filter_mlst_model.py +11 -10
xspect/models/probabilistic_filter_model.py +142 -8
xspect/models/probabilistic_filter_svm_model.py +29 -14
xspect/models/probabilistic_single_filter_model.py +9 -7
xspect/models/result.py +22 -15
xspect/ncbi.py +82 -7
xspect/train.py +21 -4
xspect/web.py +13 -4
{xspect-0.5.3.dist-info → xspect-0.6.0.dist-info}/METADATA +4 -1
{xspect-0.5.3.dist-info → xspect-0.6.0.dist-info}/RECORD +26 -22
{xspect-0.5.3.dist-info → xspect-0.6.0.dist-info}/WHEEL +0 -0
{xspect-0.5.3.dist-info → xspect-0.6.0.dist-info}/entry_points.txt +0 -0
{xspect-0.5.3.dist-info → xspect-0.6.0.dist-info}/licenses/LICENSE +0 -0
{xspect-0.5.3.dist-info → xspect-0.6.0.dist-info}/top_level.txt +0 -0

xspect/classify.py CHANGED Viewed

@@ -1,11 +1,13 @@
+"""Classification module"""
 from pathlib import Path
-from xspect.mlst_feature.mlst_helper import pick_scheme_from_models_dir
+from importlib import import_module
 import xspect.model_management as mm
-from xspect.models.probabilistic_filter_mlst_model import (
-    ProbabilisticFilterMlstSchemeModel,
-)
 from xspect.file_io import prepare_input_output_paths
+# inline imports lead to "invalid name" issues
+# pylint: disable=invalid-name
 def classify_genus(
     model_genus: str, input_path: Path, output_path: Path, step: int = 1
@@ -22,7 +24,12 @@ def classify_genus(
         output_path (Path): The path to the output file where results will be saved.
         step (int): The amount of kmers to be skipped.
     """
-    model = mm.get_genus_model(model_genus)
+    ProbabilisticSingleFilterModel = import_module(
+        "xspect.models.probabilistic_single_filter_model"
+    ).ProbabilisticSingleFilterModel
+    model_path = mm.get_genus_model_path(model_genus)
+    model = ProbabilisticSingleFilterModel.load(model_path)
     input_paths, get_output_path = prepare_input_output_paths(input_path)
     for idx, current_path in enumerate(input_paths):
@@ -34,7 +41,12 @@ def classify_genus(
 def classify_species(
-    model_genus: str, input_path: Path, output_path: Path, step: int = 1
+    model_genus: str,
+    input_path: Path,
+    output_path: Path,
+    step: int = 1,
+    display_name: bool = False,
+    validation: bool = False,
 ):
     """
     Classify the species of sequences.
@@ -47,12 +59,24 @@ def classify_species(
         input_path (Path): The path to the input file/directory containing sequences.
         output_path (Path): The path to the output file where results will be saved.
         step (int): The amount of kmers to be skipped.
+        display_name (bool): Includes a display name for each tax_ID.
+        validation (bool): Sorts out misclassified reads.
     """
-    model = mm.get_species_model(model_genus)
+    ProbabilisticFilterSVMModel = import_module(
+        "xspect.models.probabilistic_filter_svm_model"
+    ).ProbabilisticFilterSVMModel
+    model_path = mm.get_species_model_path(model_genus)
+    model = ProbabilisticFilterSVMModel.load(model_path)
     input_paths, get_output_path = prepare_input_output_paths(input_path)
     for idx, current_path in enumerate(input_paths):
-        result = model.predict(current_path, step=step)
+        result = model.predict(
+            current_path,
+            step=step,
+            display_name=display_name,
+            validation=validation,
+        )
         result.input_source = current_path.name
         cls_path = get_output_path(idx, output_path)
         result.save(cls_path)
@@ -68,6 +92,12 @@ def classify_mlst(input_path: Path, output_path: Path, limit: bool):
         output_path (Path): The path to the output file where results will be saved.
         limit (bool): A limit for the highest allele_id results that are shown.
     """
+    pick_scheme_from_models_dir = import_module(
+        "xspect.mlst_feature.mlst_helper"
+    ).pick_scheme_from_models_dir
+    ProbabilisticFilterMlstSchemeModel = import_module(
+        "xspect.models.probabilistic_filter_mlst_model"
+    ).ProbabilisticFilterMlstSchemeModel
     scheme_path = pick_scheme_from_models_dir()
     model = ProbabilisticFilterMlstSchemeModel.load(scheme_path)

xspect/definitions.py CHANGED Viewed

@@ -11,8 +11,9 @@ def get_xspect_root_path() -> Path:
     """
     Return the root path for XspecT data.
-    Returns the path to the XspecT data directory, which can be located either in the user's home directory or in the current working directory.
-    If neither exists, it creates the directory in the user's home directory.
+    Returns the path to the XspecT data directory, which can be located either in the user's home
+    directory or in the current working directory. If neither exists, it creates the directory in
+    the user's home directory.
     Returns:
         Path: The path to the XspecT data directory.
@@ -34,8 +35,8 @@ def get_xspect_model_path() -> Path:
     """
     Return the path to the XspecT models.
-    Returns the path to the XspecT models directory, which is located within the XspecT data directory.
-    If the directory does not exist, it creates the directory.
+    Returns the path to the XspecT models directory, which is located within the XspecT data
+    directory. If the directory does not exist, it creates the directory.
     Returns:
         Path: The path to the XspecT models directory.
@@ -49,8 +50,8 @@ def get_xspect_upload_path() -> Path:
     """
     Return the path to the XspecT upload directory.
-    Returns the path to the XspecT uploads directory, which is located within the XspecT data directory.
-    If the directory does not exist, it creates the directory.
+    Returns the path to the XspecT uploads directory, which is located within the XspecT data
+    directory. If the directory does not exist, it creates the directory.
     Returns:
         Path: The path to the XspecT uploads directory.
@@ -64,8 +65,8 @@ def get_xspect_runs_path() -> Path:
     """
     Return the path to the XspecT runs directory.
-    Returns the path to the XspecT runs directory, which is located within the XspecT data directory.
-    If the directory does not exist, it creates the directory.
+    Returns the path to the XspecT runs directory, which is located within the XspecT data
+    directory. If the directory does not exist, it creates the directory.
     Returns:
         Path: The path to the XspecT runs directory.
@@ -79,8 +80,8 @@ def get_xspect_mlst_path() -> Path:
     """
     Return the path to the XspecT MLST directory.
-    Returns the path to the XspecT MLST directory, which is located within the XspecT data directory.
-    If the directory does not exist, it creates the directory.
+    Returns the path to the XspecT MLST directory, which is located within the XspecT data
+    directory. If the directory does not exist, it creates the directory.
     Returns:
         Path: The path to the XspecT MLST directory.
@@ -88,3 +89,22 @@ def get_xspect_mlst_path() -> Path:
     mlst_path = get_xspect_root_path() / "mlst"
     mlst_path.mkdir(exist_ok=True, parents=True)
     return mlst_path
+def get_xspect_misclassification_path() -> Path:
+    """
+    Notes:
+    Developed by Oemer Cetin as part of a Bsc thesis at Goethe University Frankfurt am Main (2025).
+    (An Integration of Alignment-Free and Alignment-Based Approaches for Bacterial Taxon Assignment)
+    Return the path to the XspecT Misclassification directory.
+    Returns the path to the XspecT Misclassification directory, which is located within the XspecT data
+    directory. If the directory does not exist, it creates the directory.
+    Returns:
+        Path: The path to the XspecT Misclassification directory.
+    """
+    misclassification_path = get_xspect_root_path() / "misclassification"
+    misclassification_path.mkdir(exist_ok=True, parents=True)
+    return misclassification_path

xspect/file_io.py CHANGED Viewed

@@ -113,7 +113,8 @@ def concatenate_metagenome(fasta_dir: Path, meta_path: Path) -> None:
     Concatenate all fasta files in a directory into one file.
     This function searches for all fasta files in the specified directory and writes their contents
-    into a single output file. The output file will contain the concatenated sequences from all fasta files.
+    into a single output file. The output file will contain the concatenated sequences from all
+    fasta files.
     Args:
         fasta_dir (Path): Path to the directory with the fasta files.

xspect/filter_sequences.py CHANGED Viewed

@@ -1,7 +1,13 @@
+"""Sequence filtering module"""
 from pathlib import Path
-from xspect.model_management import get_genus_model, get_species_model
+from importlib import import_module
+from xspect.model_management import get_genus_model_path, get_species_model_path
 from xspect.file_io import filter_sequences, prepare_input_output_paths
+# inline imports lead to "invalid name" issues
+# pylint: disable=invalid-name
 def filter_species(
     model_genus: str,
@@ -31,7 +37,12 @@ def filter_species(
             available species scores.
         sparse_sampling_step (int): The step size for sparse sampling. Defaults to 1.
     """
-    species_model = get_species_model(model_genus)
+    ProbabilisticFilterSVMModel = import_module(
+        "xspect.models.probabilistic_filter_svm_model"
+    ).ProbabilisticFilterSVMModel
+    species_model_path = get_species_model_path(model_genus)
+    species_model = ProbabilisticFilterSVMModel.load(species_model_path)
     input_paths, get_output_path = prepare_input_output_paths(input_path)
     for idx, current_path in enumerate(input_paths):
@@ -82,11 +93,16 @@ def filter_genus(
         sparse_sampling_step (int): The step size for sparse sampling. Defaults to 1.
     """
-    model = get_genus_model(model_genus)
+    ProbabilisticSingleFilterModel = import_module(
+        "xspect.models.probabilistic_single_filter_model"
+    ).ProbabilisticSingleFilterModel
+    genus_model_path = get_genus_model_path(model_genus)
+    genus_model = ProbabilisticSingleFilterModel.load(genus_model_path)
     input_paths, get_output_path = prepare_input_output_paths(input_path)
     for idx, current_path in enumerate(input_paths):
-        result = model.predict(current_path, step=sparse_sampling_step)
+        result = genus_model.predict(current_path, step=sparse_sampling_step)
         result.input_source = current_path.name
         if classification_output_path:

xspect/main.py CHANGED Viewed

@@ -2,25 +2,12 @@
 from pathlib import Path
 from uuid import uuid4
+from importlib import import_module
 import click
-import uvicorn
-from xspect import classify
-from xspect.web import app
-from xspect.download_models import download_test_models
-from xspect import filter_sequences
-from xspect.train import train_from_directory, train_from_ncbi
-from xspect.definitions import (
-    get_xspect_model_path,
-)
-from xspect.mlst_feature.mlst_helper import pick_scheme
-from xspect.mlst_feature.pub_mlst_handler import PubMLSTHandler
-from xspect.models.probabilistic_filter_mlst_model import (
-    ProbabilisticFilterMlstSchemeModel,
-)
-from xspect.model_management import (
-    get_model_metadata,
-    get_models,
-)
+from xspect.model_management import get_models
+# inline imports lead to "invalid name" issues
+# pylint: disable=invalid-name
 @click.group()
@@ -32,7 +19,10 @@ def cli():
 @cli.command()
 def web():
     """Open the XspecT web application."""
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+    app = import_module("xspect.web").app
+    run = import_module("uvicorn").run
+    run(app, host="0.0.0.0", port=8000)
 # # # # # # # # # # # # # # #
@@ -49,6 +39,8 @@ def models():
 def download():
     """Download models."""
     click.echo("Downloading models, this may take a while...")
+    download_test_models = import_module("xspect.download_models").download_test_models
     download_test_models(
         "https://assets.adrianromberg.com/science/xspect-models-07-08-2025.zip"
     )
@@ -64,7 +56,6 @@ def list_models():
     if not available_models:
         click.echo("No models found.")
         return
-    # todo: make this machine readable
     click.echo("Models found:")
     click.echo("--------------")
     for model_type, names in available_models.items():
@@ -96,11 +87,62 @@ def train():
     help="Email of the author.",
     default=None,
 )
-def train_ncbi(model_genus, svm_steps, author, author_email):
+@click.option(
+    "--min-n50",
+    type=int,
+    help="Minimum contig N50 to filter the accessions (default: 10000).",
+    default=10000,
+)
+@click.option(
+    "--include-atypical/--exclude-atypical",
+    help="Include or exclude atypical accessions (default: exclude).",
+    default=False,
+)
+@click.option(
+    "--allow-inconclusive",
+    is_flag=True,
+    help="Allow the use of accessions with inconclusive taxonomy check status for training.",
+    default=False,
+)
+@click.option(
+    "--allow-candidatus",
+    is_flag=True,
+    help="Allow the use of Candidatus species for training.",
+    default=False,
+)
+@click.option(
+    "--allow-sp",
+    is_flag=True,
+    help="Allow the use of species with 'sp.' in their names for training.",
+    default=False,
+)
+def train_ncbi(
+    model_genus,
+    svm_steps,
+    author,
+    author_email,
+    min_n50,
+    include_atypical,
+    allow_inconclusive,
+    allow_candidatus,
+    allow_sp,
+):
     """Train a species and a genus model based on NCBI data."""
     click.echo(f"Training {model_genus} species and genus metagenome model.")
     try:
-        train_from_ncbi(model_genus, svm_steps, author, author_email)
+        train_from_ncbi = import_module("xspect.train").train_from_ncbi
+        train_from_ncbi(
+            model_genus,
+            svm_steps,
+            author,
+            author_email,
+            min_n50=min_n50,
+            exclude_atypical=not include_atypical,
+            allow_inconclusive=allow_inconclusive,
+            allow_candidatus=allow_candidatus,
+            allow_sp=allow_sp,
+        )
     except ValueError as e:
         click.echo(f"Error: {e}")
         return
@@ -143,6 +185,8 @@ def train_ncbi(model_genus, svm_steps, author, author_email):
 def train_directory(model_genus, input_path, svm_steps, meta, author, author_email):
     """Train a model based on data from a directory for a given genus."""
     click.echo(f"Training {model_genus} model with {svm_steps} SVM steps.")
+    train_from_directory = import_module("xspect.train").train_from_directory
     train_from_directory(
         model_genus,
         Path(input_path),
@@ -167,12 +211,28 @@ def train_directory(model_genus, input_path, svm_steps, meta, author, author_ema
 def train_mlst(choose_schemes):
     """Download alleles and train bloom filters."""
     click.echo("Updating alleles")
+    mlst_helper = import_module("xspect.mlst_feature.mlst_helper")
+    pick_scheme = mlst_helper.pick_scheme
+    pub_mlst_handler = import_module("xspect.mlst_feature.pub_mlst_handler")
+    PubMLSTHandler = pub_mlst_handler.PubMLSTHandler
+    probabilistic_filter_mlst_model = import_module(
+        "xspect.models.probabilistic_filter_mlst_model"
+    )
+    ProbabilisticFilterMlstSchemeModel = (
+        probabilistic_filter_mlst_model.ProbabilisticFilterMlstSchemeModel
+    )
+    definitions = import_module("xspect.definitions")
+    get_xspect_model_path = definitions.get_xspect_model_path
     handler = PubMLSTHandler()
     handler.download_alleles(choose_schemes)
     click.echo("Download finished")
     scheme_path = pick_scheme(handler.get_scheme_paths())
     species_name = str(scheme_path).split("/")[-2]
-    scheme_name = str(scheme_path).split("/")[-1]
+    scheme_name = str(scheme_path).rsplit("/", maxsplit=1)[-1]
     scheme_url = handler.scheme_mapping[str(scheme_path)]
     model = ProbabilisticFilterMlstSchemeModel(
         31, f"{species_name}:{scheme_name}", get_xspect_model_path(), scheme_url
@@ -230,6 +290,8 @@ def classify_seqs():
 def classify_genus(model_genus, input_path, output_path, sparse_sampling_step):
     """Classify samples using a genus model."""
     click.echo("Classifying...")
+    classify = import_module("xspect.classify")
     classify.classify_genus(
         model_genus, Path(input_path), Path(output_path), sparse_sampling_step
     )
@@ -268,11 +330,37 @@ def classify_genus(model_genus, input_path, output_path, sparse_sampling_step):
     help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
     default=1,
 )
-def classify_species(model_genus, input_path, output_path, sparse_sampling_step):
+@click.option(
+    "-n",
+    "--display-names",
+    help="Includes the display names next to taxonomy-IDs.",
+    is_flag=True,
+)
+@click.option(
+    "-v",
+    "--validation",
+    help="Detects misclassification for small reads or contigs.",
+    is_flag=True,
+)
+def classify_species(
+    model_genus,
+    input_path,
+    output_path,
+    sparse_sampling_step,
+    display_names,
+    validation,
+):
     """Classify samples using a species model."""
     click.echo("Classifying...")
+    classify = import_module("xspect.classify")
     classify.classify_species(
-        model_genus, Path(input_path), Path(output_path), sparse_sampling_step
+        model_genus,
+        Path(input_path),
+        Path(output_path),
+        sparse_sampling_step,
+        display_names,
+        validation,
     )
@@ -301,6 +389,8 @@ def classify_species(model_genus, input_path, output_path, sparse_sampling_step)
 def classify_mlst(input_path, output_path, limit):
     """MLST classify a sample."""
     click.echo("Classifying...")
+    classify = import_module("xspect.classify")
     classify.classify_mlst(Path(input_path), Path(output_path), limit)
@@ -372,6 +462,7 @@ def filter_genus(
 ):
     """Filter samples using a genus model."""
     click.echo("Filtering...")
+    filter_sequences = import_module("xspect.filter_sequences")
     filter_sequences.filter_genus(
         model_genus,
@@ -426,14 +517,16 @@ def filter_genus(
     "-t",
     "--threshold",
     type=float,
-    help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring species.",
+    help="Threshold for filtering (default: 0.7). Use -1 to filter for the highest scoring "
+    "species.",
     default=0.7,
     prompt=True,
 )
 @click.option(
     "--sparse-sampling-step",
     type=int,
-    help="Sparse sampling step (e. g. only every 500th kmer for '--sparse-sampling-step 500').",
+    help="Sparse sampling step (e. g. only every 500th kmer for "
+    "'--sparse-sampling-step 500').",
     default=1,
 )
 def filter_species(
@@ -449,9 +542,12 @@ def filter_species(
     if threshold != -1 and (threshold < 0 or threshold > 1):
         raise click.BadParameter(
-            "Threshold must be between 0 and 1, or -1 for filtering by the highest scoring species."
+            "Threshold must be between 0 and 1, or -1 for filtering by the highest "
+            "scoring species."
         )
+    get_model_metadata = import_module("xspect.model_management").get_model_metadata
     available_species = get_model_metadata(f"{model_genus}-species")["display_names"]
     available_species = {
         id: name.replace(f"{model_genus} ", "")
@@ -476,6 +572,8 @@ def filter_species(
     ][0]
     click.echo("Filtering...")
+    filter_sequences = import_module("xspect.filter_sequences")
     filter_sequences.filter_species(
         model_genus,
         model_species,

xspect/misclassification_detection/__init__.py ADDED Viewed

File without changes

xspect/misclassification_detection/mapping.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""
+Mapping handler for the alignment-based misclassification detection.
+Notes:
+Developed by Oemer Cetin as part of a Bsc thesis at Goethe University Frankfurt am Main (2025).
+(An Integration of Alignment-Free and Alignment-Based Approaches for Bacterial Taxon Assignment)
+"""
+import mappy, pysam, os, csv
+from Bio import SeqIO
+from xspect.definitions import fasta_endings
+__author__ = "Cetin, Oemer"
+class MappingHandler:
+    """Handler class for all mapping related procedures."""
+    def __init__(self, ref_genome_path: str, reads_path: str) -> None:
+        """
+        Initialise the mapping handler.
+        This method sets up the paths to the reference genome and query sequences.
+        Additionally, the paths to the output formats (SAM, BAM and TSV) are generated.
+        Args:
+            ref_genome_path (str): The path to the reference genome.
+            reads_path (str): The path to the query sequences.
+        """
+        if not os.path.isfile(ref_genome_path):
+            raise ValueError("The path to the reference genome does not exist.")
+        if not os.path.isfile(reads_path):
+            raise ValueError("The path to the reads does not exist.")
+        if not ref_genome_path.endswith(tuple(fasta_endings)) and reads_path.endswith(
+            tuple(fasta_endings)
+        ):
+            raise ValueError("The files must be FASTA-files!")
+        stem = reads_path.rsplit(".", 1)[0] + "_mapped"
+        self.ref_genome_path = ref_genome_path
+        self.reads_path = reads_path
+        self.sam = stem + ".sam"
+        self.bam = stem + ".sorted.bam"
+        self.tsv = stem + ".start_coordinates.tsv"
+    def map_reads_onto_reference(self) -> None:
+        """
+        A Method that maps reads against the respective reference genome.
+        This function creates a SAM file via Mappy and converts it into a BAM file.
+        """
+        # create header (entry = sequences of the reference genome)
+        ref_seq = [
+            {"SN": rec.id, "LN": len(rec.seq)}
+            for rec in SeqIO.parse(self.ref_genome_path, "fasta")
+        ]
+        header = {"HD": {"VN": "1.0"}, "SQ": ref_seq}
+        target_id = {sequence["SN"]: number for number, sequence in enumerate(ref_seq)}
+        reads = list(SeqIO.parse(self.reads_path, "fasta"))
+        if not reads:
+            raise ValueError("Reads file is empty.")
+        read_length = len(reads[0].seq)
+        preset = "map-ont" if read_length > 150 else "sr"
+        # create SAM-file
+        aln = mappy.Aligner(self.ref_genome_path, preset=preset)
+        with pysam.AlignmentFile(self.sam, "w", header=header) as out:
+            for read in reads:
+                read_seq = str(read.seq)
+                for hit in aln.map(read_seq):
+                    if hit.cigar_str is None:
+                        continue
+                    # add soft-clips so CIGAR length == len(read_seq) IMPORTANT!!
+                    leftS = hit.q_st
+                    rightS = len(read_seq) - hit.q_en
+                    cigar = (
+                        (f"{leftS}S" if leftS > 0 else "")
+                        + hit.cigar_str
+                        + (f"{rightS}S" if rightS > 0 else "")
+                    )
+                    mapped_region = pysam.AlignedSegment()
+                    mapped_region.query_name = read.id
+                    mapped_region.query_sequence = read_seq
+                    mapped_region.flag = 16 if hit.strand == -1 else 0
+                    mapped_region.reference_id = target_id[hit.ctg]
+                    mapped_region.reference_start = hit.r_st
+                    mapped_region.mapping_quality = (
+                        hit.mapq or 255
+                    )  # 0-60 (255 means unavailable)
+                    mapped_region.cigarstring = cigar
+                    out.write(mapped_region)
+                    break  # keep only primary
+        # create BAM-file
+        pysam.sort("-o", self.bam, self.sam)
+        pysam.index(self.bam)
+    def get_total_genome_length(self) -> int:
+        """
+        Get the genome length from a BAM-file.
+        This function opens a BAM-file and extracts the genome length information.
+        Returns:
+            int: The genome length.
+        """
+        with pysam.AlignmentFile(self.bam, "rb") as bam:
+            return sum(bam.lengths)
+    def extract_starting_coordinates(self) -> None:
+        """
+        Extract starting coordinates of mapped regions from a BAM-file.
+        This function scans through a BAM-file and creates a TSV-file.
+        The information that is extracted is the starting coordinate for each mapped read.
+        """
+        # create tsv-file with all start positions
+        with open(self.tsv, "w") as tsv:
+            tsv.write("reference_genome\tread\tmapped_starting_coordinate\n")
+            try:
+                with pysam.AlignmentFile(self.bam, "rb") as bam:
+                    entry = {
+                        i: seq["SN"] for i, seq in enumerate(bam.header.to_dict()["SQ"])
+                    }
+                    seen = set()
+                    for ref_seq in bam.references:
+                        for hit in bam.fetch(ref_seq):
+                            if (
+                                hit.is_unmapped
+                                or hit.is_secondary
+                                or hit.is_supplementary
+                            ):
+                                continue
+                            key = (hit.reference_id, hit.reference_start)
+                            if key in seen:
+                                continue
+                            seen.add(key)
+                            tsv.write(
+                                f"{entry[hit.reference_id]}\t{hit.query_name}\t{hit.reference_start}\n"
+                            )
+            except ValueError:
+                tsv.write("dummy_reference\tdummy_read\t1000\n")
+    def get_start_coordinates(self) -> list[int]:
+        """
+        Get the coordinates of a TSV-file.
+        This function opens a TSV-file and saves all starting coordinates in a list.
+        Returns:
+            list[int]: The list containing all starting coordinates.
+        Raises:
+            ValueError: If no column with starting coordinates is found.
+        """
+        coordinates = []
+        with open(self.tsv, "r", newline="") as f:
+            reader = csv.DictReader(f, delimiter="\t")
+            for row in reader:
+                val = row.get("mapped_starting_coordinate")
+                if val is None:
+                    raise ValueError("Column with starting coordinates not found.")
+                coordinates.append(int(val))
+        return coordinates

XspecT 0.5.3__py3-none-any.whl → 0.6.0__py3-none-any.whl

Potentially problematic release.

XspecT 0.5.3py3-none-any.whl → 0.6.0py3-none-any.whl