PyPI - birdnet-analyzer - Versions diffs - 2.0.1__py3-none-any.whl → 2.1.1__py3-none-any.whl - Mend

birdnet-analyzer 2.0.1py3-none-any.whl → 2.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

birdnet_analyzer/__init__.py +9 -9
birdnet_analyzer/analyze/__init__.py +19 -5
birdnet_analyzer/analyze/__main__.py +3 -3
birdnet_analyzer/analyze/cli.py +30 -25
birdnet_analyzer/analyze/core.py +268 -241
birdnet_analyzer/analyze/utils.py +700 -692
birdnet_analyzer/audio.py +368 -368
birdnet_analyzer/cli.py +732 -709
birdnet_analyzer/config.py +243 -242
birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
birdnet_analyzer/embeddings/__init__.py +3 -3
birdnet_analyzer/embeddings/__main__.py +3 -3
birdnet_analyzer/embeddings/cli.py +12 -12
birdnet_analyzer/embeddings/core.py +70 -69
birdnet_analyzer/embeddings/utils.py +173 -179
birdnet_analyzer/evaluation/__init__.py +189 -196
birdnet_analyzer/evaluation/__main__.py +3 -3
birdnet_analyzer/evaluation/assessment/metrics.py +388 -388
birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -409
birdnet_analyzer/evaluation/assessment/plotting.py +378 -379
birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -631
birdnet_analyzer/evaluation/preprocessing/utils.py +98 -98
birdnet_analyzer/gui/__init__.py +19 -19
birdnet_analyzer/gui/__main__.py +3 -3
birdnet_analyzer/gui/analysis.py +179 -175
birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
birdnet_analyzer/gui/assets/gui.css +36 -28
birdnet_analyzer/gui/assets/gui.js +89 -93
birdnet_analyzer/gui/embeddings.py +638 -619
birdnet_analyzer/gui/evaluation.py +801 -795
birdnet_analyzer/gui/localization.py +75 -75
birdnet_analyzer/gui/multi_file.py +265 -245
birdnet_analyzer/gui/review.py +472 -519
birdnet_analyzer/gui/segments.py +191 -191
birdnet_analyzer/gui/settings.py +149 -128
birdnet_analyzer/gui/single_file.py +264 -267
birdnet_analyzer/gui/species.py +95 -95
birdnet_analyzer/gui/train.py +687 -696
birdnet_analyzer/gui/utils.py +803 -810
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
birdnet_analyzer/lang/de.json +342 -334
birdnet_analyzer/lang/en.json +342 -334
birdnet_analyzer/lang/fi.json +342 -334
birdnet_analyzer/lang/fr.json +342 -334
birdnet_analyzer/lang/id.json +342 -334
birdnet_analyzer/lang/pt-br.json +342 -334
birdnet_analyzer/lang/ru.json +342 -334
birdnet_analyzer/lang/se.json +342 -334
birdnet_analyzer/lang/tlh.json +342 -334
birdnet_analyzer/lang/zh_TW.json +342 -334
birdnet_analyzer/model.py +1213 -1212
birdnet_analyzer/search/__init__.py +3 -3
birdnet_analyzer/search/__main__.py +3 -3
birdnet_analyzer/search/cli.py +11 -11
birdnet_analyzer/search/core.py +78 -78
birdnet_analyzer/search/utils.py +104 -107
birdnet_analyzer/segments/__init__.py +3 -3
birdnet_analyzer/segments/__main__.py +3 -3
birdnet_analyzer/segments/cli.py +13 -13
birdnet_analyzer/segments/core.py +81 -81
birdnet_analyzer/segments/utils.py +383 -383
birdnet_analyzer/species/__init__.py +3 -3
birdnet_analyzer/species/__main__.py +3 -3
birdnet_analyzer/species/cli.py +13 -13
birdnet_analyzer/species/core.py +35 -35
birdnet_analyzer/species/utils.py +73 -74
birdnet_analyzer/train/__init__.py +3 -3
birdnet_analyzer/train/__main__.py +3 -3
birdnet_analyzer/train/cli.py +13 -13
birdnet_analyzer/train/core.py +113 -113
birdnet_analyzer/train/utils.py +878 -877
birdnet_analyzer/translate.py +132 -133
birdnet_analyzer/utils.py +425 -426
{birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/METADATA +147 -137
birdnet_analyzer-2.1.1.dist-info/RECORD +124 -0
{birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/WHEEL +1 -1
{birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/licenses/LICENSE +18 -18
birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
birdnet_analyzer/playground.py +0 -5
birdnet_analyzer-2.0.1.dist-info/RECORD +0 -125
{birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/entry_points.txt +0 -0
{birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/top_level.txt +0 -0

birdnet_analyzer/evaluation/assessment/performance_assessor.py CHANGED Viewed

@@ -1,409 +1,364 @@
-"""
-PerformanceAssessor Module
-This module defines the `PerformanceAssessor` class to evaluate classification model performance.
-It includes methods to compute metrics like precision, recall, F1 score, AUROC, and accuracy,
-as well as utilities for generating related plots.
-"""
-from typing import Literal
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
-from birdnet_analyzer.evaluation.assessment import metrics, plotting
-class PerformanceAssessor:
-    """
-    A class to assess the performance of classification models by computing metrics
-    and generating visualizations for binary and multilabel classification tasks.
-    """
-    def __init__(
-        self,
-        num_classes: int,
-        threshold: float = 0.5,
-        classes: tuple[str, ...] | None = None,
-        task: Literal["binary", "multilabel"] = "multilabel",
-        metrics_list: tuple[str, ...] = (
-            "recall",
-            "precision",
-            "f1",
-            "ap",
-            "auroc",
-            "accuracy",
-        ),
-    ) -> None:
-        """
-        Initialize the PerformanceAssessor.
-        Args:
-            num_classes (int): The number of classes in the classification problem.
-            threshold (float): The threshold for binarizing probabilities into class labels.
-            classes (Optional[Tuple[str, ...]]): Optional tuple of class names.
-            task (Literal["binary", "multilabel"]): The classification task type.
-            metrics_list (Tuple[str, ...]): A tuple of metrics to compute.
-        Raises:
-            ValueError: If any of the inputs are invalid.
-        """
-        # Validate the number of classes
-        if not isinstance(num_classes, int) or num_classes <= 0:
-            raise ValueError("num_classes must be a positive integer.")
-        # Validate the threshold value
-        if not isinstance(threshold, float) or not 0 < threshold < 1:
-            raise ValueError("threshold must be a float between 0 and 1 (exclusive).")
-        # Validate class names
-        if classes is not None:
-            if not isinstance(classes, tuple):
-                raise ValueError("classes must be a tuple of strings.")
-            if len(classes) != num_classes:
-                raise ValueError(f"Length of classes ({len(classes)}) must match num_classes ({num_classes}).")
-            if not all(isinstance(class_name, str) for class_name in classes):
-                raise ValueError("All elements in classes must be strings.")
-        # Validate the task type
-        if task not in {"binary", "multilabel"}:
-            raise ValueError("task must be 'binary' or 'multilabel'.")
-        # Validate the metrics list
-        valid_metrics = ["accuracy", "recall", "precision", "f1", "ap", "auroc"]
-        if not metrics_list:
-            raise ValueError("metrics_list cannot be empty.")
-        if not all(metric in valid_metrics for metric in metrics_list):
-            raise ValueError(f"Invalid metrics in {metrics_list}. Valid options are {valid_metrics}.")
-        # Assign instance variables
-        self.num_classes = num_classes
-        self.threshold = threshold
-        self.classes = classes
-        self.task = task
-        self.metrics_list = metrics_list
-        # Set default colors for plotting
-        self.colors = ["#3A50B1", "#61A83E", "#D74C4C", "#A13FA1", "#D9A544", "#F3A6E0"]
-    def calculate_metrics(
-        self,
-        predictions: np.ndarray,
-        labels: np.ndarray,
-        per_class_metrics: bool = False,
-    ) -> pd.DataFrame:
-        """
-        Calculate multiple performance metrics for the given predictions and labels.
-        Args:
-            predictions (np.ndarray): Model predictions as a 2D NumPy array (probabilities or logits).
-            labels (np.ndarray): Ground truth labels as a 2D NumPy array.
-            per_class_metrics (bool): If True, compute metrics for each class individually.
-        Returns:
-            pd.DataFrame: A DataFrame containing the computed metrics.
-        Raises:
-            TypeError: If predictions or labels are not NumPy arrays.
-            ValueError: If predictions and labels have mismatched dimensions or invalid shapes.
-        """
-        # Validate that predictions and labels are NumPy arrays
-        if not isinstance(predictions, np.ndarray):
-            raise TypeError("predictions must be a NumPy array.")
-        if not isinstance(labels, np.ndarray):
-            raise TypeError("labels must be a NumPy array.")
-        # Ensure predictions and labels have the same shape
-        if predictions.shape != labels.shape:
-            raise ValueError("predictions and labels must have the same shape.")
-        if predictions.ndim != 2:
-            raise ValueError("predictions and labels must be 2-dimensional arrays.")
-        if predictions.shape[1] != self.num_classes:
-            raise ValueError(
-                f"The number of columns in predictions ({predictions.shape[1]}) "
-                + f"must match num_classes ({self.num_classes})."
-            )
-        # Determine the averaging method for metrics
-        if per_class_metrics and self.num_classes == 1:
-            averaging_method = "macro"
-        else:
-            averaging_method = None if per_class_metrics else "macro"
-        # Dictionary to store the results of each metric
-        metrics_results = {}
-        # Compute each metric in the metrics list
-        for metric_name in self.metrics_list:
-            if metric_name == "recall":
-                result = metrics.calculate_recall(
-                    predictions=predictions,
-                    labels=labels,
-                    task=self.task,
-                    threshold=self.threshold,
-                    averaging_method=averaging_method,
-                )
-                metrics_results["Recall"] = np.atleast_1d(result)
-            elif metric_name == "precision":
-                result = metrics.calculate_precision(
-                    predictions=predictions,
-                    labels=labels,
-                    task=self.task,
-                    threshold=self.threshold,
-                    averaging_method=averaging_method,
-                )
-                metrics_results["Precision"] = np.atleast_1d(result)
-            elif metric_name == "f1":
-                result = metrics.calculate_f1_score(
-                    predictions=predictions,
-                    labels=labels,
-                    task=self.task,
-                    threshold=self.threshold,
-                    averaging_method=averaging_method,
-                )
-                metrics_results["F1"] = np.atleast_1d(result)
-            elif metric_name == "ap":
-                result = metrics.calculate_average_precision(
-                    predictions=predictions,
-                    labels=labels,
-                    task=self.task,
-                    averaging_method=averaging_method,
-                )
-                metrics_results["AP"] = np.atleast_1d(result)
-            elif metric_name == "auroc":
-                result = metrics.calculate_auroc(
-                    predictions=predictions,
-                    labels=labels,
-                    task=self.task,
-                    averaging_method=averaging_method,
-                )
-                metrics_results["AUROC"] = np.atleast_1d(result)
-            elif metric_name == "accuracy":
-                result = metrics.calculate_accuracy(
-                    predictions=predictions,
-                    labels=labels,
-                    task=self.task,
-                    num_classes=self.num_classes,
-                    threshold=self.threshold,
-                    averaging_method=averaging_method,
-                )
-                metrics_results["Accuracy"] = np.atleast_1d(result)
-        # Define column names for the DataFrame
-        columns = (
-            (self.classes if self.classes else [f"Class {i}" for i in range(self.num_classes)])
-            if per_class_metrics
-            else ["Overall"]
-        )
-        # Create a DataFrame to organize metric results
-        metrics_data = {key: np.atleast_1d(value) for key, value in metrics_results.items()}
-        return pd.DataFrame.from_dict(metrics_data, orient="index", columns=columns)
-    def plot_metrics(
-        self,
-        predictions: np.ndarray,
-        labels: np.ndarray,
-        per_class_metrics: bool = False,
-    ) -> None:
-        """
-        Plot performance metrics for the given predictions and labels.
-        Args:
-            predictions (np.ndarray): Model output predictions as a 2D NumPy array (probabilities or logits).
-            labels (np.ndarray): Ground truth labels as a 2D NumPy array.
-            per_class_metrics (bool): If True, plots metrics for each class individually.
-        Raises:
-            ValueError: If the metrics cannot be calculated or plotting fails.
-        Returns:
-            None
-        """
-        # Calculate metrics using the provided predictions and labels
-        metrics_df = self.calculate_metrics(predictions, labels, per_class_metrics)
-        # Choose the plotting method based on whether per-class metrics are required
-        return (
-            plotting.plot_metrics_per_class(metrics_df, self.colors)
-            if per_class_metrics
-            else plotting.plot_overall_metrics(metrics_df, self.colors)
-        )
-    def plot_metrics_all_thresholds(
-        self,
-        predictions: np.ndarray,
-        labels: np.ndarray,
-        per_class_metrics: bool = False,
-    ) -> None:
-        """
-        Plot performance metrics across thresholds for the given predictions and labels.
-        Args:
-            predictions (np.ndarray): Model output predictions as a 2D NumPy array (probabilities or logits).
-            labels (np.ndarray): Ground truth labels as a 2D NumPy array.
-            per_class_metrics (bool): If True, plots metrics for each class individually.
-        Raises:
-            ValueError: If metrics calculation or plotting fails.
-        Returns:
-            None
-        """
-        # Save the original threshold value to restore it later
-        original_threshold = self.threshold
-        # Define a range of thresholds for analysis
-        thresholds = np.arange(0.05, 1.0, 0.05)
-        # Exclude metrics that are not threshold-dependent
-        metrics_to_plot = [m for m in self.metrics_list if m not in ["auroc", "ap"]]
-        if per_class_metrics:
-            # Define class names for plotting
-            class_names = list(self.classes) if self.classes else [f"Class {i}" for i in range(self.num_classes)]
-            # Initialize a dictionary to store metric values per class
-            metric_values_dict_per_class = {
-                class_name: {metric: [] for metric in metrics_to_plot} for class_name in class_names
-            }
-            # Compute metrics for each threshold
-            for thresh in thresholds:
-                self.threshold = thresh
-                metrics_df = self.calculate_metrics(predictions, labels, per_class_metrics=True)
-                for metric_name in metrics_to_plot:
-                    metric_label = metric_name.capitalize() if metric_name != "f1" else "F1"
-                    for class_name in class_names:
-                        value = metrics_df.loc[metric_label, class_name]
-                        metric_values_dict_per_class[class_name][metric_name].append(value)
-            # Restore the original threshold
-            self.threshold = original_threshold
-            # Plot metrics across thresholds per class
-            fig = plotting.plot_metrics_across_thresholds_per_class(
-                thresholds,
-                metric_values_dict_per_class,
-                metrics_to_plot,
-                class_names,
-                self.colors,
-            )
-        else:
-            # Initialize a dictionary to store overall metric values
-            metric_values_dict = {metric_name: [] for metric_name in metrics_to_plot}
-            # Compute metrics for each threshold
-            for thresh in thresholds:
-                self.threshold = thresh
-                metrics_df = self.calculate_metrics(predictions, labels, per_class_metrics=False)
-                for metric_name in metrics_to_plot:
-                    metric_label = metric_name.capitalize() if metric_name != "f1" else "F1"
-                    value = metrics_df.loc[metric_label, "Overall"]
-                    metric_values_dict[metric_name].append(value)
-            # Restore the original threshold
-            self.threshold = original_threshold
-            # Plot metrics across thresholds
-            fig = plotting.plot_metrics_across_thresholds(
-                thresholds,
-                metric_values_dict,
-                metrics_to_plot,
-                self.colors,
-            )
-        return fig
-    def plot_confusion_matrix(
-        self,
-        predictions: np.ndarray,
-        labels: np.ndarray,
-    ) -> None:
-        """
-        Plot confusion matrices for each class using scikit-learn's ConfusionMatrixDisplay.
-        Args:
-            predictions (np.ndarray): Model output predictions as a 2D NumPy array (probabilities or logits).
-            labels (np.ndarray): Ground truth labels as a 2D NumPy array.
-        Raises:
-            TypeError: If predictions or labels are not NumPy arrays.
-            ValueError: If predictions and labels have mismatched shapes or invalid dimensions.
-        Returns:
-            None
-        """
-        # Validate that predictions and labels are NumPy arrays and match in shape
-        if not isinstance(predictions, np.ndarray):
-            raise TypeError("predictions must be a NumPy array.")
-        if not isinstance(labels, np.ndarray):
-            raise TypeError("labels must be a NumPy array.")
-        if predictions.shape != labels.shape:
-            raise ValueError("predictions and labels must have the same shape.")
-        if predictions.ndim != 2:
-            raise ValueError("predictions and labels must be 2-dimensional arrays.")
-        if predictions.shape[1] != self.num_classes:
-            raise ValueError(
-                f"The number of columns in predictions ({predictions.shape[1]}) "
-                + f"must match num_classes ({self.num_classes})."
-            )
-        if self.task == "binary":
-            # Binarize predictions using the threshold
-            y_pred = (predictions >= self.threshold).astype(int).flatten()
-            y_true = labels.astype(int).flatten()
-            # Compute and normalize the confusion matrix
-            conf_mat = confusion_matrix(y_true, y_pred, normalize="true")
-            conf_mat = np.round(conf_mat, 2)
-            # Plot the confusion matrix
-            disp = ConfusionMatrixDisplay(confusion_matrix=conf_mat, display_labels=["Negative", "Positive"])
-            fig, ax = plt.subplots(figsize=(6, 6))
-            disp.plot(cmap="Reds", ax=ax, colorbar=False, values_format=".2f")
-            ax.set_title("Confusion Matrix")
-            return fig
-        if self.task == "multilabel":
-            # Binarize predictions for multilabel classification
-            y_pred = (predictions >= self.threshold).astype(int)
-            y_true = labels.astype(int)
-            # Compute confusion matrices for each class
-            conf_mats = []
-            class_names = self.classes if self.classes else [f"Class {i}" for i in range(self.num_classes)]
-            for i in range(self.num_classes):
-                conf_mat = confusion_matrix(y_true[:, i], y_pred[:, i], normalize="true")
-                conf_mat = np.round(conf_mat, 2)
-                conf_mats.append(conf_mat)
-            # Determine grid size for subplots
-            num_matrices = self.num_classes
-            n_cols = int(np.ceil(np.sqrt(num_matrices)))
-            n_rows = int(np.ceil(num_matrices / n_cols))
-            # Create subplots for each confusion matrix
-            fig, axes = plt.subplots(n_rows, n_cols, figsize=(4 * n_cols, 4 * n_rows))
-            axes = axes.flatten()
-            # Plot each confusion matrix
-            for idx, (conf_mat, class_name) in enumerate(zip(conf_mats, class_names, strict=True)):
-                disp = ConfusionMatrixDisplay(confusion_matrix=conf_mat, display_labels=["Negative", "Positive"])
-                disp.plot(cmap="Reds", ax=axes[idx], colorbar=False, values_format=".2f")
-                axes[idx].set_title(f"{class_name}")
-                axes[idx].set_xlabel("Predicted class")
-                axes[idx].set_ylabel("True class")
-            # Remove unused subplot axes
-            for ax in axes[num_matrices:]:
-                fig.delaxes(ax)
-            plt.tight_layout()
-            return fig
-        raise ValueError(f"Unsupported task type: {self.task}")
+"""
+PerformanceAssessor Module
+This module defines the `PerformanceAssessor` class to evaluate classification model performance.
+It includes methods to compute metrics like precision, recall, F1 score, AUROC, and accuracy,
+as well as utilities for generating related plots.
+"""
+from typing import Literal
+import numpy as np
+import pandas as pd
+from sklearn.metrics import confusion_matrix
+from birdnet_analyzer.evaluation.assessment import metrics, plotting
+class PerformanceAssessor:
+    """
+    A class to assess the performance of classification models by computing metrics
+    and generating visualizations for binary and multilabel classification tasks.
+    """
+    def __init__(
+        self,
+        num_classes: int,
+        threshold: float = 0.5,
+        classes: tuple[str, ...] | None = None,
+        task: Literal["binary", "multilabel"] = "multilabel",
+        metrics_list: tuple[str, ...] = (
+            "recall",
+            "precision",
+            "f1",
+            "ap",
+            "auroc",
+            "accuracy",
+        ),
+    ) -> None:
+        """
+        Initialize the PerformanceAssessor.
+        Args:
+            num_classes (int): The number of classes in the classification problem.
+            threshold (float): The threshold for binarizing probabilities into class labels.
+            classes (Optional[Tuple[str, ...]]): Optional tuple of class names.
+            task (Literal["binary", "multilabel"]): The classification task type.
+            metrics_list (Tuple[str, ...]): A tuple of metrics to compute.
+        Raises:
+            ValueError: If any of the inputs are invalid.
+        """
+        # Validate the number of classes
+        if not isinstance(num_classes, int) or num_classes <= 0:
+            raise ValueError("num_classes must be a positive integer.")
+        # Validate the threshold value
+        if not isinstance(threshold, float) or not 0 < threshold < 1:
+            raise ValueError("threshold must be a float between 0 and 1 (exclusive).")
+        # Validate class names
+        if classes is not None:
+            if not isinstance(classes, tuple):
+                raise ValueError("classes must be a tuple of strings.")
+            if len(classes) != num_classes:
+                raise ValueError(f"Length of classes ({len(classes)}) must match num_classes ({num_classes}).")
+            if not all(isinstance(class_name, str) for class_name in classes):
+                raise ValueError("All elements in classes must be strings.")
+        # Validate the task type
+        if task not in {"binary", "multilabel"}:
+            raise ValueError("task must be 'binary' or 'multilabel'.")
+        # Validate the metrics list
+        valid_metrics = ["accuracy", "recall", "precision", "f1", "ap", "auroc"]
+        if not metrics_list:
+            raise ValueError("metrics_list cannot be empty.")
+        if not all(metric in valid_metrics for metric in metrics_list):
+            raise ValueError(f"Invalid metrics in {metrics_list}. Valid options are {valid_metrics}.")
+        # Assign instance variables
+        self.num_classes = num_classes
+        self.threshold = threshold
+        self.classes = classes
+        self.task = task
+        self.metrics_list = metrics_list
+        # Set default colors for plotting
+        self.colors = ["#3A50B1", "#61A83E", "#D74C4C", "#A13FA1", "#D9A544", "#F3A6E0"]
+    def calculate_metrics(
+        self,
+        predictions: np.ndarray,
+        labels: np.ndarray,
+        per_class_metrics: bool = False,
+    ) -> pd.DataFrame:
+        """
+        Calculate multiple performance metrics for the given predictions and labels.
+        Args:
+            predictions (np.ndarray): Model predictions as a 2D NumPy array (probabilities or logits).
+            labels (np.ndarray): Ground truth labels as a 2D NumPy array.
+            per_class_metrics (bool): If True, compute metrics for each class individually.
+        Returns:
+            pd.DataFrame: A DataFrame containing the computed metrics.
+        Raises:
+            TypeError: If predictions or labels are not NumPy arrays.
+            ValueError: If predictions and labels have mismatched dimensions or invalid shapes.
+        """
+        # Validate that predictions and labels are NumPy arrays
+        if not isinstance(predictions, np.ndarray):
+            raise TypeError("predictions must be a NumPy array.")
+        if not isinstance(labels, np.ndarray):
+            raise TypeError("labels must be a NumPy array.")
+        # Ensure predictions and labels have the same shape
+        if predictions.shape != labels.shape:
+            raise ValueError("predictions and labels must have the same shape.")
+        if predictions.ndim != 2:
+            raise ValueError("predictions and labels must be 2-dimensional arrays.")
+        if predictions.shape[1] != self.num_classes:
+            raise ValueError(f"The number of columns in predictions ({predictions.shape[1]}) " + f"must match num_classes ({self.num_classes}).")
+        # Determine the averaging method for metrics
+        if per_class_metrics and self.num_classes == 1:
+            averaging_method = "macro"
+        else:
+            averaging_method = None if per_class_metrics else "macro"
+        # Dictionary to store the results of each metric
+        metrics_results = {}
+        # Compute each metric in the metrics list
+        for metric_name in self.metrics_list:
+            if metric_name == "recall":
+                result = metrics.calculate_recall(
+                    predictions=predictions,
+                    labels=labels,
+                    task=self.task,
+                    threshold=self.threshold,
+                    averaging_method=averaging_method,
+                )
+                metrics_results["Recall"] = np.atleast_1d(result)
+            elif metric_name == "precision":
+                result = metrics.calculate_precision(
+                    predictions=predictions,
+                    labels=labels,
+                    task=self.task,
+                    threshold=self.threshold,
+                    averaging_method=averaging_method,
+                )
+                metrics_results["Precision"] = np.atleast_1d(result)
+            elif metric_name == "f1":
+                result = metrics.calculate_f1_score(
+                    predictions=predictions,
+                    labels=labels,
+                    task=self.task,
+                    threshold=self.threshold,
+                    averaging_method=averaging_method,
+                )
+                metrics_results["F1"] = np.atleast_1d(result)
+            elif metric_name == "ap":
+                result = metrics.calculate_average_precision(
+                    predictions=predictions,
+                    labels=labels,
+                    task=self.task,
+                    averaging_method=averaging_method,
+                )
+                metrics_results["AP"] = np.atleast_1d(result)
+            elif metric_name == "auroc":
+                result = metrics.calculate_auroc(
+                    predictions=predictions,
+                    labels=labels,
+                    task=self.task,
+                    averaging_method=averaging_method,
+                )
+                metrics_results["AUROC"] = np.atleast_1d(result)
+            elif metric_name == "accuracy":
+                result = metrics.calculate_accuracy(
+                    predictions=predictions,
+                    labels=labels,
+                    task=self.task,
+                    num_classes=self.num_classes,
+                    threshold=self.threshold,
+                    averaging_method=averaging_method,
+                )
+                metrics_results["Accuracy"] = np.atleast_1d(result)
+        # Define column names for the DataFrame
+        columns = (self.classes if self.classes else [f"Class {i}" for i in range(self.num_classes)]) if per_class_metrics else ["Overall"]
+        # Create a DataFrame to organize metric results
+        metrics_data = {key: np.atleast_1d(value) for key, value in metrics_results.items()}
+        return pd.DataFrame.from_dict(metrics_data, orient="index", columns=columns)
+    def plot_metrics(
+        self,
+        predictions: np.ndarray,
+        labels: np.ndarray,
+        per_class_metrics: bool = False,
+    ):
+        """
+        Plot performance metrics for the given predictions and labels.
+        Args:
+            predictions (np.ndarray): Model output predictions as a 2D NumPy array (probabilities or logits).
+            labels (np.ndarray): Ground truth labels as a 2D NumPy array.
+            per_class_metrics (bool): If True, plots metrics for each class individually.
+        Raises:
+            ValueError: If the metrics cannot be calculated or plotting fails.
+        Returns:
+            None
+        """
+        # Calculate metrics using the provided predictions and labels
+        metrics_df = self.calculate_metrics(predictions, labels, per_class_metrics)
+        # Choose the plotting method based on whether per-class metrics are required
+        return plotting.plot_metrics_per_class(metrics_df, self.colors) if per_class_metrics else plotting.plot_overall_metrics(metrics_df, self.colors)
+    def plot_metrics_all_thresholds(
+        self,
+        predictions: np.ndarray,
+        labels: np.ndarray,
+        per_class_metrics: bool = False,
+    ):
+        """
+        Plot performance metrics across thresholds for the given predictions and labels.
+        Args:
+            predictions (np.ndarray): Model output predictions as a 2D NumPy array (probabilities or logits).
+            labels (np.ndarray): Ground truth labels as a 2D NumPy array.
+            per_class_metrics (bool): If True, plots metrics for each class individually.
+        Raises:
+            ValueError: If metrics calculation or plotting fails.
+        Returns:
+            None
+        """
+        # Save the original threshold value to restore it later
+        original_threshold = self.threshold
+        # Define a range of thresholds for analysis
+        thresholds = np.arange(0.05, 1.0, 0.05)
+        # Exclude metrics that are not threshold-dependent
+        metrics_to_plot = [m for m in self.metrics_list if m not in ["auroc", "ap"]]
+        if per_class_metrics:
+            # Define class names for plotting
+            class_names = list(self.classes) if self.classes else [f"Class {i}" for i in range(self.num_classes)]
+            # Initialize a dictionary to store metric values per class
+            metric_values_dict_per_class = {class_name: {metric: [] for metric in metrics_to_plot} for class_name in class_names}
+            # Compute metrics for each threshold
+            for thresh in thresholds:
+                self.threshold = thresh
+                metrics_df = self.calculate_metrics(predictions, labels, per_class_metrics=True)
+                for metric_name in metrics_to_plot:
+                    metric_label = metric_name.capitalize() if metric_name != "f1" else "F1"
+                    for class_name in class_names:
+                        value = metrics_df.loc[metric_label, class_name]
+                        metric_values_dict_per_class[class_name][metric_name].append(value)
+            # Restore the original threshold
+            self.threshold = original_threshold
+            # Plot metrics across thresholds per class
+            fig = plotting.plot_metrics_across_thresholds_per_class(
+                thresholds,
+                metric_values_dict_per_class,
+                metrics_to_plot,
+                class_names,
+                self.colors,
+            )
+        else:
+            # Initialize a dictionary to store overall metric values
+            metric_values_dict = {metric_name: [] for metric_name in metrics_to_plot}
+            # Compute metrics for each threshold
+            for thresh in thresholds:
+                self.threshold = thresh
+                metrics_df = self.calculate_metrics(predictions, labels, per_class_metrics=False)
+                for metric_name in metrics_to_plot:
+                    metric_label = metric_name.capitalize() if metric_name != "f1" else "F1"
+                    value = metrics_df.loc[metric_label, "Overall"]
+                    metric_values_dict[metric_name].append(value)
+            # Restore the original threshold
+            self.threshold = original_threshold
+            # Plot metrics across thresholds
+            fig = plotting.plot_metrics_across_thresholds(
+                thresholds,
+                metric_values_dict,
+                metrics_to_plot,
+                self.colors,
+            )
+        return fig
+    def plot_confusion_matrix(
+        self,
+        predictions: np.ndarray,
+        labels: np.ndarray,
+    ):
+        """
+        Plot confusion matrices for each class using scikit-learn's ConfusionMatrixDisplay.
+        Args:
+            predictions (np.ndarray): Model output predictions as a 2D NumPy array (probabilities or logits).
+            labels (np.ndarray): Ground truth labels as a 2D NumPy array.
+        Raises:
+            TypeError: If predictions or labels are not NumPy arrays.
+            ValueError: If predictions and labels have mismatched shapes or invalid dimensions.
+        Returns:
+            None
+        """
+        # Validate that predictions and labels are NumPy arrays and match in shape
+        if not isinstance(predictions, np.ndarray):
+            raise TypeError("predictions must be a NumPy array.")
+        if not isinstance(labels, np.ndarray):
+            raise TypeError("labels must be a NumPy array.")
+        if predictions.shape != labels.shape:
+            raise ValueError("predictions and labels must have the same shape.")
+        if predictions.ndim != 2:
+            raise ValueError("predictions and labels must be 2-dimensional arrays.")
+        if predictions.shape[1] != self.num_classes:
+            raise ValueError(f"The number of columns in predictions ({predictions.shape[1]}) " + f"must match num_classes ({self.num_classes}).")
+        if self.task == "binary":
+            # Binarize predictions using the threshold
+            y_pred = (predictions >= self.threshold).astype(int).flatten()
+            y_true = labels.astype(int).flatten()
+            # Compute and normalize the confusion matrix
+            conf_mat = confusion_matrix(y_true, y_pred, normalize="true")
+            conf_mat = np.round(conf_mat, 2)
+            return plotting.plot_confusion_matrices(conf_mat, self.task, self.classes)
+        if self.task == "multilabel":
+            # Binarize predictions for multilabel classification
+            y_pred = (predictions >= self.threshold).astype(int)
+            y_true = labels.astype(int)
+            # Compute confusion matrices for each class
+            conf_mats = []
+            class_names = self.classes if self.classes else [f"Class {i}" for i in range(self.num_classes)]
+            for i in range(self.num_classes):
+                conf_mat = confusion_matrix(y_true[:, i], y_pred[:, i], normalize="true")
+                conf_mat = np.round(conf_mat, 2)
+                conf_mats.append(conf_mat)
+            return plotting.plot_confusion_matrices(np.array(conf_mats), self.task, class_names)
+        raise ValueError(f"Unsupported task type: {self.task}")

birdnet-analyzer 2.0.1__py3-none-any.whl → 2.1.1__py3-none-any.whl

birdnet-analyzer 2.0.1py3-none-any.whl → 2.1.1py3-none-any.whl