PyPI - pheval - Versions diffs - 0.4.6__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

pheval 0.4.6py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (33) hide show

pheval/analyse/benchmark.py +156 -0
pheval/analyse/benchmark_db_manager.py +16 -134
pheval/analyse/benchmark_output_type.py +43 -0
pheval/analyse/binary_classification_curves.py +132 -0
pheval/analyse/binary_classification_stats.py +164 -307
pheval/analyse/generate_plots.py +210 -395
pheval/analyse/generate_rank_comparisons.py +44 -0
pheval/analyse/rank_stats.py +190 -382
pheval/analyse/run_data_parser.py +21 -39
pheval/cli.py +28 -25
pheval/cli_pheval_utils.py +7 -8
pheval/post_processing/phenopacket_truth_set.py +235 -0
pheval/post_processing/post_processing.py +183 -303
pheval/post_processing/validate_result_format.py +92 -0
pheval/prepare/update_phenopacket.py +11 -9
pheval/utils/logger.py +35 -0
pheval/utils/phenopacket_utils.py +85 -91
{pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/METADATA +4 -4
{pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/RECORD +22 -26
{pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/WHEEL +1 -1
pheval/analyse/analysis.py +0 -104
pheval/analyse/assess_prioritisation_base.py +0 -108
pheval/analyse/benchmark_generator.py +0 -126
pheval/analyse/benchmarking_data.py +0 -25
pheval/analyse/disease_prioritisation_analysis.py +0 -152
pheval/analyse/gene_prioritisation_analysis.py +0 -147
pheval/analyse/generate_summary_outputs.py +0 -105
pheval/analyse/parse_benchmark_summary.py +0 -81
pheval/analyse/parse_corpus.py +0 -219
pheval/analyse/prioritisation_result_types.py +0 -52
pheval/analyse/variant_prioritisation_analysis.py +0 -159
{pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/LICENSE +0 -0
{pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/entry_points.txt +0 -0

pheval/analyse/rank_stats.py CHANGED Viewed

@@ -1,447 +1,255 @@
-from dataclasses import dataclass, field
-from statistics import mean
+from dataclasses import dataclass
 from typing import List
 import numpy as np
-from duckdb import DuckDBPyConnection
+import polars as pl
 from sklearn.metrics import ndcg_score
-from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
-from pheval.analyse.binary_classification_stats import BinaryClassificationStats
+from pheval.utils.logger import get_logger
-@dataclass
-class RankStats:
-    """Store statistics related to ranking.
-    Attributes:
-        top (int): Count of top-ranked matches.
-        top3 (int): Count of matches within the top 3 ranks.
-        top5 (int): Count of matches within the top 5 ranks.
-        top10 (int): Count of matches within the top 10 ranks.
-        found (int): Count of found matches.
-        total (int): Total count of matches.
-        reciprocal_ranks (List[float]): List of reciprocal ranks.
-        relevant_ranks List[List[int]]: Nested list of ranks for the known entities for all cases in a run.
-        mrr (float): Mean Reciprocal Rank (MRR). Defaults to None.
+@dataclass(frozen=True)
+class Ranks:
+    """
+    Class for calculating ranking statistics.
     """
-    top: int = 0
-    top3: int = 0
-    top5: int = 0
-    top10: int = 0
-    found: int = 0
-    total: int = 0
-    reciprocal_ranks: List = field(default_factory=list)
-    relevant_result_ranks: List[List[int]] = field(default_factory=list)
-    mrr: float = None
-    def add_ranks(self, benchmark_name: str, table_name: str, column_name: str) -> None:
-        """
-        Add ranks to RankStats instance from table.
-        Args:
-            table_name (str): Name of the table to add ranks from.
-            column_name (str): Name of the column to add ranks from.:
-        """
-        conn = BenchmarkDBManager(benchmark_name).conn
-        self.top = self._execute_count_query(conn, table_name, column_name, " = 1")
-        self.top3 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 3")
-        self.top5 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 5")
-        self.top10 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 10")
-        self.found = self._execute_count_query(conn, table_name, column_name, " > 0")
-        self.total = self._execute_count_query(conn, table_name, column_name, " >= 0")
-        self.reciprocal_ranks = self._fetch_reciprocal_ranks(conn, table_name, column_name)
-        self.relevant_result_ranks = self._fetch_relevant_ranks(conn, table_name, column_name)
-        conn.close()
-    @staticmethod
-    def _execute_count_query(
-        conn: DuckDBPyConnection, table_name: str, column_name: str, condition: str
-    ) -> int:
-        """
-        Execute count query on table.
-        Args:
-            conn (DuckDBPyConnection): Connection to the database.
-            table_name (str): Name of the table to execute count query on.
-            column_name (str): Name of the column to execute count query on.
-            condition (str): Condition to execute count query.
-        Returns:
-            int: Count query result.
-        """
-        query = f'SELECT COUNT(*) FROM "{table_name}" WHERE "{column_name}" {condition}'
-        return conn.execute(query).fetchone()[0]
-    @staticmethod
-    def _fetch_reciprocal_ranks(
-        conn: DuckDBPyConnection, table_name: str, column_name: str
-    ) -> List[float]:
-        """
-        Fetch reciprocal ranks from table.
-        Args:
-            conn (DuckDBPyConnection): Connection to the database.
-            table_name (str): Name of the table to fetch reciprocal ranks from.
-            column_name (str): Name of the column to fetch reciprocal ranks from.
-        Returns:
-            List[float]: List of reciprocal ranks.
-        """
-        query = f'SELECT "{column_name}" FROM "{table_name}"'
-        return [1 / rank[0] if rank[0] > 0 else 0 for rank in conn.execute(query).fetchall()]
+    TOP_1 = pl.col("rank").eq(1).sum().alias("top1")
+    TOP_3 = pl.col("rank").is_between(1, 3, closed="both").sum().alias("top3")
+    TOP_5 = pl.col("rank").is_between(1, 5, closed="both").sum().alias("top5")
+    TOP_10 = pl.col("rank").is_between(1, 10, closed="both").sum().alias("top10")
+    FOUND = pl.col("rank").gt(0).sum().alias("found")
+    TOTAL = pl.len().alias("total")
+    NUMBER_OF_SAMPLES = pl.col("file_path").n_unique().alias("number_of_samples")
+    MRR = ((1 / pl.col("rank").filter(pl.col("rank") > 0)).sum() / pl.len()).alias("mrr")
-    @staticmethod
-    def _fetch_relevant_ranks(
-        conn: DuckDBPyConnection, table_name: str, column_name: str
-    ) -> List[List[int]]:
+    @classmethod
+    def _filter_results(cls, df: pl.LazyFrame, k: int) -> pl.LazyFrame:
         """
-        Fetch relevant ranks from table.
+        Filter for ranks within k.
         Args:
-            conn (DuckDBPyConnection): Connection to the database.
-            table_name (str): Name of the table to fetch relevant ranks from.
-            column_name (str): Name of the column to fetch relevant ranks from.
+            df (pl.LazyFrame): The dataframe to filter.
+            k (int): The number upper rank limit.
         Returns:
-            List[List[int]]: List of relevant ranks.
+            pl.LazyFrame: The filtered dataframe.
         """
-        query = (
-            f'SELECT LIST("{column_name}") as values_list FROM "{table_name}" GROUP BY phenopacket'
+        df = df.filter(pl.col("rank").is_between(1, k, closed="both"))
+        return df.group_by("file_path").agg(
+            pl.col("rank").sort().alias("ranks"),
         )
-        return [rank[0] for rank in conn.execute(query).fetchall()]
-    def percentage_rank(self, value: int) -> float:
+    @classmethod
+    def percentage_at_k(cls, k: int) -> pl.Expr:
         """
-        Calculate the percentage rank.
+        Compute percentage at k dynamically.
         Args:
-            value (int): The value for which the percentage rank needs to be calculated.
-        Returns:
-            float: The calculated percentage rank based on the provided value and the total count.
-        """
-        return 100 * value / self.total
-    def percentage_top(self) -> float:
-        """
-        Calculate the percentage of top matches.
-        Returns:
-            float: The percentage of top matches compared to the total count.
-        """
-        return self.percentage_rank(self.top)
-    def percentage_top3(self) -> float:
-        """
-        Calculate the percentage of matches within the top 3.
-        Returns:
-            float: The percentage of matches within the top 3 compared to the total count.
-        """
-        return self.percentage_rank(self.top3)
-    def percentage_top5(self) -> float:
-        """
-        Calculate the percentage of matches within the top 5.
-        Returns:
-            float: The percentage of matches within the top 5 compared to the total count.
-        """
-        return self.percentage_rank(self.top5)
-    def percentage_top10(self) -> float:
-        """
-        Calculate the percentage of matches within the top 10.
+            k (int): The upper rank limit.
         Returns:
-            float: The percentage of matches within the top 10 compared to the total count.
+            pl.Expr: The expression for calculating percentage at k.
         """
-        return self.percentage_rank(self.top10)
+        return (100 * pl.col(f"top{k}") / pl.col("total")).alias(f"percentage@{k}")
-    def percentage_found(self) -> float:
+    @classmethod
+    def percentage_found(cls) -> pl.Expr:
         """
-        Calculate the percentage of matches found.
+        Compute the percentage of found items.
         Returns:
-            float: The percentage of matches found compared to the total count.
+            pl.Expr: The expression for calculating percentage of found items.
         """
-        return self.percentage_rank(self.found)
+        return (100 * pl.col("found") / pl.col("total")).alias("percentage_found")
-    @staticmethod
-    def percentage_difference(percentage_value_1: float, percentage_value_2: float) -> float:
+    @classmethod
+    def precision_at_k(cls, k: int) -> pl.Expr:
         """
-        Calculate the percentage difference between two percentage values.
+        Compute precision at k dynamically.
         Args:
-            percentage_value_1 (float): The first percentage value.
-            percentage_value_2 (float): The second percentage value.
+            k (int): The upper rank limit.
         Returns:
-            float: The difference between the two percentage values.
+            pl.Expr: The expression for calculating precision at k.
         """
-        return percentage_value_1 - percentage_value_2
+        return (pl.col(f"top{k}") / (pl.col("number_of_samples") * k)).alias(f"precision@{k}")
-    def mean_reciprocal_rank(self) -> float:
+    @classmethod
+    def f_beta_score_at_k(cls, k: int) -> pl.Expr:
         """
-        Calculate the Mean Reciprocal Rank (MRR) for the stored ranks.
-        The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks
-        for the found cases.
-        If the total number of cases differs from the number of found cases,
-        this method extends the reciprocal ranks list with zeroes for missing cases.
-        Returns:
-            float: The calculated Mean Reciprocal Rank.
-        """
-        if len(self.reciprocal_ranks) != self.total:
-            missing_cases = self.total - self.found
-            self.reciprocal_ranks.extend([0] * missing_cases)
-            return mean(self.reciprocal_ranks)
-        return mean(self.reciprocal_ranks)
-    def return_mean_reciprocal_rank(self) -> float:
-        """
-        Retrieve or calculate the Mean Reciprocal Rank (MRR).
-        If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value.
-        Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method.
-        Returns:
-            float: The Mean Reciprocal Rank value.
-        """
-        if self.mrr is not None:
-            return self.mrr
-        else:
-            return self.mean_reciprocal_rank()
-    def precision_at_k(self, k: int) -> float:
-        """
-        Calculate the precision at k.
-        Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions.
-        It measures the accuracy of the top-k predictions made by a model.
+        Compute f_beta_score at k.
         Args:
-            k (int): The number of top predictions to consider.
+            k (int): The upper rank limit.
         Returns:
-            float: The precision at k, ranging from 0.0 to 1.0.
-            A higher precision indicates a better performance in identifying relevant items in the top-k predictions.
+            pl.Expr: The expression for calculating f_beta_score at k.
         """
-        k_attr = getattr(self, f"top{k}") if k > 1 else self.top
-        return k_attr / (self.total * k)
+        precision_expr = pl.col(f"top{k}") / (pl.col("number_of_samples") * k)
+        recall_expr = pl.col(f"top{k}") / pl.col("total")
+        return (
+            ((2 * precision_expr * recall_expr) / (precision_expr + recall_expr))
+            .fill_nan(0)
+            .alias(f"f_beta@{k}")
+        )
-    @staticmethod
-    def _average_precision_at_k(
-        number_of_relevant_entities_at_k: int, precision_at_k: float
-    ) -> float:
+    @classmethod
+    def _average_precision_at_k(cls, df: pl.LazyFrame, k: int) -> pl.LazyFrame:
         """
-        Calculate the Average Precision at k.
+        Compute Average Precision at K (AP@K) for each query.
-        Average Precision at k (AP@k) is a metric used to evaluate the precision of a ranked retrieval system.
-        It measures the precision at each relevant position up to k and takes the average.
+        AP@K = (1 / min(k, R)) * sum(P(i) * rel(i)) for i ≤ k
         Args:
-            number_of_relevant_entities_at_k (int): The count of relevant entities in the top-k predictions.
-            precision_at_k (float): The precision at k - the sum of the precision values at each relevant position.
-        Returns:
-            float: The Average Precision at k, ranging from 0.0 to 1.0.
-                   A higher value indicates better precision in the top-k predictions.
-        """
-        return (
-            (1 / number_of_relevant_entities_at_k) * precision_at_k
-            if number_of_relevant_entities_at_k > 0
-            else 0.0
+            df (pl.LazyFrame): The dataframe calculate AP@K for each query.
+            k (int): The upper rank limit.
+        Returns:
+            pl.LazyFrame: The dataframe with AP@K for each query.
+        """
+        filtered_df = cls._filter_results(df, k)
+        df_grouped = filtered_df.with_columns(
+            pl.struct("ranks")
+            .map_elements(
+                lambda row: cls._compute_ap_k(np.array(row["ranks"])), return_dtype=pl.Float64
+            )
+            .alias(f"ap@{k}")
         )
+        return df_grouped.select(["file_path", f"ap@{k}"])
-    def mean_average_precision_at_k(self, k: int) -> float:
+    @staticmethod
+    def _compute_ap_k(ranks: np.array) -> np.floating:
         """
-        Calculate the Mean Average Precision at k.
-        Mean Average Precision at k (MAP@k) is a performance metric for ranked data.
-        It calculates the average precision at k for each result rank and then takes the mean across all queries.
+        Helper function to compute AP@K for a single query.
         Args:
-            k (int): The number of top predictions to consider for precision calculation.
+            ranks (np.array): The ranks to compute AP@K.
         Returns:
-            float: The Mean Average Precision at k, ranging from 0.0 to 1.0.
-                   A higher value indicates better performance in ranking relevant entities higher in the predictions.
+            float: The AP@K.
         """
-        cumulative_average_precision_scores = 0
-        for result_ranks in self.relevant_result_ranks:
-            precision_at_k, number_of_relevant_entities_at_k = 0, 0
-            for rank in result_ranks:
-                if 0 < rank <= k:
-                    number_of_relevant_entities_at_k += 1
-                    precision_at_k += number_of_relevant_entities_at_k / rank
-                cumulative_average_precision_scores += self._average_precision_at_k(
-                    number_of_relevant_entities_at_k, precision_at_k
-                )
-        return (1 / self.total) * cumulative_average_precision_scores
+        num_relevant = np.arange(1, len(ranks) + 1)
+        precision_at_k = num_relevant / ranks
+        return np.mean(precision_at_k)
-    def f_beta_score_at_k(self, percentage_at_k: float, k: int) -> float:
+    @classmethod
+    def mean_average_precision_at_k(cls, df: pl.LazyFrame, k: int) -> pl.LazyFrame:
         """
-        Calculate the F-beta score at k.
-        The F-beta score is a metric that combines precision and recall,
-        with beta controlling the emphasis on precision.
-        The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall.
-        This method computes the F-beta score at a specific percentage threshold within the top-k predictions.
+        Compute Mean Average Precision at K (MAP@K) by averaging AP@K scores.
         Args:
-            percentage_at_k (float): The percentage of true positive predictions within the top-k.
-            k (int): The number of top predictions to consider.
+            df (pl.LazyFrame): The dataframe calculate MAP@K for each query.
+            k (int): The upper rank limit.
         Returns:
-            float: The F-beta score at k, ranging from 0.0 to 1.0.
-                   A higher score indicates better trade-off between precision and recall.
+            pl.LazyFrame: The dataframe with MAP@K for each query.
         """
-        precision = self.precision_at_k(k)
-        recall_at_k = percentage_at_k / 100
+        ap_at_k_df = cls._average_precision_at_k(df, k)
         return (
-            (2 * precision * recall_at_k) / (precision + recall_at_k)
-            if (precision + recall_at_k) > 0
-            else 0
+            ap_at_k_df.select(
+                pl.col(f"ap@{k}").sum() / df.select(Ranks.NUMBER_OF_SAMPLES).collect()
+            )
+            .fill_null(0.0)
+            .collect()
+            .item()
         )
-    def mean_normalised_discounted_cumulative_gain(self, k: int) -> float:
+    @classmethod
+    def _calculate_ndcg_at_k(cls, ranks: List[int], k: int) -> float:
         """
-        Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff.
-        NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items.
+        Compute NDCG@K for a single query.
         Args:
-            k (int): The rank cutoff for calculating NDCG.
+            ranks (List[int]): The ranks to compute NDCG@K.
+            k (int): The upper rank limit.
         Returns:
-            float: The mean NDCG score across all query results.
-        """
-        ndcg_scores = []
-        for result_ranks in self.relevant_result_ranks:
-            result_ranks = [rank for rank in result_ranks if rank <= k]
-            result_ranks = [3 if i in result_ranks else 0 for i in range(k)]
-            ideal_ranking = sorted(result_ranks, reverse=True)
-            ndcg_scores.append(ndcg_score(np.asarray([ideal_ranking]), np.asarray([result_ranks])))
-        return np.mean(ndcg_scores)
-class RankStatsWriter:
-    """Class for writing the rank stats to a file."""
-    def __init__(self, benchmark_name: str, table_name: str):
+            float: The NDCG@K.
         """
-        Initialise the RankStatsWriter class
-        Args:
-            table_name (str): Name of table to add statistics.
-        """
-        self.table_name = table_name
-        self.benchmark_name = benchmark_name
-        conn = BenchmarkDBManager(benchmark_name).conn
-        conn.execute(
-            f'CREATE TABLE IF NOT EXISTS "{self.table_name}" ('
-            f"results_directory_path VARCHAR,"
-            f"top INT,"
-            f"top3 INT,"
-            f"top5 INT,"
-            f"top10 INT,"
-            f'"found" INT,'
-            f"total INT,"
-            f"mean_reciprocal_rank FLOAT,"
-            f"percentage_top FLOAT,"
-            f"percentage_top3 FLOAT,"
-            f"percentage_top5 FLOAT,"
-            f"percentage_top10 FLOAT,"
-            f"percentage_found FLOAT,"
-            f'"precision@1" FLOAT,'
-            f'"precision@3" FLOAT,'
-            f'"precision@5" FLOAT,'
-            f'"precision@10" FLOAT,'
-            f'"MAP@1" FLOAT,'
-            f'"MAP@3" FLOAT,'
-            f'"MAP@5" FLOAT,'
-            f'"MAP@10" FLOAT,'
-            f'"f_beta_score@1" FLOAT,'
-            f'"f_beta_score@3"FLOAT,'
-            f'"f_beta_score@5" FLOAT,'
-            f'"f_beta_score@10" FLOAT,'
-            f'"NDCG@3" FLOAT,'
-            f'"NDCG@5" FLOAT,'
-            f'"NDCG@10" FLOAT,'
-            f"true_positives INT,"
-            f"false_positives INT,"
-            f"true_negatives INT,"
-            f"false_negatives INT,"
-            f"sensitivity FLOAT,"
-            f"specificity FLOAT,"
-            f'"precision" FLOAT,'
-            f"negative_predictive_value FLOAT,"
-            f"false_positive_rate FLOAT,"
-            f"false_discovery_rate FLOAT,"
-            f"false_negative_rate FLOAT,"
-            f"accuracy FLOAT,"
-            f"f1_score FLOAT,"
-            f"matthews_correlation_coefficient FLOAT,                        )"
+        result_ranks = np.zeros(k, dtype=int)
+        indices = np.array(ranks) - 1
+        valid_indices = indices[(indices >= 0) & (indices < k)]
+        result_ranks[valid_indices] = 3
+        ideal_ranking = np.sort(result_ranks)[::-1]
+        return (
+            ndcg_score(result_ranks.reshape(1, -1), ideal_ranking.reshape(1, -1))
+            if np.sum(result_ranks) > 0
+            else 0.0
         )
-        conn.close()
-    def add_statistics_entry(
-        self,
-        run_identifier: str,
-        rank_stats: RankStats,
-        binary_classification: BinaryClassificationStats,
-    ):
+    @classmethod
+    def mean_normalised_discounted_cumulative_gain(cls, df: pl.LazyFrame, k: int) -> pl.Float64:
         """
-        Add statistics row to table for a run.
+        Compute mean normalised discounted cumulative gain.
         Args:
-            run_identifier (str): The run identifier.
-            rank_stats (RankStats): RankStats object for the run.
-            binary_classification (BinaryClassificationStats): BinaryClassificationStats object for the run.
+            df (pl.LazyFrame): The dataframe to calculate mean normalised cumulative gain.
+            k (int): The upper rank limit.
+        Returns:
+            pl.LazyFrame: The dataframe with mean normalised cumulative gain.
         """
-        conn = BenchmarkDBManager(self.benchmark_name).conn
-        conn.execute(
-            f' INSERT INTO "{self.table_name}" VALUES ( '
-            f"'{run_identifier}',"
-            f"{rank_stats.top},"
-            f"{rank_stats.top3},"
-            f"{rank_stats.top5},"
-            f"{rank_stats.top10},"
-            f"{rank_stats.found},"
-            f"{rank_stats.total},"
-            f"{rank_stats.mean_reciprocal_rank()},"
-            f"{rank_stats.percentage_top()},"
-            f"{rank_stats.percentage_top3()},"
-            f"{rank_stats.percentage_top5()},"
-            f"{rank_stats.percentage_top10()},"
-            f"{rank_stats.percentage_found()},"
-            f"{rank_stats.precision_at_k(1)},"
-            f"{rank_stats.precision_at_k(3)},"
-            f"{rank_stats.precision_at_k(5)},"
-            f"{rank_stats.precision_at_k(10)},"
-            f"{rank_stats.mean_average_precision_at_k(1)},"
-            f"{rank_stats.mean_average_precision_at_k(3)},"
-            f"{rank_stats.mean_average_precision_at_k(5)},"
-            f"{rank_stats.mean_average_precision_at_k(10)},"
-            f"{rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 1)},"
-            f"{rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 3)},"
-            f"{rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 5)},"
-            f"{rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 10)},"
-            f"{rank_stats.mean_normalised_discounted_cumulative_gain(3)},"
-            f"{rank_stats.mean_normalised_discounted_cumulative_gain(5)},"
-            f"{rank_stats.mean_normalised_discounted_cumulative_gain(10)},"
-            f"{binary_classification.true_positives},"
-            f"{binary_classification.false_positives},"
-            f"{binary_classification.true_negatives},"
-            f"{binary_classification.false_negatives},"
-            f"{binary_classification.sensitivity()},"
-            f"{binary_classification.specificity()},"
-            f"{binary_classification.precision()},"
-            f"{binary_classification.negative_predictive_value()},"
-            f"{binary_classification.false_positive_rate()},"
-            f"{binary_classification.false_discovery_rate()},"
-            f"{binary_classification.false_negative_rate()},"
-            f"{binary_classification.accuracy()},"
-            f"{binary_classification.f1_score()},"
-            f"{binary_classification.matthews_correlation_coefficient()})"
+        filtered_df = cls._filter_results(df, k)
+        return (
+            filtered_df.with_columns(
+                pl.struct("ranks")
+                .map_elements(
+                    lambda row: cls._calculate_ndcg_at_k(row["ranks"], k), return_dtype=pl.Float64
+                )
+                .alias(f"NDCG@{k}")
+            )
+            .select(pl.col(f"NDCG@{k}").sum() / df.select(Ranks.NUMBER_OF_SAMPLES).collect())
+            .fill_null(0.0)
+            .collect()
+            .item()
         )
-        conn.close()
+def compute_rank_stats(run_identifier: str, result_scan: pl.LazyFrame) -> pl.LazyFrame:
+    """
+    Computes ranking statistics for a given benchmarking run.
+    Args:
+        run_identifier (str): The identifier of the benchmarking run.
+        result_scan (pl.LazyFrame): The scan of the directory to compute ranking statistics for.
+    """
+    logger = get_logger()
+    logger.info(f"Generating ranking statistics for {run_identifier}...")
+    true_positive_scan = result_scan.filter(pl.col("true_positive"))
+    rankings = true_positive_scan.select(
+        [
+            pl.lit(run_identifier).alias("run_identifier"),
+            Ranks.TOP_1.alias("top1"),
+            Ranks.TOP_3.alias("top3"),
+            Ranks.TOP_5.alias("top5"),
+            Ranks.TOP_10.alias("top10"),
+            Ranks.FOUND.alias("found"),
+            Ranks.TOTAL.alias("total"),
+            Ranks.NUMBER_OF_SAMPLES.alias("number_of_samples"),
+            Ranks.MRR.alias("mrr"),
+        ]
+    )
+    return rankings.select(
+        [
+            pl.col("run_identifier"),
+            pl.col("top1"),
+            pl.col("top3"),
+            pl.col("top5"),
+            pl.col("top10"),
+            pl.col("found"),
+            pl.col("total"),
+            pl.col("number_of_samples"),
+            pl.col("mrr"),
+            Ranks.percentage_at_k(1),
+            Ranks.percentage_at_k(3),
+            Ranks.percentage_at_k(5),
+            Ranks.percentage_at_k(10),
+            Ranks.percentage_found(),
+            Ranks.precision_at_k(1),
+            Ranks.precision_at_k(3),
+            Ranks.precision_at_k(5),
+            Ranks.precision_at_k(10),
+            Ranks.f_beta_score_at_k(1),
+            Ranks.f_beta_score_at_k(3),
+            Ranks.f_beta_score_at_k(5),
+            Ranks.f_beta_score_at_k(10),
+            pl.lit(Ranks.mean_average_precision_at_k(true_positive_scan, 1)).alias("MAP@1"),
+            pl.lit(Ranks.mean_average_precision_at_k(true_positive_scan, 3)).alias("MAP@3"),
+            pl.lit(Ranks.mean_average_precision_at_k(true_positive_scan, 5)).alias("MAP@5"),
+            pl.lit(Ranks.mean_average_precision_at_k(true_positive_scan, 10)).alias("MAP@10"),
+            pl.lit(Ranks.mean_normalised_discounted_cumulative_gain(true_positive_scan, 3)).alias(
+                "NDCG@3"
+            ),
+            pl.lit(Ranks.mean_normalised_discounted_cumulative_gain(true_positive_scan, 5)).alias(
+                "NDCG@5"
+            ),
+            pl.lit(Ranks.mean_normalised_discounted_cumulative_gain(true_positive_scan, 10)).alias(
+                "NDCG@10"
+            ),
+        ]
+    )

pheval 0.4.6__py3-none-any.whl → 0.5.0__py3-none-any.whl

Potentially problematic release.

pheval 0.4.6py3-none-any.whl → 0.5.0py3-none-any.whl