PyPI - pheval - Versions diffs - 0.3.9__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

pheval 0.3.9py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (26) hide show

pheval/analyse/analysis.py +61 -150
pheval/analyse/assess_prioritisation_base.py +108 -0
pheval/analyse/benchmark_db_manager.py +140 -0
pheval/analyse/benchmark_generator.py +47 -50
pheval/analyse/benchmarking_data.py +3 -2
pheval/analyse/disease_prioritisation_analysis.py +70 -219
pheval/analyse/gene_prioritisation_analysis.py +66 -242
pheval/analyse/generate_plots.py +81 -79
pheval/analyse/generate_summary_outputs.py +64 -134
pheval/analyse/parse_benchmark_summary.py +50 -37
pheval/analyse/parse_corpus.py +219 -0
pheval/analyse/rank_stats.py +177 -144
pheval/analyse/run_data_parser.py +108 -27
pheval/analyse/variant_prioritisation_analysis.py +78 -212
pheval/cli.py +2 -4
pheval/cli_pheval_utils.py +34 -245
pheval/prepare/create_noisy_phenopackets.py +78 -67
pheval-0.4.1.dist-info/METADATA +113 -0
{pheval-0.3.9.dist-info → pheval-0.4.1.dist-info}/RECORD +22 -22
{pheval-0.3.9.dist-info → pheval-0.4.1.dist-info}/WHEEL +1 -1
pheval/analyse/parse_pheval_result.py +0 -43
pheval/analyse/prioritisation_rank_recorder.py +0 -83
pheval/constants.py +0 -8
pheval-0.3.9.dist-info/METADATA +0 -35
{pheval-0.3.9.dist-info → pheval-0.4.1.dist-info}/LICENSE +0 -0
{pheval-0.3.9.dist-info → pheval-0.4.1.dist-info}/entry_points.txt +0 -0

pheval/analyse/generate_plots.py CHANGED Viewed

@@ -14,11 +14,8 @@ from pheval.analyse.benchmark_generator import (
     VariantBenchmarkRunOutputGenerator,
 )
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
-from pheval.analyse.parse_benchmark_summary import (
-    parse_benchmark_result_summary,
-    read_benchmark_tsv_result_summary,
-)
-from pheval.constants import PHEVAL_RESULTS_DIRECTORY_SUFFIX
+from pheval.analyse.parse_benchmark_summary import parse_benchmark_db
+from pheval.analyse.run_data_parser import parse_run_config
 def trim_corpus_results_directory_suffix(corpus_results_directory: Path) -> Path:
@@ -31,7 +28,7 @@ def trim_corpus_results_directory_suffix(corpus_results_directory: Path) -> Path
     Returns:
         Path: The Path object with the suffix removed from the directory name.
     """
-    return Path(str(corpus_results_directory).replace(PHEVAL_RESULTS_DIRECTORY_SUFFIX, ""))
+    return Path(str(corpus_results_directory).replace("_results", ""))
 class PlotGenerator:
@@ -50,9 +47,7 @@ class PlotGenerator:
         "#1b9e77",
     ]
-    def __init__(
-        self,
-    ):
+    def __init__(self, benchmark_name: str):
         """
         Initialise the PlotGenerator class.
         Note:
@@ -61,6 +56,7 @@ class PlotGenerator:
             Matplotlib settings are configured to remove the right and top axes spines
             for generated plots.
         """
+        self.benchmark_name = benchmark_name
         self.stats, self.mrr = [], []
         matplotlib.rcParams["axes.spines.right"] = False
         matplotlib.rcParams["axes.spines.top"] = False
@@ -145,7 +141,6 @@ class PlotGenerator:
         self,
         benchmarking_results: List[BenchmarkRunResults],
         benchmark_generator: BenchmarkRunOutputGenerator,
-        title: str = None,
     ) -> None:
         """
         Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot.
@@ -153,12 +148,12 @@ class PlotGenerator:
         Args:
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-            title (str, optional): Title for the generated plot. Defaults to None.
         """
         for benchmark_result in benchmarking_results:
             self._generate_stacked_bar_plot_data(benchmark_result)
             self._generate_stats_mrr_bar_plot_data(benchmark_result)
         stats_df = pd.DataFrame(self.stats)
+        plt.clf()
         stats_df.set_index("Run").plot(
             kind="bar",
             stacked=True,
@@ -166,15 +161,15 @@ class PlotGenerator:
             ylabel=benchmark_generator.y_label,
             edgecolor="white",
         ).legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
-        if title is None:
+        if benchmark_generator.plot_customisation.rank_plot_title is None:
+            plt.title(f"{benchmark_generator.prioritisation_type_string.capitalize()} Rank Stats")
+        else:
             plt.title(
-                f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} Rank Stats"
+                benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15
             )
-        else:
-            plt.title(title, loc="center", fontsize=15)
         plt.ylim(0, 100)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_rank_stats.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -183,16 +178,16 @@ class PlotGenerator:
         mrr_df.set_index("Run").plot(
             kind="bar",
             color=self.palette_hex_codes,
-            ylabel=f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} mean reciprocal rank",
+            ylabel=f"{benchmark_generator.prioritisation_type_string.capitalize()} mean reciprocal rank",
             legend=False,
             edgecolor="white",
         )
         plt.title(
-            f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} results - mean reciprocal rank"
+            f"{benchmark_generator.prioritisation_type_string.capitalize()} results - mean reciprocal rank"
         )
         plt.ylim(0, 1)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_mrr.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_mrr.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -254,7 +249,6 @@ class PlotGenerator:
         self,
         benchmarking_results: List[BenchmarkRunResults],
         benchmark_generator: BenchmarkRunOutputGenerator,
-        title: str = None,
     ) -> None:
         """
         Generate a cumulative bar plot.
@@ -262,11 +256,11 @@ class PlotGenerator:
         Args:
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-            title (str, optional): Title for the generated plot. Defaults to None.
         """
         for benchmark_result in benchmarking_results:
             self._generate_cumulative_bar_plot_data(benchmark_result)
         stats_df = pd.DataFrame(self.stats)
+        plt.clf()
         sns.catplot(
             data=stats_df,
             kind="bar",
@@ -278,15 +272,17 @@ class PlotGenerator:
             legend=False,
         ).set(xlabel="Rank", ylabel=benchmark_generator.y_label)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15), ncol=3, title="Run")
-        if title is None:
+        if benchmark_generator.plot_customisation.rank_plot_title is None:
             plt.title(
-                f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} Cumulative Rank Stats"
+                f"{benchmark_generator.prioritisation_type_string.capitalize()} Cumulative Rank Stats"
             )
         else:
-            plt.title(title, loc="center", fontsize=15)
+            plt.title(
+                benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15
+            )
         plt.ylim(0, 1)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_rank_stats.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -370,6 +366,7 @@ class PlotGenerator:
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
         """
+        plt.clf()
         for i, benchmark_result in enumerate(benchmarking_results):
             fpr, tpr, thresh = roc_curve(
                 benchmark_result.binary_classification_stats.labels,
@@ -388,10 +385,13 @@ class PlotGenerator:
         plt.plot(linestyle="--", color="gray")
         plt.xlabel("False Positive Rate")
         plt.ylabel("True Positive Rate")
-        plt.title("Receiver Operating Characteristic (ROC) Curve")
+        if benchmark_generator.plot_customisation.roc_curve_title is None:
+            plt.title("Receiver Operating Characteristic (ROC) Curve")
+        else:
+            plt.title(benchmark_generator.plot_customisation.roc_curve_title)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_roc_curve.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_roc_curve.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -408,6 +408,7 @@ class PlotGenerator:
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
         """
+        plt.clf()
         plt.figure()
         for i, benchmark_result in enumerate(benchmarking_results):
             precision, recall, thresh = precision_recall_curve(
@@ -426,10 +427,13 @@ class PlotGenerator:
         plt.plot(linestyle="--", color="gray")
         plt.xlabel("Recall")
         plt.ylabel("Precision")
-        plt.title("Precision-Recall Curve")
+        if benchmark_generator.plot_customisation.precision_recall_title is None:
+            plt.title("Precision-Recall Curve")
+        else:
+            plt.title(benchmark_generator.plot_customisation.precision_recall_title)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_precision_recall_curve.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_pr_curve.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -438,7 +442,6 @@ class PlotGenerator:
         self,
         benchmarking_results: List[BenchmarkRunResults],
         benchmark_generator: BenchmarkRunOutputGenerator,
-        title: str = None,
     ) -> None:
         """
         Generate a non-cumulative bar plot.
@@ -446,8 +449,8 @@ class PlotGenerator:
         Args:
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-            title (str, optional): Title for the generated plot. Defaults to None.
         """
+        plt.clf()
         for benchmark_result in benchmarking_results:
             self._generate_non_cumulative_bar_plot_data(benchmark_result)
@@ -463,26 +466,27 @@ class PlotGenerator:
             legend=False,
         ).set(xlabel="Rank", ylabel=benchmark_generator.y_label)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15), ncol=3, title="Run")
-        if title is None:
+        if benchmark_generator.plot_customisation.rank_plot_title is None:
             plt.title(
-                f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} Non-Cumulative Rank Stats"
+                f"{benchmark_generator.prioritisation_type_string.capitalize()} Non-Cumulative Rank Stats"
             )
         else:
-            plt.title(title, loc="center", fontsize=15)
+            plt.title(
+                benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15
+            )
         plt.ylim(0, 1)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_rank_stats.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
             format="svg",
             bbox_inches="tight",
         )
 def generate_plots(
+    benchmark_name: str,
     benchmarking_results: List[BenchmarkRunResults],
     benchmark_generator: BenchmarkRunOutputGenerator,
-    plot_type: str,
-    title: str = None,
-    generate_from_tsv: bool = False,
+    generate_from_db: bool = False,
 ) -> None:
     """
     Generate summary statistics bar plots for prioritisation.
@@ -492,56 +496,54 @@ def generate_plots(
     Args:
         benchmarking_results (list[BenchmarkRunResults]): List of benchmarking results for multiple runs.
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-        plot_type (str): Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").
-        title (str, optional): Title for the generated plot. Defaults to None.
-        generate_from_tsv (bool): Specify whether to generate plots from the TSV file. Defaults to False.
+        generate_from_db (bool): Specify whether to generate plots from the db file. Defaults to False.
     """
-    plot_generator = PlotGenerator()
-    if not generate_from_tsv:
+    plot_generator = PlotGenerator(benchmark_name)
+    if not generate_from_db:
         plot_generator.generate_roc_curve(benchmarking_results, benchmark_generator)
         plot_generator.generate_precision_recall(benchmarking_results, benchmark_generator)
-    if plot_type == "bar_stacked":
-        plot_generator.generate_stacked_bar_plot(benchmarking_results, benchmark_generator, title)
-    elif plot_type == "bar_cumulative":
-        plot_generator.generate_cumulative_bar(benchmarking_results, benchmark_generator, title)
-    elif plot_type == "bar_non_cumulative":
-        plot_generator.generate_non_cumulative_bar(benchmarking_results, benchmark_generator, title)
-def generate_plots_from_benchmark_summary_tsv(
-    benchmark_summary_tsv: Path,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
-    plot_type: str,
-    title: str,
+    if benchmark_generator.plot_customisation.plot_type == "bar_stacked":
+        plot_generator.generate_stacked_bar_plot(benchmarking_results, benchmark_generator)
+    elif benchmark_generator.plot_customisation.plot_type == "bar_cumulative":
+        plot_generator.generate_cumulative_bar(benchmarking_results, benchmark_generator)
+    elif benchmark_generator.plot_customisation.plot_type == "bar_non_cumulative":
+        plot_generator.generate_non_cumulative_bar(benchmarking_results, benchmark_generator)
+def generate_plots_from_benchmark_summary_db(
+    benchmark_db: Path,
+    run_data: Path,
 ):
     """
     Generate bar plot from summary benchmark results.
-    Reads a summary of benchmark results from a TSV file and generates a bar plot
+    Reads a summary of benchmark results from a benchmark db and generates a bar plot
     based on the analysis type and plot type.
     Args:
-        benchmark_summary_tsv (Path): Path to the summary TSV file containing benchmark results.
-        gene_analysis (bool): Flag indicating whether to analyse gene prioritisation.
-        variant_analysis (bool): Flag indicating whether to analyse variant prioritisation.
-        disease_analysis (bool): Flag indicating whether to analyse disease prioritisation.
-        plot_type (str): Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").
-        title (str): Title for the generated plot.
-    Raises:
-         ValueError: If an unsupported plot type is specified.
+        benchmark_db (Path): Path to the summary TSV file containing benchmark results.
+        run_data (Path): Path to YAML benchmarking configuration file.
     """
-    benchmark_stats_summary = read_benchmark_tsv_result_summary(benchmark_summary_tsv)
-    benchmarking_results = parse_benchmark_result_summary(benchmark_stats_summary)
-    if gene_analysis:
-        benchmark_generator = GeneBenchmarkRunOutputGenerator()
-    elif variant_analysis:
-        benchmark_generator = VariantBenchmarkRunOutputGenerator()
-    elif disease_analysis:
-        benchmark_generator = DiseaseBenchmarkRunOutputGenerator()
-    else:
-        raise ValueError(
-            "Specify one analysis type (gene_analysis, variant_analysis, or disease_analysis)"
+    benchmark_stats_summary = parse_benchmark_db(benchmark_db)
+    config = parse_run_config(run_data)
+    if benchmark_stats_summary.gene_results:
+        generate_plots(
+            config.benchmark_name,
+            benchmark_stats_summary.gene_results,
+            GeneBenchmarkRunOutputGenerator(config.plot_customisation.gene_plots),
+            True,
+        )
+    if benchmark_stats_summary.variant_results:
+        generate_plots(
+            config.benchmark_name,
+            benchmark_stats_summary.variant_results,
+            VariantBenchmarkRunOutputGenerator(config.plot_customisation.variant_plots),
+            True,
+        )
+    elif benchmark_stats_summary.disease_results:
+        generate_plots(
+            config.benchmark_name,
+            benchmark_stats_summary.disease_results,
+            DiseaseBenchmarkRunOutputGenerator(config.plot_customisation.disease_plots),
+            True,
         )
-    generate_plots(benchmarking_results, benchmark_generator, plot_type, title, True)

pheval/analyse/generate_summary_outputs.py CHANGED Viewed

@@ -1,143 +1,68 @@
 import itertools
-from collections import defaultdict
-from copy import deepcopy
 from typing import List
-import numpy as np
-import pandas as pd
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.benchmark_generator import BenchmarkRunOutputGenerator
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.generate_plots import generate_plots
-from pheval.constants import RANK_COMPARISON_FILE_SUFFIX
-class RankComparisonGenerator:
-    """Class for writing the run comparison of rank assignment for prioritisation."""
-    def __init__(self, run_comparison: defaultdict):
-        """
-        Initialise the RankComparisonGenerator class.
-        Args:
-            run_comparison (defaultdict): A nested dictionary containing the run comparison data.
-        """
-        self.run_comparison = run_comparison
-    def _generate_dataframe(self) -> pd.DataFrame:
-        """
-        Generate a Pandas DataFrame based on the run comparison data.
-        Returns:
-            pd.DataFrame: DataFrame containing the run comparison data.
-        """
-        return pd.DataFrame.from_dict(self.run_comparison, orient="index")
-    def _calculate_rank_difference(self) -> pd.DataFrame:
-        """
-        Calculate the rank decrease for runs, taking the first directory as a baseline.
-        Returns:
-            pd.DataFrame: DataFrame containing the calculated rank differences.
-        """
-        comparison_df = self._generate_dataframe()
-        comparison_df["rank_change"] = comparison_df.iloc[:, 2] - comparison_df.iloc[:, 3]
-        comparison_df["rank_change"] = np.where(
-            (comparison_df.iloc[:, 2] == 0) & (comparison_df.iloc[:, 3] != 0),
-            "GAINED",
-            np.where(
-                (comparison_df.iloc[:, 3] == 0) & (comparison_df.iloc[:, 2] != 0),
-                "LOST",
-                comparison_df["rank_change"],
-            ),
-        )
-        comparison_df["rank_change"] = comparison_df["rank_change"].apply(
-            lambda x: int(x) if str(x).lstrip("-").isdigit() else x
-        )
-        return comparison_df
-    def generate_output(self, prefix: str, suffix: str) -> None:
-        """
-        Generate output file from the run comparison data.
-        Args:
-            prefix (str): Prefix for the output file name.
-            suffix (str): Suffix for the output file name.
-        """
-        self._generate_dataframe().to_csv(prefix + suffix, sep="\t")
-    def generate_comparison_output(self, prefix: str, suffix: str) -> None:
-        """
-        Generate output file with calculated rank differences.
-        Args:
-            prefix (str): Prefix for the output file name.
-            suffix (str): Suffix for the output file name.
-        """
-        self._calculate_rank_difference().to_csv(prefix + suffix, sep="\t")
-def generate_benchmark_output(
-    benchmarking_results: BenchmarkRunResults,
-    plot_type: str,
-    benchmark_generator: BenchmarkRunOutputGenerator,
-) -> None:
+def get_new_table_name(run_identifier_1: str, run_identifier_2: str, output_prefix: str) -> str:
     """
-    Generate prioritisation outputs for a single benchmarking run.
+    Get the new table name for rank comparison tables.
     Args:
-        benchmarking_results (BenchmarkRunResults): Results of a benchmarking run.
-        plot_type (str): Type of plot to generate.
-        benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
+        run_identifier_1: The first run identifier.
+        run_identifier_2: The second run identifier.
+        output_prefix: The output prefix of the table
+    Returns:
+        The new table name.
     """
-    rank_comparison_data = benchmarking_results.ranks
-    results_dir_name = benchmarking_results.results_dir.name
-    RankComparisonGenerator(rank_comparison_data).generate_output(
-        f"{results_dir_name}",
-        f"-{benchmark_generator.prioritisation_type_file_prefix}{RANK_COMPARISON_FILE_SUFFIX}",
-    )
-    generate_plots(
-        [benchmarking_results],
-        benchmark_generator,
-        plot_type,
-    )
+    return f"{run_identifier_1}_vs_" f"{run_identifier_2}_" f"{output_prefix}_rank_comparison"
-def merge_results(result1: dict, result2: dict) -> defaultdict:
+def create_comparison_table(
+    comparison_table_name: str,
+    connector: BenchmarkDBManager,
+    drop_columns: List[str],
+    run_identifier_1: str,
+    run_identifier_2: str,
+    table_name: str,
+) -> None:
     """
-    Merge two nested dictionaries containing results on commonalities.
-    This function merges two dictionaries, `result1` and `result2`, containing nested structures.
-    It traverses the dictionaries recursively and merges their contents based on common keys.
-    If a key is present in both dictionaries and points to another dictionary, the function
-    will further merge their nested contents. If a key exists in `result2` but not in `result1`,
-    it will be added to `result1`.
+    Create rank comparison tables.
     Args:
-        result1 (dict): The first dictionary to be merged.
-        result2 (dict): The second dictionary to be merged.
-    Returns:
-        defaultdict: The merged dictionary containing the combined contents of `result1` and `result2`.
+        comparison_table_name (str): Name of the comparison table to create.
+        connector (BenchmarkDBManager): DBConnector instance.
+        drop_columns (List[str]): List of columns to drop.
+        run_identifier_1 (str): The first run identifier.
+        run_identifier_2 (str): The second run identifier.
+        table_name (str): Name of the table to extract ranks from
     """
-    for key, val in result1.items():
-        if type(val) == dict:
-            if key in result2 and type(result2[key] == dict):
-                merge_results(result1[key], result2[key])
-        else:
-            if key in result2:
-                result1[key] = result2[key]
+    connector.drop_table(comparison_table_name)
+    excluded_columns = tuple(drop_columns + ["identifier"]) if drop_columns else ("identifier",)
+    connector.conn.execute(
+        f'CREATE TABLE "{comparison_table_name}" AS SELECT * '
+        f"EXCLUDE {excluded_columns} FROM {table_name}"
+    )
-    for key, val in result2.items():
-        if key not in result1:
-            result1[key] = val
-    return result1
+    connector.conn.execute(
+        f"""ALTER TABLE "{comparison_table_name}" ADD COLUMN rank_change VARCHAR;"""
+    )
+    connector.conn.execute(
+        f'UPDATE "{comparison_table_name}" SET rank_change = CASE WHEN "{run_identifier_1}" = 0 '
+        f'AND "{run_identifier_2}" != 0 '
+        f"THEN 'GAINED' WHEN \"{run_identifier_1}\" != 0 AND \"{run_identifier_2}\" = 0 THEN 'LOST' ELSE "
+        f'CAST ("{run_identifier_1}" - "{run_identifier_2}" AS VARCHAR) END;'
+    )
+    connector.conn.commit()
 def generate_benchmark_comparison_output(
+    benchmark_name: str,
     benchmarking_results: List[BenchmarkRunResults],
-    plot_type: str,
+    run_identifiers: List[str],
     benchmark_generator: BenchmarkRunOutputGenerator,
+    table_name: str,
 ) -> None:
     """
     Generate prioritisation outputs for benchmarking multiple runs.
@@ -147,29 +72,34 @@ def generate_benchmark_comparison_output(
     comparison outputs using `RankComparisonGenerator` for each pair.
     Args:
+        benchmark_name (str): Name of the benchmark.
         benchmarking_results (List[BenchmarkRunResults]): A list containing BenchmarkRunResults instances
             representing the benchmarking results of multiple runs.
-        plot_type (str): The type of plot to be generated.
+        run_identifiers (List[str]): A list of run identifiers.
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
+        table_name (str): The name of the table where ranks are stored.
     """
-    output_prefix = benchmark_generator.prioritisation_type_file_prefix
-    for pair in itertools.combinations(benchmarking_results, 2):
-        result1 = pair[0]
-        result2 = pair[1]
-        merged_results = merge_results(
-            deepcopy(result1.ranks),
-            deepcopy(result2.ranks),
+    output_prefix = benchmark_generator.prioritisation_type_string
+    connector = BenchmarkDBManager(benchmark_name)
+    for pair in itertools.combinations(
+        [str(result.benchmark_name) for result in benchmarking_results], 2
+    ):
+        run_identifier_1 = pair[0]
+        run_identifier_2 = pair[1]
+        drop_columns = [run for run in run_identifiers if run not in pair]
+        comparison_table_name = get_new_table_name(
+            run_identifier_1, run_identifier_2, output_prefix
         )
-        RankComparisonGenerator(merged_results).generate_comparison_output(
-            f"{result1.results_dir.parents[0].name}_"
-            f"{result1.results_dir.name}"
-            f"_vs_{result2.results_dir.parents[0].name}_"
-            f"{result2.results_dir.name}",
-            f"-{output_prefix}{RANK_COMPARISON_FILE_SUFFIX}",
+        create_comparison_table(
+            comparison_table_name,
+            connector,
+            drop_columns,
+            run_identifier_1,
+            run_identifier_2,
+            table_name,
         )
     generate_plots(
+        benchmark_name,
         benchmarking_results,
         benchmark_generator,
-        plot_type,
     )

pheval 0.3.9__py3-none-any.whl → 0.4.1__py3-none-any.whl

Potentially problematic release.

pheval 0.3.9py3-none-any.whl → 0.4.1py3-none-any.whl