PyPI - pheval - Versions diffs - 0.4.7__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

pheval 0.4.7py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (33) hide show

pheval/analyse/benchmark.py +156 -0
pheval/analyse/benchmark_db_manager.py +16 -134
pheval/analyse/benchmark_output_type.py +43 -0
pheval/analyse/binary_classification_curves.py +132 -0
pheval/analyse/binary_classification_stats.py +164 -307
pheval/analyse/generate_plots.py +210 -395
pheval/analyse/generate_rank_comparisons.py +44 -0
pheval/analyse/rank_stats.py +190 -382
pheval/analyse/run_data_parser.py +21 -39
pheval/cli.py +27 -24
pheval/cli_pheval_utils.py +7 -8
pheval/post_processing/phenopacket_truth_set.py +250 -0
pheval/post_processing/post_processing.py +179 -345
pheval/post_processing/validate_result_format.py +91 -0
pheval/prepare/update_phenopacket.py +11 -9
pheval/utils/logger.py +35 -0
pheval/utils/phenopacket_utils.py +85 -91
{pheval-0.4.7.dist-info → pheval-0.5.1.dist-info}/METADATA +4 -4
{pheval-0.4.7.dist-info → pheval-0.5.1.dist-info}/RECORD +22 -26
pheval/analyse/analysis.py +0 -104
pheval/analyse/assess_prioritisation_base.py +0 -108
pheval/analyse/benchmark_generator.py +0 -126
pheval/analyse/benchmarking_data.py +0 -25
pheval/analyse/disease_prioritisation_analysis.py +0 -152
pheval/analyse/gene_prioritisation_analysis.py +0 -147
pheval/analyse/generate_summary_outputs.py +0 -105
pheval/analyse/parse_benchmark_summary.py +0 -81
pheval/analyse/parse_corpus.py +0 -219
pheval/analyse/prioritisation_result_types.py +0 -52
pheval/analyse/variant_prioritisation_analysis.py +0 -159
{pheval-0.4.7.dist-info → pheval-0.5.1.dist-info}/LICENSE +0 -0
{pheval-0.4.7.dist-info → pheval-0.5.1.dist-info}/WHEEL +0 -0
{pheval-0.4.7.dist-info → pheval-0.5.1.dist-info}/entry_points.txt +0 -0

pheval/analyse/assess_prioritisation_base.py DELETED Viewed

@@ -1,108 +0,0 @@
-from typing import Union
-from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
-from pheval.post_processing.post_processing import (
-    RankedPhEvalDiseaseResult,
-    RankedPhEvalGeneResult,
-    RankedPhEvalVariantResult,
-)
-class AssessPrioritisationBase:
-    def __init__(
-        self,
-        db_connection: BenchmarkDBManager,
-        table_name: str,
-        column: str,
-        threshold: float,
-        score_order: str,
-    ):
-        """
-        Initialise AssessPrioritisationBase class
-        Args:
-            db_connection (BenchmarkDBManager): DB connection.
-            table_name (str): Table name.
-            column (str): Column name.
-            threshold (float): Threshold for scores
-            score_order (str): Score order for results, either ascending or descending
-        """
-        self.threshold = threshold
-        self.score_order = score_order
-        self.db_connection = db_connection
-        self.conn = db_connection.conn
-        self.column = column
-        self.table_name = table_name
-        db_connection.add_column_integer_default(
-            table_name=table_name, column=self.column, default=0
-        )
-    def _assess_with_threshold_ascending_order(
-        self,
-        result_entry: Union[
-            RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult
-        ],
-    ) -> int:
-        """
-        Record the prioritisation rank if it meets the ascending order threshold.
-        Args:
-            result_entry (Union[RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult]):
-                Ranked PhEval result entry
-        Returns:
-            int: Recorded prioritisation rank
-        """
-        if float(self.threshold) > float(result_entry.score):
-            return result_entry.rank
-        else:
-            return 0
-    def _assess_with_threshold(
-        self,
-        result_entry: Union[
-            RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult
-        ],
-    ) -> int:
-        """
-        Record the prioritisation rank if it meets the score threshold.
-        Args:
-            result_entry (Union[RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult]):
-                Ranked PhEval result entry
-        Returns:
-            int: Recorded prioritisation rank
-        """
-        if float(self.threshold) < float(result_entry.score):
-            return result_entry.rank
-        else:
-            return 0
-    def _record_matched_entity(
-        self,
-        standardised_result: Union[
-            RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult
-        ],
-    ) -> int:
-        """
-        Return the rank result - handling the specification of a threshold.
-        Args:
-            standardised_result (Union[RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult]):
-                Ranked PhEval disease result entry
-        Returns:
-            int: Recorded entity prioritisation rank
-        """
-        if float(self.threshold) == 0.0:
-            return standardised_result.rank
-        else:
-            return (
-                self._assess_with_threshold(standardised_result)
-                if self.score_order != "ascending"
-                else self._assess_with_threshold_ascending_order(
-                    standardised_result,
-                )
-            )

pheval/analyse/benchmark_generator.py DELETED Viewed

@@ -1,126 +0,0 @@
-from dataclasses import dataclass
-from typing import Callable
-from pheval.analyse.benchmarking_data import BenchmarkRunResults
-from pheval.analyse.disease_prioritisation_analysis import benchmark_disease_prioritisation
-from pheval.analyse.gene_prioritisation_analysis import benchmark_gene_prioritisation
-from pheval.analyse.run_data_parser import RunConfig, SinglePlotCustomisation
-from pheval.analyse.variant_prioritisation_analysis import benchmark_variant_prioritisation
-@dataclass
-class BenchmarkRunOutputGenerator:
-    """Base class for recording data required for generating benchmarking outputs.
-    Attributes:
-        plot_customisation (SinglePlotCustomisation): Customisation for plot.
-        prioritisation_type_string (str):  Prioritisation type string.
-        y_label (str): Label for the y-axis in benchmarking outputs.
-        generate_benchmark_run_results (Callable): Callable to generate benchmark run results.
-            Takes parameters: input and results directory, score order, threshold, rank comparison,
-            and returns BenchmarkRunResults.
-        stats_comparison_file (str): Suffix for the rank comparison file.
-    """
-    plot_customisation: SinglePlotCustomisation
-    prioritisation_type_string: str
-    y_label: str
-    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults]
-    stats_comparison_file: str
-@dataclass
-class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
-    """
-    Subclass of BenchmarkRunOutputGenerator specialised
-    for producing gene prioritisation benchmarking outputs.
-    This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes
-    specifically for gene prioritisation benchmarking.
-    Attributes:
-        plot_customisation (SinglePlotCustomisation): Customisation for plot.
-        prioritisation_type_string (str): Prioritisation type string.
-            Defaults to GENE_PRIORITISATION_TYPE_STR.
-        y_label (str): Label for the y-axis in gene prioritisation benchmarking outputs.
-            Defaults to GENE_PLOT_Y_LABEL.
-        generate_benchmark_run_results (Callable): Callable to generate gene prioritisation
-            benchmark run results. Defaults to benchmark_gene_prioritisation.
-            Takes parameters: run configuration, score order, threshold, rank comparison,
-            and returns BenchmarkRunResults.
-        stats_comparison_file (str): Suffix for the gene rank comparison file.
-            Defaults to "-gene_summary".
-    """
-    plot_customisation: SinglePlotCustomisation = None
-    prioritisation_type_string: str = "gene"
-    y_label: str = "Disease-causing genes (%)"
-    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
-        benchmark_gene_prioritisation
-    )
-    stats_comparison_file: str = "gene_summary"
-@dataclass
-class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
-    """
-    Subclass of BenchmarkRunOutputGenerator specialised
-    for producing variant prioritisation benchmarking outputs.
-    This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes
-    specifically for variant prioritisation benchmarking.
-    Attributes:
-        plot_customisation (SinglePlotCustomisation): Customisation for plot.
-        prioritisation_type_string (str): Prioritisation type string.
-            Defaults to VARIANT_PRIORITISATION_TYPE_STR.
-        y_label (str): Label for the y-axis in variant prioritisation benchmarking outputs.
-            Defaults to VARIANT_PLOT_Y_LABEL.
-        generate_benchmark_run_results (Callable): Callable to generate variant prioritisation
-            benchmark run results. Defaults to benchmark_variant_prioritisation.
-            Takes parameters: run configuration, score order, threshold, rank comparison,
-            and returns BenchmarkRunResults.
-        stats_comparison_file (str): Suffix for the variant rank comparison file.
-            Defaults to "-variant_summary".
-    """
-    plot_customisation: SinglePlotCustomisation = None
-    prioritisation_type_string: str = "variant"
-    y_label: str = "Disease-causing variants (%)"
-    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
-        benchmark_variant_prioritisation
-    )
-    stats_comparison_file: str = "variant_summary"
-@dataclass
-class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
-    """
-    Subclass of BenchmarkRunOutputGenerator specialised
-    for producing disease prioritisation benchmarking outputs.
-    This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes
-    specifically for disease prioritisation benchmarking.
-    Attributes:
-        plot_customisation (SinglePlotCustomisation): Customisation for plot.
-        prioritisation_type_string (str): Prioritisation type string.
-            Defaults to DISEASE_PRIORITISATION_TYPE_STR.
-        y_label (str): Label for the y-axis in disease prioritisation benchmarking outputs.
-            Defaults to DISEASE_PLOT_Y_LABEL.
-        generate_benchmark_run_results (Callable): Callable to generate disease prioritisation
-            benchmark run results. Defaults to benchmark_disease_prioritisation.
-            Takes parameters: run configuration, score order, threshold, rank comparison,
-            and returns BenchmarkRunResults.
-        stats_comparison_file (str): Suffix for the disease rank comparison file.
-            Defaults to "-disease_summary".
-    """
-    plot_customisation: SinglePlotCustomisation = None
-    prioritisation_type_string: str = "disease"
-    y_label: str = "Known diseases (%)"
-    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
-        benchmark_disease_prioritisation
-    )
-    stats_comparison_file: str = "disease_summary"

pheval/analyse/benchmarking_data.py DELETED Viewed

@@ -1,25 +0,0 @@
-from dataclasses import dataclass
-from pathlib import Path
-from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.rank_stats import RankStats
-@dataclass
-class BenchmarkRunResults:
-    """
-    Benchmarking results for a run.
-    Attributes:
-        rank_stats (RankStats): Statistics related to benchmark.
-        binary_classification_stats (BinaryClassificationStats): Binary statistics related to benchmark.
-        results_dir (Path, optional): Path to the result directory. Defaults to None.
-        benchmark_name (str, optional): Name of the benchmark run. Defaults to None.
-        phenopacket_dir (Path, optional): Path to the phenopacket directory. Defaults to None.
-    """
-    rank_stats: RankStats
-    binary_classification_stats: BinaryClassificationStats
-    results_dir: Path = None
-    benchmark_name: str = None
-    phenopacket_dir: Path = None

pheval/analyse/disease_prioritisation_analysis.py DELETED Viewed

@@ -1,152 +0,0 @@
-from pathlib import Path
-from pheval.analyse.assess_prioritisation_base import AssessPrioritisationBase
-from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
-from pheval.analyse.benchmarking_data import BenchmarkRunResults
-from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.rank_stats import RankStats
-from pheval.analyse.run_data_parser import RunConfig
-from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
-from pheval.utils.file_utils import all_files
-class AssessDiseasePrioritisation(AssessPrioritisationBase):
-    """Class for assessing disease prioritisation based on thresholds and scoring orders."""
-    def assess_disease_prioritisation(
-        self,
-        standardised_disease_result_path: Path,
-        phenopacket_path: Path,
-        binary_classification_stats: BinaryClassificationStats,
-    ) -> None:
-        """
-        Assess disease prioritisation.
-        This method assesses the prioritisation of diseases based on the provided criteria
-        and records ranks using a PrioritisationRankRecorder.
-        Args:
-            standardised_disease_result_path (Path): Path to the standardised disease TSV result.
-            phenopacket_path (Path): Path to the phenopacket.
-            binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
-        """
-        relevant_ranks = []
-        df = self.conn.execute(
-            f"SELECT * FROM '{self.table_name}' WHERE phenopacket = ? ",
-            (phenopacket_path.name,),
-        ).fetchdf()
-        for _i, row in df.iterrows():
-            result = (
-                self.conn.execute(
-                    (
-                        f"SELECT * FROM '{standardised_disease_result_path}' "
-                        f"WHERE contains_entity_function(CAST(COALESCE(disease_identifier, '') AS VARCHAR),"
-                        f" '{row['disease_identifier']}') "
-                        f"OR contains_entity_function(CAST(COALESCE(disease_name, '') AS VARCHAR), "
-                        f"'{row['disease_name']}')"
-                    )
-                    if standardised_disease_result_path.exists()
-                    and standardised_disease_result_path.stat().st_size > 0
-                    else "SELECT NULL WHERE FALSE"
-                )
-                .fetchdf()
-                .to_dict(orient="records")
-            )
-            if len(result) > 0:
-                disease_match = self._record_matched_entity(RankedPhEvalDiseaseResult(**result[0]))
-                relevant_ranks.append(disease_match)
-                primary_key = f"{phenopacket_path.name}-{row['disease_identifier']}"
-                self.conn.execute(
-                    f'UPDATE "{self.table_name}" SET "{self.column}" = ? WHERE identifier = ?',
-                    (disease_match, primary_key),
-                )
-            elif len(result) == 0:
-                relevant_ranks.append(0)
-        binary_classification_stats.add_classification(
-            (
-                self.db_connection.parse_table_into_dataclass(
-                    str(standardised_disease_result_path), RankedPhEvalDiseaseResult
-                )
-                if standardised_disease_result_path.exists()
-                else []
-            ),
-            relevant_ranks,
-        )
-def assess_phenopacket_disease_prioritisation(
-    phenopacket_path: Path,
-    run: RunConfig,
-    disease_binary_classification_stats: BinaryClassificationStats,
-    disease_benchmarker: AssessDiseasePrioritisation,
-) -> None:
-    """
-    Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results
-    against the recorded causative diseases for a proband in the Phenopacket.
-    Args:
-        phenopacket_path (Path): Path to the Phenopacket.
-        run (RunConfig): Run configuration.
-        disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
-        disease_benchmarker (AssessDiseasePrioritisation): AssessDiseasePrioritisation class instance.
-    """
-    standardised_disease_result_path = run.results_dir.joinpath(
-        f"pheval_disease_results/{phenopacket_path.stem}-pheval_disease_result.tsv"
-    )
-    disease_benchmarker.assess_disease_prioritisation(
-        standardised_disease_result_path,
-        phenopacket_path,
-        disease_binary_classification_stats,
-    )
-def benchmark_disease_prioritisation(
-    benchmark_name: str,
-    run: RunConfig,
-    score_order: str,
-    threshold: float,
-):
-    """
-    Benchmark a directory based on disease prioritisation results.
-    Args:
-        benchmark_name (str): Name of the benchmark.
-        run (RunConfig): Run configuration.
-        score_order (str): The order in which scores are arranged.
-        threshold (float): Threshold for assessment.
-    Returns:
-        BenchmarkRunResults: An object containing benchmarking results for disease prioritisation,
-        including ranks and rank statistics for the benchmarked directory.
-    """
-    disease_binary_classification_stats = BinaryClassificationStats()
-    db_connection = BenchmarkDBManager(benchmark_name)
-    db_connection.initialise()
-    disease_benchmarker = AssessDiseasePrioritisation(
-        db_connection,
-        f"{run.phenopacket_dir.parents[0].name}_disease",
-        run.run_identifier,
-        threshold,
-        score_order,
-    )
-    for phenopacket_path in all_files(run.phenopacket_dir):
-        assess_phenopacket_disease_prioritisation(
-            phenopacket_path,
-            run,
-            disease_binary_classification_stats,
-            disease_benchmarker,
-        )
-    db_connection.close()
-    disease_rank_stats = RankStats()
-    disease_rank_stats.add_ranks(
-        benchmark_name=benchmark_name,
-        table_name=f"{run.phenopacket_dir.parents[0].name}_disease",
-        column_name=str(run.run_identifier),
-    )
-    return BenchmarkRunResults(
-        rank_stats=disease_rank_stats,
-        benchmark_name=run.run_identifier,
-        binary_classification_stats=disease_binary_classification_stats,
-        phenopacket_dir=run.phenopacket_dir,
-    )

pheval/analyse/gene_prioritisation_analysis.py DELETED Viewed

@@ -1,147 +0,0 @@
-from pathlib import Path
-from pheval.analyse.assess_prioritisation_base import AssessPrioritisationBase
-from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
-from pheval.analyse.benchmarking_data import BenchmarkRunResults
-from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.rank_stats import RankStats
-from pheval.analyse.run_data_parser import RunConfig
-from pheval.post_processing.post_processing import RankedPhEvalGeneResult
-from pheval.utils.file_utils import all_files
-class AssessGenePrioritisation(AssessPrioritisationBase):
-    """Class for assessing gene prioritisation based on thresholds and scoring orders."""
-    def assess_gene_prioritisation(
-        self,
-        standardised_gene_result_path: Path,
-        phenopacket_path: Path,
-        binary_classification_stats: BinaryClassificationStats,
-    ) -> None:
-        """
-        Assess gene prioritisation.
-        This method assesses the prioritisation of genes based on the provided criteria
-        and records ranks using a PrioritisationRankRecorder.
-        Args:
-            standardised_gene_result_path (Path): Path to the standardised gene TSV result.
-            phenopacket_path (Path): Path to the Phenopacket.
-            binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
-        """
-        relevant_ranks = []
-        df = self.conn.execute(
-            f"""SELECT * FROM "{self.table_name}" WHERE phenopacket = '{phenopacket_path.name}'"""
-        ).fetchdf()
-        for _i, row in df.iterrows():
-            result = (
-                self.conn.execute(
-                    (
-                        f"SELECT * FROM '{standardised_gene_result_path}' "
-                        f"WHERE contains_entity_function(CAST(COALESCE(gene_identifier, '') AS VARCHAR), "
-                        f"'{row['gene_identifier']}') "
-                        f"OR contains_entity_function(CAST(COALESCE(gene_symbol, '') AS VARCHAR), "
-                        f"'{row['gene_symbol']}')"
-                    )
-                    if standardised_gene_result_path.exists()
-                    and standardised_gene_result_path.stat().st_size > 0
-                    else "SELECT NULL WHERE FALSE"
-                )
-                .fetchdf()
-                .to_dict(orient="records")
-            )
-            if len(result) > 0:
-                gene_match = self._record_matched_entity(RankedPhEvalGeneResult(**result[0]))
-                relevant_ranks.append(gene_match)
-                primary_key = f"{phenopacket_path.name}-{row['gene_symbol']}"
-                self.conn.execute(
-                    f'UPDATE "{self.table_name}" SET "{self.column}" = ? WHERE identifier = ?',
-                    (gene_match, primary_key),
-                )
-            if not result:
-                relevant_ranks.append(0)
-        binary_classification_stats.add_classification(
-            (
-                self.db_connection.parse_table_into_dataclass(
-                    str(standardised_gene_result_path), RankedPhEvalGeneResult
-                )
-                if standardised_gene_result_path.exists()
-                else []
-            ),
-            relevant_ranks,
-        )
-def assess_phenopacket_gene_prioritisation(
-    phenopacket_path: Path,
-    run: RunConfig,
-    gene_binary_classification_stats: BinaryClassificationStats,
-    gene_benchmarker: AssessGenePrioritisation,
-) -> None:
-    """
-    Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results
-    against the recorded causative genes for a proband in the Phenopacket.
-    Args:
-        phenopacket_path (Path): Path to the Phenopacket.
-        run (RunConfig): Run configuration.
-        gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
-        gene_benchmarker (AssessGenePrioritisation): AssessGenePrioritisation class instance.
-    """
-    standardised_gene_result_path = run.results_dir.joinpath(
-        f"pheval_gene_results/{phenopacket_path.stem}-pheval_gene_result.tsv"
-    )
-    gene_benchmarker.assess_gene_prioritisation(
-        standardised_gene_result_path,
-        phenopacket_path,
-        gene_binary_classification_stats,
-    )
-def benchmark_gene_prioritisation(
-    benchmark_name: str,
-    run: RunConfig,
-    score_order: str,
-    threshold: float,
-) -> BenchmarkRunResults:
-    """
-    Benchmark a directory based on gene prioritisation results.
-     Args:
-         benchmark_name (str): Name of the benchmark.
-         run (RunConfig): Run configuration.
-         score_order (str): The order in which scores are arranged.
-         threshold (float): Threshold for assessment.
-     Returns:
-         BenchmarkRunResults: An object containing benchmarking results for gene prioritisation,
-         including ranks and rank statistics for the benchmarked directory.
-    """
-    gene_binary_classification_stats = BinaryClassificationStats()
-    db_connection = BenchmarkDBManager(benchmark_name)
-    db_connection.initialise()
-    gene_benchmarker = AssessGenePrioritisation(
-        db_connection,
-        f"{run.phenopacket_dir.parents[0].name}" f"_gene",
-        run.run_identifier,
-        threshold,
-        score_order,
-    )
-    for phenopacket_path in all_files(run.phenopacket_dir):
-        assess_phenopacket_gene_prioritisation(
-            phenopacket_path,
-            run,
-            gene_binary_classification_stats,
-            gene_benchmarker,
-        )
-    db_connection.close()
-    gene_rank_stats = RankStats()
-    gene_rank_stats.add_ranks(
-        benchmark_name=benchmark_name,
-        table_name=f"{run.phenopacket_dir.parents[0].name}_gene",
-        column_name=str(run.run_identifier),
-    )
-    return BenchmarkRunResults(
-        rank_stats=gene_rank_stats,
-        benchmark_name=run.run_identifier,
-        binary_classification_stats=gene_binary_classification_stats,
-        phenopacket_dir=run.phenopacket_dir,
-    )

pheval/analyse/generate_summary_outputs.py DELETED Viewed

@@ -1,105 +0,0 @@
-import itertools
-from typing import List
-from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
-from pheval.analyse.benchmark_generator import BenchmarkRunOutputGenerator
-from pheval.analyse.benchmarking_data import BenchmarkRunResults
-from pheval.analyse.generate_plots import generate_plots
-def get_new_table_name(run_identifier_1: str, run_identifier_2: str, output_prefix: str) -> str:
-    """
-    Get the new table name for rank comparison tables.
-    Args:
-        run_identifier_1: The first run identifier.
-        run_identifier_2: The second run identifier.
-        output_prefix: The output prefix of the table
-    Returns:
-        The new table name.
-    """
-    return f"{run_identifier_1}_vs_" f"{run_identifier_2}_" f"{output_prefix}_rank_comparison"
-def create_comparison_table(
-    comparison_table_name: str,
-    connector: BenchmarkDBManager,
-    drop_columns: List[str],
-    run_identifier_1: str,
-    run_identifier_2: str,
-    table_name: str,
-) -> None:
-    """
-    Create rank comparison tables.
-    Args:
-        comparison_table_name (str): Name of the comparison table to create.
-        connector (BenchmarkDBManager): DBConnector instance.
-        drop_columns (List[str]): List of columns to drop.
-        run_identifier_1 (str): The first run identifier.
-        run_identifier_2 (str): The second run identifier.
-        table_name (str): Name of the table to extract ranks from
-    """
-    connector.drop_table(comparison_table_name)
-    excluded_columns = tuple(drop_columns + ["identifier"]) if drop_columns else ("identifier",)
-    connector.conn.execute(
-        f'CREATE TABLE "{comparison_table_name}" AS SELECT * '
-        f"EXCLUDE {excluded_columns} FROM {table_name}"
-    )
-    connector.conn.execute(
-        f"""ALTER TABLE "{comparison_table_name}" ADD COLUMN rank_change VARCHAR;"""
-    )
-    connector.conn.execute(
-        f'UPDATE "{comparison_table_name}" SET rank_change = CASE WHEN "{run_identifier_1}" = 0 '
-        f'AND "{run_identifier_2}" != 0 '
-        f"THEN 'GAINED' WHEN \"{run_identifier_1}\" != 0 AND \"{run_identifier_2}\" = 0 THEN 'LOST' ELSE "
-        f'CAST ("{run_identifier_1}" - "{run_identifier_2}" AS VARCHAR) END;'
-    )
-    connector.conn.commit()
-def generate_benchmark_comparison_output(
-    benchmark_name: str,
-    benchmarking_results: List[BenchmarkRunResults],
-    run_identifiers: List[str],
-    benchmark_generator: BenchmarkRunOutputGenerator,
-    table_name: str,
-) -> None:
-    """
-    Generate prioritisation outputs for benchmarking multiple runs.
-    This function generates comparison outputs for benchmarking multiple runs. It compares the results
-    between pairs of `BenchmarkRunResults` instances in `benchmarking_results` and generates rank
-    comparison outputs using `RankComparisonGenerator` for each pair.
-    Args:
-        benchmark_name (str): Name of the benchmark.
-        benchmarking_results (List[BenchmarkRunResults]): A list containing BenchmarkRunResults instances
-            representing the benchmarking results of multiple runs.
-        run_identifiers (List[str]): A list of run identifiers.
-        benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-        table_name (str): The name of the table where ranks are stored.
-    """
-    output_prefix = benchmark_generator.prioritisation_type_string
-    connector = BenchmarkDBManager(benchmark_name)
-    for pair in itertools.combinations(
-        [str(result.benchmark_name) for result in benchmarking_results], 2
-    ):
-        run_identifier_1 = pair[0]
-        run_identifier_2 = pair[1]
-        drop_columns = [run for run in run_identifiers if run not in pair]
-        comparison_table_name = get_new_table_name(
-            run_identifier_1, run_identifier_2, output_prefix
-        )
-        create_comparison_table(
-            comparison_table_name,
-            connector,
-            drop_columns,
-            run_identifier_1,
-            run_identifier_2,
-            table_name,
-        )
-    generate_plots(
-        benchmark_name,
-        benchmarking_results,
-        benchmark_generator,
-    )

pheval 0.4.7__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

pheval 0.4.7py3-none-any.whl → 0.5.1py3-none-any.whl