PyPI - pheval - Versions diffs - 0.3.8__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

pheval 0.3.8py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (26) hide show

pheval/analyse/analysis.py +61 -150
pheval/analyse/assess_prioritisation_base.py +108 -0
pheval/analyse/benchmark_db_manager.py +140 -0
pheval/analyse/benchmark_generator.py +47 -50
pheval/analyse/benchmarking_data.py +3 -2
pheval/analyse/disease_prioritisation_analysis.py +70 -219
pheval/analyse/gene_prioritisation_analysis.py +66 -242
pheval/analyse/generate_plots.py +81 -79
pheval/analyse/generate_summary_outputs.py +64 -134
pheval/analyse/parse_benchmark_summary.py +50 -37
pheval/analyse/parse_corpus.py +219 -0
pheval/analyse/rank_stats.py +177 -144
pheval/analyse/run_data_parser.py +108 -27
pheval/analyse/variant_prioritisation_analysis.py +78 -212
pheval/cli.py +2 -4
pheval/cli_pheval_utils.py +34 -245
pheval/prepare/create_noisy_phenopackets.py +78 -67
pheval-0.4.0.dist-info/METADATA +112 -0
{pheval-0.3.8.dist-info → pheval-0.4.0.dist-info}/RECORD +22 -22
pheval/analyse/parse_pheval_result.py +0 -43
pheval/analyse/prioritisation_rank_recorder.py +0 -83
pheval/constants.py +0 -8
pheval-0.3.8.dist-info/METADATA +0 -35
{pheval-0.3.8.dist-info → pheval-0.4.0.dist-info}/LICENSE +0 -0
{pheval-0.3.8.dist-info → pheval-0.4.0.dist-info}/WHEEL +0 -0
{pheval-0.3.8.dist-info → pheval-0.4.0.dist-info}/entry_points.txt +0 -0

pheval/analyse/analysis.py CHANGED Viewed

@@ -1,193 +1,104 @@
-from collections import defaultdict
-from pathlib import Path
-from typing import List
 from pheval.analyse.benchmark_generator import (
     BenchmarkRunOutputGenerator,
     DiseaseBenchmarkRunOutputGenerator,
     GeneBenchmarkRunOutputGenerator,
     VariantBenchmarkRunOutputGenerator,
 )
-from pheval.analyse.generate_summary_outputs import (
-    generate_benchmark_comparison_output,
-    generate_benchmark_output,
-)
+from pheval.analyse.generate_summary_outputs import generate_benchmark_comparison_output
+from pheval.analyse.parse_corpus import CorpusParser
 from pheval.analyse.rank_stats import RankStatsWriter
-from pheval.analyse.run_data_parser import TrackInputOutputDirectories
-def _run_benchmark(
-    results_dir_and_input: TrackInputOutputDirectories,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    plot_type: str,
-    benchmark_generator: BenchmarkRunOutputGenerator,
-) -> None:
-    """Run a benchmark on a result directory.
-    Args:
-        results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results.
-        score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        output_prefix (str): Prefix for the benchmark output file names.
-        threshold (float): The threshold for benchmark evaluation.
-        plot_type (str): Type of plot for benchmark visualisation.
-        benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
-    """
-    stats_writer = RankStatsWriter(
-        Path(output_prefix + benchmark_generator.stats_comparison_file_suffix)
-    )
-    rank_comparison = defaultdict(dict)
-    benchmark_result = benchmark_generator.generate_benchmark_run_results(
-        results_dir_and_input, score_order, threshold, rank_comparison
-    )
-    stats_writer.write_row(
-        results_dir_and_input.results_dir,
-        benchmark_result.rank_stats,
-        benchmark_result.binary_classification_stats,
-    )
-    generate_benchmark_output(benchmark_result, plot_type, benchmark_generator)
-    stats_writer.close()
-def benchmark_directory(
-    results_dir_and_input: TrackInputOutputDirectories,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
-    plot_type: str,
-) -> None:
-    """
-    Benchmark prioritisation performance for a single run.
-    Args:
-        results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results.
-        score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        output_prefix (str): Prefix for the benchmark output file names.
-        threshold (float): The threshold for benchmark evaluation.
-        gene_analysis (bool): Boolean flag indicating whether to benchmark gene results.
-        variant_analysis (bool): Boolean flag indicating whether to benchmark variant results.
-        disease_analysis (bool): Boolean flag indicating whether to benchmark disease results.
-        plot_type (str): Type of plot for benchmark visualisation.
-    """
-    if gene_analysis:
-        _run_benchmark(
-            results_dir_and_input=results_dir_and_input,
-            score_order=score_order,
-            output_prefix=output_prefix,
-            threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=GeneBenchmarkRunOutputGenerator(),
-        )
-    if variant_analysis:
-        _run_benchmark(
-            results_dir_and_input=results_dir_and_input,
-            score_order=score_order,
-            output_prefix=output_prefix,
-            threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=VariantBenchmarkRunOutputGenerator(),
-        )
-    if disease_analysis:
-        _run_benchmark(
-            results_dir_and_input=results_dir_and_input,
-            score_order=score_order,
-            output_prefix=output_prefix,
-            threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=DiseaseBenchmarkRunOutputGenerator(),
-        )
+from pheval.analyse.run_data_parser import Config
 def _run_benchmark_comparison(
-    results_directories: List[TrackInputOutputDirectories],
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    plot_type: str,
+    run_config: Config,
     benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """
     Run a benchmark on several result directories.
     Args:
-        results_directories (List[TrackInputOutputDirectories]): List of input and output directories
+        run_config (List[TrackInputOutputDirectories]): List of input and output directories
             for tracking results across multiple directories.
-        score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        output_prefix (str): Prefix for the benchmark output file names.
-        threshold (float): The threshold for benchmark evaluation.
-        plot_type (str): Type of plot for benchmark visualisation.
         benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
     """
     stats_writer = RankStatsWriter(
-        Path(output_prefix + benchmark_generator.stats_comparison_file_suffix)
+        run_config.benchmark_name, benchmark_generator.stats_comparison_file
     )
+    unique_test_corpora_directories = set([result.phenopacket_dir for result in run_config.runs])
+    [
+        CorpusParser(run_config.benchmark_name, test_corpora_directory).parse_corpus(
+            benchmark_generator
+        )
+        for test_corpora_directory in unique_test_corpora_directories
+    ]
     benchmarking_results = []
-    for results_dir_and_input in results_directories:
-        rank_comparison = defaultdict(dict)
+    for run in run_config.runs:
         benchmark_result = benchmark_generator.generate_benchmark_run_results(
-            results_dir_and_input, score_order, threshold, rank_comparison
+            run_config.benchmark_name, run, run.score_order, run.threshold
         )
-        stats_writer.write_row(
-            results_dir_and_input.results_dir,
+        stats_writer.add_statistics_entry(
+            run.run_identifier,
             benchmark_result.rank_stats,
             benchmark_result.binary_classification_stats,
         )
         benchmarking_results.append(benchmark_result)
-    generate_benchmark_comparison_output(benchmarking_results, plot_type, benchmark_generator)
-    stats_writer.close()
+    run_identifiers = [run.run_identifier for run in run_config.runs]
+    [
+        generate_benchmark_comparison_output(
+            run_config.benchmark_name,
+            benchmarking_results,
+            run_identifiers,
+            benchmark_generator,
+            f"{unique_test_corpora_directory.parents[0].name}_"
+            f"{benchmark_generator.prioritisation_type_string}",
+        )
+        for unique_test_corpora_directory in unique_test_corpora_directories
+    ]
 def benchmark_run_comparisons(
-    results_directories: List[TrackInputOutputDirectories],
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
-    plot_type: str,
+    run_config: Config,
 ) -> None:
     """
     Benchmark prioritisation performance for several runs.
     Args:
-        results_directories (List[TrackInputOutputDirectories]): Input and output directories for tracking results.
-        score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        output_prefix (str): Prefix for the benchmark output file names.
-        threshold (float): The threshold for benchmark evaluation.
-        gene_analysis (bool): Boolean flag indicating whether to benchmark gene results.
-        variant_analysis (bool): Boolean flag indicating whether to benchmark variant results.
-        disease_analysis (bool): Boolean flag indicating whether to benchmark disease results.
-        plot_type (str): Type of plot for benchmark visualisation.
+        run_config (Config): Run configurations.
     """
-    if gene_analysis:
+    gene_analysis_runs = Config(
+        benchmark_name=run_config.benchmark_name,
+        runs=[run for run in run_config.runs if run.gene_analysis],
+        plot_customisation=run_config.plot_customisation,
+    )
+    variant_analysis_runs = Config(
+        benchmark_name=run_config.benchmark_name,
+        runs=[run for run in run_config.runs if run.variant_analysis],
+        plot_customisation=run_config.plot_customisation,
+    )
+    disease_analysis_runs = Config(
+        benchmark_name=run_config.benchmark_name,
+        runs=[run for run in run_config.runs if run.disease_analysis],
+        plot_customisation=run_config.plot_customisation,
+    )
+    if gene_analysis_runs.runs:
         _run_benchmark_comparison(
-            results_directories=results_directories,
-            score_order=score_order,
-            output_prefix=output_prefix,
-            threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=GeneBenchmarkRunOutputGenerator(),
+            run_config=gene_analysis_runs,
+            benchmark_generator=GeneBenchmarkRunOutputGenerator(
+                plot_customisation=gene_analysis_runs.plot_customisation.gene_plots
+            ),
         )
-    if variant_analysis:
+    if variant_analysis_runs.runs:
         _run_benchmark_comparison(
-            results_directories=results_directories,
-            score_order=score_order,
-            output_prefix=output_prefix,
-            threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=VariantBenchmarkRunOutputGenerator(),
+            run_config=variant_analysis_runs,
+            benchmark_generator=VariantBenchmarkRunOutputGenerator(
+                plot_customisation=variant_analysis_runs.plot_customisation.variant_plots
+            ),
         )
-    if disease_analysis:
+    if disease_analysis_runs.runs:
         _run_benchmark_comparison(
-            results_directories=results_directories,
-            score_order=score_order,
-            output_prefix=output_prefix,
-            threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=DiseaseBenchmarkRunOutputGenerator(),
+            run_config=disease_analysis_runs,
+            benchmark_generator=DiseaseBenchmarkRunOutputGenerator(
+                plot_customisation=disease_analysis_runs.plot_customisation.disease_plots
+            ),
         )

pheval/analyse/assess_prioritisation_base.py ADDED Viewed

@@ -0,0 +1,108 @@
+from typing import Union
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
+from pheval.post_processing.post_processing import (
+    RankedPhEvalDiseaseResult,
+    RankedPhEvalGeneResult,
+    RankedPhEvalVariantResult,
+)
+class AssessPrioritisationBase:
+    def __init__(
+        self,
+        db_connection: BenchmarkDBManager,
+        table_name: str,
+        column: str,
+        threshold: float,
+        score_order: str,
+    ):
+        """
+        Initialise AssessPrioritisationBase class
+        Args:
+            db_connection (BenchmarkDBManager): DB connection.
+            table_name (str): Table name.
+            column (str): Column name.
+            threshold (float): Threshold for scores
+            score_order (str): Score order for results, either ascending or descending
+        """
+        self.threshold = threshold
+        self.score_order = score_order
+        self.db_connection = db_connection
+        self.conn = db_connection.conn
+        self.column = column
+        self.table_name = table_name
+        db_connection.add_column_integer_default(
+            table_name=table_name, column=self.column, default=0
+        )
+    def _assess_with_threshold_ascending_order(
+        self,
+        result_entry: Union[
+            RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult
+        ],
+    ) -> int:
+        """
+        Record the prioritisation rank if it meets the ascending order threshold.
+        Args:
+            result_entry (Union[RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult]):
+                Ranked PhEval result entry
+        Returns:
+            int: Recorded prioritisation rank
+        """
+        if float(self.threshold) > float(result_entry.score):
+            return result_entry.rank
+        else:
+            return 0
+    def _assess_with_threshold(
+        self,
+        result_entry: Union[
+            RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult
+        ],
+    ) -> int:
+        """
+        Record the prioritisation rank if it meets the score threshold.
+        Args:
+            result_entry (Union[RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult]):
+                Ranked PhEval result entry
+        Returns:
+            int: Recorded prioritisation rank
+        """
+        if float(self.threshold) < float(result_entry.score):
+            return result_entry.rank
+        else:
+            return 0
+    def _record_matched_entity(
+        self,
+        standardised_result: Union[
+            RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult
+        ],
+    ) -> int:
+        """
+        Return the rank result - handling the specification of a threshold.
+        Args:
+            standardised_result (Union[RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult]):
+                Ranked PhEval disease result entry
+        Returns:
+            int: Recorded entity prioritisation rank
+        """
+        if float(self.threshold) == 0.0:
+            return standardised_result.rank
+        else:
+            return (
+                self._assess_with_threshold(standardised_result)
+                if self.score_order != "ascending"
+                else self._assess_with_threshold_ascending_order(
+                    standardised_result,
+                )
+            )

pheval/analyse/benchmark_db_manager.py ADDED Viewed

@@ -0,0 +1,140 @@
+import ast
+import re
+from typing import List, Type, Union
+import duckdb
+from duckdb import DuckDBPyConnection
+from pheval.post_processing.post_processing import (
+    RankedPhEvalDiseaseResult,
+    RankedPhEvalGeneResult,
+    RankedPhEvalVariantResult,
+)
+class BenchmarkDBManager:
+    """
+    Class to connect to database.
+    """
+    def __init__(self, benchmark_name: str):
+        """Initialise the BenchmarkDBManager class."""
+        self.conn = self.get_connection(
+            f"{benchmark_name}" if str(benchmark_name).endswith(".db") else f"{benchmark_name}.db"
+        )
+    def initialise(self):
+        """Initialise the duckdb connection."""
+        self.add_contains_function()
+    @staticmethod
+    def get_connection(db_name: str) -> DuckDBPyConnection:
+        """
+        Get a connection to the database.
+        Returns:
+            DuckDBPyConnection: Connection to the database.
+        """
+        conn = duckdb.connect(db_name)
+        return conn
+    def add_column_integer_default(self, table_name: str, column: str, default: int = 0) -> None:
+        """
+        Add a column to an existing table with an integer default value.
+        Args:
+            table_name (str): Name of the table.
+            column (str): Name of the column to add.
+            default (int): Default integer value to add.
+        """
+        try:
+            self.conn.execute(
+                f'ALTER TABLE {table_name} ADD COLUMN "{column}" INTEGER DEFAULT {default}'
+            )
+            self.conn.execute(f'UPDATE {table_name} SET "{column}" = ?', (default,))
+            self.conn.commit()
+        except duckdb.CatalogException:
+            pass
+    def drop_table(self, table_name: str) -> None:
+        """
+        Drop a table from the database.
+        Args:
+            table_name: Name of the table to drop from the database
+        """
+        self.conn.execute(f"""DROP TABLE IF EXISTS "{table_name}";""")
+    @staticmethod
+    def contains_entity_function(entity: str, known_causative_entity: str) -> bool:
+        """
+        Determines if a known causative entity is present within an entity or list of entities.
+        Args:
+            entity (str): The entity to be checked. It can be a single entity or a string representation of a list.
+            known_causative_entity (str): The entity to search for within the `entity`.
+        Returns:
+            bool: `True` if `known_causative_entity` is found in `entity` (or its list representation),
+                `False` otherwise.
+        """
+        list_pattern = re.compile(r"^\[\s*(?:[^\[\],\s]+(?:\s*,\s*[^\[\],\s]+)*)?\s*]$")
+        if list_pattern.match(str(entity)):
+            list_representation = ast.literal_eval(entity)
+            if isinstance(list_representation, list):
+                return known_causative_entity in list_representation
+        return known_causative_entity == entity
+    def add_contains_function(self) -> None:
+        """
+        Adds a custom `contains_entity_function` to the DuckDB connection if it does not already exist.
+        """
+        result = self.conn.execute(
+            "SELECT * FROM duckdb_functions() WHERE function_name = ?", ["contains_entity_function"]
+        ).fetchall()
+        if not result:
+            self.conn.create_function("contains_entity_function", self.contains_entity_function)
+    def parse_table_into_dataclass(
+        self,
+        table_name: str,
+        dataclass: Union[
+            Type[RankedPhEvalGeneResult],
+            Type[RankedPhEvalVariantResult],
+            Type[RankedPhEvalDiseaseResult],
+        ],
+    ) -> Union[
+        List[RankedPhEvalGeneResult],
+        List[RankedPhEvalVariantResult],
+        List[RankedPhEvalDiseaseResult],
+    ]:
+        """
+        Parses a DuckDB table into a list of dataclass instances.
+        Args:
+            table_name (str): The name of the DuckDB table to be parsed.
+            dataclass (Union[Type[RankedPhEvalGeneResult], Type[RankedPhEvalVariantResult],
+            Type[RankedPhEvalDiseaseResult]]):
+                The dataclass type to which each row in the table should be mapped.
+        Returns:
+            List[dataclass]: A list of instances of the provided dataclass, each representing a row from the table.
+        """
+        result = (
+            self.conn.execute(f"SELECT * FROM '{table_name}'").fetchdf().to_dict(orient="records")
+        )
+        return [dataclass(**row) for row in result]
+    def check_table_exists(self, table_name: str) -> bool:
+        """
+        Check if a table exists in the connected DuckDB database.
+        Args:
+            table_name (str): The name of the table to check for existence.
+        Returns:
+            bool: Returns `True` if the table exists in the database, `False` otherwise.
+        """
+        result = self.conn.execute(
+            f"SELECT * FROM information_schema.tables WHERE table_name = '{table_name}'"
+        ).fetchall()
+        if result:
+            return True
+        return False
+    def close(self):
+        """Close the connection to the database."""
+        self.conn.close()

pheval/analyse/benchmark_generator.py CHANGED Viewed

@@ -1,20 +1,11 @@
-from collections import defaultdict
 from dataclasses import dataclass
 from typing import Callable
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.disease_prioritisation_analysis import benchmark_disease_prioritisation
 from pheval.analyse.gene_prioritisation_analysis import benchmark_gene_prioritisation
-from pheval.analyse.run_data_parser import TrackInputOutputDirectories
+from pheval.analyse.run_data_parser import RunConfig, SinglePlotCustomisation
 from pheval.analyse.variant_prioritisation_analysis import benchmark_variant_prioritisation
-from pheval.constants import (
-    DISEASE_PLOT_FILE_PREFIX,
-    DISEASE_PLOT_Y_LABEL,
-    GENE_PLOT_FILE_PREFIX,
-    GENE_PLOT_Y_LABEL,
-    VARIANT_PLOT_FILE_PREFIX,
-    VARIANT_PLOT_Y_LABEL,
-)
 @dataclass
@@ -22,20 +13,20 @@ class BenchmarkRunOutputGenerator:
     """Base class for recording data required for generating benchmarking outputs.
     Attributes:
-        prioritisation_type_file_prefix (str): Prefix for the prioritisation type output file.
+        plot_customisation (SinglePlotCustomisation): Customisation for plot.
+        prioritisation_type_string (str):  Prioritisation type string.
         y_label (str): Label for the y-axis in benchmarking outputs.
         generate_benchmark_run_results (Callable): Callable to generate benchmark run results.
             Takes parameters: input and results directory, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
-        stats_comparison_file_suffix (str): Suffix for the rank comparison file.
+        stats_comparison_file (str): Suffix for the rank comparison file.
     """
-    prioritisation_type_file_prefix: str
+    plot_customisation: SinglePlotCustomisation
+    prioritisation_type_string: str
     y_label: str
-    generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
-    ]
-    stats_comparison_file_suffix: str
+    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults]
+    stats_comparison_file: str
 @dataclass
@@ -48,24 +39,26 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     specifically for gene prioritisation benchmarking.
     Attributes:
-        prioritisation_type_file_prefix (str): Prefix for the gene prioritisation type file.
-            Defaults to GENE_PLOT_FILE_PREFIX.
+        plot_customisation (SinglePlotCustomisation): Customisation for plot.
+        prioritisation_type_string (str): Prioritisation type string.
+            Defaults to GENE_PRIORITISATION_TYPE_STR.
         y_label (str): Label for the y-axis in gene prioritisation benchmarking outputs.
             Defaults to GENE_PLOT_Y_LABEL.
         generate_benchmark_run_results (Callable): Callable to generate gene prioritisation
             benchmark run results. Defaults to benchmark_gene_prioritisation.
-            Takes parameters: input and results directory, score order, threshold, rank comparison,
+            Takes parameters: run configuration, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
-        stats_comparison_file_suffix (str): Suffix for the gene rank comparison file.
-            Defaults to "-gene_summary.tsv".
+        stats_comparison_file (str): Suffix for the gene rank comparison file.
+            Defaults to "-gene_summary".
     """
-    prioritisation_type_file_prefix: str = GENE_PLOT_FILE_PREFIX
-    y_label: str = GENE_PLOT_Y_LABEL
-    generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
-    ] = benchmark_gene_prioritisation
-    stats_comparison_file_suffix: str = "-gene_summary.tsv"
+    plot_customisation: SinglePlotCustomisation = None
+    prioritisation_type_string: str = "gene"
+    y_label: str = "Disease-causing genes (%)"
+    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
+        benchmark_gene_prioritisation
+    )
+    stats_comparison_file: str = "gene_summary"
 @dataclass
@@ -78,25 +71,27 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     specifically for variant prioritisation benchmarking.
     Attributes:
-        prioritisation_type_file_prefix (str): Prefix for the variant prioritisation type file.
-            Defaults to VARIANT_PLOT_FILE_PREFIX.
+        plot_customisation (SinglePlotCustomisation): Customisation for plot.
+        prioritisation_type_string (str): Prioritisation type string.
+            Defaults to VARIANT_PRIORITISATION_TYPE_STR.
         y_label (str): Label for the y-axis in variant prioritisation benchmarking outputs.
             Defaults to VARIANT_PLOT_Y_LABEL.
         generate_benchmark_run_results (Callable): Callable to generate variant prioritisation
             benchmark run results. Defaults to benchmark_variant_prioritisation.
-            Takes parameters: input and results directory, score order, threshold, rank comparison,
+            Takes parameters: run configuration, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
-        stats_comparison_file_suffix (str): Suffix for the variant rank comparison file.
-            Defaults to "-variant_summary.tsv".
+        stats_comparison_file (str): Suffix for the variant rank comparison file.
+            Defaults to "-variant_summary".
     """
-    prioritisation_type_file_prefix: str = VARIANT_PLOT_FILE_PREFIX
-    y_label: str = VARIANT_PLOT_Y_LABEL
-    generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
-    ] = benchmark_variant_prioritisation
-    stats_comparison_file_suffix: str = "-variant_summary.tsv"
+    plot_customisation: SinglePlotCustomisation = None
+    prioritisation_type_string: str = "variant"
+    y_label: str = "Disease-causing variants (%)"
+    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
+        benchmark_variant_prioritisation
+    )
+    stats_comparison_file: str = "variant_summary"
 @dataclass
@@ -109,21 +104,23 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     specifically for disease prioritisation benchmarking.
     Attributes:
-        prioritisation_type_file_prefix (str): Prefix for the disease prioritisation type file.
-            Defaults to DISEASE_PLOT_FILE_PREFIX.
+        plot_customisation (SinglePlotCustomisation): Customisation for plot.
+        prioritisation_type_string (str): Prioritisation type string.
+            Defaults to DISEASE_PRIORITISATION_TYPE_STR.
         y_label (str): Label for the y-axis in disease prioritisation benchmarking outputs.
             Defaults to DISEASE_PLOT_Y_LABEL.
         generate_benchmark_run_results (Callable): Callable to generate disease prioritisation
             benchmark run results. Defaults to benchmark_disease_prioritisation.
-            Takes parameters: input and results directory, score order, threshold, rank comparison,
+            Takes parameters: run configuration, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
-        stats_comparison_file_suffix (str): Suffix for the disease rank comparison file.
-            Defaults to "-disease_summary.tsv".
+        stats_comparison_file (str): Suffix for the disease rank comparison file.
+            Defaults to "-disease_summary".
     """
-    prioritisation_type_file_prefix: str = DISEASE_PLOT_FILE_PREFIX
-    y_label: str = DISEASE_PLOT_Y_LABEL
-    generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
-    ] = benchmark_disease_prioritisation
-    stats_comparison_file_suffix: str = "-disease_summary.tsv"
+    plot_customisation: SinglePlotCustomisation = None
+    prioritisation_type_string: str = "disease"
+    y_label: str = "Known diseases (%)"
+    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
+        benchmark_disease_prioritisation
+    )
+    stats_comparison_file: str = "disease_summary"

pheval 0.3.8__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

pheval 0.3.8py3-none-any.whl → 0.4.0py3-none-any.whl