pheval 0.3.9__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

@@ -1,193 +1,104 @@
1
- from collections import defaultdict
2
- from pathlib import Path
3
- from typing import List
4
-
5
1
  from pheval.analyse.benchmark_generator import (
6
2
  BenchmarkRunOutputGenerator,
7
3
  DiseaseBenchmarkRunOutputGenerator,
8
4
  GeneBenchmarkRunOutputGenerator,
9
5
  VariantBenchmarkRunOutputGenerator,
10
6
  )
11
- from pheval.analyse.generate_summary_outputs import (
12
- generate_benchmark_comparison_output,
13
- generate_benchmark_output,
14
- )
7
+ from pheval.analyse.generate_summary_outputs import generate_benchmark_comparison_output
8
+ from pheval.analyse.parse_corpus import CorpusParser
15
9
  from pheval.analyse.rank_stats import RankStatsWriter
16
- from pheval.analyse.run_data_parser import TrackInputOutputDirectories
17
-
18
-
19
- def _run_benchmark(
20
- results_dir_and_input: TrackInputOutputDirectories,
21
- score_order: str,
22
- output_prefix: str,
23
- threshold: float,
24
- plot_type: str,
25
- benchmark_generator: BenchmarkRunOutputGenerator,
26
- ) -> None:
27
- """Run a benchmark on a result directory.
28
-
29
- Args:
30
- results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results.
31
- score_order (str): The order in which scores are arranged, this can be either ascending or descending.
32
- output_prefix (str): Prefix for the benchmark output file names.
33
- threshold (float): The threshold for benchmark evaluation.
34
- plot_type (str): Type of plot for benchmark visualisation.
35
- benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
36
- """
37
- stats_writer = RankStatsWriter(
38
- Path(output_prefix + benchmark_generator.stats_comparison_file_suffix)
39
- )
40
- rank_comparison = defaultdict(dict)
41
- benchmark_result = benchmark_generator.generate_benchmark_run_results(
42
- results_dir_and_input, score_order, threshold, rank_comparison
43
- )
44
- stats_writer.write_row(
45
- results_dir_and_input.results_dir,
46
- benchmark_result.rank_stats,
47
- benchmark_result.binary_classification_stats,
48
- )
49
- generate_benchmark_output(benchmark_result, plot_type, benchmark_generator)
50
- stats_writer.close()
51
-
52
-
53
- def benchmark_directory(
54
- results_dir_and_input: TrackInputOutputDirectories,
55
- score_order: str,
56
- output_prefix: str,
57
- threshold: float,
58
- gene_analysis: bool,
59
- variant_analysis: bool,
60
- disease_analysis: bool,
61
- plot_type: str,
62
- ) -> None:
63
- """
64
- Benchmark prioritisation performance for a single run.
65
-
66
- Args:
67
- results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results.
68
- score_order (str): The order in which scores are arranged, this can be either ascending or descending.
69
- output_prefix (str): Prefix for the benchmark output file names.
70
- threshold (float): The threshold for benchmark evaluation.
71
- gene_analysis (bool): Boolean flag indicating whether to benchmark gene results.
72
- variant_analysis (bool): Boolean flag indicating whether to benchmark variant results.
73
- disease_analysis (bool): Boolean flag indicating whether to benchmark disease results.
74
- plot_type (str): Type of plot for benchmark visualisation.
75
- """
76
- if gene_analysis:
77
- _run_benchmark(
78
- results_dir_and_input=results_dir_and_input,
79
- score_order=score_order,
80
- output_prefix=output_prefix,
81
- threshold=threshold,
82
- plot_type=plot_type,
83
- benchmark_generator=GeneBenchmarkRunOutputGenerator(),
84
- )
85
- if variant_analysis:
86
- _run_benchmark(
87
- results_dir_and_input=results_dir_and_input,
88
- score_order=score_order,
89
- output_prefix=output_prefix,
90
- threshold=threshold,
91
- plot_type=plot_type,
92
- benchmark_generator=VariantBenchmarkRunOutputGenerator(),
93
- )
94
- if disease_analysis:
95
- _run_benchmark(
96
- results_dir_and_input=results_dir_and_input,
97
- score_order=score_order,
98
- output_prefix=output_prefix,
99
- threshold=threshold,
100
- plot_type=plot_type,
101
- benchmark_generator=DiseaseBenchmarkRunOutputGenerator(),
102
- )
10
+ from pheval.analyse.run_data_parser import Config
103
11
 
104
12
 
105
13
  def _run_benchmark_comparison(
106
- results_directories: List[TrackInputOutputDirectories],
107
- score_order: str,
108
- output_prefix: str,
109
- threshold: float,
110
- plot_type: str,
14
+ run_config: Config,
111
15
  benchmark_generator: BenchmarkRunOutputGenerator,
112
16
  ) -> None:
113
17
  """
114
18
  Run a benchmark on several result directories.
115
19
 
116
20
  Args:
117
- results_directories (List[TrackInputOutputDirectories]): List of input and output directories
21
+ run_config (List[TrackInputOutputDirectories]): List of input and output directories
118
22
  for tracking results across multiple directories.
119
- score_order (str): The order in which scores are arranged, this can be either ascending or descending.
120
- output_prefix (str): Prefix for the benchmark output file names.
121
- threshold (float): The threshold for benchmark evaluation.
122
- plot_type (str): Type of plot for benchmark visualisation.
123
23
  benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
124
24
  """
125
25
  stats_writer = RankStatsWriter(
126
- Path(output_prefix + benchmark_generator.stats_comparison_file_suffix)
26
+ run_config.benchmark_name, benchmark_generator.stats_comparison_file
127
27
  )
28
+ unique_test_corpora_directories = set([result.phenopacket_dir for result in run_config.runs])
29
+ [
30
+ CorpusParser(run_config.benchmark_name, test_corpora_directory).parse_corpus(
31
+ benchmark_generator
32
+ )
33
+ for test_corpora_directory in unique_test_corpora_directories
34
+ ]
128
35
  benchmarking_results = []
129
- for results_dir_and_input in results_directories:
130
- rank_comparison = defaultdict(dict)
36
+ for run in run_config.runs:
131
37
  benchmark_result = benchmark_generator.generate_benchmark_run_results(
132
- results_dir_and_input, score_order, threshold, rank_comparison
38
+ run_config.benchmark_name, run, run.score_order, run.threshold
133
39
  )
134
- stats_writer.write_row(
135
- results_dir_and_input.results_dir,
40
+ stats_writer.add_statistics_entry(
41
+ run.run_identifier,
136
42
  benchmark_result.rank_stats,
137
43
  benchmark_result.binary_classification_stats,
138
44
  )
139
45
  benchmarking_results.append(benchmark_result)
140
- generate_benchmark_comparison_output(benchmarking_results, plot_type, benchmark_generator)
141
- stats_writer.close()
46
+ run_identifiers = [run.run_identifier for run in run_config.runs]
47
+ [
48
+ generate_benchmark_comparison_output(
49
+ run_config.benchmark_name,
50
+ benchmarking_results,
51
+ run_identifiers,
52
+ benchmark_generator,
53
+ f"{unique_test_corpora_directory.parents[0].name}_"
54
+ f"{benchmark_generator.prioritisation_type_string}",
55
+ )
56
+ for unique_test_corpora_directory in unique_test_corpora_directories
57
+ ]
142
58
 
143
59
 
144
60
  def benchmark_run_comparisons(
145
- results_directories: List[TrackInputOutputDirectories],
146
- score_order: str,
147
- output_prefix: str,
148
- threshold: float,
149
- gene_analysis: bool,
150
- variant_analysis: bool,
151
- disease_analysis: bool,
152
- plot_type: str,
61
+ run_config: Config,
153
62
  ) -> None:
154
63
  """
155
64
  Benchmark prioritisation performance for several runs.
156
65
 
157
66
  Args:
158
- results_directories (List[TrackInputOutputDirectories]): Input and output directories for tracking results.
159
- score_order (str): The order in which scores are arranged, this can be either ascending or descending.
160
- output_prefix (str): Prefix for the benchmark output file names.
161
- threshold (float): The threshold for benchmark evaluation.
162
- gene_analysis (bool): Boolean flag indicating whether to benchmark gene results.
163
- variant_analysis (bool): Boolean flag indicating whether to benchmark variant results.
164
- disease_analysis (bool): Boolean flag indicating whether to benchmark disease results.
165
- plot_type (str): Type of plot for benchmark visualisation.
67
+ run_config (Config): Run configurations.
166
68
  """
167
- if gene_analysis:
69
+ gene_analysis_runs = Config(
70
+ benchmark_name=run_config.benchmark_name,
71
+ runs=[run for run in run_config.runs if run.gene_analysis],
72
+ plot_customisation=run_config.plot_customisation,
73
+ )
74
+ variant_analysis_runs = Config(
75
+ benchmark_name=run_config.benchmark_name,
76
+ runs=[run for run in run_config.runs if run.variant_analysis],
77
+ plot_customisation=run_config.plot_customisation,
78
+ )
79
+ disease_analysis_runs = Config(
80
+ benchmark_name=run_config.benchmark_name,
81
+ runs=[run for run in run_config.runs if run.disease_analysis],
82
+ plot_customisation=run_config.plot_customisation,
83
+ )
84
+ if gene_analysis_runs.runs:
168
85
  _run_benchmark_comparison(
169
- results_directories=results_directories,
170
- score_order=score_order,
171
- output_prefix=output_prefix,
172
- threshold=threshold,
173
- plot_type=plot_type,
174
- benchmark_generator=GeneBenchmarkRunOutputGenerator(),
86
+ run_config=gene_analysis_runs,
87
+ benchmark_generator=GeneBenchmarkRunOutputGenerator(
88
+ plot_customisation=gene_analysis_runs.plot_customisation.gene_plots
89
+ ),
175
90
  )
176
- if variant_analysis:
91
+ if variant_analysis_runs.runs:
177
92
  _run_benchmark_comparison(
178
- results_directories=results_directories,
179
- score_order=score_order,
180
- output_prefix=output_prefix,
181
- threshold=threshold,
182
- plot_type=plot_type,
183
- benchmark_generator=VariantBenchmarkRunOutputGenerator(),
93
+ run_config=variant_analysis_runs,
94
+ benchmark_generator=VariantBenchmarkRunOutputGenerator(
95
+ plot_customisation=variant_analysis_runs.plot_customisation.variant_plots
96
+ ),
184
97
  )
185
- if disease_analysis:
98
+ if disease_analysis_runs.runs:
186
99
  _run_benchmark_comparison(
187
- results_directories=results_directories,
188
- score_order=score_order,
189
- output_prefix=output_prefix,
190
- threshold=threshold,
191
- plot_type=plot_type,
192
- benchmark_generator=DiseaseBenchmarkRunOutputGenerator(),
100
+ run_config=disease_analysis_runs,
101
+ benchmark_generator=DiseaseBenchmarkRunOutputGenerator(
102
+ plot_customisation=disease_analysis_runs.plot_customisation.disease_plots
103
+ ),
193
104
  )
@@ -0,0 +1,108 @@
1
+ from typing import Union
2
+
3
+ from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
4
+ from pheval.post_processing.post_processing import (
5
+ RankedPhEvalDiseaseResult,
6
+ RankedPhEvalGeneResult,
7
+ RankedPhEvalVariantResult,
8
+ )
9
+
10
+
11
+ class AssessPrioritisationBase:
12
+ def __init__(
13
+ self,
14
+ db_connection: BenchmarkDBManager,
15
+ table_name: str,
16
+ column: str,
17
+ threshold: float,
18
+ score_order: str,
19
+ ):
20
+ """
21
+ Initialise AssessPrioritisationBase class
22
+
23
+ Args:
24
+ db_connection (BenchmarkDBManager): DB connection.
25
+ table_name (str): Table name.
26
+ column (str): Column name.
27
+ threshold (float): Threshold for scores
28
+ score_order (str): Score order for results, either ascending or descending
29
+
30
+ """
31
+ self.threshold = threshold
32
+ self.score_order = score_order
33
+ self.db_connection = db_connection
34
+ self.conn = db_connection.conn
35
+ self.column = column
36
+ self.table_name = table_name
37
+ db_connection.add_column_integer_default(
38
+ table_name=table_name, column=self.column, default=0
39
+ )
40
+
41
+ def _assess_with_threshold_ascending_order(
42
+ self,
43
+ result_entry: Union[
44
+ RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult
45
+ ],
46
+ ) -> int:
47
+ """
48
+ Record the prioritisation rank if it meets the ascending order threshold.
49
+
50
+
51
+ Args:
52
+ result_entry (Union[RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult]):
53
+ Ranked PhEval result entry
54
+
55
+ Returns:
56
+ int: Recorded prioritisation rank
57
+ """
58
+ if float(self.threshold) > float(result_entry.score):
59
+ return result_entry.rank
60
+ else:
61
+ return 0
62
+
63
+ def _assess_with_threshold(
64
+ self,
65
+ result_entry: Union[
66
+ RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult
67
+ ],
68
+ ) -> int:
69
+ """
70
+ Record the prioritisation rank if it meets the score threshold.
71
+
72
+ Args:
73
+ result_entry (Union[RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult]):
74
+ Ranked PhEval result entry
75
+
76
+ Returns:
77
+ int: Recorded prioritisation rank
78
+ """
79
+ if float(self.threshold) < float(result_entry.score):
80
+ return result_entry.rank
81
+ else:
82
+ return 0
83
+
84
+ def _record_matched_entity(
85
+ self,
86
+ standardised_result: Union[
87
+ RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult
88
+ ],
89
+ ) -> int:
90
+ """
91
+ Return the rank result - handling the specification of a threshold.
92
+ Args:
93
+ standardised_result (Union[RankedPhEvalGeneResult, RankedPhEvalDiseaseResult, RankedPhEvalVariantResult]):
94
+ Ranked PhEval disease result entry
95
+
96
+ Returns:
97
+ int: Recorded entity prioritisation rank
98
+ """
99
+ if float(self.threshold) == 0.0:
100
+ return standardised_result.rank
101
+ else:
102
+ return (
103
+ self._assess_with_threshold(standardised_result)
104
+ if self.score_order != "ascending"
105
+ else self._assess_with_threshold_ascending_order(
106
+ standardised_result,
107
+ )
108
+ )
@@ -0,0 +1,140 @@
1
+ import ast
2
+ import re
3
+ from typing import List, Type, Union
4
+
5
+ import duckdb
6
+ from duckdb import DuckDBPyConnection
7
+
8
+ from pheval.post_processing.post_processing import (
9
+ RankedPhEvalDiseaseResult,
10
+ RankedPhEvalGeneResult,
11
+ RankedPhEvalVariantResult,
12
+ )
13
+
14
+
15
+ class BenchmarkDBManager:
16
+ """
17
+ Class to connect to database.
18
+ """
19
+
20
+ def __init__(self, benchmark_name: str):
21
+ """Initialise the BenchmarkDBManager class."""
22
+ self.conn = self.get_connection(
23
+ f"{benchmark_name}" if str(benchmark_name).endswith(".db") else f"{benchmark_name}.db"
24
+ )
25
+
26
+ def initialise(self):
27
+ """Initialise the duckdb connection."""
28
+ self.add_contains_function()
29
+
30
+ @staticmethod
31
+ def get_connection(db_name: str) -> DuckDBPyConnection:
32
+ """
33
+ Get a connection to the database.
34
+ Returns:
35
+ DuckDBPyConnection: Connection to the database.
36
+ """
37
+ conn = duckdb.connect(db_name)
38
+ return conn
39
+
40
+ def add_column_integer_default(self, table_name: str, column: str, default: int = 0) -> None:
41
+ """
42
+ Add a column to an existing table with an integer default value.
43
+ Args:
44
+ table_name (str): Name of the table.
45
+ column (str): Name of the column to add.
46
+ default (int): Default integer value to add.
47
+ """
48
+ try:
49
+ self.conn.execute(
50
+ f'ALTER TABLE {table_name} ADD COLUMN "{column}" INTEGER DEFAULT {default}'
51
+ )
52
+ self.conn.execute(f'UPDATE {table_name} SET "{column}" = ?', (default,))
53
+ self.conn.commit()
54
+ except duckdb.CatalogException:
55
+ pass
56
+
57
+ def drop_table(self, table_name: str) -> None:
58
+ """
59
+ Drop a table from the database.
60
+ Args:
61
+ table_name: Name of the table to drop from the database
62
+ """
63
+ self.conn.execute(f"""DROP TABLE IF EXISTS "{table_name}";""")
64
+
65
+ @staticmethod
66
+ def contains_entity_function(entity: str, known_causative_entity: str) -> bool:
67
+ """
68
+ Determines if a known causative entity is present within an entity or list of entities.
69
+ Args:
70
+ entity (str): The entity to be checked. It can be a single entity or a string representation of a list.
71
+ known_causative_entity (str): The entity to search for within the `entity`.
72
+
73
+ Returns:
74
+ bool: `True` if `known_causative_entity` is found in `entity` (or its list representation),
75
+ `False` otherwise.
76
+ """
77
+ list_pattern = re.compile(r"^\[\s*(?:[^\[\],\s]+(?:\s*,\s*[^\[\],\s]+)*)?\s*]$")
78
+ if list_pattern.match(str(entity)):
79
+ list_representation = ast.literal_eval(entity)
80
+ if isinstance(list_representation, list):
81
+ return known_causative_entity in list_representation
82
+ return known_causative_entity == entity
83
+
84
+ def add_contains_function(self) -> None:
85
+ """
86
+ Adds a custom `contains_entity_function` to the DuckDB connection if it does not already exist.
87
+ """
88
+ result = self.conn.execute(
89
+ "SELECT * FROM duckdb_functions() WHERE function_name = ?", ["contains_entity_function"]
90
+ ).fetchall()
91
+ if not result:
92
+ self.conn.create_function("contains_entity_function", self.contains_entity_function)
93
+
94
+ def parse_table_into_dataclass(
95
+ self,
96
+ table_name: str,
97
+ dataclass: Union[
98
+ Type[RankedPhEvalGeneResult],
99
+ Type[RankedPhEvalVariantResult],
100
+ Type[RankedPhEvalDiseaseResult],
101
+ ],
102
+ ) -> Union[
103
+ List[RankedPhEvalGeneResult],
104
+ List[RankedPhEvalVariantResult],
105
+ List[RankedPhEvalDiseaseResult],
106
+ ]:
107
+ """
108
+ Parses a DuckDB table into a list of dataclass instances.
109
+ Args:
110
+ table_name (str): The name of the DuckDB table to be parsed.
111
+ dataclass (Union[Type[RankedPhEvalGeneResult], Type[RankedPhEvalVariantResult],
112
+ Type[RankedPhEvalDiseaseResult]]):
113
+ The dataclass type to which each row in the table should be mapped.
114
+
115
+ Returns:
116
+ List[dataclass]: A list of instances of the provided dataclass, each representing a row from the table.
117
+ """
118
+ result = (
119
+ self.conn.execute(f"SELECT * FROM '{table_name}'").fetchdf().to_dict(orient="records")
120
+ )
121
+ return [dataclass(**row) for row in result]
122
+
123
+ def check_table_exists(self, table_name: str) -> bool:
124
+ """
125
+ Check if a table exists in the connected DuckDB database.
126
+ Args:
127
+ table_name (str): The name of the table to check for existence.
128
+ Returns:
129
+ bool: Returns `True` if the table exists in the database, `False` otherwise.
130
+ """
131
+ result = self.conn.execute(
132
+ f"SELECT * FROM information_schema.tables WHERE table_name = '{table_name}'"
133
+ ).fetchall()
134
+ if result:
135
+ return True
136
+ return False
137
+
138
+ def close(self):
139
+ """Close the connection to the database."""
140
+ self.conn.close()
@@ -1,20 +1,11 @@
1
- from collections import defaultdict
2
1
  from dataclasses import dataclass
3
2
  from typing import Callable
4
3
 
5
4
  from pheval.analyse.benchmarking_data import BenchmarkRunResults
6
5
  from pheval.analyse.disease_prioritisation_analysis import benchmark_disease_prioritisation
7
6
  from pheval.analyse.gene_prioritisation_analysis import benchmark_gene_prioritisation
8
- from pheval.analyse.run_data_parser import TrackInputOutputDirectories
7
+ from pheval.analyse.run_data_parser import RunConfig, SinglePlotCustomisation
9
8
  from pheval.analyse.variant_prioritisation_analysis import benchmark_variant_prioritisation
10
- from pheval.constants import (
11
- DISEASE_PLOT_FILE_PREFIX,
12
- DISEASE_PLOT_Y_LABEL,
13
- GENE_PLOT_FILE_PREFIX,
14
- GENE_PLOT_Y_LABEL,
15
- VARIANT_PLOT_FILE_PREFIX,
16
- VARIANT_PLOT_Y_LABEL,
17
- )
18
9
 
19
10
 
20
11
  @dataclass
@@ -22,20 +13,20 @@ class BenchmarkRunOutputGenerator:
22
13
  """Base class for recording data required for generating benchmarking outputs.
23
14
 
24
15
  Attributes:
25
- prioritisation_type_file_prefix (str): Prefix for the prioritisation type output file.
16
+ plot_customisation (SinglePlotCustomisation): Customisation for plot.
17
+ prioritisation_type_string (str): Prioritisation type string.
26
18
  y_label (str): Label for the y-axis in benchmarking outputs.
27
19
  generate_benchmark_run_results (Callable): Callable to generate benchmark run results.
28
20
  Takes parameters: input and results directory, score order, threshold, rank comparison,
29
21
  and returns BenchmarkRunResults.
30
- stats_comparison_file_suffix (str): Suffix for the rank comparison file.
22
+ stats_comparison_file (str): Suffix for the rank comparison file.
31
23
  """
32
24
 
33
- prioritisation_type_file_prefix: str
25
+ plot_customisation: SinglePlotCustomisation
26
+ prioritisation_type_string: str
34
27
  y_label: str
35
- generate_benchmark_run_results: Callable[
36
- [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
37
- ]
38
- stats_comparison_file_suffix: str
28
+ generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults]
29
+ stats_comparison_file: str
39
30
 
40
31
 
41
32
  @dataclass
@@ -48,24 +39,26 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
48
39
  specifically for gene prioritisation benchmarking.
49
40
 
50
41
  Attributes:
51
- prioritisation_type_file_prefix (str): Prefix for the gene prioritisation type file.
52
- Defaults to GENE_PLOT_FILE_PREFIX.
42
+ plot_customisation (SinglePlotCustomisation): Customisation for plot.
43
+ prioritisation_type_string (str): Prioritisation type string.
44
+ Defaults to GENE_PRIORITISATION_TYPE_STR.
53
45
  y_label (str): Label for the y-axis in gene prioritisation benchmarking outputs.
54
46
  Defaults to GENE_PLOT_Y_LABEL.
55
47
  generate_benchmark_run_results (Callable): Callable to generate gene prioritisation
56
48
  benchmark run results. Defaults to benchmark_gene_prioritisation.
57
- Takes parameters: input and results directory, score order, threshold, rank comparison,
49
+ Takes parameters: run configuration, score order, threshold, rank comparison,
58
50
  and returns BenchmarkRunResults.
59
- stats_comparison_file_suffix (str): Suffix for the gene rank comparison file.
60
- Defaults to "-gene_summary.tsv".
51
+ stats_comparison_file (str): Suffix for the gene rank comparison file.
52
+ Defaults to "-gene_summary".
61
53
  """
62
54
 
63
- prioritisation_type_file_prefix: str = GENE_PLOT_FILE_PREFIX
64
- y_label: str = GENE_PLOT_Y_LABEL
65
- generate_benchmark_run_results: Callable[
66
- [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
67
- ] = benchmark_gene_prioritisation
68
- stats_comparison_file_suffix: str = "-gene_summary.tsv"
55
+ plot_customisation: SinglePlotCustomisation = None
56
+ prioritisation_type_string: str = "gene"
57
+ y_label: str = "Disease-causing genes (%)"
58
+ generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
59
+ benchmark_gene_prioritisation
60
+ )
61
+ stats_comparison_file: str = "gene_summary"
69
62
 
70
63
 
71
64
  @dataclass
@@ -78,25 +71,27 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
78
71
  specifically for variant prioritisation benchmarking.
79
72
 
80
73
  Attributes:
81
- prioritisation_type_file_prefix (str): Prefix for the variant prioritisation type file.
82
- Defaults to VARIANT_PLOT_FILE_PREFIX.
74
+ plot_customisation (SinglePlotCustomisation): Customisation for plot.
75
+ prioritisation_type_string (str): Prioritisation type string.
76
+ Defaults to VARIANT_PRIORITISATION_TYPE_STR.
83
77
  y_label (str): Label for the y-axis in variant prioritisation benchmarking outputs.
84
78
  Defaults to VARIANT_PLOT_Y_LABEL.
85
79
  generate_benchmark_run_results (Callable): Callable to generate variant prioritisation
86
80
  benchmark run results. Defaults to benchmark_variant_prioritisation.
87
- Takes parameters: input and results directory, score order, threshold, rank comparison,
81
+ Takes parameters: run configuration, score order, threshold, rank comparison,
88
82
  and returns BenchmarkRunResults.
89
- stats_comparison_file_suffix (str): Suffix for the variant rank comparison file.
90
- Defaults to "-variant_summary.tsv".
83
+ stats_comparison_file (str): Suffix for the variant rank comparison file.
84
+ Defaults to "-variant_summary".
91
85
 
92
86
  """
93
87
 
94
- prioritisation_type_file_prefix: str = VARIANT_PLOT_FILE_PREFIX
95
- y_label: str = VARIANT_PLOT_Y_LABEL
96
- generate_benchmark_run_results: Callable[
97
- [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
98
- ] = benchmark_variant_prioritisation
99
- stats_comparison_file_suffix: str = "-variant_summary.tsv"
88
+ plot_customisation: SinglePlotCustomisation = None
89
+ prioritisation_type_string: str = "variant"
90
+ y_label: str = "Disease-causing variants (%)"
91
+ generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
92
+ benchmark_variant_prioritisation
93
+ )
94
+ stats_comparison_file: str = "variant_summary"
100
95
 
101
96
 
102
97
  @dataclass
@@ -109,21 +104,23 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
109
104
  specifically for disease prioritisation benchmarking.
110
105
 
111
106
  Attributes:
112
- prioritisation_type_file_prefix (str): Prefix for the disease prioritisation type file.
113
- Defaults to DISEASE_PLOT_FILE_PREFIX.
107
+ plot_customisation (SinglePlotCustomisation): Customisation for plot.
108
+ prioritisation_type_string (str): Prioritisation type string.
109
+ Defaults to DISEASE_PRIORITISATION_TYPE_STR.
114
110
  y_label (str): Label for the y-axis in disease prioritisation benchmarking outputs.
115
111
  Defaults to DISEASE_PLOT_Y_LABEL.
116
112
  generate_benchmark_run_results (Callable): Callable to generate disease prioritisation
117
113
  benchmark run results. Defaults to benchmark_disease_prioritisation.
118
- Takes parameters: input and results directory, score order, threshold, rank comparison,
114
+ Takes parameters: run configuration, score order, threshold, rank comparison,
119
115
  and returns BenchmarkRunResults.
120
- stats_comparison_file_suffix (str): Suffix for the disease rank comparison file.
121
- Defaults to "-disease_summary.tsv".
116
+ stats_comparison_file (str): Suffix for the disease rank comparison file.
117
+ Defaults to "-disease_summary".
122
118
  """
123
119
 
124
- prioritisation_type_file_prefix: str = DISEASE_PLOT_FILE_PREFIX
125
- y_label: str = DISEASE_PLOT_Y_LABEL
126
- generate_benchmark_run_results: Callable[
127
- [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
128
- ] = benchmark_disease_prioritisation
129
- stats_comparison_file_suffix: str = "-disease_summary.tsv"
120
+ plot_customisation: SinglePlotCustomisation = None
121
+ prioritisation_type_string: str = "disease"
122
+ y_label: str = "Known diseases (%)"
123
+ generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
124
+ benchmark_disease_prioritisation
125
+ )
126
+ stats_comparison_file: str = "disease_summary"