pheval 0.4.6__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pheval might be problematic. Click here for more details.
- pheval/analyse/benchmark.py +156 -0
- pheval/analyse/benchmark_db_manager.py +16 -134
- pheval/analyse/benchmark_output_type.py +43 -0
- pheval/analyse/binary_classification_curves.py +132 -0
- pheval/analyse/binary_classification_stats.py +164 -307
- pheval/analyse/generate_plots.py +210 -395
- pheval/analyse/generate_rank_comparisons.py +44 -0
- pheval/analyse/rank_stats.py +190 -382
- pheval/analyse/run_data_parser.py +21 -39
- pheval/cli.py +28 -25
- pheval/cli_pheval_utils.py +7 -8
- pheval/post_processing/phenopacket_truth_set.py +235 -0
- pheval/post_processing/post_processing.py +183 -303
- pheval/post_processing/validate_result_format.py +92 -0
- pheval/prepare/update_phenopacket.py +11 -9
- pheval/utils/logger.py +35 -0
- pheval/utils/phenopacket_utils.py +85 -91
- {pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/METADATA +4 -4
- {pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/RECORD +22 -26
- {pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/WHEEL +1 -1
- pheval/analyse/analysis.py +0 -104
- pheval/analyse/assess_prioritisation_base.py +0 -108
- pheval/analyse/benchmark_generator.py +0 -126
- pheval/analyse/benchmarking_data.py +0 -25
- pheval/analyse/disease_prioritisation_analysis.py +0 -152
- pheval/analyse/gene_prioritisation_analysis.py +0 -147
- pheval/analyse/generate_summary_outputs.py +0 -105
- pheval/analyse/parse_benchmark_summary.py +0 -81
- pheval/analyse/parse_corpus.py +0 -219
- pheval/analyse/prioritisation_result_types.py +0 -52
- pheval/analyse/variant_prioritisation_analysis.py +0 -159
- {pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/LICENSE +0 -0
- {pheval-0.4.6.dist-info → pheval-0.5.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from collections import defaultdict
|
|
2
1
|
from pathlib import Path
|
|
3
2
|
from typing import Union
|
|
4
3
|
|
|
4
|
+
import polars as pl
|
|
5
5
|
from phenopackets import Family, Phenopacket
|
|
6
6
|
|
|
7
7
|
from pheval.utils.file_utils import all_files
|
|
@@ -9,14 +9,14 @@ from pheval.utils.phenopacket_utils import (
|
|
|
9
9
|
GeneIdentifierUpdater,
|
|
10
10
|
PhenopacketRebuilder,
|
|
11
11
|
PhenopacketUtil,
|
|
12
|
-
|
|
12
|
+
create_gene_identifier_map,
|
|
13
13
|
phenopacket_reader,
|
|
14
14
|
write_phenopacket,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def update_outdated_gene_context(
|
|
19
|
-
phenopacket_path: Path, gene_identifier: str,
|
|
19
|
+
phenopacket_path: Path, gene_identifier: str, identifier_map: pl.DataFrame
|
|
20
20
|
) -> Union[Phenopacket, Family]:
|
|
21
21
|
"""
|
|
22
22
|
Update the gene context of the Phenopacket.
|
|
@@ -24,7 +24,7 @@ def update_outdated_gene_context(
|
|
|
24
24
|
Args:
|
|
25
25
|
phenopacket_path (Path): The path to the Phenopacket file.
|
|
26
26
|
gene_identifier (str): Identifier to update the gene context.
|
|
27
|
-
|
|
27
|
+
identifier_map (pl.DataFrame): The gene identifier map used for updating.
|
|
28
28
|
|
|
29
29
|
Returns:
|
|
30
30
|
Union[Phenopacket, Family]: The updated Phenopacket or Family.
|
|
@@ -37,7 +37,7 @@ def update_outdated_gene_context(
|
|
|
37
37
|
phenopacket = phenopacket_reader(phenopacket_path)
|
|
38
38
|
interpretations = PhenopacketUtil(phenopacket).interpretations()
|
|
39
39
|
updated_interpretations = GeneIdentifierUpdater(
|
|
40
|
-
|
|
40
|
+
identifier_map=identifier_map, gene_identifier=gene_identifier
|
|
41
41
|
).update_genomic_interpretations_gene_identifier(interpretations, phenopacket_path)
|
|
42
42
|
return PhenopacketRebuilder(phenopacket).update_interpretations(updated_interpretations)
|
|
43
43
|
|
|
@@ -57,8 +57,10 @@ def create_updated_phenopacket(
|
|
|
57
57
|
to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
|
|
58
58
|
to describe the gene identifiers.
|
|
59
59
|
"""
|
|
60
|
-
|
|
61
|
-
updated_phenopacket = update_outdated_gene_context(
|
|
60
|
+
identifier_map = create_gene_identifier_map()
|
|
61
|
+
updated_phenopacket = update_outdated_gene_context(
|
|
62
|
+
phenopacket_path, gene_identifier, identifier_map
|
|
63
|
+
)
|
|
62
64
|
write_phenopacket(updated_phenopacket, output_dir.joinpath(phenopacket_path.name))
|
|
63
65
|
|
|
64
66
|
|
|
@@ -78,10 +80,10 @@ def create_updated_phenopackets(
|
|
|
78
80
|
to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
|
|
79
81
|
to describe the gene identifiers.
|
|
80
82
|
"""
|
|
81
|
-
|
|
83
|
+
identifier_map = create_gene_identifier_map()
|
|
82
84
|
for phenopacket_path in all_files(phenopacket_dir):
|
|
83
85
|
updated_phenopacket = update_outdated_gene_context(
|
|
84
|
-
phenopacket_path, gene_identifier,
|
|
86
|
+
phenopacket_path, gene_identifier, identifier_map
|
|
85
87
|
)
|
|
86
88
|
write_phenopacket(updated_phenopacket, output_dir.joinpath(phenopacket_path.name))
|
|
87
89
|
|
pheval/utils/logger.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
logging.basicConfig(
|
|
4
|
+
level=logging.INFO,
|
|
5
|
+
format="[%(asctime)s] [%(levelname)s] [%(filename)s:%(lineno)d] - %(message)s",
|
|
6
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_logger(name="PHEVAL"):
|
|
11
|
+
return logging.getLogger(name)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def print_ascii_banner():
|
|
15
|
+
"""Prints ASCII banner only once when the script starts."""
|
|
16
|
+
if not getattr(logging, "_ascii_printed", False):
|
|
17
|
+
logging._ascii_printed = True
|
|
18
|
+
pheval_banner = """
|
|
19
|
+
Welcome to:
|
|
20
|
+
██████╗ ██╗ ██╗███████╗██╗ ██╗ █████╗ ██╗
|
|
21
|
+
██╔══██╗██║ ██║██╔════╝██║ ██║██╔══██╗██║
|
|
22
|
+
██████╔╝███████║█████╗ ██║ ██║███████║██║
|
|
23
|
+
██╔═══╝ ██╔══██║██╔══╝ ╚██╗ ██╔╝██╔══██║██║
|
|
24
|
+
██║ ██║ ██║███████╗ ╚████╔╝ ██║ ██║███████╗
|
|
25
|
+
╚═╝ ╚═╝ ╚═╝╚══════╝ ╚═══╝ ╚═╝ ╚═╝╚══════╝
|
|
26
|
+
A framework for the empirical evaluation of phenotype-driven prioritisation tools.
|
|
27
|
+
"""
|
|
28
|
+
print(pheval_banner)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def initialise_context(ctx):
|
|
32
|
+
ctx.ensure_object(dict)
|
|
33
|
+
if not getattr(ctx, "ascii_printed", False):
|
|
34
|
+
ctx.ascii_printed = True
|
|
35
|
+
print_ascii_banner()
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
-
from collections import defaultdict
|
|
5
4
|
from copy import copy
|
|
6
5
|
from dataclasses import dataclass
|
|
7
6
|
from pathlib import Path
|
|
8
7
|
from typing import List, Union
|
|
9
8
|
|
|
10
|
-
import
|
|
9
|
+
import polars as pl
|
|
11
10
|
from google.protobuf.json_format import MessageToJson, Parse
|
|
12
11
|
from phenopackets import (
|
|
13
12
|
Disease,
|
|
@@ -122,79 +121,65 @@ class ProbandDisease:
|
|
|
122
121
|
disease_identifier: str
|
|
123
122
|
|
|
124
123
|
|
|
125
|
-
def
|
|
124
|
+
def parse_hgnc_data() -> pl.DataFrame:
|
|
126
125
|
"""
|
|
127
|
-
Read HGNC data from a file and return it as a
|
|
126
|
+
Read HGNC data from a file and return it as a Polars DataFrame.
|
|
128
127
|
|
|
129
128
|
Returns:
|
|
130
|
-
|
|
129
|
+
pl.DataFrame: DataFrame containing the HGNC data.
|
|
131
130
|
"""
|
|
132
|
-
return
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
131
|
+
return (
|
|
132
|
+
pl.read_csv(
|
|
133
|
+
os.path.dirname(__file__).replace("utils", "resources/hgnc_complete_set.txt"),
|
|
134
|
+
separator="\t",
|
|
135
|
+
infer_schema=10000000000,
|
|
136
|
+
dtypes={"omim_id": pl.Utf8},
|
|
137
|
+
)
|
|
138
|
+
.select(
|
|
139
|
+
[
|
|
140
|
+
pl.col("hgnc_id").alias("hgnc_id"),
|
|
141
|
+
pl.col("symbol").alias("gene_symbol"),
|
|
142
|
+
pl.col("ensembl_gene_id").alias("ensembl_id"),
|
|
143
|
+
pl.col("entrez_id").alias("entrez_id"),
|
|
144
|
+
pl.col("refseq_accession").alias("refseq_accession"),
|
|
145
|
+
pl.col("prev_symbol").alias("previous_symbol_raw"),
|
|
146
|
+
]
|
|
147
|
+
)
|
|
148
|
+
.with_columns(
|
|
149
|
+
pl.col("previous_symbol_raw")
|
|
150
|
+
.str.split("|")
|
|
151
|
+
.list.eval(pl.element().str.strip_chars('"'))
|
|
152
|
+
.alias("prev_symbols")
|
|
153
|
+
)
|
|
136
154
|
)
|
|
137
155
|
|
|
138
156
|
|
|
139
|
-
def
|
|
140
|
-
"""
|
|
141
|
-
Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data.
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
Returns:
|
|
145
|
-
defaultdict: A dictionary containing gene symbols as keys and their associated gene information.
|
|
146
|
-
|
|
147
|
-
Notes:
|
|
148
|
-
The dictionary structure:
|
|
149
|
-
{
|
|
150
|
-
'gene_symbol': {
|
|
151
|
-
'ensembl_id': str,
|
|
152
|
-
'hgnc_id': str,
|
|
153
|
-
'entrez_id': str,
|
|
154
|
-
'refseq_accession': str,
|
|
155
|
-
'previous_symbol': [str, ...]
|
|
156
|
-
},
|
|
157
|
-
...
|
|
158
|
-
}
|
|
159
|
-
"""
|
|
160
|
-
hgnc_df = read_hgnc_data()
|
|
161
|
-
hgnc_data = defaultdict(dict)
|
|
162
|
-
for _index, row in hgnc_df.iterrows():
|
|
163
|
-
previous_names = []
|
|
164
|
-
hgnc_data[row["symbol"]]["ensembl_id"] = row["ensembl_gene_id"]
|
|
165
|
-
hgnc_data[row["symbol"]]["hgnc_id"] = row["hgnc_id"]
|
|
166
|
-
hgnc_data[row["symbol"]]["entrez_id"] = row["entrez_id"]
|
|
167
|
-
hgnc_data[row["symbol"]]["refseq_accession"] = row["refseq_accession"]
|
|
168
|
-
previous = str(row["prev_symbol"]).split("|")
|
|
169
|
-
for p in previous:
|
|
170
|
-
previous_names.append(p.strip('"'))
|
|
171
|
-
hgnc_data[row["symbol"]]["previous_symbol"] = previous_names
|
|
172
|
-
|
|
173
|
-
return hgnc_data
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
def create_gene_identifier_map() -> dict:
|
|
157
|
+
def create_gene_identifier_map() -> pl.DataFrame:
|
|
177
158
|
"""
|
|
178
159
|
Create a mapping of gene identifiers to gene symbols using HGNC data.
|
|
179
160
|
|
|
180
161
|
Returns:
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
Notes:
|
|
184
|
-
The dictionary structure:
|
|
185
|
-
{
|
|
186
|
-
'identifier': 'gene_symbol',
|
|
187
|
-
...
|
|
188
|
-
}
|
|
162
|
+
pl.DataFrame: A mapping of gene identifiers to gene symbols.
|
|
189
163
|
"""
|
|
190
|
-
hgnc_df =
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
164
|
+
hgnc_df = parse_hgnc_data()
|
|
165
|
+
return hgnc_df.melt(
|
|
166
|
+
id_vars=["gene_symbol", "prev_symbols"],
|
|
167
|
+
value_vars=["ensembl_id", "hgnc_id", "entrez_id", "refseq_accession"],
|
|
168
|
+
variable_name="identifier_type",
|
|
169
|
+
value_name="identifier",
|
|
170
|
+
).with_columns(
|
|
171
|
+
pl.col("identifier_type")
|
|
172
|
+
.replace(
|
|
173
|
+
{
|
|
174
|
+
"ensembl_id": "ensembl:",
|
|
175
|
+
"hgnc_id": "",
|
|
176
|
+
"entrez_id": "ncbigene:",
|
|
177
|
+
"refseq_accession": "",
|
|
178
|
+
},
|
|
179
|
+
default="",
|
|
180
|
+
)
|
|
181
|
+
.alias("prefix")
|
|
182
|
+
)
|
|
198
183
|
|
|
199
184
|
|
|
200
185
|
def phenopacket_reader(file: Path) -> Union[Phenopacket, Family]:
|
|
@@ -651,17 +636,19 @@ def write_phenopacket(phenopacket: Union[Phenopacket, Family], output_file: Path
|
|
|
651
636
|
class GeneIdentifierUpdater:
|
|
652
637
|
"""Class for updating gene identifiers within genomic interpretations."""
|
|
653
638
|
|
|
654
|
-
def __init__(
|
|
639
|
+
def __init__(
|
|
640
|
+
self,
|
|
641
|
+
gene_identifier: str,
|
|
642
|
+
identifier_map: pl.DataFrame = None,
|
|
643
|
+
):
|
|
655
644
|
"""
|
|
656
645
|
Initialise the GeneIdentifierUpdater.
|
|
657
646
|
|
|
658
647
|
Args:
|
|
659
648
|
gene_identifier (str): The gene identifier to update to.
|
|
660
|
-
|
|
661
|
-
identifier_map (dict): A dictionary mapping gene identifiers (default: None).
|
|
649
|
+
identifier_map (dict): A polars dataframe mapping gene identifiers (default: None).
|
|
662
650
|
"""
|
|
663
651
|
|
|
664
|
-
self.hgnc_data = hgnc_data
|
|
665
652
|
self.gene_identifier = gene_identifier
|
|
666
653
|
self.identifier_map = identifier_map
|
|
667
654
|
|
|
@@ -675,13 +662,20 @@ class GeneIdentifierUpdater:
|
|
|
675
662
|
Returns:
|
|
676
663
|
str: The identified gene identifier.
|
|
677
664
|
"""
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
665
|
+
matches = self.identifier_map.filter(
|
|
666
|
+
(pl.col("gene_symbol") == gene_symbol)
|
|
667
|
+
& (pl.col("identifier_type") == self.gene_identifier)
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
if matches.height > 0:
|
|
671
|
+
return matches["identifier"][0]
|
|
672
|
+
prev_symbol_matches = self.identifier_map.filter(
|
|
673
|
+
(pl.col("identifier_type") == self.gene_identifier)
|
|
674
|
+
& (pl.col("prev_symbols").list.contains(gene_symbol))
|
|
675
|
+
)
|
|
676
|
+
if prev_symbol_matches.height > 0:
|
|
677
|
+
return prev_symbol_matches["identifier"][0]
|
|
678
|
+
return None
|
|
685
679
|
|
|
686
680
|
def obtain_gene_symbol_from_identifier(self, query_gene_identifier: str) -> str:
|
|
687
681
|
"""
|
|
@@ -693,7 +687,9 @@ class GeneIdentifierUpdater:
|
|
|
693
687
|
Returns:
|
|
694
688
|
str: The gene symbol corresponding to the identifier.
|
|
695
689
|
"""
|
|
696
|
-
return self.identifier_map[
|
|
690
|
+
return self.identifier_map.filter(pl.col("identifier") == query_gene_identifier)[
|
|
691
|
+
"gene_symbol"
|
|
692
|
+
][0]
|
|
697
693
|
|
|
698
694
|
def _find_alternate_ids(self, gene_symbol: str) -> List[str]:
|
|
699
695
|
"""
|
|
@@ -705,23 +701,20 @@ class GeneIdentifierUpdater:
|
|
|
705
701
|
Returns:
|
|
706
702
|
List[str]: List of alternate IDs for the gene symbol.
|
|
707
703
|
"""
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
"
|
|
712
|
-
"ensembl:" + self.hgnc_data[gene_symbol]["ensembl_id"],
|
|
713
|
-
"symbol:" + gene_symbol,
|
|
704
|
+
matches = self.identifier_map.filter((pl.col("gene_symbol") == gene_symbol))
|
|
705
|
+
if matches.height > 0:
|
|
706
|
+
return [f"{row['prefix']}{row['identifier']}" for row in matches.rows(named=True)] + [
|
|
707
|
+
f"symbol:{gene_symbol}"
|
|
714
708
|
]
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
]
|
|
709
|
+
prev_symbol_matches = self.identifier_map.filter(
|
|
710
|
+
(pl.col("prev_symbols").list.contains(gene_symbol))
|
|
711
|
+
)
|
|
712
|
+
if prev_symbol_matches.height > 0:
|
|
713
|
+
return [
|
|
714
|
+
f"{row['prefix']}{row['identifier']}"
|
|
715
|
+
for row in prev_symbol_matches.rows(named=True)
|
|
716
|
+
] + [f"symbol:{gene_symbol}"]
|
|
717
|
+
return None
|
|
725
718
|
|
|
726
719
|
def update_genomic_interpretations_gene_identifier(
|
|
727
720
|
self, interpretations: List[Interpretation], phenopacket_path: Path
|
|
@@ -731,6 +724,7 @@ class GeneIdentifierUpdater:
|
|
|
731
724
|
|
|
732
725
|
Args:
|
|
733
726
|
interpretations (List[Interpretation]): List of Interpretation objects.
|
|
727
|
+
phenopacket_path (Path): The Path to the Phenopacket.
|
|
734
728
|
|
|
735
729
|
Returns:
|
|
736
730
|
List[Interpretation]: Updated list of Interpretation objects.
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pheval
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Yasemin Bridges
|
|
6
6
|
Author-email: y.bridges@qmul.ac.uk
|
|
7
|
-
Requires-Python: >=3.
|
|
7
|
+
Requires-Python: >=3.10,<4.0.0
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
10
9
|
Classifier: Programming Language :: Python :: 3.10
|
|
11
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -22,8 +21,9 @@ Requires-Dist: oaklib (>=0.5.6)
|
|
|
22
21
|
Requires-Dist: pandas (>=1.5.1)
|
|
23
22
|
Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
|
|
24
23
|
Requires-Dist: plotly (>=5.13.0,<6.0.0)
|
|
25
|
-
Requires-Dist: polars (>=
|
|
24
|
+
Requires-Dist: polars (>=1.23,<2.0)
|
|
26
25
|
Requires-Dist: pyaml (>=21.10.1,<22.0.0)
|
|
26
|
+
Requires-Dist: pyarrow (>=19.0.1,<20.0.0)
|
|
27
27
|
Requires-Dist: pyserde (>=0.9.8,<0.10.0)
|
|
28
28
|
Requires-Dist: scikit-learn (>=1.4.0,<2.0.0)
|
|
29
29
|
Requires-Dist: seaborn (>=0.12.2,<0.13.0)
|
|
@@ -1,36 +1,31 @@
|
|
|
1
1
|
pheval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
pheval/analyse/
|
|
4
|
-
pheval/analyse/
|
|
5
|
-
pheval/analyse/
|
|
6
|
-
pheval/analyse/
|
|
7
|
-
pheval/analyse/
|
|
8
|
-
pheval/analyse/
|
|
9
|
-
pheval/analyse/
|
|
10
|
-
pheval/analyse/
|
|
11
|
-
pheval/analyse/
|
|
12
|
-
pheval/
|
|
13
|
-
pheval/analyse/parse_benchmark_summary.py,sha256=vyAOIdIWF4rZjGTPFE69ajhEC9AkkN3QBVqSe_uYZsg,2946
|
|
14
|
-
pheval/analyse/parse_corpus.py,sha256=pxhoKTgd-DnwAMP081UMG-NKbj89qAYBQhHve8aphfI,8698
|
|
15
|
-
pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6hleoKDYATTkhvFF98,1228
|
|
16
|
-
pheval/analyse/rank_stats.py,sha256=vNLVuG_NzhKDXxKmklYNPz44MczlyKUqcuHqbiuOXwI,17993
|
|
17
|
-
pheval/analyse/run_data_parser.py,sha256=VQBUoOIRYRWc5uqURUvaWdaW3E3C7Su0JvLavQLHQaY,4105
|
|
18
|
-
pheval/analyse/variant_prioritisation_analysis.py,sha256=HhDeczF7wmJjXt0ejAtF0qdczyMe25glqiS6uX_TFl8,6408
|
|
19
|
-
pheval/cli.py,sha256=EBGh6TIxAiWs0eDdQiefq6YuD0mb93siGsNmsVO1j7c,1527
|
|
3
|
+
pheval/analyse/benchmark.py,sha256=1ysz1peGb21DhgNpEam9NgUOS5eGv7K0CI3RNjy0crQ,6275
|
|
4
|
+
pheval/analyse/benchmark_db_manager.py,sha256=zS1TI76YuV2_YXLipHLSyh-XDR5kTxyOwhRhHRFHfjQ,764
|
|
5
|
+
pheval/analyse/benchmark_output_type.py,sha256=bh-qQvV4AF7BHQyr_bdY8HTTzYZVe7KvoIoUF0D9k-g,1468
|
|
6
|
+
pheval/analyse/binary_classification_curves.py,sha256=Crb45rJWc5rxDdx82sgoHRvYHE2D5pus91fgl39FyRw,5007
|
|
7
|
+
pheval/analyse/binary_classification_stats.py,sha256=sOuEp6IxZ6SVp-KC6MJkZNTkZucZTNK25xApP5tU6Mk,6944
|
|
8
|
+
pheval/analyse/generate_plots.py,sha256=g98DxhTw1dPRfRRYoKBmt51XfIa2KzlL_Z7weFSoBUg,14550
|
|
9
|
+
pheval/analyse/generate_rank_comparisons.py,sha256=KcQJ9rm1nvvTcqLNuxAkXRXuV18vEsiP0giQ-ryHyYc,1684
|
|
10
|
+
pheval/analyse/rank_stats.py,sha256=qHrqlIsZVSV2ASc5cZ6TsmKaMq3bZtCzS1ZURjL8mks,9211
|
|
11
|
+
pheval/analyse/run_data_parser.py,sha256=Lr0ao_Mlp8EYLaM4XmiEjo7P7jt_rCBR2y2hb_D3c70,3366
|
|
12
|
+
pheval/cli.py,sha256=rpvTTCKAvH75XkZUh0xaKv7Ftl9zIt2RncsMGIlrq9U,1556
|
|
20
13
|
pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
|
|
21
|
-
pheval/cli_pheval_utils.py,sha256=
|
|
14
|
+
pheval/cli_pheval_utils.py,sha256=sh6kx36jYfuSIWBMlrdW3g-LPftxBy-xw4b7hg8bdj4,16545
|
|
22
15
|
pheval/config_parser.py,sha256=lh-Dy_FflXJUnRC3HYaEdSvPAsNZWQZlEr1hHQigrTM,1227
|
|
23
16
|
pheval/implementations/__init__.py,sha256=BMUTotjTdgy5j5xubWCIQgRXrSQ1ZIcjooer7r299Zo,1228
|
|
24
17
|
pheval/infra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
18
|
pheval/infra/exomiserdb.py,sha256=pM9-TfjrgurtH4OtM1Enk5oVhIxGQN3rKRlrxHuObTM,5080
|
|
26
19
|
pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
pheval/post_processing/
|
|
20
|
+
pheval/post_processing/phenopacket_truth_set.py,sha256=ue3pNeg_GZiGyuKrm6_4MsJWpW0LWtfG9wja2Cc8SLg,8873
|
|
21
|
+
pheval/post_processing/post_processing.py,sha256=4xP-gjZ3VoXydU9ClPvmRtuDaSMUeJImgLugurOS5_k,9480
|
|
22
|
+
pheval/post_processing/validate_result_format.py,sha256=4U6AfHt01EexwU_OnpmytQAhGVS6ZWF1S-5NVBx1oaM,2916
|
|
28
23
|
pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
24
|
pheval/prepare/create_noisy_phenopackets.py,sha256=ydhA4mpqKTDc4hBu8YfvNW2nMubHK3dbO-cv0lA4JFQ,11504
|
|
30
25
|
pheval/prepare/create_spiked_vcf.py,sha256=90A-Mi8QKhvN036vtFEVWAHgzHO37itiLYrqYlG4LiA,23953
|
|
31
26
|
pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
|
|
32
27
|
pheval/prepare/prepare_corpus.py,sha256=YFnklpeVXeqeme9DVmd_jfsK04ytIe9cH5uXYcgK5cY,4650
|
|
33
|
-
pheval/prepare/update_phenopacket.py,sha256=
|
|
28
|
+
pheval/prepare/update_phenopacket.py,sha256=Bjru0ptNKyzLaYElouKZe2GYRQbETTC0FMiMojrP8Lg,4850
|
|
34
29
|
pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
|
|
35
30
|
pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
|
|
36
31
|
pheval/resources/alternate_ouputs/OVA_results.txt,sha256=_5XFCR4W04D-W7DObpALLsa0-693g2kiIUB_uo79aHk,9845
|
|
@@ -47,11 +42,12 @@ pheval/utils/docs_gen.py,sha256=6FGtHicBC0rZKi0tdL3Epsg8d4osE44I9f1Ga0j4JLA,3193
|
|
|
47
42
|
pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
|
|
48
43
|
pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
|
|
49
44
|
pheval/utils/file_utils.py,sha256=m21cz-qjDYqnI8ClUv3J9fKizex98a-9bSEerQ75i_c,3576
|
|
50
|
-
pheval/utils/
|
|
45
|
+
pheval/utils/logger.py,sha256=5DZl5uMltUDQorhkvg_B7_ZhFwApAmEkWneFIOKfRGQ,1566
|
|
46
|
+
pheval/utils/phenopacket_utils.py,sha256=AfV_mWac6n5HCc5zjfH6CGP8T0qI0LR0VBrooaKmgdY,26978
|
|
51
47
|
pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
|
|
52
48
|
pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
|
|
53
|
-
pheval-0.
|
|
54
|
-
pheval-0.
|
|
55
|
-
pheval-0.
|
|
56
|
-
pheval-0.
|
|
57
|
-
pheval-0.
|
|
49
|
+
pheval-0.5.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
50
|
+
pheval-0.5.0.dist-info/METADATA,sha256=v7UNSBKUzJQAs8oBSq8XScwKnDiNXlzWZV0A70xR3M8,6456
|
|
51
|
+
pheval-0.5.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
52
|
+
pheval-0.5.0.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
|
|
53
|
+
pheval-0.5.0.dist-info/RECORD,,
|
pheval/analyse/analysis.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
from pheval.analyse.benchmark_generator import (
|
|
2
|
-
BenchmarkRunOutputGenerator,
|
|
3
|
-
DiseaseBenchmarkRunOutputGenerator,
|
|
4
|
-
GeneBenchmarkRunOutputGenerator,
|
|
5
|
-
VariantBenchmarkRunOutputGenerator,
|
|
6
|
-
)
|
|
7
|
-
from pheval.analyse.generate_summary_outputs import generate_benchmark_comparison_output
|
|
8
|
-
from pheval.analyse.parse_corpus import CorpusParser
|
|
9
|
-
from pheval.analyse.rank_stats import RankStatsWriter
|
|
10
|
-
from pheval.analyse.run_data_parser import Config
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def _run_benchmark_comparison(
|
|
14
|
-
run_config: Config,
|
|
15
|
-
benchmark_generator: BenchmarkRunOutputGenerator,
|
|
16
|
-
) -> None:
|
|
17
|
-
"""
|
|
18
|
-
Run a benchmark on several result directories.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
run_config (List[TrackInputOutputDirectories]): List of input and output directories
|
|
22
|
-
for tracking results across multiple directories.
|
|
23
|
-
benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
|
|
24
|
-
"""
|
|
25
|
-
stats_writer = RankStatsWriter(
|
|
26
|
-
run_config.benchmark_name, benchmark_generator.stats_comparison_file
|
|
27
|
-
)
|
|
28
|
-
unique_test_corpora_directories = set([result.phenopacket_dir for result in run_config.runs])
|
|
29
|
-
[
|
|
30
|
-
CorpusParser(run_config.benchmark_name, test_corpora_directory).parse_corpus(
|
|
31
|
-
benchmark_generator
|
|
32
|
-
)
|
|
33
|
-
for test_corpora_directory in unique_test_corpora_directories
|
|
34
|
-
]
|
|
35
|
-
benchmarking_results = []
|
|
36
|
-
for run in run_config.runs:
|
|
37
|
-
benchmark_result = benchmark_generator.generate_benchmark_run_results(
|
|
38
|
-
run_config.benchmark_name, run, run.score_order, run.threshold
|
|
39
|
-
)
|
|
40
|
-
stats_writer.add_statistics_entry(
|
|
41
|
-
run.run_identifier,
|
|
42
|
-
benchmark_result.rank_stats,
|
|
43
|
-
benchmark_result.binary_classification_stats,
|
|
44
|
-
)
|
|
45
|
-
benchmarking_results.append(benchmark_result)
|
|
46
|
-
run_identifiers = [run.run_identifier for run in run_config.runs]
|
|
47
|
-
[
|
|
48
|
-
generate_benchmark_comparison_output(
|
|
49
|
-
run_config.benchmark_name,
|
|
50
|
-
benchmarking_results,
|
|
51
|
-
run_identifiers,
|
|
52
|
-
benchmark_generator,
|
|
53
|
-
f"{unique_test_corpora_directory.parents[0].name}_"
|
|
54
|
-
f"{benchmark_generator.prioritisation_type_string}",
|
|
55
|
-
)
|
|
56
|
-
for unique_test_corpora_directory in unique_test_corpora_directories
|
|
57
|
-
]
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def benchmark_run_comparisons(
|
|
61
|
-
run_config: Config,
|
|
62
|
-
) -> None:
|
|
63
|
-
"""
|
|
64
|
-
Benchmark prioritisation performance for several runs.
|
|
65
|
-
|
|
66
|
-
Args:
|
|
67
|
-
run_config (Config): Run configurations.
|
|
68
|
-
"""
|
|
69
|
-
gene_analysis_runs = Config(
|
|
70
|
-
benchmark_name=run_config.benchmark_name,
|
|
71
|
-
runs=[run for run in run_config.runs if run.gene_analysis],
|
|
72
|
-
plot_customisation=run_config.plot_customisation,
|
|
73
|
-
)
|
|
74
|
-
variant_analysis_runs = Config(
|
|
75
|
-
benchmark_name=run_config.benchmark_name,
|
|
76
|
-
runs=[run for run in run_config.runs if run.variant_analysis],
|
|
77
|
-
plot_customisation=run_config.plot_customisation,
|
|
78
|
-
)
|
|
79
|
-
disease_analysis_runs = Config(
|
|
80
|
-
benchmark_name=run_config.benchmark_name,
|
|
81
|
-
runs=[run for run in run_config.runs if run.disease_analysis],
|
|
82
|
-
plot_customisation=run_config.plot_customisation,
|
|
83
|
-
)
|
|
84
|
-
if gene_analysis_runs.runs:
|
|
85
|
-
_run_benchmark_comparison(
|
|
86
|
-
run_config=gene_analysis_runs,
|
|
87
|
-
benchmark_generator=GeneBenchmarkRunOutputGenerator(
|
|
88
|
-
plot_customisation=gene_analysis_runs.plot_customisation.gene_plots
|
|
89
|
-
),
|
|
90
|
-
)
|
|
91
|
-
if variant_analysis_runs.runs:
|
|
92
|
-
_run_benchmark_comparison(
|
|
93
|
-
run_config=variant_analysis_runs,
|
|
94
|
-
benchmark_generator=VariantBenchmarkRunOutputGenerator(
|
|
95
|
-
plot_customisation=variant_analysis_runs.plot_customisation.variant_plots
|
|
96
|
-
),
|
|
97
|
-
)
|
|
98
|
-
if disease_analysis_runs.runs:
|
|
99
|
-
_run_benchmark_comparison(
|
|
100
|
-
run_config=disease_analysis_runs,
|
|
101
|
-
benchmark_generator=DiseaseBenchmarkRunOutputGenerator(
|
|
102
|
-
plot_customisation=disease_analysis_runs.plot_customisation.disease_plots
|
|
103
|
-
),
|
|
104
|
-
)
|