pheval 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pheval might be problematic. Click here for more details.
- pheval/analyse/analysis.py +61 -150
- pheval/analyse/assess_prioritisation_base.py +108 -0
- pheval/analyse/benchmark_db_manager.py +140 -0
- pheval/analyse/benchmark_generator.py +47 -50
- pheval/analyse/benchmarking_data.py +3 -2
- pheval/analyse/disease_prioritisation_analysis.py +70 -219
- pheval/analyse/gene_prioritisation_analysis.py +66 -242
- pheval/analyse/generate_plots.py +81 -79
- pheval/analyse/generate_summary_outputs.py +64 -134
- pheval/analyse/parse_benchmark_summary.py +50 -37
- pheval/analyse/parse_corpus.py +219 -0
- pheval/analyse/rank_stats.py +177 -144
- pheval/analyse/run_data_parser.py +108 -27
- pheval/analyse/variant_prioritisation_analysis.py +78 -212
- pheval/cli.py +2 -4
- pheval/cli_pheval_utils.py +34 -245
- pheval/prepare/create_noisy_phenopackets.py +78 -67
- pheval-0.4.0.dist-info/METADATA +112 -0
- {pheval-0.3.9.dist-info → pheval-0.4.0.dist-info}/RECORD +22 -22
- pheval/analyse/parse_pheval_result.py +0 -43
- pheval/analyse/prioritisation_rank_recorder.py +0 -83
- pheval/constants.py +0 -8
- pheval-0.3.9.dist-info/METADATA +0 -35
- {pheval-0.3.9.dist-info → pheval-0.4.0.dist-info}/LICENSE +0 -0
- {pheval-0.3.9.dist-info → pheval-0.4.0.dist-info}/WHEEL +0 -0
- {pheval-0.3.9.dist-info → pheval-0.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,32 +1,32 @@
|
|
|
1
1
|
pheval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
pheval/analyse/analysis.py,sha256=
|
|
4
|
-
pheval/analyse/
|
|
5
|
-
pheval/analyse/
|
|
3
|
+
pheval/analyse/analysis.py,sha256=Yt2xH0WS_2NO13-wYvywzmCRCj8RinQ1MeozJQuGe3o,4009
|
|
4
|
+
pheval/analyse/assess_prioritisation_base.py,sha256=znBscRTqIKxxZMHR-H6KrjFJ6Uv5P5HzwTQUWS6Eoos,3434
|
|
5
|
+
pheval/analyse/benchmark_db_manager.py,sha256=RaAnvq5Tfvsw8161iQUe_P146X98BckMRhMq6BibwNI,5111
|
|
6
|
+
pheval/analyse/benchmark_generator.py,sha256=-LljszuKAT3oJfGQn7JHAILCGg5QXYny4nPPf273g_E,5896
|
|
7
|
+
pheval/analyse/benchmarking_data.py,sha256=aRvDmwqjFGKvWDRGjMwaQxfDZscptRBwI-rcSqY-X5s,913
|
|
6
8
|
pheval/analyse/binary_classification_stats.py,sha256=E35YjvGM-zFnuEt8M3pgN03vBab4MH6ih726QKvuogg,12519
|
|
7
|
-
pheval/analyse/disease_prioritisation_analysis.py,sha256=
|
|
8
|
-
pheval/analyse/gene_prioritisation_analysis.py,sha256=
|
|
9
|
-
pheval/analyse/generate_plots.py,sha256=
|
|
10
|
-
pheval/analyse/generate_summary_outputs.py,sha256=
|
|
11
|
-
pheval/analyse/parse_benchmark_summary.py,sha256=
|
|
12
|
-
pheval/analyse/
|
|
13
|
-
pheval/analyse/prioritisation_rank_recorder.py,sha256=EVe8DoEvvp0_WMAcjfVxmDGGRFPEELi7hEVjH3sIpLY,3223
|
|
9
|
+
pheval/analyse/disease_prioritisation_analysis.py,sha256=1Ut4u6p9mDGbGBoXrVxTuUgv_nrqxzBhq5N9jbGWRWs,5725
|
|
10
|
+
pheval/analyse/gene_prioritisation_analysis.py,sha256=_7r16BC0S2H9rOY016mLLUg6PcxaCQOh5FQcNKFTy-4,5582
|
|
11
|
+
pheval/analyse/generate_plots.py,sha256=rMSdgawGYYc7BAAolqz73TAnanKtrxCC48bI6WQq6xc,21455
|
|
12
|
+
pheval/analyse/generate_summary_outputs.py,sha256=9v9w7le_wLxyjgUgxl6f0RXb2Qt-B3XrKdlmfT6DSjo,4160
|
|
13
|
+
pheval/analyse/parse_benchmark_summary.py,sha256=vyAOIdIWF4rZjGTPFE69ajhEC9AkkN3QBVqSe_uYZsg,2946
|
|
14
|
+
pheval/analyse/parse_corpus.py,sha256=N88enptR4qG6cmqXU_TKg8DMmCeFog37eeK5nFEMQOQ,8678
|
|
14
15
|
pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6hleoKDYATTkhvFF98,1228
|
|
15
|
-
pheval/analyse/rank_stats.py,sha256=
|
|
16
|
-
pheval/analyse/run_data_parser.py,sha256=
|
|
17
|
-
pheval/analyse/variant_prioritisation_analysis.py,sha256=
|
|
18
|
-
pheval/cli.py,sha256=
|
|
16
|
+
pheval/analyse/rank_stats.py,sha256=53ZickUtQlctYsorAIUwlCX7M6UC-wCxoV1MbL6F9gc,17987
|
|
17
|
+
pheval/analyse/run_data_parser.py,sha256=VQBUoOIRYRWc5uqURUvaWdaW3E3C7Su0JvLavQLHQaY,4105
|
|
18
|
+
pheval/analyse/variant_prioritisation_analysis.py,sha256=LPEZDhFfzx-sQl9g8pM700l4Zzpy6qdOS68vOV2mZA0,6020
|
|
19
|
+
pheval/cli.py,sha256=EBGh6TIxAiWs0eDdQiefq6YuD0mb93siGsNmsVO1j7c,1527
|
|
19
20
|
pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
|
|
20
|
-
pheval/cli_pheval_utils.py,sha256=
|
|
21
|
+
pheval/cli_pheval_utils.py,sha256=O6tWnE85QQHGNcP08OwJGANMfXJPsZtFEu-D6ATld00,16700
|
|
21
22
|
pheval/config_parser.py,sha256=lh-Dy_FflXJUnRC3HYaEdSvPAsNZWQZlEr1hHQigrTM,1227
|
|
22
|
-
pheval/constants.py,sha256=TWBgWOc05FGXFu63fs-hEHS2IJkLLAPHtMppiWBfBOg,349
|
|
23
23
|
pheval/implementations/__init__.py,sha256=BMUTotjTdgy5j5xubWCIQgRXrSQ1ZIcjooer7r299Zo,1228
|
|
24
24
|
pheval/infra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
pheval/infra/exomiserdb.py,sha256=pM9-TfjrgurtH4OtM1Enk5oVhIxGQN3rKRlrxHuObTM,5080
|
|
26
26
|
pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
27
|
pheval/post_processing/post_processing.py,sha256=tqeVRWF6PMHpOe681ONeGaqxdviLgVJgze3o6qSpXEg,13438
|
|
28
28
|
pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
pheval/prepare/create_noisy_phenopackets.py,sha256=
|
|
29
|
+
pheval/prepare/create_noisy_phenopackets.py,sha256=ydhA4mpqKTDc4hBu8YfvNW2nMubHK3dbO-cv0lA4JFQ,11504
|
|
30
30
|
pheval/prepare/create_spiked_vcf.py,sha256=90A-Mi8QKhvN036vtFEVWAHgzHO37itiLYrqYlG4LiA,23953
|
|
31
31
|
pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
|
|
32
32
|
pheval/prepare/prepare_corpus.py,sha256=eRvozzezIgAqHAumtqul0WfXfBO1iOBaSlN8fPSn0Nw,4223
|
|
@@ -50,8 +50,8 @@ pheval/utils/file_utils.py,sha256=m21cz-qjDYqnI8ClUv3J9fKizex98a-9bSEerQ75i_c,35
|
|
|
50
50
|
pheval/utils/phenopacket_utils.py,sha256=W9T_X48EJ-xn5GghzbZlt-lI-DxWoSm7_SHr8DCJg2Q,26856
|
|
51
51
|
pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
|
|
52
52
|
pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
|
|
53
|
-
pheval-0.
|
|
54
|
-
pheval-0.
|
|
55
|
-
pheval-0.
|
|
56
|
-
pheval-0.
|
|
57
|
-
pheval-0.
|
|
53
|
+
pheval-0.4.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
54
|
+
pheval-0.4.0.dist-info/METADATA,sha256=JXpNQPIx-5qBaP3ZDR96_hiasIdBHhaESiHUOJvT1s0,6418
|
|
55
|
+
pheval-0.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
56
|
+
pheval-0.4.0.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
|
|
57
|
+
pheval-0.4.0.dist-info/RECORD,,
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from typing import List
|
|
4
|
-
|
|
5
|
-
import pandas as pd
|
|
6
|
-
|
|
7
|
-
from pheval.post_processing.post_processing import PhEvalResult
|
|
8
|
-
|
|
9
|
-
info_log = logging.getLogger("info")
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def read_standardised_result(standardised_result_path: Path) -> List[dict]:
|
|
13
|
-
"""
|
|
14
|
-
Read the standardised result output and return a list of dictionaries.
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
standardised_result_path (Path): The path to the file containing the standardised result output.
|
|
18
|
-
|
|
19
|
-
Returns:
|
|
20
|
-
List[dict]: A list of dictionaries representing the content of the standardised result file.
|
|
21
|
-
"""
|
|
22
|
-
if standardised_result_path.is_file():
|
|
23
|
-
return pd.read_csv(standardised_result_path, delimiter="\t").to_dict("records")
|
|
24
|
-
else:
|
|
25
|
-
info_log.info(f"Could not find {standardised_result_path}")
|
|
26
|
-
return pd.DataFrame().to_dict("records")
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def parse_pheval_result(
|
|
30
|
-
data_class_type: PhEvalResult, pheval_result: List[dict]
|
|
31
|
-
) -> List[PhEvalResult]:
|
|
32
|
-
"""
|
|
33
|
-
Parse PhEval result into specified dataclass type.
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
data_class_type (PhEvalResult): The data class type to parse the result into.
|
|
37
|
-
pheval_result (List[dict]): A list of dictionaries representing the PhEval result.
|
|
38
|
-
|
|
39
|
-
Returns:
|
|
40
|
-
List[PhEvalResult]: A list of instances of the specified data class type,
|
|
41
|
-
each instance representing a row in the PhEval result.
|
|
42
|
-
"""
|
|
43
|
-
return [data_class_type(**row) for row in pheval_result]
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
from collections import defaultdict
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Union
|
|
5
|
-
|
|
6
|
-
from pheval.analyse.prioritisation_result_types import (
|
|
7
|
-
DiseasePrioritisationResult,
|
|
8
|
-
GenePrioritisationResult,
|
|
9
|
-
VariantPrioritisationResult,
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class PrioritisationRankRecorder:
|
|
15
|
-
"""
|
|
16
|
-
Record ranks for different types of prioritisation results.
|
|
17
|
-
|
|
18
|
-
Attributes:
|
|
19
|
-
index (int): The index representing the run.
|
|
20
|
-
directory (Path): The result directory path.
|
|
21
|
-
prioritisation_result (Union[GenePrioritisationResult, VariantPrioritisationResult,
|
|
22
|
-
DiseasePrioritisationResult]): The prioritisation result object.
|
|
23
|
-
run_comparison (defaultdict): The comparison dictionary to record ranks.
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
index: int
|
|
27
|
-
directory: Path
|
|
28
|
-
prioritisation_result: Union[
|
|
29
|
-
GenePrioritisationResult, VariantPrioritisationResult, DiseasePrioritisationResult
|
|
30
|
-
]
|
|
31
|
-
run_comparison: defaultdict
|
|
32
|
-
|
|
33
|
-
def _record_gene_rank(self) -> None:
|
|
34
|
-
"""
|
|
35
|
-
Record gene prioritisation rank.
|
|
36
|
-
|
|
37
|
-
This method updates the 'Gene' key in the run comparison dictionary with the gene
|
|
38
|
-
information extracted from the correct prioritisation result.
|
|
39
|
-
"""
|
|
40
|
-
self.run_comparison[self.index]["Gene"] = self.prioritisation_result.gene
|
|
41
|
-
|
|
42
|
-
def _record_variant_rank(self) -> None:
|
|
43
|
-
"""
|
|
44
|
-
Record variant prioritisation rank.
|
|
45
|
-
|
|
46
|
-
This method updates the 'Variant' key in the run comparison dictionary with the variant
|
|
47
|
-
information extracted from the correct prioritisation result.
|
|
48
|
-
"""
|
|
49
|
-
variant = self.prioritisation_result.variant
|
|
50
|
-
self.run_comparison[self.index]["Variant"] = "-".join(
|
|
51
|
-
[variant.chrom, str(variant.pos), variant.ref, variant.alt]
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
def _record_disease_rank(self) -> None:
|
|
55
|
-
"""
|
|
56
|
-
Record disease prioritisation rank.
|
|
57
|
-
|
|
58
|
-
This method updates the 'Disease' key in the run comparison dictionary with the disease
|
|
59
|
-
information extracted from the correct prioritisation result.
|
|
60
|
-
"""
|
|
61
|
-
self.run_comparison[self.index][
|
|
62
|
-
"Disease"
|
|
63
|
-
] = self.prioritisation_result.disease.disease_identifier
|
|
64
|
-
|
|
65
|
-
def record_rank(self) -> None:
|
|
66
|
-
"""
|
|
67
|
-
Record the prioritisation ranks for different runs.
|
|
68
|
-
|
|
69
|
-
It assigns the prioritisation rank and associated details such as phenopacket name
|
|
70
|
-
and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison
|
|
71
|
-
dictionary for each respective run, allowing comparison and analysis of the ranks of correct results
|
|
72
|
-
across different runs.
|
|
73
|
-
"""
|
|
74
|
-
self.run_comparison[self.index][
|
|
75
|
-
"Phenopacket"
|
|
76
|
-
] = self.prioritisation_result.phenopacket_path.name
|
|
77
|
-
if type(self.prioritisation_result) is GenePrioritisationResult:
|
|
78
|
-
self._record_gene_rank()
|
|
79
|
-
elif type(self.prioritisation_result) is VariantPrioritisationResult:
|
|
80
|
-
self._record_variant_rank()
|
|
81
|
-
elif type(self.prioritisation_result) is DiseasePrioritisationResult:
|
|
82
|
-
self._record_disease_rank()
|
|
83
|
-
self.run_comparison[self.index][self.directory] = self.prioritisation_result.rank
|
pheval/constants.py
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
PHEVAL_RESULTS_DIRECTORY_SUFFIX = "_results"
|
|
2
|
-
GENE_PLOT_FILE_PREFIX = "gene"
|
|
3
|
-
GENE_PLOT_Y_LABEL = "Disease-causing genes (%)"
|
|
4
|
-
VARIANT_PLOT_FILE_PREFIX = "variant"
|
|
5
|
-
VARIANT_PLOT_Y_LABEL = "Disease-causing variants (%)"
|
|
6
|
-
DISEASE_PLOT_FILE_PREFIX = "disease"
|
|
7
|
-
DISEASE_PLOT_Y_LABEL = "Known diseases (%)"
|
|
8
|
-
RANK_COMPARISON_FILE_SUFFIX = "_rank_comparison.tsv"
|
pheval-0.3.9.dist-info/METADATA
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: pheval
|
|
3
|
-
Version: 0.3.9
|
|
4
|
-
Summary:
|
|
5
|
-
Author: Yasemin Bridges
|
|
6
|
-
Author-email: y.bridges@qmul.ac.uk
|
|
7
|
-
Requires-Python: >=3.9,<4.0.0
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
-
Requires-Dist: class-resolver (>=0.4.2)
|
|
14
|
-
Requires-Dist: click (>=8.1.3)
|
|
15
|
-
Requires-Dist: deprecation (>=2.1.0)
|
|
16
|
-
Requires-Dist: google (>=3.0.0,<4.0.0)
|
|
17
|
-
Requires-Dist: jaydebeapi (>=1.2.3)
|
|
18
|
-
Requires-Dist: matplotlib (>=3.7.0,<4.0.0)
|
|
19
|
-
Requires-Dist: oaklib (>=0.5.6)
|
|
20
|
-
Requires-Dist: pandas (>=1.5.1)
|
|
21
|
-
Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
|
|
22
|
-
Requires-Dist: plotly (>=5.13.0,<6.0.0)
|
|
23
|
-
Requires-Dist: polars (>=0.19.15,<0.20.0)
|
|
24
|
-
Requires-Dist: pyaml (>=21.10.1,<22.0.0)
|
|
25
|
-
Requires-Dist: pyserde (>=0.9.8,<0.10.0)
|
|
26
|
-
Requires-Dist: scikit-learn (>=1.4.0,<2.0.0)
|
|
27
|
-
Requires-Dist: seaborn (>=0.12.2,<0.13.0)
|
|
28
|
-
Requires-Dist: tqdm (>=4.64.1)
|
|
29
|
-
Description-Content-Type: text/markdown
|
|
30
|
-
|
|
31
|
-
# PhEval - Phenotypic Inference Evaluation Framework
|
|
32
|
-
|
|
33
|
-
There is currently no empirical framework to evaluate the performance of phenotype matching and prioritization tools, much needed to guide tuning for cross species inference. Many algorithms are evaluated using simulations, which may fail to capture real-world scenarios. This gap presents a number of problems: it is difficult to optimize algorithms if we do not know which choices lead to better results; performance may be sensitive to factors that are subject to change, such as ontology structure or annotation completeness. We will develop a modular Phenotypic Inference Evaluation Framework, PhEval and use it to optimize our own algorithms, as well as deliver it as a community resource.
|
|
34
|
-
|
|
35
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|