PyPI - pheval - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

pheval 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (12) hide show

pheval/analyse/binary_classification_stats.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from math import sqrt
 from typing import List, Union
@@ -29,6 +29,8 @@ class BinaryClassificationStats:
     true_negatives: int = 0
     false_positives: int = 0
     false_negatives: int = 0
+    labels: List = field(default_factory=list)
+    scores: List = field(default_factory=list)
     @staticmethod
     def remove_relevant_ranks(
@@ -84,6 +86,31 @@ class BinaryClassificationStats:
             elif rank != 1:
                 self.true_negatives += 1
+    def add_labels_and_scores(
+        self,
+        pheval_results: Union[
+            List[RankedPhEvalGeneResult],
+            List[RankedPhEvalVariantResult],
+            List[RankedPhEvalDiseaseResult],
+        ],
+        relevant_ranks: List[int],
+    ):
+        """
+        Adds scores and labels from the PhEval results.
+        Args:
+            pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult],
+                                  List[RankedPhEvalDiseaseResult]]):
+                List of all PhEval results
+            relevant_ranks (List[int]): A list of the ranks associated with the known entities.
+        """
+        relevant_ranks_copy = relevant_ranks.copy()
+        for result in pheval_results:
+            self.scores.append(result.score)
+            label = 1 if result.rank in relevant_ranks_copy else 0
+            self.labels.append(label)
+            relevant_ranks_copy.remove(result.rank) if label == 1 else None
     def add_classification(
         self,
         pheval_results: Union[
@@ -105,6 +132,7 @@ class BinaryClassificationStats:
         self.add_classification_for_other_entities(
             self.remove_relevant_ranks(pheval_results, relevant_ranks)
         )
+        self.add_labels_and_scores(pheval_results, relevant_ranks)
     def sensitivity(self) -> float:
         """

pheval/analyse/disease_prioritisation_analysis.py CHANGED Viewed

@@ -10,7 +10,11 @@ from pheval.analyse.prioritisation_result_types import DiseasePrioritisationResu
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
-from pheval.utils.file_utils import all_files, files_with_suffix, obtain_closest_file_name
+from pheval.utils.file_utils import (
+    all_files,
+    files_with_suffix,
+    obtain_phenopacket_path_from_pheval_result,
+)
 from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandDisease, phenopacket_reader
@@ -234,7 +238,7 @@ def assess_phenopacket_disease_prioritisation(
         disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.
         disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
     """
-    phenopacket_path = obtain_closest_file_name(
+    phenopacket_path = obtain_phenopacket_path_from_pheval_result(
         standardised_disease_result, all_files(results_dir_and_input.phenopacket_dir)
     )
     pheval_disease_result = read_standardised_result(standardised_disease_result)

pheval/analyse/gene_prioritisation_analysis.py CHANGED Viewed

@@ -10,7 +10,11 @@ from pheval.analyse.prioritisation_result_types import GenePrioritisationResult
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalGeneResult
-from pheval.utils.file_utils import all_files, files_with_suffix, obtain_closest_file_name
+from pheval.utils.file_utils import (
+    all_files,
+    files_with_suffix,
+    obtain_phenopacket_path_from_pheval_result,
+)
 from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandCausativeGene, phenopacket_reader
@@ -226,7 +230,7 @@ def assess_phenopacket_gene_prioritisation(
         gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons.
         gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
     """
-    phenopacket_path = obtain_closest_file_name(
+    phenopacket_path = obtain_phenopacket_path_from_pheval_result(
         standardised_gene_result, all_files(results_dir_and_input.phenopacket_dir)
     )
     pheval_gene_result = read_standardised_result(standardised_gene_result)

pheval/analyse/generate_plots.py CHANGED Viewed

@@ -5,6 +5,7 @@ import matplotlib
 import pandas as pd
 import seaborn as sns
 from matplotlib import pyplot as plt
+from sklearn.metrics import auc, precision_recall_curve, roc_curve
 from pheval.analyse.benchmark_generator import (
     BenchmarkRunOutputGenerator,
@@ -357,6 +358,82 @@ class PlotGenerator:
             ]
         )
+    def generate_roc_curve(
+        self,
+        benchmarking_results: List[BenchmarkRunResults],
+        benchmark_generator: BenchmarkRunOutputGenerator,
+    ):
+        """
+        Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results.
+        Args:
+            benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
+            benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
+        """
+        for i, benchmark_result in enumerate(benchmarking_results):
+            fpr, tpr, thresh = roc_curve(
+                benchmark_result.binary_classification_stats.labels,
+                benchmark_result.binary_classification_stats.scores,
+                pos_label=1,
+            )
+            roc_auc = auc(fpr, tpr)
+            plt.plot(
+                fpr,
+                tpr,
+                label=f"{self.return_benchmark_name(benchmark_result)} ROC Curve (AUC = {roc_auc:.2f})",
+                color=self.palette_hex_codes[i],
+            )
+        plt.plot(linestyle="--", color="gray")
+        plt.xlabel("False Positive Rate")
+        plt.ylabel("True Positive Rate")
+        plt.title("Receiver Operating Characteristic (ROC) Curve")
+        plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
+        plt.savefig(
+            f"{benchmark_generator.prioritisation_type_file_prefix}_roc_curve.svg",
+            format="svg",
+            bbox_inches="tight",
+        )
+    def generate_precision_recall(
+        self,
+        benchmarking_results: List[BenchmarkRunResults],
+        benchmark_generator: BenchmarkRunOutputGenerator,
+    ):
+        """
+        Generate and plot Precision-Recall curves for binary classification benchmark results.
+        Args:
+            benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
+            benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
+        """
+        plt.figure()
+        for i, benchmark_result in enumerate(benchmarking_results):
+            precision, recall, thresh = precision_recall_curve(
+                benchmark_result.binary_classification_stats.labels,
+                benchmark_result.binary_classification_stats.scores,
+            )
+            precision_recall_auc = auc(recall, precision)
+            plt.plot(
+                recall,
+                precision,
+                label=f"{self.return_benchmark_name(benchmark_result)} Precision-Recall Curve "
+                f"(AUC = {precision_recall_auc:.2f})",
+                color=self.palette_hex_codes[i],
+            )
+        plt.plot(linestyle="--", color="gray")
+        plt.xlabel("Recall")
+        plt.ylabel("Precision")
+        plt.title("Precision-Recall Curve")
+        plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
+        plt.savefig(
+            f"{benchmark_generator.prioritisation_type_file_prefix}_precision_recall_curve.svg",
+            format="svg",
+            bbox_inches="tight",
+        )
     def generate_non_cumulative_bar(
         self,
         benchmarking_results: List[BenchmarkRunResults],
@@ -418,6 +495,8 @@ def generate_plots(
         title (str, optional): Title for the generated plot. Defaults to None.
     """
     plot_generator = PlotGenerator()
+    plot_generator.generate_roc_curve(benchmarking_results, benchmark_generator)
+    plot_generator.generate_precision_recall(benchmarking_results, benchmark_generator)
     if plot_type == "bar_stacked":
         plot_generator.generate_stacked_bar_plot(benchmarking_results, benchmark_generator, title)
     elif plot_type == "bar_cumulative":

pheval/analyse/generate_summary_outputs.py CHANGED Viewed

@@ -3,6 +3,7 @@ from collections import defaultdict
 from copy import deepcopy
 from typing import List
+import numpy as np
 import pandas as pd
 from pheval.analyse.benchmark_generator import BenchmarkRunOutputGenerator
@@ -40,7 +41,19 @@ class RankComparisonGenerator:
             pd.DataFrame: DataFrame containing the calculated rank differences.
         """
         comparison_df = self._generate_dataframe()
-        comparison_df["rank_decrease"] = comparison_df.iloc[:, 3] - comparison_df.iloc[:, 2]
+        comparison_df["rank_change"] = comparison_df.iloc[:, 2] - comparison_df.iloc[:, 3]
+        comparison_df["rank_change"] = np.where(
+            (comparison_df.iloc[:, 2] == 0) & (comparison_df.iloc[:, 3] != 0),
+            "GAINED",
+            np.where(
+                (comparison_df.iloc[:, 3] == 0) & (comparison_df.iloc[:, 2] != 0),
+                "LOST",
+                comparison_df["rank_change"],
+            ),
+        )
+        comparison_df["rank_change"] = comparison_df["rank_change"].apply(
+            lambda x: int(x) if str(x).lstrip("-").isdigit() else x
+        )
         return comparison_df
     def generate_output(self, prefix: str, suffix: str) -> None:

pheval/analyse/variant_prioritisation_analysis.py CHANGED Viewed

@@ -10,7 +10,11 @@ from pheval.analyse.prioritisation_result_types import VariantPrioritisationResu
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalVariantResult
-from pheval.utils.file_utils import all_files, files_with_suffix, obtain_closest_file_name
+from pheval.utils.file_utils import (
+    all_files,
+    files_with_suffix,
+    obtain_phenopacket_path_from_pheval_result,
+)
 from pheval.utils.phenopacket_utils import GenomicVariant, PhenopacketUtil, phenopacket_reader
@@ -228,7 +232,7 @@ def assess_phenopacket_variant_prioritisation(
         variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.
         variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
     """
-    phenopacket_path = obtain_closest_file_name(
+    phenopacket_path = obtain_phenopacket_path_from_pheval_result(
         standardised_variant_result, all_files(results_dir_and_input.phenopacket_dir)
     )
     proband_causative_variants = _obtain_causative_variants(phenopacket_path)

pheval/utils/file_utils.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import difflib
 import itertools
 import re
 import unicodedata
@@ -71,23 +70,33 @@ def normalise_file_name(file_path: Path) -> str:
     return re.sub("[\u0300-\u036f]", "", normalised_file_name)
-def obtain_closest_file_name(file_to_be_queried: Path, file_paths: list[Path]) -> Path:
+def obtain_phenopacket_path_from_pheval_result(
+    pheval_result_path: Path, phenopacket_paths: list[Path]
+) -> Path:
     """
-    Obtains the closest file name when given a template file name
-    and a list of full paths of files to be queried.
+    Obtains the phenopacket file name when given a pheval result file name
+    and a list of full paths of phenopackets to be queried.
     Args:
-        file_to_be_queried (Path): The template file name to find the closest match.
-        file_paths (list[Path]): List of full paths of files to be queried.
+        pheval_result_path (Path): The PhEval result.
+        phenopacket_paths (list[Path]): List of full paths of phenopackets to be queried.
     Returns:
-        Path: The closest matching file path from the provided list.
+        Path: The matching phenopacket file path from the provided list.
     """
-    stems = [Path(file_path).stem for file_path in file_paths]
-    closest_file_match = difflib.get_close_matches(
-        str(Path(file_to_be_queried).stem), stems, cutoff=0.1, n=1
-    )[0]
-    return [file_path for file_path in file_paths if closest_file_match == str(file_path.stem)][0]
+    pheval_result_path_stem_stripped = pheval_result_path.stem.split("-pheval_")[0]
+    matching_phenopacket_paths = [
+        phenopacket_path
+        for phenopacket_path in phenopacket_paths
+        if phenopacket_path.stem == pheval_result_path_stem_stripped
+    ]
+    if matching_phenopacket_paths:
+        return matching_phenopacket_paths[0]
+    else:
+        raise FileNotFoundError(
+            f"Unable to find matching phenopacket file named "
+            f"{pheval_result_path_stem_stripped}.json for {pheval_result_path.name}"
+        )
 def ensure_file_exists(*files: str):

{pheval-0.3.1.dist-info → pheval-0.3.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pheval
-Version: 0.3.1
+Version: 0.3.2
 Summary:
 Author: Yasemin Bridges
 Author-email: y.bridges@qmul.ac.uk

{pheval-0.3.1.dist-info → pheval-0.3.2.dist-info}/RECORD RENAMED Viewed

@@ -3,18 +3,18 @@ pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pheval/analyse/analysis.py,sha256=ponm3P8nvzJNmcrNZ2_KudEhWSaWshd_Gd30D-aau8s,7743
 pheval/analyse/benchmark_generator.py,sha256=AeuwbaPb4j_dyBGPRgEBxQk2NahDb5u4xHyFiqp5Fes,5943
 pheval/analyse/benchmarking_data.py,sha256=aNZkWdmWemlnC1Tg35MtR60S9YC71QWS2rMuzkUc3w0,768
-pheval/analyse/binary_classification_stats.py,sha256=ZBAvhMVPYSFg3asONUG1w24JhYTjG03RG_C9uohQntI,11373
-pheval/analyse/disease_prioritisation_analysis.py,sha256=ttdgUX5ZKT74gKgsRrnyH8zKFxhcJxVOtZTsAdheGxU,12596
-pheval/analyse/gene_prioritisation_analysis.py,sha256=raEjzJFvAvS3wE0yrYcSIQzBe6s_lOgJMqe_p_AFgZY,12320
-pheval/analyse/generate_plots.py,sha256=gU7NYr1zgnXEXAZR-nHLql3farQEaUN5gkgu2ywTJho,17779
-pheval/analyse/generate_summary_outputs.py,sha256=tpHjbyme3FlkflGcTIgQ4H4xyN6FZ5Jmm-ImjAbSpYU,6071
+pheval/analyse/binary_classification_stats.py,sha256=E35YjvGM-zFnuEt8M3pgN03vBab4MH6ih726QKvuogg,12519
+pheval/analyse/disease_prioritisation_analysis.py,sha256=qadEVhBMtBgtjGCJLhNQA510F8Pd0Ll4NAQXoT23BYs,12649
+pheval/analyse/gene_prioritisation_analysis.py,sha256=lAN171xfXqweK8ie6191s_6WPPGjZKJXL1Z0dIqp54k,12373
+pheval/analyse/generate_plots.py,sha256=zjsVzf-WsMG7jb5Y_FVYeOHQwu9lz_V90a9LApUlsDo,21163
+pheval/analyse/generate_summary_outputs.py,sha256=s9pXMSW6xm4ZBe1aCd0UJSaFiKBvpUfPwJ2BI4qfTas,6591
 pheval/analyse/parse_benchmark_summary.py,sha256=Y8uPTlHTEiaeVBOqxMcdOqjY3ZBtOS3DoRycL78Dzxg,2384
 pheval/analyse/parse_pheval_result.py,sha256=j8YFVA0YXfySOkm8gMwrfIuV45DI9AX3ETn7h-r8ayE,1211
 pheval/analyse/prioritisation_rank_recorder.py,sha256=EVe8DoEvvp0_WMAcjfVxmDGGRFPEELi7hEVjH3sIpLY,3223
 pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6hleoKDYATTkhvFF98,1228
 pheval/analyse/rank_stats.py,sha256=knj1tsKrly17QgtOUVpqA14UjbO99N3ydkWN4xU6c2k,15785
 pheval/analyse/run_data_parser.py,sha256=HzBKsJL2skjmrRZdrF3VYzswtKNgbX6U5qhY_kqq9mA,1552
-pheval/analyse/variant_prioritisation_analysis.py,sha256=_yYgknFHqL0_nlpBeQdo9D1Jnd99BcUkA733uxTPpcg,12331
+pheval/analyse/variant_prioritisation_analysis.py,sha256=ApmUeTW0cl_BPh7LusbApxtgjEXEkhuNFyh0DxKKpgU,12384
 pheval/cli.py,sha256=4l9xZfxBfLCcm7PDdhMWgTvTKbQt5sJ2bYHf7kU1dO4,1493
 pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
 pheval/cli_pheval_utils.py,sha256=wVLH0Bk2WrvTBkH-G5wC3Xgo6KftX9zSwonC2DVBpP8,16929
@@ -45,12 +45,12 @@ pheval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pheval/utils/docs_gen.py,sha256=6FGtHicBC0rZKi0tdL3Epsg8d4osE44I9f1Ga0j4JLA,3193
 pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
 pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
-pheval/utils/file_utils.py,sha256=ESAXWtfpCAZX6T6nU6vb1x0of5S-eYhu639geJBu1es,4361
+pheval/utils/file_utils.py,sha256=9HoCmtF73D3wY6bBhFLefMBI5uhvCe_meZeHXQzF_ts,4640
 pheval/utils/phenopacket_utils.py,sha256=hBEWl9mOP9D7odSaL6lIY__dbXn7Sc3TZX0Si-nPYaE,24379
 pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
 pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
-pheval-0.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-pheval-0.3.1.dist-info/METADATA,sha256=wVyoDa-Xs4ztciDaO56ogC3rjhukYhCe3HFqmqEtClA,1810
-pheval-0.3.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-pheval-0.3.1.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
-pheval-0.3.1.dist-info/RECORD,,
+pheval-0.3.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+pheval-0.3.2.dist-info/METADATA,sha256=8dvmkrDAkmmwiOvMWltnb_oXGo8IQTC1-iJImlsW-m8,1810
+pheval-0.3.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+pheval-0.3.2.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
+pheval-0.3.2.dist-info/RECORD,,

{pheval-0.3.1.dist-info → pheval-0.3.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 1.8.1
+Generator: poetry-core 1.9.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{pheval-0.3.1.dist-info → pheval-0.3.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{pheval-0.3.1.dist-info → pheval-0.3.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

pheval 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

Potentially problematic release.

pheval 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl