PyPI - pheval - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

pheval 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (9) hide show

pheval/analyse/benchmark.py CHANGED Viewed

@@ -23,27 +23,32 @@ def scan_directory(run: RunConfig, benchmark_type: BenchmarkOutputType) -> pl.La
         run (RunConfig): RunConfig object.
         benchmark_type (BenchmarkOutputTypeEnum): Benchmark output type.
     Returns:
-        pl.LazyFrame: LazyFrame object containing all the results in the directory..
+        pl.LazyFrame: LazyFrame object containing all the results in the directory.
     """
     logger = get_logger()
     logger.info(f"Analysing results in {run.results_dir.joinpath(benchmark_type.result_directory)}")
     return (
-        pl.scan_parquet(
-            run.results_dir.joinpath(benchmark_type.result_directory),
-            include_file_paths="file_path",
-        ).with_columns(
-            pl.col("rank").cast(pl.Int64),
-            pl.col("file_path").str.extract(r"([^/\\]+)$").alias("result_file"),
-            pl.col("true_positive").fill_null(False),
-        )
-    ).filter(
         (
-            pl.col("score") >= run.threshold
-            if run.score_order.lower() == "descending"
-            else pl.col("score") <= run.threshold
+            pl.scan_parquet(
+                run.results_dir.joinpath(benchmark_type.result_directory),
+                include_file_paths="file_path",
+            ).with_columns(
+                pl.col("rank").cast(pl.Int64),
+                pl.col("file_path").str.extract(r"([^/\\]+)$").alias("result_file"),
+                pl.col("true_positive").fill_null(False),
+            )
+        )
+        .filter(
+            (
+                pl.col("score") >= run.threshold
+                if run.score_order.lower() == "descending"
+                else pl.col("score") <= run.threshold
+            )
+            if run.threshold is not None
+            else True
         )
-        if run.threshold is not None
-        else True
+        .sort("rank")
+        .unique(subset=["file_path", *benchmark_type.columns], keep="first")
     )
@@ -68,14 +73,29 @@ def process_stats(
         )
         curve_results.append(compute_curves(run.run_identifier, result_scan))
         true_positive_cases.append(
-            result_scan.filter(pl.col("true_positive")).select(
+            result_scan.filter(pl.col("true_positive"))
+            .select(
                 ["result_file", *benchmark_type.columns, pl.col("rank").alias(run.run_identifier)]
             )
+            .sort(["result_file", *benchmark_type.columns])
         )
     return (
         pl.concat(stats, how="vertical").collect(),
         pl.concat(curve_results, how="vertical").collect(),
-        pl.concat(true_positive_cases, how="align_inner").collect(),
+        pl.concat(
+            [true_positive_cases[0]]
+            + [
+                df.select(
+                    [
+                        col
+                        for col in df.collect_schema().keys()
+                        if col not in ["result_file", *benchmark_type.columns]
+                    ]
+                )
+                for df in true_positive_cases[1:]
+            ],
+            how="horizontal",
+        ).collect(),
     )

pheval/post_processing/phenopacket_truth_set.py CHANGED Viewed

@@ -12,6 +12,18 @@ from pheval.utils.phenopacket_utils import (
 )
+def calculate_end_pos(variant_start: int, variant_ref: str) -> int:
+    """Calculate the end position for a variant
+    Args:
+        variant_start (int): The start position of the variant
+        variant_ref (str): The reference allele of the variant
+    Returns:
+        int: The end position of the variant
+    """
+    return variant_start + len(variant_ref) - 1
 class PhenopacketTruthSet:
     """Class for finding the causative gene/disease/variant from a phenopacket"""
@@ -139,13 +151,14 @@ class PhenopacketTruthSet:
         return pl.DataFrame(
             {
                 "chrom": [v.chrom for v in variants],
-                "pos": [v.pos for v in variants],
+                "start": [v.pos for v in variants],
+                "end": [calculate_end_pos(v.pos, v.ref) for v in variants],
                 "ref": [v.ref for v in variants],
                 "alt": [v.alt for v in variants],
             }
         ).with_columns(
             [
-                pl.concat_str(["chrom", "pos", "ref", "alt"], separator="-").alias("variant_id"),
+                pl.concat_str(["chrom", "start", "ref", "alt"], separator="-").alias("variant_id"),
                 pl.lit(0.0).cast(pl.Float64).alias("score"),
                 pl.lit(0).cast(pl.Int64).alias("rank"),
                 pl.lit(True).alias("true_positive"),
@@ -166,10 +179,10 @@ class PhenopacketTruthSet:
         return (
             ranked_results.with_columns(
                 [
-                    pl.struct(["chrom", "pos", "ref", "alt"])
+                    pl.struct(["chrom", "start", "end", "ref", "alt"])
                     .is_in(
                         classified_results.select(
-                            pl.struct(["chrom", "pos", "ref", "alt"])
+                            pl.struct(["chrom", "start", "end", "ref", "alt"])
                         ).to_series()
                     )
                     .alias("true_positive")
@@ -179,8 +192,10 @@ class PhenopacketTruthSet:
             .select(classified_results.columns)
             .vstack(
                 classified_results.filter(
-                    ~pl.struct(["chrom", "pos", "ref", "alt"]).is_in(
-                        ranked_results.select(pl.struct(["chrom", "pos", "ref", "alt"])).to_series()
+                    ~pl.struct(["chrom", "start", "end", "ref", "alt"]).is_in(
+                        ranked_results.select(
+                            pl.struct(["chrom", "start", "end", "ref", "alt"])
+                        ).to_series()
                     )
                 )
             )

pheval/post_processing/post_processing.py CHANGED Viewed

@@ -22,18 +22,6 @@ class ResultType(Enum):
     VARIANT = "variant"
-def calculate_end_pos(variant_start: int, variant_ref: str) -> int:
-    """Calculate the end position for a variant
-    Args:
-        variant_start (int): The start position of the variant
-        variant_ref (str): The reference allele of the variant
-    Returns:
-        int: The end position of the variant
-    """
-    return variant_start + len(variant_ref) - 1
 class SortOrder(Enum):
     """Enumeration representing sorting orders."""
@@ -106,7 +94,7 @@ def _write_variant_result(ranked_results: pl.DataFrame, output_file: Path) -> No
         output_file (Path): Path to the output file.
     """
     variant_output = ranked_results.select(
-        ["rank", "score", "chromosome", "start", "end", "ref", "alt", "variant_id", "true_positive"]
+        ["rank", "score", "chrom", "start", "end", "ref", "alt", "variant_id", "true_positive"]
     )
     _write_results_file(output_file, variant_output)
@@ -119,9 +107,7 @@ def _write_disease_result(ranked_results: pl.DataFrame, output_file: Path) -> No
         ranked_results ([PhEvalResult]): List of ranked PhEval disease results.
         output_file (Path): Path to the output file.
     """
-    disease_output = ranked_results.select(
-        ["rank", "score", "disease_name", "disease_identifier", "true_positive"]
-    )
+    disease_output = ranked_results.select(["rank", "score", "disease_identifier", "true_positive"])
     _write_results_file(output_file, disease_output)
@@ -228,7 +214,7 @@ def generate_variant_result(
         phenopacket_dir, output_dir.joinpath("pheval_variant_results"), ResultType.VARIANT
     )
     ranked_results = _rank_results(results, sort_order).with_columns(
-        pl.concat_str(["chrom", "pos", "ref", "alt"], separator="-").alias("variant_id")
+        pl.concat_str(["chrom", "start", "ref", "alt"], separator="-").alias("variant_id")
     )
     classified_results = PhenopacketTruthSet(phenopacket_dir).merge_variant_results(
         ranked_results, output_file

pheval/post_processing/validate_result_format.py CHANGED Viewed

@@ -35,18 +35,17 @@ class ResultSchema(Enum):
     )
     DISEASE_RESULT_SCHEMA = pl.Schema(
         {
-            "disease_name": pl.String,
             "disease_identifier": pl.String,
             "score": pl.Float64,
             "grouping_id": pl.Utf8,
         }
     )
-    def validate(self, df: pl.DataFrame) -> bool:
+    def validate(self, results: pl.DataFrame) -> bool:
         """
         Validate that a DataFrame follows the expected schema.
         Args:
-            df (pl.DataFrame): The DataFrame to validate.
+            results (pl.DataFrame): The DataFrame to validate.
         Raises:
             ValueError: If a required column is missing or the grouping_id column contains a null value.
             TypeError: If a column exists but has an incorrect data type.
@@ -55,18 +54,18 @@ class ResultSchema(Enum):
         """
         expected_schema = self.value
-        if "grouping_id" in df.columns and df["grouping_id"].null_count() > 0:
+        if "grouping_id" in results.columns and results["grouping_id"].null_count() > 0:
             raise ValueError("'grouping_id' column should not contain null values if provided.")
         for col_name, expected_type in expected_schema.items():
-            if col_name not in df.schema:
+            if col_name not in results.schema:
                 if col_name == "grouping_id":
                     continue
                 raise ValueError(f"Missing required column: {col_name}")
-            if df.schema[col_name] != expected_type:
+            if results.schema[col_name] != expected_type:
                 raise TypeError(
-                    f"Column '{col_name}' has type {df.schema[col_name]}, expected {expected_type}"
+                    f"Column '{col_name}' has type {results.schema[col_name]}, expected {expected_type}"
                 )
         return True
@@ -83,9 +82,9 @@ def validate_dataframe(schema: ResultSchema) -> Callable:
     def decorator(func: Callable) -> Callable:
         @wraps(func)
-        def wrapper(df: pl.DataFrame, *args, **kwargs):
-            schema.validate(df)
-            return func(df, *args, **kwargs)
+        def wrapper(results: pl.DataFrame, *args, **kwargs):
+            schema.validate(results)
+            return func(results, *args, **kwargs)
         return wrapper

{pheval-0.5.0.dist-info → pheval-0.5.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pheval
-Version: 0.5.0
+Version: 0.5.2
 Summary:
 Author: Yasemin Bridges
 Author-email: y.bridges@qmul.ac.uk
@@ -32,10 +32,10 @@ Description-Content-Type: text/markdown
 # PhEval - Phenotypic Inference Evaluation Framework
-![PyPI](https://img.shields.io/pypi/v/pheval)
+[![PyPI](https://img.shields.io/pypi/v/pheval)](https://pypi.org/project/pheval/)
 ![Build Status](https://img.shields.io/github/actions/workflow/status/monarch-initiative/pheval/pypi-publish.yml?branch=main)
 ![License](https://img.shields.io/github/license/monarch-initiative/pheval)
-![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
+![Python Version](https://img.shields.io/badge/python-3.10%2B-blue)
 ![Issues](https://img.shields.io/github/issues/monarch-initiative/pheval)
 ## Overview
@@ -53,7 +53,7 @@ For more information please see the full [documentation](https://monarch-initiat
 ## Download and Installation
-1. Ensure you have Python 3.8 or greater installed.
+1. Ensure you have Python 3.10 or greater installed.
 2. Install with `pip`:
 ```bash
 pip install pheval

{pheval-0.5.0.dist-info → pheval-0.5.2.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 pheval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pheval/analyse/benchmark.py,sha256=1ysz1peGb21DhgNpEam9NgUOS5eGv7K0CI3RNjy0crQ,6275
+pheval/analyse/benchmark.py,sha256=sfjReLmodXwCT9ZyZDE6Oli0j1S5ygicJshb7n4-x4U,6916
 pheval/analyse/benchmark_db_manager.py,sha256=zS1TI76YuV2_YXLipHLSyh-XDR5kTxyOwhRhHRFHfjQ,764
 pheval/analyse/benchmark_output_type.py,sha256=bh-qQvV4AF7BHQyr_bdY8HTTzYZVe7KvoIoUF0D9k-g,1468
 pheval/analyse/binary_classification_curves.py,sha256=Crb45rJWc5rxDdx82sgoHRvYHE2D5pus91fgl39FyRw,5007
@@ -17,9 +17,9 @@ pheval/implementations/__init__.py,sha256=BMUTotjTdgy5j5xubWCIQgRXrSQ1ZIcjooer7r
 pheval/infra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pheval/infra/exomiserdb.py,sha256=pM9-TfjrgurtH4OtM1Enk5oVhIxGQN3rKRlrxHuObTM,5080
 pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pheval/post_processing/phenopacket_truth_set.py,sha256=ue3pNeg_GZiGyuKrm6_4MsJWpW0LWtfG9wja2Cc8SLg,8873
-pheval/post_processing/post_processing.py,sha256=4xP-gjZ3VoXydU9ClPvmRtuDaSMUeJImgLugurOS5_k,9480
-pheval/post_processing/validate_result_format.py,sha256=4U6AfHt01EexwU_OnpmytQAhGVS6ZWF1S-5NVBx1oaM,2916
+pheval/post_processing/phenopacket_truth_set.py,sha256=EvpfS0NJpcipI1muCtB0PBUghXtktln9vF5PUk57wSM,9412
+pheval/post_processing/post_processing.py,sha256=VadU-tjToEa2auvNpmbIzKuGtRvN4E89pH_GH1RiHm0,9078
+pheval/post_processing/validate_result_format.py,sha256=rRlVVIT5ZtdD_Qi0tQVqRSghCrxEDZCKImtw1ygcbtA,2927
 pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pheval/prepare/create_noisy_phenopackets.py,sha256=ydhA4mpqKTDc4hBu8YfvNW2nMubHK3dbO-cv0lA4JFQ,11504
 pheval/prepare/create_spiked_vcf.py,sha256=90A-Mi8QKhvN036vtFEVWAHgzHO37itiLYrqYlG4LiA,23953
@@ -46,8 +46,8 @@ pheval/utils/logger.py,sha256=5DZl5uMltUDQorhkvg_B7_ZhFwApAmEkWneFIOKfRGQ,1566
 pheval/utils/phenopacket_utils.py,sha256=AfV_mWac6n5HCc5zjfH6CGP8T0qI0LR0VBrooaKmgdY,26978
 pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
 pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
-pheval-0.5.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-pheval-0.5.0.dist-info/METADATA,sha256=v7UNSBKUzJQAs8oBSq8XScwKnDiNXlzWZV0A70xR3M8,6456
-pheval-0.5.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
-pheval-0.5.0.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
-pheval-0.5.0.dist-info/RECORD,,
+pheval-0.5.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+pheval-0.5.2.dist-info/METADATA,sha256=do8ya_Tw3VD-md2rPf83DBmVTyytUrH7tEhQenjN-6o,6494
+pheval-0.5.2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+pheval-0.5.2.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
+pheval-0.5.2.dist-info/RECORD,,

{pheval-0.5.0.dist-info → pheval-0.5.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{pheval-0.5.0.dist-info → pheval-0.5.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{pheval-0.5.0.dist-info → pheval-0.5.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

pheval 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

pheval 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl