PyPI - pheval - Versions diffs - 0.5.0__tar.gz → 0.5.1__tar.gz - Mend

pheval 0.5.0tar.gz → 0.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (52) hide show

{pheval-0.5.0 → pheval-0.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pheval
-Version: 0.5.0
+Version: 0.5.1
 Summary:
 Author: Yasemin Bridges
 Author-email: y.bridges@qmul.ac.uk

{pheval-0.5.0 → pheval-0.5.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pheval"
-version = "0.5.0"
+version = "0.5.1"
 description = ""
 authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
   "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",

{pheval-0.5.0 → pheval-0.5.1}/src/pheval/post_processing/phenopacket_truth_set.py RENAMED Viewed

@@ -12,6 +12,18 @@ from pheval.utils.phenopacket_utils import (
 )
+def calculate_end_pos(variant_start: int, variant_ref: str) -> int:
+    """Calculate the end position for a variant
+    Args:
+        variant_start (int): The start position of the variant
+        variant_ref (str): The reference allele of the variant
+    Returns:
+        int: The end position of the variant
+    """
+    return variant_start + len(variant_ref) - 1
 class PhenopacketTruthSet:
     """Class for finding the causative gene/disease/variant from a phenopacket"""
@@ -139,13 +151,14 @@ class PhenopacketTruthSet:
         return pl.DataFrame(
             {
                 "chrom": [v.chrom for v in variants],
-                "pos": [v.pos for v in variants],
+                "start": [v.pos for v in variants],
+                "end": [calculate_end_pos(v.pos, v.ref) for v in variants],
                 "ref": [v.ref for v in variants],
                 "alt": [v.alt for v in variants],
             }
         ).with_columns(
             [
-                pl.concat_str(["chrom", "pos", "ref", "alt"], separator="-").alias("variant_id"),
+                pl.concat_str(["chrom", "start", "ref", "alt"], separator="-").alias("variant_id"),
                 pl.lit(0.0).cast(pl.Float64).alias("score"),
                 pl.lit(0).cast(pl.Int64).alias("rank"),
                 pl.lit(True).alias("true_positive"),
@@ -166,10 +179,10 @@ class PhenopacketTruthSet:
         return (
             ranked_results.with_columns(
                 [
-                    pl.struct(["chrom", "pos", "ref", "alt"])
+                    pl.struct(["chrom", "start", "end", "ref", "alt"])
                     .is_in(
                         classified_results.select(
-                            pl.struct(["chrom", "pos", "ref", "alt"])
+                            pl.struct(["chrom", "start", "end", "ref", "alt"])
                         ).to_series()
                     )
                     .alias("true_positive")
@@ -179,8 +192,10 @@ class PhenopacketTruthSet:
             .select(classified_results.columns)
             .vstack(
                 classified_results.filter(
-                    ~pl.struct(["chrom", "pos", "ref", "alt"]).is_in(
-                        ranked_results.select(pl.struct(["chrom", "pos", "ref", "alt"])).to_series()
+                    ~pl.struct(["chrom", "start", "end", "ref", "alt"]).is_in(
+                        ranked_results.select(
+                            pl.struct(["chrom", "start", "end", "ref", "alt"])
+                        ).to_series()
                     )
                 )
             )

{pheval-0.5.0 → pheval-0.5.1}/src/pheval/post_processing/post_processing.py RENAMED Viewed

@@ -22,18 +22,6 @@ class ResultType(Enum):
     VARIANT = "variant"
-def calculate_end_pos(variant_start: int, variant_ref: str) -> int:
-    """Calculate the end position for a variant
-    Args:
-        variant_start (int): The start position of the variant
-        variant_ref (str): The reference allele of the variant
-    Returns:
-        int: The end position of the variant
-    """
-    return variant_start + len(variant_ref) - 1
 class SortOrder(Enum):
     """Enumeration representing sorting orders."""
@@ -106,7 +94,7 @@ def _write_variant_result(ranked_results: pl.DataFrame, output_file: Path) -> No
         output_file (Path): Path to the output file.
     """
     variant_output = ranked_results.select(
-        ["rank", "score", "chromosome", "start", "end", "ref", "alt", "variant_id", "true_positive"]
+        ["rank", "score", "chrom", "start", "end", "ref", "alt", "variant_id", "true_positive"]
     )
     _write_results_file(output_file, variant_output)
@@ -119,9 +107,7 @@ def _write_disease_result(ranked_results: pl.DataFrame, output_file: Path) -> No
         ranked_results ([PhEvalResult]): List of ranked PhEval disease results.
         output_file (Path): Path to the output file.
     """
-    disease_output = ranked_results.select(
-        ["rank", "score", "disease_name", "disease_identifier", "true_positive"]
-    )
+    disease_output = ranked_results.select(["rank", "score", "disease_identifier", "true_positive"])
     _write_results_file(output_file, disease_output)
@@ -228,7 +214,7 @@ def generate_variant_result(
         phenopacket_dir, output_dir.joinpath("pheval_variant_results"), ResultType.VARIANT
     )
     ranked_results = _rank_results(results, sort_order).with_columns(
-        pl.concat_str(["chrom", "pos", "ref", "alt"], separator="-").alias("variant_id")
+        pl.concat_str(["chrom", "start", "ref", "alt"], separator="-").alias("variant_id")
     )
     classified_results = PhenopacketTruthSet(phenopacket_dir).merge_variant_results(
         ranked_results, output_file

{pheval-0.5.0 → pheval-0.5.1}/src/pheval/post_processing/validate_result_format.py RENAMED Viewed

@@ -35,18 +35,17 @@ class ResultSchema(Enum):
     )
     DISEASE_RESULT_SCHEMA = pl.Schema(
         {
-            "disease_name": pl.String,
             "disease_identifier": pl.String,
             "score": pl.Float64,
             "grouping_id": pl.Utf8,
         }
     )
-    def validate(self, df: pl.DataFrame) -> bool:
+    def validate(self, results: pl.DataFrame) -> bool:
         """
         Validate that a DataFrame follows the expected schema.
         Args:
-            df (pl.DataFrame): The DataFrame to validate.
+            results (pl.DataFrame): The DataFrame to validate.
         Raises:
             ValueError: If a required column is missing or the grouping_id column contains a null value.
             TypeError: If a column exists but has an incorrect data type.
@@ -55,18 +54,18 @@ class ResultSchema(Enum):
         """
         expected_schema = self.value
-        if "grouping_id" in df.columns and df["grouping_id"].null_count() > 0:
+        if "grouping_id" in results.columns and results["grouping_id"].null_count() > 0:
             raise ValueError("'grouping_id' column should not contain null values if provided.")
         for col_name, expected_type in expected_schema.items():
-            if col_name not in df.schema:
+            if col_name not in results.schema:
                 if col_name == "grouping_id":
                     continue
                 raise ValueError(f"Missing required column: {col_name}")
-            if df.schema[col_name] != expected_type:
+            if results.schema[col_name] != expected_type:
                 raise TypeError(
-                    f"Column '{col_name}' has type {df.schema[col_name]}, expected {expected_type}"
+                    f"Column '{col_name}' has type {results.schema[col_name]}, expected {expected_type}"
                 )
         return True
@@ -83,9 +82,9 @@ def validate_dataframe(schema: ResultSchema) -> Callable:
     def decorator(func: Callable) -> Callable:
         @wraps(func)
-        def wrapper(df: pl.DataFrame, *args, **kwargs):
-            schema.validate(df)
-            return func(df, *args, **kwargs)
+        def wrapper(results: pl.DataFrame, *args, **kwargs):
+            schema.validate(results)
+            return func(results, *args, **kwargs)
         return wrapper