PyPI - pheval - Versions diffs - 0.5.3__tar.gz → 0.5.5__tar.gz - Mend

pheval 0.5.3tar.gz → 0.5.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (52) hide show

{pheval-0.5.3 → pheval-0.5.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pheval
-Version: 0.5.3
+Version: 0.5.5
 Summary:
 Author: Yasemin Bridges
 Author-email: y.bridges@qmul.ac.uk

{pheval-0.5.3 → pheval-0.5.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pheval"
-version = "0.5.3"
+version = "0.5.5"
 description = ""
 authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
   "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",

{pheval-0.5.3 → pheval-0.5.5}/src/pheval/analyse/rank_stats.py RENAMED Viewed

@@ -124,24 +124,11 @@ class Ranks:
         return np.mean(precision_at_k)
     @classmethod
-    def mean_average_precision_at_k(cls, df: pl.LazyFrame, k: int) -> pl.LazyFrame:
-        """
-        Compute Mean Average Precision at K (MAP@K) by averaging AP@K scores.
-        Args:
-            df (pl.LazyFrame): The dataframe calculate MAP@K for each query.
-            k (int): The upper rank limit.
-        Returns:
-            pl.LazyFrame: The dataframe with MAP@K for each query.
-        """
+    def mean_average_precision_at_k(cls, df: pl.LazyFrame, k: int) -> float:
         ap_at_k_df = cls._average_precision_at_k(df, k)
-        return (
-            ap_at_k_df.select(
-                pl.col(f"ap@{k}").sum() / df.select(Ranks.NUMBER_OF_SAMPLES).collect()
-            )
-            .fill_null(0.0)
-            .collect()
-            .item()
-        )
+        ap_sum = ap_at_k_df.select(pl.col(f"ap@{k}").sum()).collect().item()
+        num_samples = df.select(Ranks.NUMBER_OF_SAMPLES).collect().item()
+        return ap_sum / num_samples
     @classmethod
     def _calculate_ndcg_at_k(cls, ranks: List[int], k: int) -> float:
@@ -165,29 +152,18 @@ class Ranks:
         )
     @classmethod
-    def mean_normalised_discounted_cumulative_gain(cls, df: pl.LazyFrame, k: int) -> pl.Float64:
-        """
-        Compute mean normalised discounted cumulative gain.
-        Args:
-            df (pl.LazyFrame): The dataframe to calculate mean normalised cumulative gain.
-            k (int): The upper rank limit.
-        Returns:
-            pl.LazyFrame: The dataframe with mean normalised cumulative gain.
-        """
+    def mean_normalised_discounted_cumulative_gain(cls, df: pl.LazyFrame, k: int) -> float:
         filtered_df = cls._filter_results(df, k)
-        return (
-            filtered_df.with_columns(
-                pl.struct("ranks")
-                .map_elements(
-                    lambda row: cls._calculate_ndcg_at_k(row["ranks"], k), return_dtype=pl.Float64
-                )
-                .alias(f"NDCG@{k}")
+        ndcg_df = filtered_df.with_columns(
+            pl.struct("ranks")
+            .map_elements(
+                lambda row: cls._calculate_ndcg_at_k(row["ranks"], k), return_dtype=pl.Float64
             )
-            .select(pl.col(f"NDCG@{k}").sum() / df.select(Ranks.NUMBER_OF_SAMPLES).collect())
-            .fill_null(0.0)
-            .collect()
-            .item()
+            .alias(f"NDCG@{k}")
         )
+        ndcg_sum = ndcg_df.select(pl.col(f"NDCG@{k}").sum()).collect().item()
+        num_samples = df.select(Ranks.NUMBER_OF_SAMPLES).collect().item()
+        return ndcg_sum / num_samples
 def compute_rank_stats(run_identifier: str, result_scan: pl.LazyFrame) -> pl.LazyFrame:

{pheval-0.5.3 → pheval-0.5.5}/src/pheval/post_processing/post_processing.py RENAMED Viewed

@@ -151,6 +151,10 @@ def create_empty_pheval_result(
     """
     if result_type in executed_results:
         return
+    logger.info(
+        f"Writing classified results for {len(all_files(phenopacket_dir))} "
+        f"phenopackets to {output_dir}"
+    )
     executed_results.add(result_type)
     phenopacket_truth_set = PhenopacketTruthSet(phenopacket_dir)
     classify_method, write_method = _get_result_type(result_type, phenopacket_truth_set)
@@ -180,10 +184,6 @@ def generate_gene_result(
         phenopacket_dir (Path): Path to the Phenopacket directory
     """
     output_file = output_dir.joinpath(f"pheval_gene_results/{result_path.stem}-gene_result.parquet")
-    logger.info(
-        f"Writing classified results for {len(all_files(phenopacket_dir))} "
-        f"phenopackets to {output_dir.joinpath('pheval_gene_results')}"
-    )
     create_empty_pheval_result(
         phenopacket_dir, output_dir.joinpath("pheval_gene_results"), ResultType.GENE
     )
@@ -214,10 +214,6 @@ def generate_variant_result(
     output_file = output_dir.joinpath(
         f"pheval_variant_results/{result_path.stem}-variant_result.parquet"
     )
-    logger.info(
-        f"Writing classified results for {len(all_files(phenopacket_dir))} "
-        f"phenopackets to {output_dir.joinpath('pheval_variant_results')}"
-    )
     create_empty_pheval_result(
         phenopacket_dir, output_dir.joinpath("pheval_variant_results"), ResultType.VARIANT
     )
@@ -250,10 +246,6 @@ def generate_disease_result(
     output_file = output_dir.joinpath(
         f"pheval_disease_results/{result_path.stem}-disease_result.parquet"
     )
-    logger.info(
-        f"Writing classified results for {len(all_files(phenopacket_dir))} "
-        f"phenopackets to {output_dir.joinpath('pheval_disease_results')}"
-    )
     create_empty_pheval_result(
         phenopacket_dir, output_dir.joinpath("pheval_disease_results"), ResultType.DISEASE
     )