PyPI - pheval - Versions diffs - 0.4.2__tar.gz → 0.4.4__tar.gz - Mend

pheval 0.4.2tar.gz → 0.4.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (56) hide show

{pheval-0.4.2 → pheval-0.4.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: pheval
-Version: 0.4.2
+Version: 0.4.4
 Summary:
 Author: Yasemin Bridges
 Author-email: y.bridges@qmul.ac.uk

{pheval-0.4.2 → pheval-0.4.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pheval"
-version = "0.4.2"
+version = "0.4.4"
 description = ""
 authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
   "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",

{pheval-0.4.2 → pheval-0.4.4}/src/pheval/analyse/benchmark_db_manager.py RENAMED Viewed

@@ -75,6 +75,7 @@ class BenchmarkDBManager:
                 `False` otherwise.
         """
         list_pattern = re.compile(r"^\[\s*(?:[^\[\],\s]+(?:\s*,\s*[^\[\],\s]+)*)?\s*]$")
+        entity = entity.replace("nan", "None").replace("NaN", "None")
         if list_pattern.match(str(entity)):
             list_representation = ast.literal_eval(entity)
             if isinstance(list_representation, list):

{pheval-0.4.2 → pheval-0.4.4}/src/pheval/analyse/disease_prioritisation_analysis.py RENAMED Viewed

@@ -38,11 +38,16 @@ class AssessDiseasePrioritisation(AssessPrioritisationBase):
         for _i, row in df.iterrows():
             result = (
                 self.conn.execute(
-                    f"SELECT * FROM '{standardised_disease_result_path}' "
-                    f"WHERE contains_entity_function(CAST(COALESCE(disease_identifier, '') AS VARCHAR),"
-                    f" '{row['disease_identifier']}') "
-                    f"OR contains_entity_function(CAST(COALESCE(disease_name, '') AS VARCHAR), "
-                    f"'{row['disease_name']}')"
+                    (
+                        f"SELECT * FROM '{standardised_disease_result_path}' "
+                        f"WHERE contains_entity_function(CAST(COALESCE(disease_identifier, '') AS VARCHAR),"
+                        f" '{row['disease_identifier']}') "
+                        f"OR contains_entity_function(CAST(COALESCE(disease_name, '') AS VARCHAR), "
+                        f"'{row['disease_name']}')"
+                    )
+                    if standardised_disease_result_path.exists()
+                    and standardised_disease_result_path.stat().st_size > 0
+                    else "SELECT NULL WHERE FALSE"
                 )
                 .fetchdf()
                 .to_dict(orient="records")
@@ -56,9 +61,15 @@ class AssessDiseasePrioritisation(AssessPrioritisationBase):
                     f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (disease_match, primary_key),
                 )
+            elif len(result) == 0:
+                relevant_ranks.append(0)
         binary_classification_stats.add_classification(
-            self.db_connection.parse_table_into_dataclass(
-                str(standardised_disease_result_path), RankedPhEvalDiseaseResult
+            (
+                self.db_connection.parse_table_into_dataclass(
+                    str(standardised_disease_result_path), RankedPhEvalDiseaseResult
+                )
+                if standardised_disease_result_path.exists()
+                else []
             ),
             relevant_ranks,
         )

{pheval-0.4.2 → pheval-0.4.4}/src/pheval/analyse/gene_prioritisation_analysis.py RENAMED Viewed

@@ -36,11 +36,16 @@ class AssessGenePrioritisation(AssessPrioritisationBase):
         for _i, row in df.iterrows():
             result = (
                 self.conn.execute(
-                    f"SELECT * FROM '{standardised_gene_result_path}' "
-                    f"WHERE contains_entity_function(CAST(COALESCE(gene_identifier, '') AS VARCHAR),"
-                    f" '{row['gene_identifier']}') "
-                    f"OR contains_entity_function(CAST(COALESCE(gene_symbol, '') AS VARCHAR), "
-                    f"'{row['gene_symbol']}')"
+                    (
+                        f"SELECT * FROM '{standardised_gene_result_path}' "
+                        f"WHERE contains_entity_function(CAST(COALESCE(gene_identifier, '') AS VARCHAR), "
+                        f"'{row['gene_identifier']}') "
+                        f"OR contains_entity_function(CAST(COALESCE(gene_symbol, '') AS VARCHAR), "
+                        f"'{row['gene_symbol']}')"
+                    )
+                    if standardised_gene_result_path.exists()
+                    and standardised_gene_result_path.stat().st_size > 0
+                    else "SELECT NULL WHERE FALSE"
                 )
                 .fetchdf()
                 .to_dict(orient="records")
@@ -53,9 +58,15 @@ class AssessGenePrioritisation(AssessPrioritisationBase):
                     f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (gene_match, primary_key),
                 )
+            if not result:
+                relevant_ranks.append(0)
         binary_classification_stats.add_classification(
-            self.db_connection.parse_table_into_dataclass(
-                str(standardised_gene_result_path), RankedPhEvalGeneResult
+            (
+                self.db_connection.parse_table_into_dataclass(
+                    str(standardised_gene_result_path), RankedPhEvalGeneResult
+                )
+                if standardised_gene_result_path.exists()
+                else []
             ),
             relevant_ranks,
         )

{pheval-0.4.2 → pheval-0.4.4}/src/pheval/analyse/generate_plots.py RENAMED Viewed

@@ -2,6 +2,7 @@ from pathlib import Path
 from typing import List
 import matplotlib
+import numpy as np
 import pandas as pd
 import seaborn as sns
 from matplotlib import pyplot as plt
@@ -368,9 +369,16 @@ class PlotGenerator:
         """
         plt.clf()
         for i, benchmark_result in enumerate(benchmarking_results):
+            y_score = np.array(benchmark_result.binary_classification_stats.scores)
+            y_score = np.nan_to_num(
+                y_score,
+                nan=0.0,
+                posinf=max(y_score[np.isfinite(y_score)]),
+                neginf=min(y_score[np.isfinite(y_score)]),
+            )
             fpr, tpr, thresh = roc_curve(
                 benchmark_result.binary_classification_stats.labels,
-                benchmark_result.binary_classification_stats.scores,
+                y_score,
                 pos_label=1,
             )
             roc_auc = auc(fpr, tpr)
@@ -411,9 +419,16 @@ class PlotGenerator:
         plt.clf()
         plt.figure()
         for i, benchmark_result in enumerate(benchmarking_results):
+            y_score = np.array(benchmark_result.binary_classification_stats.scores)
+            y_score = np.nan_to_num(
+                y_score,
+                nan=0.0,
+                posinf=max(y_score[np.isfinite(y_score)]),
+                neginf=min(y_score[np.isfinite(y_score)]),
+            )
             precision, recall, thresh = precision_recall_curve(
                 benchmark_result.binary_classification_stats.labels,
-                benchmark_result.binary_classification_stats.scores,
+                y_score,
             )
             precision_recall_auc = auc(recall, precision)
             plt.plot(

{pheval-0.4.2 → pheval-0.4.4}/src/pheval/analyse/variant_prioritisation_analysis.py RENAMED Viewed

@@ -44,12 +44,16 @@ class AssessVariantPrioritisation(AssessPrioritisationBase):
             )
             result = (
                 self.conn.execute(
-                    f"SELECT * FROM '{standardised_variant_result_path}' "
-                    f"WHERE "
-                    f"chromosome == '{causative_variant.chrom}' AND "
-                    f"start == {causative_variant.pos} AND "
-                    f"ref == '{causative_variant.ref}' AND "
-                    f"alt == '{causative_variant.alt}'"
+                    (
+                        f"SELECT * FROM '{standardised_variant_result_path}' "
+                        f"WHERE "
+                        f"chromosome == '{causative_variant.chrom}' AND "
+                        f"start == {causative_variant.pos} AND "
+                        f"ref == '{causative_variant.ref}' AND "
+                        f"alt == '{causative_variant.alt}'"
+                    )
+                    if standardised_variant_result_path.exists()
+                    else "SELECT NULL WHERE FALSE"
                 )
                 .fetchdf()
                 .to_dict(orient="records")
@@ -66,10 +70,15 @@ class AssessVariantPrioritisation(AssessPrioritisationBase):
                     f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (variant_match, primary_key),
                 )
+            elif len(result) == 0:
+                relevant_ranks.append(0)
         binary_classification_stats.add_classification(
-            self.db_connection.parse_table_into_dataclass(
-                str(standardised_variant_result_path), RankedPhEvalVariantResult
+            (
+                self.db_connection.parse_table_into_dataclass(
+                    str(standardised_variant_result_path), RankedPhEvalVariantResult
+                )
+                if standardised_variant_result_path.exists()
+                else []
             ),
             relevant_ranks,
         )

{pheval-0.4.2 → pheval-0.4.4}/src/pheval/prepare/prepare_corpus.py RENAMED Viewed

@@ -90,6 +90,10 @@ def prepare_corpus(
             )
         else:
             # if not updating phenopacket gene identifiers then copy phenopacket as is to output directory
-            shutil.copy(
-                phenopacket_path, output_dir.joinpath(f"phenopackets/{phenopacket_path.name}")
+            (
+                shutil.copy(
+                    phenopacket_path, output_dir.joinpath(f"phenopackets/{phenopacket_path.name}")
+                )
+                if phenopacket_path != output_dir.joinpath(f"phenopackets/{phenopacket_path.name}")
+                else None
             )