PyPI - pheval - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

pheval 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (12) hide show

pheval/analyse/benchmark_db_manager.py CHANGED Viewed

@@ -75,6 +75,7 @@ class BenchmarkDBManager:
                 `False` otherwise.
         """
         list_pattern = re.compile(r"^\[\s*(?:[^\[\],\s]+(?:\s*,\s*[^\[\],\s]+)*)?\s*]$")
+        entity = entity.replace("nan", "None").replace("NaN", "None")
         if list_pattern.match(str(entity)):
             list_representation = ast.literal_eval(entity)
             if isinstance(list_representation, list):

pheval/analyse/disease_prioritisation_analysis.py CHANGED Viewed

@@ -38,11 +38,16 @@ class AssessDiseasePrioritisation(AssessPrioritisationBase):
         for _i, row in df.iterrows():
             result = (
                 self.conn.execute(
-                    f"SELECT * FROM '{standardised_disease_result_path}' "
-                    f"WHERE contains_entity_function(CAST(COALESCE(disease_identifier, '') AS VARCHAR),"
-                    f" '{row['disease_identifier']}') "
-                    f"OR contains_entity_function(CAST(COALESCE(disease_name, '') AS VARCHAR), "
-                    f"'{row['disease_name']}')"
+                    (
+                        f"SELECT * FROM '{standardised_disease_result_path}' "
+                        f"WHERE contains_entity_function(CAST(COALESCE(disease_identifier, '') AS VARCHAR),"
+                        f" '{row['disease_identifier']}') "
+                        f"OR contains_entity_function(CAST(COALESCE(disease_name, '') AS VARCHAR), "
+                        f"'{row['disease_name']}')"
+                    )
+                    if standardised_disease_result_path.exists()
+                    and standardised_disease_result_path.stat().st_size > 0
+                    else "SELECT NULL WHERE FALSE"
                 )
                 .fetchdf()
                 .to_dict(orient="records")
@@ -56,9 +61,15 @@ class AssessDiseasePrioritisation(AssessPrioritisationBase):
                     f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (disease_match, primary_key),
                 )
+            elif len(result) == 0:
+                relevant_ranks.append(0)
         binary_classification_stats.add_classification(
-            self.db_connection.parse_table_into_dataclass(
-                str(standardised_disease_result_path), RankedPhEvalDiseaseResult
+            (
+                self.db_connection.parse_table_into_dataclass(
+                    str(standardised_disease_result_path), RankedPhEvalDiseaseResult
+                )
+                if standardised_disease_result_path.exists()
+                else []
             ),
             relevant_ranks,
         )

pheval/analyse/gene_prioritisation_analysis.py CHANGED Viewed

@@ -36,11 +36,16 @@ class AssessGenePrioritisation(AssessPrioritisationBase):
         for _i, row in df.iterrows():
             result = (
                 self.conn.execute(
-                    f"SELECT * FROM '{standardised_gene_result_path}' "
-                    f"WHERE contains_entity_function(CAST(COALESCE(gene_identifier, '') AS VARCHAR),"
-                    f" '{row['gene_identifier']}') "
-                    f"OR contains_entity_function(CAST(COALESCE(gene_symbol, '') AS VARCHAR), "
-                    f"'{row['gene_symbol']}')"
+                    (
+                        f"SELECT * FROM '{standardised_gene_result_path}' "
+                        f"WHERE contains_entity_function(CAST(COALESCE(gene_identifier, '') AS VARCHAR), "
+                        f"'{row['gene_identifier']}') "
+                        f"OR contains_entity_function(CAST(COALESCE(gene_symbol, '') AS VARCHAR), "
+                        f"'{row['gene_symbol']}')"
+                    )
+                    if standardised_gene_result_path.exists()
+                    and standardised_gene_result_path.stat().st_size > 0
+                    else "SELECT NULL WHERE FALSE"
                 )
                 .fetchdf()
                 .to_dict(orient="records")
@@ -53,9 +58,15 @@ class AssessGenePrioritisation(AssessPrioritisationBase):
                     f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (gene_match, primary_key),
                 )
+            if not result:
+                relevant_ranks.append(0)
         binary_classification_stats.add_classification(
-            self.db_connection.parse_table_into_dataclass(
-                str(standardised_gene_result_path), RankedPhEvalGeneResult
+            (
+                self.db_connection.parse_table_into_dataclass(
+                    str(standardised_gene_result_path), RankedPhEvalGeneResult
+                )
+                if standardised_gene_result_path.exists()
+                else []
             ),
             relevant_ranks,
         )

pheval/analyse/generate_plots.py CHANGED Viewed

@@ -2,6 +2,7 @@ from pathlib import Path
 from typing import List
 import matplotlib
+import numpy as np
 import pandas as pd
 import seaborn as sns
 from matplotlib import pyplot as plt
@@ -368,9 +369,16 @@ class PlotGenerator:
         """
         plt.clf()
         for i, benchmark_result in enumerate(benchmarking_results):
+            y_score = np.array(benchmark_result.binary_classification_stats.scores)
+            y_score = np.nan_to_num(
+                y_score,
+                nan=0.0,
+                posinf=max(y_score[np.isfinite(y_score)]),
+                neginf=min(y_score[np.isfinite(y_score)]),
+            )
             fpr, tpr, thresh = roc_curve(
                 benchmark_result.binary_classification_stats.labels,
-                benchmark_result.binary_classification_stats.scores,
+                y_score,
                 pos_label=1,
             )
             roc_auc = auc(fpr, tpr)
@@ -411,9 +419,16 @@ class PlotGenerator:
         plt.clf()
         plt.figure()
         for i, benchmark_result in enumerate(benchmarking_results):
+            y_score = np.array(benchmark_result.binary_classification_stats.scores)
+            y_score = np.nan_to_num(
+                y_score,
+                nan=0.0,
+                posinf=max(y_score[np.isfinite(y_score)]),
+                neginf=min(y_score[np.isfinite(y_score)]),
+            )
             precision, recall, thresh = precision_recall_curve(
                 benchmark_result.binary_classification_stats.labels,
-                benchmark_result.binary_classification_stats.scores,
+                y_score,
             )
             precision_recall_auc = auc(recall, precision)
             plt.plot(

pheval/analyse/variant_prioritisation_analysis.py CHANGED Viewed

@@ -44,12 +44,16 @@ class AssessVariantPrioritisation(AssessPrioritisationBase):
             )
             result = (
                 self.conn.execute(
-                    f"SELECT * FROM '{standardised_variant_result_path}' "
-                    f"WHERE "
-                    f"chromosome == '{causative_variant.chrom}' AND "
-                    f"start == {causative_variant.pos} AND "
-                    f"ref == '{causative_variant.ref}' AND "
-                    f"alt == '{causative_variant.alt}'"
+                    (
+                        f"SELECT * FROM '{standardised_variant_result_path}' "
+                        f"WHERE "
+                        f"chromosome == '{causative_variant.chrom}' AND "
+                        f"start == {causative_variant.pos} AND "
+                        f"ref == '{causative_variant.ref}' AND "
+                        f"alt == '{causative_variant.alt}'"
+                    )
+                    if standardised_variant_result_path.exists()
+                    else "SELECT NULL WHERE FALSE"
                 )
                 .fetchdf()
                 .to_dict(orient="records")
@@ -66,10 +70,15 @@ class AssessVariantPrioritisation(AssessPrioritisationBase):
                     f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (variant_match, primary_key),
                 )
+            elif len(result) == 0:
+                relevant_ranks.append(0)
         binary_classification_stats.add_classification(
-            self.db_connection.parse_table_into_dataclass(
-                str(standardised_variant_result_path), RankedPhEvalVariantResult
+            (
+                self.db_connection.parse_table_into_dataclass(
+                    str(standardised_variant_result_path), RankedPhEvalVariantResult
+                )
+                if standardised_variant_result_path.exists()
+                else []
             ),
             relevant_ranks,
         )

pheval/prepare/prepare_corpus.py CHANGED Viewed

@@ -57,6 +57,11 @@ def prepare_corpus(
                     f"Removed {phenopacket_path.name} from the corpus due to missing variant fields."
                 )
                 continue
+            elif phenopacket_util.check_variant_alleles():
+                info_log.warning(
+                    f"Removed {phenopacket_path.name} from the corpus due to identical "
+                    "reference and alternate allele fields."
+                )
         if gene_analysis:
             if phenopacket_util.check_incomplete_gene_record():
                 info_log.warning(

pheval/utils/phenopacket_utils.py CHANGED Viewed

@@ -503,6 +503,19 @@ class PhenopacketUtil:
                 return True
         return False
+    def check_variant_alleles(self) -> bool:
+        """
+        Check if any variant record in the phenopacket has identical reference and alternate alleles.
+        Returns:
+            bool: True if the reference and alternate alleles are identical, False otherwise.
+        """
+        variants = self.diagnosed_variants()
+        for variant in variants:
+            if variant.ref == variant.alt:
+                return True
+        return False
     def check_incomplete_gene_record(self) -> bool:
         """
         Check if any gene record in the phenopacket has incomplete information.

{pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: pheval
-Version: 0.4.1
+Version: 0.4.3
 Summary:
 Author: Yasemin Bridges
 Author-email: y.bridges@qmul.ac.uk

{pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/RECORD RENAMED Viewed

@@ -2,20 +2,20 @@ pheval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pheval/analyse/analysis.py,sha256=Yt2xH0WS_2NO13-wYvywzmCRCj8RinQ1MeozJQuGe3o,4009
 pheval/analyse/assess_prioritisation_base.py,sha256=znBscRTqIKxxZMHR-H6KrjFJ6Uv5P5HzwTQUWS6Eoos,3434
-pheval/analyse/benchmark_db_manager.py,sha256=RaAnvq5Tfvsw8161iQUe_P146X98BckMRhMq6BibwNI,5111
+pheval/analyse/benchmark_db_manager.py,sha256=R1GstKKoh4PIPupyAarm7DhZyOdVDVtyY4A1nPNTkDs,5181
 pheval/analyse/benchmark_generator.py,sha256=-LljszuKAT3oJfGQn7JHAILCGg5QXYny4nPPf273g_E,5896
 pheval/analyse/benchmarking_data.py,sha256=aRvDmwqjFGKvWDRGjMwaQxfDZscptRBwI-rcSqY-X5s,913
 pheval/analyse/binary_classification_stats.py,sha256=E35YjvGM-zFnuEt8M3pgN03vBab4MH6ih726QKvuogg,12519
-pheval/analyse/disease_prioritisation_analysis.py,sha256=1Ut4u6p9mDGbGBoXrVxTuUgv_nrqxzBhq5N9jbGWRWs,5725
-pheval/analyse/gene_prioritisation_analysis.py,sha256=_7r16BC0S2H9rOY016mLLUg6PcxaCQOh5FQcNKFTy-4,5582
-pheval/analyse/generate_plots.py,sha256=rMSdgawGYYc7BAAolqz73TAnanKtrxCC48bI6WQq6xc,21455
+pheval/analyse/disease_prioritisation_analysis.py,sha256=mOIAiz_WzWQBP-3g8Twv-ii8ZxbXktAmgi6zl4sJaC8,6182
+pheval/analyse/gene_prioritisation_analysis.py,sha256=wPH-mCW_KY3XxrwAvw-ucxLbWzUKBucO4zSo3XqBsaY,6022
+pheval/analyse/generate_plots.py,sha256=5oxsdnAbbVgQj8ZrWTLs12rSM24EXp-IdLCjy5QB1_g,21992
 pheval/analyse/generate_summary_outputs.py,sha256=nKqwbpA-9bbL5mCySiuyV_AUDIokmCg3vD8_JAsg1ls,4157
 pheval/analyse/parse_benchmark_summary.py,sha256=vyAOIdIWF4rZjGTPFE69ajhEC9AkkN3QBVqSe_uYZsg,2946
 pheval/analyse/parse_corpus.py,sha256=N88enptR4qG6cmqXU_TKg8DMmCeFog37eeK5nFEMQOQ,8678
 pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6hleoKDYATTkhvFF98,1228
 pheval/analyse/rank_stats.py,sha256=53ZickUtQlctYsorAIUwlCX7M6UC-wCxoV1MbL6F9gc,17987
 pheval/analyse/run_data_parser.py,sha256=VQBUoOIRYRWc5uqURUvaWdaW3E3C7Su0JvLavQLHQaY,4105
-pheval/analyse/variant_prioritisation_analysis.py,sha256=LPEZDhFfzx-sQl9g8pM700l4Zzpy6qdOS68vOV2mZA0,6020
+pheval/analyse/variant_prioritisation_analysis.py,sha256=8ntbALzAAfJ4ijviQKvLiwKBeRvTLWI0Tb9dpnCYxhs,6404
 pheval/cli.py,sha256=EBGh6TIxAiWs0eDdQiefq6YuD0mb93siGsNmsVO1j7c,1527
 pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
 pheval/cli_pheval_utils.py,sha256=O6tWnE85QQHGNcP08OwJGANMfXJPsZtFEu-D6ATld00,16700
@@ -29,7 +29,7 @@ pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pheval/prepare/create_noisy_phenopackets.py,sha256=ydhA4mpqKTDc4hBu8YfvNW2nMubHK3dbO-cv0lA4JFQ,11504
 pheval/prepare/create_spiked_vcf.py,sha256=90A-Mi8QKhvN036vtFEVWAHgzHO37itiLYrqYlG4LiA,23953
 pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
-pheval/prepare/prepare_corpus.py,sha256=eRvozzezIgAqHAumtqul0WfXfBO1iOBaSlN8fPSn0Nw,4223
+pheval/prepare/prepare_corpus.py,sha256=mwI3FJJ6GGSbmbvz4enrVP86MBoHSssMIRebkapmu5Y,4484
 pheval/prepare/update_phenopacket.py,sha256=21fzUPbwKN6Ey5TSh9PFzjT2x86U19RAE6WmkjG8u28,4770
 pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
 pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
@@ -47,11 +47,11 @@ pheval/utils/docs_gen.py,sha256=6FGtHicBC0rZKi0tdL3Epsg8d4osE44I9f1Ga0j4JLA,3193
 pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
 pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
 pheval/utils/file_utils.py,sha256=m21cz-qjDYqnI8ClUv3J9fKizex98a-9bSEerQ75i_c,3576
-pheval/utils/phenopacket_utils.py,sha256=W9T_X48EJ-xn5GghzbZlt-lI-DxWoSm7_SHr8DCJg2Q,26856
+pheval/utils/phenopacket_utils.py,sha256=6xQ8WCLdR1VhiU3nCDzaqEVKjGvDWrzvPA50_6ZAHXM,27310
 pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
 pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
-pheval-0.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-pheval-0.4.1.dist-info/METADATA,sha256=H14Zz0k7MLDs1eEryeaXjCfNUm1FqAyv0M1mXhXq740,6469
-pheval-0.4.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-pheval-0.4.1.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
-pheval-0.4.1.dist-info/RECORD,,
+pheval-0.4.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+pheval-0.4.3.dist-info/METADATA,sha256=Y6dy74TNnyirSwcpgG9ktJopwoVEZWlixSR4CJOZfDo,6469
+pheval-0.4.3.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
+pheval-0.4.3.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
+pheval-0.4.3.dist-info/RECORD,,

{pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 1.9.1
+Generator: poetry-core 2.0.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

pheval 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

Potentially problematic release.

pheval 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl