PyPI - pheval - Versions diffs - 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl - Mend

pheval 0.6.2py3-none-any.whl → 0.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (32) hide show

pheval/analyse/benchmark.py +12 -23
pheval/analyse/benchmark_output_type.py +3 -5
pheval/analyse/binary_classification_curves.py +3 -9
pheval/analyse/binary_classification_stats.py +1 -4
pheval/analyse/generate_plots.py +8 -18
pheval/analyse/generate_rank_comparisons.py +1 -2
pheval/analyse/rank_stats.py +8 -25
pheval/analyse/run_data_parser.py +15 -9
pheval/cli.py +1 -1
pheval/cli_pheval_utils.py +10 -23
pheval/config_parser.py +1 -1
pheval/implementations/__init__.py +3 -5
pheval/infra/exomiserdb.py +7 -15
pheval/post_processing/phenopacket_truth_set.py +10 -31
pheval/post_processing/post_processing.py +12 -33
pheval/post_processing/validate_result_format.py +2 -4
pheval/prepare/create_noisy_phenopackets.py +18 -29
pheval/prepare/create_spiked_vcf.py +25 -56
pheval/prepare/custom_exceptions.py +6 -7
pheval/prepare/prepare_corpus.py +6 -17
pheval/prepare/update_phenopacket.py +6 -17
pheval/utils/docs_gen.py +3 -3
pheval/utils/file_utils.py +1 -2
pheval/utils/phenopacket_utils.py +41 -73
pheval/utils/semsim_utils.py +6 -10
pheval/utils/utils.py +3 -4
{pheval-0.6.2.dist-info → pheval-0.6.4.dist-info}/METADATA +1 -1
pheval-0.6.4.dist-info/RECORD +57 -0
pheval-0.6.2.dist-info/RECORD +0 -57
{pheval-0.6.2.dist-info → pheval-0.6.4.dist-info}/LICENSE +0 -0
{pheval-0.6.2.dist-info → pheval-0.6.4.dist-info}/WHEEL +0 -0
{pheval-0.6.2.dist-info → pheval-0.6.4.dist-info}/entry_points.txt +0 -0

pheval/analyse/benchmark.py CHANGED Viewed

@@ -1,6 +1,6 @@
+import sys
 import time
 from pathlib import Path
-from typing import List, Tuple
 import duckdb
 import polars as pl
@@ -53,8 +53,8 @@ def scan_directory(run: RunConfig, benchmark_type: BenchmarkOutputType) -> pl.La
 def process_stats(
-    runs: List[RunConfig], benchmark_type: BenchmarkOutputType
-) -> Tuple[pl.DataFrame, pl.DataFrame, pl.DataFrame]:
+    runs: list[RunConfig], benchmark_type: BenchmarkOutputType
+) -> tuple[pl.DataFrame, pl.DataFrame, pl.DataFrame]:
     """
     Processes stats outputs for specified runs to compare.
     Args:
@@ -74,9 +74,7 @@ def process_stats(
         curve_results.append(compute_curves(run.run_identifier, result_scan))
         true_positive_cases.append(
             result_scan.filter(pl.col("true_positive"))
-            .select(
-                ["result_file", *benchmark_type.columns, pl.col("rank").alias(run.run_identifier)]
-            )
+            .select(["result_file", *benchmark_type.columns, pl.col("rank").alias(run.run_identifier)])
             .sort(["result_file", *benchmark_type.columns])
         )
     return (
@@ -86,11 +84,7 @@ def process_stats(
             [true_positive_cases[0]]
             + [
                 df.select(
-                    [
-                        col
-                        for col in df.collect_schema().keys()
-                        if col not in ["result_file", *benchmark_type.columns]
-                    ]
+                    [col for col in df.collect_schema().keys() if col not in ["result_file", *benchmark_type.columns]]
                 )
                 for df in true_positive_cases[1:]
             ],
@@ -108,20 +102,14 @@ def benchmark(config: Config, benchmark_type: BenchmarkOutputType) -> None:
     """
     conn = duckdb.connect(f"{config.benchmark_name}.duckdb")
     stats, curve_results, true_positive_cases = process_stats(config.runs, benchmark_type)
-    write_table(
-        conn, stats, f"{config.benchmark_name}_{benchmark_type.prioritisation_type_string}_summary"
-    )
+    write_table(conn, stats, f"{config.benchmark_name}_{benchmark_type.prioritisation_type_string}_summary")
     write_table(
         conn,
         curve_results,
         f"{config.benchmark_name}_{benchmark_type.prioritisation_type_string}_binary_classification_curves",
     )
-    calculate_rank_changes(
-        conn, [run.run_identifier for run in config.runs], true_positive_cases, benchmark_type
-    )
-    generate_plots(
-        config.benchmark_name, stats, curve_results, benchmark_type, config.plot_customisation
-    )
+    calculate_rank_changes(conn, [run.run_identifier for run in config.runs], true_positive_cases, benchmark_type)
+    generate_plots(config.benchmark_name, stats, curve_results, benchmark_type, config.plot_customisation)
     conn.close()
@@ -135,6 +123,9 @@ def benchmark_runs(benchmark_config_file: Path) -> None:
     start_time = time.perf_counter()
     logger.info("Initiated benchmarking process.")
     config = parse_run_config(benchmark_config_file)
+    if Path(f"{config.benchmark_name}.duckdb").exists():
+        logger.error(f"{config.benchmark_name}.duckdb already exists! Exiting.")
+        sys.exit(1)
     gene_analysis_runs = [run for run in config.runs if run.gene_analysis]
     variant_analysis_runs = [run for run in config.runs if run.variant_analysis]
     disease_analysis_runs = [run for run in config.runs if run.disease_analysis]
@@ -171,6 +162,4 @@ def benchmark_runs(benchmark_config_file: Path) -> None:
             BenchmarkOutputTypeEnum.DISEASE.value,
         )
         logger.info("Finished benchmarking for disease results.")
-    logger.info(
-        f"Finished benchmarking! Total time: {time.perf_counter() - start_time:.2f} seconds."
-    )
+    logger.info(f"Finished benchmarking! Total time: {time.perf_counter() - start_time:.2f} seconds.")

pheval/analyse/benchmark_output_type.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import List, NamedTuple
+from typing import NamedTuple
 class BenchmarkOutputType(NamedTuple):
@@ -15,7 +15,7 @@ class BenchmarkOutputType(NamedTuple):
     prioritisation_type_string: str
     y_label: str
-    columns: List[str]
+    columns: list[str]
     result_directory: str
@@ -35,9 +35,7 @@ class BenchmarkOutputTypeEnum(Enum):
         ["gene_identifier", "gene_symbol"],
         "pheval_gene_results",
     )
-    VARIANT = BenchmarkOutputType(
-        "variant", "Disease-causing variants (%)", ["variant_id"], "pheval_variant_results"
-    )
+    VARIANT = BenchmarkOutputType("variant", "Disease-causing variants (%)", ["variant_id"], "pheval_variant_results")
     DISEASE = BenchmarkOutputType(
         "disease",
         "Known diseases (%)",

pheval/analyse/binary_classification_curves.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import Tuple
 import numpy as np
 import polars as pl
 from sklearn.metrics import precision_recall_curve, roc_curve
@@ -11,7 +9,7 @@ class BinaryClassificationCurves:
     """Class for computing and storing ROC & Precision-Recall curves in Polars."""
     @staticmethod
-    def _compute_finite_bounds(result_scan: pl.LazyFrame) -> Tuple[float, float]:
+    def _compute_finite_bounds(result_scan: pl.LazyFrame) -> tuple[float, float]:
         """
         Compute min and max finite values in the 'score' column to handle NaN and Inf values.
         Args:
@@ -32,9 +30,7 @@ class BinaryClassificationCurves:
         )
     @staticmethod
-    def _clean_and_extract_data(
-        result_scan: pl.LazyFrame, max_finite: float, min_finite: float
-    ) -> pl.LazyFrame:
+    def _clean_and_extract_data(result_scan: pl.LazyFrame, max_finite: float, min_finite: float) -> pl.LazyFrame:
         """
         Normalise the 'score' column (handling NaNs and Inf values) and extract 'true_positive' labels.
@@ -64,9 +60,7 @@ class BinaryClassificationCurves:
         )
     @staticmethod
-    def _compute_roc_pr_curves(
-        run_identifier: str, labels: np.ndarray, scores: np.ndarray
-    ) -> pl.LazyFrame:
+    def _compute_roc_pr_curves(run_identifier: str, labels: np.ndarray, scores: np.ndarray) -> pl.LazyFrame:
         """
         Compute ROC and Precision-Recall curves.

pheval/analyse/binary_classification_stats.py CHANGED Viewed

@@ -103,10 +103,7 @@ class BinaryClassificationStats:
     )
     F1_SCORE = (
-        pl.when(
-            2 * (pl.col("true_positives") + pl.col("false_positives") + pl.col("false_negatives"))
-            != 0
-        )
+        pl.when(2 * (pl.col("true_positives") + pl.col("false_positives") + pl.col("false_negatives")) != 0)
         .then(
             2
             * pl.col("true_positives")

pheval/analyse/generate_plots.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from enum import Enum
 from pathlib import Path
+from typing import ClassVar
 import duckdb
 import matplotlib
@@ -32,7 +33,7 @@ class PlotTypes(Enum):
 class PlotGenerator:
     """Class to generate plots."""
-    palette_hex_codes = [
+    palette_hex_codes: ClassVar[list[str]] = [
         "#f4ae3d",
         "#ee5825",
         "#2b7288",
@@ -91,9 +92,7 @@ class PlotGenerator:
             {"run_identifier": "Run", "mrr": "Percentage"}
         )
-    def _save_fig(
-        self, benchmark_output_type: BenchmarkOutputType, y_lower_limit: int, y_upper_limit: int
-    ) -> None:
+    def _save_fig(self, benchmark_output_type: BenchmarkOutputType, y_lower_limit: int, y_upper_limit: int) -> None:
         """
         Save the generated figure.
         Args:
@@ -140,9 +139,7 @@ class PlotGenerator:
             legend=False,
             edgecolor="white",
         )
-        plt.title(
-            f"{benchmark_output_type.prioritisation_type_string.capitalize()} results - mean reciprocal rank"
-        )
+        plt.title(f"{benchmark_output_type.prioritisation_type_string.capitalize()} results - mean reciprocal rank")
         self._save_fig(benchmark_output_type, 0, 1)
     @staticmethod
@@ -189,17 +186,13 @@ class PlotGenerator:
         plt.title(plot_customisation.rank_plot_title, loc="center", fontsize=15)
         self._save_fig(benchmark_output_type, 0, 1)
-    def _generate_non_cumulative_bar_plot_data(
-        self, benchmarking_results_df: pl.DataFrame
-    ) -> pl.DataFrame:
+    def _generate_non_cumulative_bar_plot_data(self, benchmarking_results_df: pl.DataFrame) -> pl.DataFrame:
         """
         Generate data in the correct format for dataframe creation for a non-cumulative bar plot,
         appending to the self.stats attribute of the class.
         """
         return self._generate_stacked_data(benchmarking_results_df).hstack(
-            self._extract_mrr_data(benchmarking_results_df).select(
-                pl.col("Percentage").alias("MRR")
-            )
+            self._extract_mrr_data(benchmarking_results_df).select(pl.col("Percentage").alias("MRR"))
         )
     def generate_cumulative_bar(
@@ -309,9 +302,7 @@ def generate_plots(
     This method generates summary statistics bar plots based on the provided benchmarking results and plot type.
     """
     plot_generator = PlotGenerator(benchmark_name)
-    plot_customisation_type = getattr(
-        plot_customisation, f"{benchmark_output_type.prioritisation_type_string}_plots"
-    )
+    plot_customisation_type = getattr(plot_customisation, f"{benchmark_output_type.prioritisation_type_string}_plots")
     logger.info("Generating ROC curve visualisations.")
     plot_generator.generate_roc_curve(curves, benchmark_output_type, plot_customisation_type)
     logger.info("Generating Precision-Recall curves visualisations.")
@@ -355,8 +346,7 @@ def generate_plots_from_db(db_path: Path, config: Path) -> None:
     }
     for benchmark_output_type in BenchmarkOutputTypeEnum:
         summary_table = (
-            f"{benchmark_config_file.benchmark_name}_"
-            f"{benchmark_output_type.value.prioritisation_type_string}_summary"
+            f"{benchmark_config_file.benchmark_name}_{benchmark_output_type.value.prioritisation_type_string}_summary"
         )
         curve_table = (
             f"{benchmark_config_file.benchmark_name}_"

pheval/analyse/generate_rank_comparisons.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from itertools import combinations
-from typing import List
 import polars as pl
 from duckdb.duckdb import DuckDBPyConnection
@@ -11,7 +10,7 @@ from pheval.utils.logger import get_logger
 def calculate_rank_changes(
     conn: DuckDBPyConnection,
-    run_identifiers: List[str],
+    run_identifiers: list[str],
     true_positive_cases: pl.DataFrame,
     benchmark_type: BenchmarkOutputType,
 ) -> None:

pheval/analyse/rank_stats.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from dataclasses import dataclass
-from typing import List
 import numpy as np
 import polars as pl
@@ -81,11 +80,7 @@ class Ranks:
         """
         precision_expr = pl.col(f"top{k}") / (pl.col("number_of_samples") * k)
         recall_expr = pl.col(f"top{k}") / pl.col("total")
-        return (
-            ((2 * precision_expr * recall_expr) / (precision_expr + recall_expr))
-            .fill_nan(0)
-            .alias(f"f_beta@{k}")
-        )
+        return ((2 * precision_expr * recall_expr) / (precision_expr + recall_expr)).fill_nan(0).alias(f"f_beta@{k}")
     @classmethod
     def _average_precision_at_k(cls, df: pl.LazyFrame, k: int) -> pl.LazyFrame:
@@ -103,9 +98,7 @@ class Ranks:
         filtered_df = cls._filter_results(df, k)
         df_grouped = filtered_df.with_columns(
             pl.struct("ranks")
-            .map_elements(
-                lambda row: cls._compute_ap_k(np.array(row["ranks"])), return_dtype=pl.Float64
-            )
+            .map_elements(lambda row: cls._compute_ap_k(np.array(row["ranks"])), return_dtype=pl.Float64)
             .alias(f"ap@{k}")
         )
         return df_grouped.select(["file_path", f"ap@{k}"])
@@ -131,7 +124,7 @@ class Ranks:
         return ap_sum / num_samples
     @classmethod
-    def _calculate_ndcg_at_k(cls, ranks: List[int], k: int) -> float:
+    def _calculate_ndcg_at_k(cls, ranks: list[int], k: int) -> float:
         """
         Compute NDCG@K for a single query.
         Args:
@@ -146,9 +139,7 @@ class Ranks:
         result_ranks[valid_indices] = 3
         ideal_ranking = np.sort(result_ranks)[::-1]
         return (
-            ndcg_score(result_ranks.reshape(1, -1), ideal_ranking.reshape(1, -1))
-            if np.sum(result_ranks) > 0
-            else 0.0
+            ndcg_score(result_ranks.reshape(1, -1), ideal_ranking.reshape(1, -1)) if np.sum(result_ranks) > 0 else 0.0
         )
     @classmethod
@@ -156,9 +147,7 @@ class Ranks:
         filtered_df = cls._filter_results(df, k)
         ndcg_df = filtered_df.with_columns(
             pl.struct("ranks")
-            .map_elements(
-                lambda row: cls._calculate_ndcg_at_k(row["ranks"], k), return_dtype=pl.Float64
-            )
+            .map_elements(lambda row: cls._calculate_ndcg_at_k(row["ranks"], k), return_dtype=pl.Float64)
             .alias(f"NDCG@{k}")
         )
         ndcg_sum = ndcg_df.select(pl.col(f"NDCG@{k}").sum()).collect().item()
@@ -218,14 +207,8 @@ def compute_rank_stats(run_identifier: str, result_scan: pl.LazyFrame) -> pl.Laz
             pl.lit(Ranks.mean_average_precision_at_k(true_positive_scan, 3)).alias("MAP@3"),
             pl.lit(Ranks.mean_average_precision_at_k(true_positive_scan, 5)).alias("MAP@5"),
             pl.lit(Ranks.mean_average_precision_at_k(true_positive_scan, 10)).alias("MAP@10"),
-            pl.lit(Ranks.mean_normalised_discounted_cumulative_gain(true_positive_scan, 3)).alias(
-                "NDCG@3"
-            ),
-            pl.lit(Ranks.mean_normalised_discounted_cumulative_gain(true_positive_scan, 5)).alias(
-                "NDCG@5"
-            ),
-            pl.lit(Ranks.mean_normalised_discounted_cumulative_gain(true_positive_scan, 10)).alias(
-                "NDCG@10"
-            ),
+            pl.lit(Ranks.mean_normalised_discounted_cumulative_gain(true_positive_scan, 3)).alias("NDCG@3"),
+            pl.lit(Ranks.mean_normalised_discounted_cumulative_gain(true_positive_scan, 5)).alias("NDCG@5"),
+            pl.lit(Ranks.mean_normalised_discounted_cumulative_gain(true_positive_scan, 10)).alias("NDCG@10"),
         ]
     )

pheval/analyse/run_data_parser.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from pathlib import Path
-from typing import List, Optional
 import yaml
 from pydantic import BaseModel, field_validator
@@ -28,8 +27,8 @@ class RunConfig(BaseModel):
     gene_analysis: bool
     variant_analysis: bool
     disease_analysis: bool
-    threshold: Optional[float]
-    score_order: Optional[str]
+    threshold: float | None
+    score_order: str | None
     @field_validator("threshold", mode="before")
     @classmethod
@@ -41,6 +40,13 @@ class RunConfig(BaseModel):
     def set_score_order(cls, score_order):
         return score_order or "descending"
+    @field_validator("results_dir", mode="after")
+    @classmethod
+    def check_results_dir_exists(cls, results_dir: Path):
+        if not results_dir.exists():
+            raise FileNotFoundError(f"The specified results directory does not exist: {results_dir}")
+        return results_dir
 class SinglePlotCustomisation(BaseModel):
     """
@@ -53,10 +59,10 @@ class SinglePlotCustomisation(BaseModel):
         precision_recall_title (str): The title for the precision-recall plot.
     """
-    plot_type: Optional[str] = "bar_cumulative"
-    rank_plot_title: Optional[str]
-    roc_curve_title: Optional[str]
-    precision_recall_title: Optional[str]
+    plot_type: str | None = "bar_cumulative"
+    rank_plot_title: str | None
+    roc_curve_title: str | None
+    precision_recall_title: str | None
     @field_validator("plot_type", mode="before")
     @classmethod
@@ -86,7 +92,7 @@ class Config(BaseModel):
     """
     benchmark_name: str
-    runs: List[RunConfig]
+    runs: list[RunConfig]
     plot_customisation: PlotCustomisation
@@ -100,7 +106,7 @@ def parse_run_config(run_config: Path) -> Config:
     """
     logger = get_logger()
     logger.info(f"Loading benchmark configuration from {run_config}")
-    with open(run_config, "r") as f:
+    with open(run_config) as f:
         config_data = yaml.safe_load(f)
     f.close()
     config = Config(**config_data)

pheval/cli.py CHANGED Viewed

@@ -29,7 +29,7 @@ def main(ctx, verbose=1, quiet=False):
     """Main CLI method for PhEval."""
     initialise_context(ctx)
-    if verbose >= 2:
+    if verbose >= 2:  # noqa
         logger.setLevel(logging.DEBUG)
     elif verbose == 1:
         logger.setLevel(logging.INFO)

pheval/cli_pheval_utils.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """PhEval utils Command Line Interface"""
 from pathlib import Path
-from typing import List
 import click
@@ -39,9 +38,7 @@ from pheval.utils.utils import semsim_scramble
     "-c",
     required=True,
     multiple=True,
-    type=click.Choice(
-        ["jaccard_similarity", "dice_similarity", "phenodigm_score"], case_sensitive=False
-    ),
+    type=click.Choice(["jaccard_similarity", "dice_similarity", "phenodigm_score"], case_sensitive=False),
     help="Score column that will be scrambled",
 )
 @click.option(
@@ -54,9 +51,7 @@ from pheval.utils.utils import semsim_scramble
     help="""Scramble Magnitude (noise)
     that will be applied to semantic similarity score column (e.g. jaccard similarity).""",
 )
-def semsim_scramble_command(
-    input: Path, output: Path, score_column: List[str], scramble_factor: float
-):
+def semsim_scramble_command(input: Path, output: Path, score_column: list[str], scramble_factor: float):
     """Scrambles semsim profile multiplying score value by scramble factor
     Args:
         input (Path): Path file that points out to the semsim profile
@@ -125,9 +120,7 @@ def scramble_phenopackets_command(
     if phenopacket_path is None and phenopacket_dir is None:
         raise InputError("Either a phenopacket or phenopacket directory must be specified")
     else:
-        scramble_phenopackets(
-            output_dir, phenopacket_path, phenopacket_dir, scramble_factor, local_ontology_cache
-        )
+        scramble_phenopackets(output_dir, phenopacket_path, phenopacket_dir, scramble_factor, local_ontology_cache)
 @click.command("semsim-comparison")
@@ -149,9 +142,7 @@ def scramble_phenopackets_command(
     "--score-column",
     "-c",
     required=True,
-    type=click.Choice(
-        ["jaccard_similarity", "dice_similarity", "phenodigm_score"], case_sensitive=False
-    ),
+    type=click.Choice(["jaccard_similarity", "dice_similarity", "phenodigm_score"], case_sensitive=False),
     help="Score column that will be used in comparison",
 )
 @click.option(
@@ -232,9 +223,7 @@ def semsim_comparison(
     help="Gene identifier to add to phenopacket",
     type=click.Choice(["ensembl_id", "entrez_id", "hgnc_id"]),
 )
-def update_phenopackets_command(
-    phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path, gene_identifier: str
-):
+def update_phenopackets_command(phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path, gene_identifier: str):
     """Update gene symbols and identifiers for phenopackets."""
     if phenopacket_path is None and phenopacket_dir is None:
         raise InputError("Either a phenopacket or phenopacket directory must be specified")
@@ -313,10 +302,10 @@ def create_spiked_vcfs_command(
     phenopacket_path: Path,
     phenopacket_dir: Path,
     output_dir: Path,
-    hg19_template_vcf: Path = None,
-    hg38_template_vcf: Path = None,
-    hg19_vcf_dir: Path = None,
-    hg38_vcf_dir: Path = None,
+    hg19_template_vcf: Path | None = None,
+    hg38_template_vcf: Path | None = None,
+    hg19_vcf_dir: Path | None = None,
+    hg38_vcf_dir: Path | None = None,
 ):
     """
     Create spiked VCF from either a Phenopacket or a Phenopacket directory.
@@ -394,9 +383,7 @@ def benchmark(
     This is the path where the phenotypic database folder will be written out.""",
     type=Path,
 )
-def semsim_to_exomiserdb_command(
-    input_file: Path, object_prefix: str, subject_prefix: str, db_path: Path
-):
+def semsim_to_exomiserdb_command(input_file: Path, object_prefix: str, subject_prefix: str, db_path: Path):
     """ingests semsim file into exomiser phenotypic database
     Args:

pheval/config_parser.py CHANGED Viewed

@@ -39,7 +39,7 @@ class InputDirConfig:
 def parse_input_dir_config(input_dir: Path) -> InputDirConfig:
     """Reads the config file."""
     logger.info(f"Parsing config.yaml located in {input_dir}.")
-    with open(Path(input_dir).joinpath("config.yaml"), "r") as config_file:
+    with open(Path(input_dir).joinpath("config.yaml")) as config_file:
         config = yaml.safe_load(config_file)
     config_file.close()
     return from_yaml(InputDirConfig, yaml.dump(config))

pheval/implementations/__init__.py CHANGED Viewed

@@ -15,11 +15,9 @@ def get_implementation_resolver() -> ClassResolver[PhEvalRunner]:
     Returns:
         ClassResolver[PhEvalRunner]: _description_
     """
-    implementation_resolver: PhevalClassResolver[PhEvalRunner] = (
-        PhevalClassResolver.from_subclasses(
-            PhEvalRunner,
-            suffix="Implementation",
-        )
+    implementation_resolver: PhevalClassResolver[PhEvalRunner] = PhevalClassResolver.from_subclasses(
+        PhEvalRunner,
+        suffix="Implementation",
     )
     # implementation_resolver.synonyms.update(

pheval/infra/exomiserdb.py CHANGED Viewed

@@ -12,9 +12,7 @@ info_debug = log.getLogger("debug")
 class DBConnector:
-    def __init__(
-        self, jar: Path, driver: str, server: str, database: str, user: str, password: str
-    ):
+    def __init__(self, jar: Path, driver: str, server: str, database: str, user: str, password: str):
         self.jar = jar
         self.driver = driver
         self.server = server
@@ -63,7 +61,7 @@ class DBConnection:
 class ExomiserDB:
     def __init__(self, db_path: Path):
         try:
-            self.connector = DBConnector(  # noqa
+            self.connector = DBConnector(
                 jar=os.path.join(os.path.dirname(__file__), "../../../lib/h2-1.4.199.jar"),
                 driver="org.h2.Driver",
                 server=f"jdbc:h2:{db_path}",
@@ -89,7 +87,7 @@ class ExomiserDB:
             batches = reader.next_batches(batch_length)
             cursor = conn.get_cursor()
             # # TODO: Refactor this
-            with open(input_file, "r") as f:
+            with open(input_file) as f:
                 total = sum(1 for line in f)
             pbar = tqdm(total=total - 1)
             mapping_id = 1
@@ -112,12 +110,10 @@ def _format_row(mapping_id, data):
         data (_type_): row data
     """
     # TODO:Improve string escaping. Replace this code with parametrised query
-    return f"""({mapping_id}, '{data['subject_id']}', '{data['subject_label'].replace("'", "")}', '{data['object_id']}', '{data['object_label'].replace("'", "")}', {data['jaccard_similarity']}, {data['ancestor_information_content']}, {data['phenodigm_score']}, '{data['ancestor_id'].split(",")[0]}', '{data['ancestor_label'].replace("'", "")}')"""  # noqa
+    return f"""({mapping_id}, '{data["subject_id"]}', '{data["subject_label"].replace("'", "")}', '{data["object_id"]}', '{data["object_label"].replace("'", "")}', {data["jaccard_similarity"]}, {data["ancestor_information_content"]}, {data["phenodigm_score"]}, '{data["ancestor_id"].split(",")[0]}', '{data["ancestor_label"].replace("'", "")}')"""  # noqa
-def _semsim2h2(
-    input_data: pl.DataFrame, subject_prefix: str, object_prefix: str, mapping_id=1
-) -> None:
+def _semsim2h2(input_data: pl.DataFrame, subject_prefix: str, object_prefix: str, mapping_id=1) -> None:
     """This function is responsible for generate sql insertion query for each semsim profile row
     Args:
@@ -130,12 +126,8 @@ def _semsim2h2(
     if mapping_id == 1:
         sql += f"TRUNCATE TABLE EXOMISER.{subject_prefix}_{object_prefix}_MAPPINGS;\n"
-    object_id = (
-        f"{object_prefix}_ID_HIT" if subject_prefix == object_prefix else f"{object_prefix}_ID"
-    )
-    object_term = (
-        f"{object_prefix}_HIT_TERM" if subject_prefix == object_prefix else f"{object_prefix}_TERM"
-    )
+    object_id = f"{object_prefix}_ID_HIT" if subject_prefix == object_prefix else f"{object_prefix}_ID"
+    object_term = f"{object_prefix}_HIT_TERM" if subject_prefix == object_prefix else f"{object_prefix}_TERM"
     sql += f"""INSERT INTO EXOMISER.{subject_prefix}_{object_prefix}_MAPPINGS
 (MAPPING_ID, {subject_prefix}_ID, {subject_prefix}_TERM, {object_id}, {object_term}, SIMJ, IC, SCORE, LCS_ID, LCS_TERM)
 VALUES"""

pheval 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl

Potentially problematic release.

pheval 0.6.2py3-none-any.whl → 0.6.4py3-none-any.whl