PyPI - pheval - Versions diffs - 0.5.1__tar.gz → 0.5.3__tar.gz - Mend

pheval 0.5.1tar.gz → 0.5.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (52) hide show

{pheval-0.5.1 → pheval-0.5.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: pheval
-Version: 0.5.1
+Version: 0.5.3
 Summary:
 Author: Yasemin Bridges
 Author-email: y.bridges@qmul.ac.uk
@@ -32,10 +32,10 @@ Description-Content-Type: text/markdown
 # PhEval - Phenotypic Inference Evaluation Framework
-![PyPI](https://img.shields.io/pypi/v/pheval)
+[![PyPI](https://img.shields.io/pypi/v/pheval)](https://pypi.org/project/pheval/)
 ![Build Status](https://img.shields.io/github/actions/workflow/status/monarch-initiative/pheval/pypi-publish.yml?branch=main)
 ![License](https://img.shields.io/github/license/monarch-initiative/pheval)
-![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
+![Python Version](https://img.shields.io/badge/python-3.10%2B-blue)
 ![Issues](https://img.shields.io/github/issues/monarch-initiative/pheval)
 ## Overview
@@ -53,7 +53,7 @@ For more information please see the full [documentation](https://monarch-initiat
 ## Download and Installation
-1. Ensure you have Python 3.8 or greater installed.
+1. Ensure you have Python 3.10 or greater installed.
 2. Install with `pip`:
 ```bash
 pip install pheval

{pheval-0.5.1 → pheval-0.5.3}/README.md RENAMED Viewed

@@ -1,9 +1,9 @@
 # PhEval - Phenotypic Inference Evaluation Framework
-![PyPI](https://img.shields.io/pypi/v/pheval)
+[![PyPI](https://img.shields.io/pypi/v/pheval)](https://pypi.org/project/pheval/)
 ![Build Status](https://img.shields.io/github/actions/workflow/status/monarch-initiative/pheval/pypi-publish.yml?branch=main)
 ![License](https://img.shields.io/github/license/monarch-initiative/pheval)
-![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
+![Python Version](https://img.shields.io/badge/python-3.10%2B-blue)
 ![Issues](https://img.shields.io/github/issues/monarch-initiative/pheval)
 ## Overview
@@ -21,7 +21,7 @@ For more information please see the full [documentation](https://monarch-initiat
 ## Download and Installation
-1. Ensure you have Python 3.8 or greater installed.
+1. Ensure you have Python 3.10 or greater installed.
 2. Install with `pip`:
 ```bash
 pip install pheval

{pheval-0.5.1 → pheval-0.5.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pheval"
-version = "0.5.1"
+version = "0.5.3"
 description = ""
 authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
   "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/analyse/benchmark.py RENAMED Viewed

@@ -23,27 +23,32 @@ def scan_directory(run: RunConfig, benchmark_type: BenchmarkOutputType) -> pl.La
         run (RunConfig): RunConfig object.
         benchmark_type (BenchmarkOutputTypeEnum): Benchmark output type.
     Returns:
-        pl.LazyFrame: LazyFrame object containing all the results in the directory..
+        pl.LazyFrame: LazyFrame object containing all the results in the directory.
     """
     logger = get_logger()
     logger.info(f"Analysing results in {run.results_dir.joinpath(benchmark_type.result_directory)}")
     return (
-        pl.scan_parquet(
-            run.results_dir.joinpath(benchmark_type.result_directory),
-            include_file_paths="file_path",
-        ).with_columns(
-            pl.col("rank").cast(pl.Int64),
-            pl.col("file_path").str.extract(r"([^/\\]+)$").alias("result_file"),
-            pl.col("true_positive").fill_null(False),
-        )
-    ).filter(
         (
-            pl.col("score") >= run.threshold
-            if run.score_order.lower() == "descending"
-            else pl.col("score") <= run.threshold
+            pl.scan_parquet(
+                run.results_dir.joinpath(benchmark_type.result_directory),
+                include_file_paths="file_path",
+            ).with_columns(
+                pl.col("rank").cast(pl.Int64),
+                pl.col("file_path").str.extract(r"([^/\\]+)$").alias("result_file"),
+                pl.col("true_positive").fill_null(False),
+            )
+        )
+        .filter(
+            (
+                pl.col("score") >= run.threshold
+                if run.score_order.lower() == "descending"
+                else pl.col("score") <= run.threshold
+            )
+            if run.threshold is not None
+            else True
         )
-        if run.threshold is not None
-        else True
+        .sort("rank")
+        .unique(subset=["file_path", *benchmark_type.columns], keep="first")
     )
@@ -68,14 +73,29 @@ def process_stats(
         )
         curve_results.append(compute_curves(run.run_identifier, result_scan))
         true_positive_cases.append(
-            result_scan.filter(pl.col("true_positive")).select(
+            result_scan.filter(pl.col("true_positive"))
+            .select(
                 ["result_file", *benchmark_type.columns, pl.col("rank").alias(run.run_identifier)]
             )
+            .sort(["result_file", *benchmark_type.columns])
         )
     return (
         pl.concat(stats, how="vertical").collect(),
         pl.concat(curve_results, how="vertical").collect(),
-        pl.concat(true_positive_cases, how="align_inner").collect(),
+        pl.concat(
+            [true_positive_cases[0]]
+            + [
+                df.select(
+                    [
+                        col
+                        for col in df.collect_schema().keys()
+                        if col not in ["result_file", *benchmark_type.columns]
+                    ]
+                )
+                for df in true_positive_cases[1:]
+            ],
+            how="horizontal",
+        ).collect(),
     )

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/cli_pheval.py RENAMED Viewed

@@ -2,12 +2,16 @@
 Monarch Initiative
 """
+import time
 from pathlib import Path
 import click
 from pheval.implementations import get_implementation_resolver
 from pheval.utils.file_utils import write_metadata
+from pheval.utils.logger import get_logger
+logger = get_logger()
 @click.command()
@@ -84,11 +88,18 @@ def run(
         config (Path): The path of the configuration file (optional e.g., config.yaml)
         version (str): The version of the tool implementation
     """
+    logger.info(f"Executing {runner}.")
+    start_time = time.perf_counter()
     runner_class = get_implementation_resolver().lookup(runner)
     runner_instance = runner_class(input_dir, testdata_dir, tmp_dir, output_dir, config, version)
     runner_instance.build_output_directory_structure()
+    logger.info("Executing prepare phase.")
     runner_instance.prepare()
+    logger.info("Executing run phase.")
     runner_instance.run()
+    logger.info("Executing post-processing phase.")
     runner_instance.post_process()
     run_metadata = runner_instance.construct_meta_data()
+    logger.info(f"Writing metadata for run to {output_dir}.")
     write_metadata(output_dir, run_metadata)
+    logger.info(f"Run completed! Total time: {time.perf_counter() - start_time:.2f} seconds.")

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/config_parser.py RENAMED Viewed

@@ -6,6 +6,10 @@ import yaml
 from serde import serde
 from serde.yaml import from_yaml
+from pheval.utils.logger import get_logger
+logger = get_logger()
 @serde
 @dataclass
@@ -34,6 +38,7 @@ class InputDirConfig:
 def parse_input_dir_config(input_dir: Path) -> InputDirConfig:
     """Reads the config file."""
+    logger.info(f"Parsing config.yaml located in {input_dir}.")
     with open(Path(input_dir).joinpath("config.yaml"), "r") as config_file:
         config = yaml.safe_load(config_file)
     config_file.close()

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/post_processing/post_processing.py RENAMED Viewed

@@ -180,6 +180,10 @@ def generate_gene_result(
         phenopacket_dir (Path): Path to the Phenopacket directory
     """
     output_file = output_dir.joinpath(f"pheval_gene_results/{result_path.stem}-gene_result.parquet")
+    logger.info(
+        f"Writing classified results for {len(all_files(phenopacket_dir))} "
+        f"phenopackets to {output_dir.joinpath('pheval_gene_results')}"
+    )
     create_empty_pheval_result(
         phenopacket_dir, output_dir.joinpath("pheval_gene_results"), ResultType.GENE
     )
@@ -210,6 +214,10 @@ def generate_variant_result(
     output_file = output_dir.joinpath(
         f"pheval_variant_results/{result_path.stem}-variant_result.parquet"
     )
+    logger.info(
+        f"Writing classified results for {len(all_files(phenopacket_dir))} "
+        f"phenopackets to {output_dir.joinpath('pheval_variant_results')}"
+    )
     create_empty_pheval_result(
         phenopacket_dir, output_dir.joinpath("pheval_variant_results"), ResultType.VARIANT
     )
@@ -242,6 +250,10 @@ def generate_disease_result(
     output_file = output_dir.joinpath(
         f"pheval_disease_results/{result_path.stem}-disease_result.parquet"
     )
+    logger.info(
+        f"Writing classified results for {len(all_files(phenopacket_dir))} "
+        f"phenopackets to {output_dir.joinpath('pheval_disease_results')}"
+    )
     create_empty_pheval_result(
         phenopacket_dir, output_dir.joinpath("pheval_disease_results"), ResultType.DISEASE
     )

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/prepare/create_noisy_phenopackets.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import random
+import time
 from pathlib import Path
 from typing import List, Union
@@ -6,7 +7,8 @@ from oaklib.implementations.pronto.pronto_implementation import ProntoImplementa
 from oaklib.resource import OntologyResource
 from phenopackets import Family, OntologyClass, Phenopacket, PhenotypicFeature
-from pheval.utils.file_utils import files_with_suffix
+from pheval.utils.file_utils import all_files, files_with_suffix
+from pheval.utils.logger import get_logger
 from pheval.utils.phenopacket_utils import (
     PhenopacketRebuilder,
     PhenopacketUtil,
@@ -14,6 +16,8 @@ from pheval.utils.phenopacket_utils import (
     write_phenopacket,
 )
+logger = get_logger()
 def load_ontology(local_cached_ontology: Path = None) -> ProntoImplementation:
     """
@@ -24,9 +28,11 @@ def load_ontology(local_cached_ontology: Path = None) -> ProntoImplementation:
         ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO.
     """
     if local_cached_ontology is None:
+        logger.warning("No local cached ontology found, using default ontology.")
         resource = OntologyResource(slug="hp.obo", local=False)
         return ProntoImplementation(resource)
     else:
+        logger.info(f"Loading local ontology from {local_cached_ontology}.")
         resource = OntologyResource(slug=local_cached_ontology, local=True)
         return ProntoImplementation(resource)
@@ -241,6 +247,7 @@ class HpoRandomiser:
         """
         phenopacket_files = files_with_suffix(phenopacket_dir, ".json")
         for phenopacket_path in phenopacket_files:
+            logger.info(f"Scrambling {phenopacket_path.name}.")
             phenopacket = phenopacket_reader(phenopacket_path)
             created_noisy_phenopacket = self.add_noise_to_phenotypic_profile(phenopacket)
             write_phenopacket(
@@ -268,14 +275,23 @@ def scramble_phenopackets(
         scramble_factor (float): A factor determining the level of scrambling for phenotypic features.
         local_cached_ontology (Path): The path to the local cached ontology.
     """
+    start_time = time.perf_counter()
+    logger.info("Initiating scrambling.")
+    logger.info(f"Created directory {output_dir}.")
+    logger.info(f"Scramble factor set to {scramble_factor}.")
     output_dir.mkdir(exist_ok=True)
     ontology = load_ontology(local_cached_ontology)
     if phenopacket_path is not None:
+        logger.info(f"Scrambling {phenopacket_path}.")
         HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopacket(
             output_dir, phenopacket_path
         )
     elif phenopacket_dir is not None:
+        logger.info(
+            f"Scrambling {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
+        )
         HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopackets(
             output_dir,
             phenopacket_dir,
         )
+    logger.info(f"Finished scrambling! Total time: {time.perf_counter() - start_time:.2f} seconds.")

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/prepare/create_spiked_vcf.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import gzip
-import logging
 import random
 import re
+import time
 import urllib.parse
 from copy import copy
 from dataclasses import dataclass
@@ -12,6 +12,7 @@ from phenopackets import Family, File, Phenopacket
 from pheval.prepare.custom_exceptions import InputError
 from pheval.utils.file_utils import all_files, files_with_suffix, is_gzipped
+from pheval.utils.logger import get_logger
 from pheval.utils.phenopacket_utils import (
     IncompatibleGenomeAssemblyError,
     PhenopacketRebuilder,
@@ -21,8 +22,7 @@ from pheval.utils.phenopacket_utils import (
     write_phenopacket,
 )
-info_log = logging.getLogger("info")
+logger = get_logger()
 genome_assemblies = {
     "GRCh38": {
         "1": 248956422,
@@ -357,9 +357,13 @@ class VcfSpiker:
                 and int(val.split("\t")[1]) < int(variant_entry[1])
             ]
             if matching_indices:
+                logger.info(
+                    f"Successfully spiked variant {variant.variant.chrom}-{variant.variant.pos}-"
+                    f"{variant.variant.ref}-{variant.variant.alt} in {template_vcf_name}"
+                )
                 variant_entry_position = matching_indices[-1] + 1
             else:
-                info_log.warning(
+                logger.warning(
                     f"Could not find entry position for {variant.variant.chrom}-{variant.variant.pos}-"
                     f"{variant.variant.ref}-{variant.variant.alt} in {template_vcf_name}, "
                     "inserting at end of VCF contents."
@@ -518,8 +522,6 @@ def generate_spiked_vcf_file(
     Returns:
         File: The generated File object representing the newly created spiked VCF file.
     """
-    output_dir.mkdir(exist_ok=True)
-    info_log.info(f" Created a directory {output_dir}")
     vcf_assembly, spiked_vcf = spike_vcf_contents(
         phenopacket, phenopacket_path, hg19_vcf_info, hg38_vcf_info, hg19_vcf_dir, hg38_vcf_dir
     )
@@ -633,6 +635,7 @@ def create_spiked_vcfs(
     hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
     hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
     for phenopacket_path in files_with_suffix(phenopacket_dir, ".json"):
+        logger.info(f"Creating spiked VCF for: {phenopacket_path.name}")
         spike_and_update_phenopacket(
             hg19_vcf_info, hg38_vcf_info, hg19_vcf_dir, hg38_vcf_dir, output_dir, phenopacket_path
         )
@@ -659,7 +662,12 @@ def spike_vcfs(
         hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional).
         hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional).
     """
+    start_time = time.perf_counter()
+    logger.info("Creating spiked VCFs.")
+    output_dir.mkdir(exist_ok=True)
+    logger.info(f" Created output directory: {output_dir}")
     if phenopacket_path is not None:
+        logger.info(f"Spiking variants from {phenopacket_path}.")
         create_spiked_vcf(
             output_dir,
             phenopacket_path,
@@ -669,6 +677,9 @@ def spike_vcfs(
             hg38_vcf_dir,
         )
     elif phenopacket_dir is not None:
+        logger.info(
+            f"Spiking variants from {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
+        )
         create_spiked_vcfs(
             output_dir,
             phenopacket_dir,
@@ -677,3 +688,4 @@ def spike_vcfs(
             hg19_vcf_dir,
             hg38_vcf_dir,
         )
+    logger.info(f"Finished spiking! Total time: {time.perf_counter() - start_time:.2f} seconds.")

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/prepare/prepare_corpus.py RENAMED Viewed

@@ -1,13 +1,18 @@
-import logging
 import shutil
+import time
 from pathlib import Path
 from pheval.prepare.create_spiked_vcf import create_spiked_vcf
 from pheval.prepare.update_phenopacket import create_updated_phenopacket
 from pheval.utils.file_utils import all_files
-from pheval.utils.phenopacket_utils import PhenopacketUtil, phenopacket_reader
+from pheval.utils.logger import get_logger
+from pheval.utils.phenopacket_utils import (
+    PhenopacketUtil,
+    create_gene_identifier_map,
+    phenopacket_reader,
+)
-info_log = logging.getLogger("info")
+logger = get_logger()
 def prepare_corpus(
@@ -43,39 +48,46 @@ def prepare_corpus(
         To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf,
         hg19_vcf_dir or hg38_vcf_dir is required.
     """
+    start_time = time.perf_counter()
+    logger.info(f"Preparing corpus for {phenopacket_dir}")
     output_dir.joinpath("phenopackets").mkdir(exist_ok=True, parents=True)
+    logger.info(f" Created output directory: {output_dir.joinpath('phenopackets')}")
+    identifier_map = create_gene_identifier_map()
     for phenopacket_path in all_files(phenopacket_dir):
         phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
         if not phenopacket_util.observed_phenotypic_features():
-            info_log.warning(
+            logger.warning(
                 f"Removed {phenopacket_path.name} from the corpus due to no observed phenotypic features."
             )
             continue
         if variant_analysis:
             if phenopacket_util.check_incomplete_variant_record():
-                info_log.warning(
+                logger.warning(
                     f"Removed {phenopacket_path.name} from the corpus due to missing variant fields."
                 )
                 continue
             elif phenopacket_util.check_variant_alleles():
-                info_log.warning(
+                logger.warning(
                     f"Removed {phenopacket_path.name} from the corpus due to identical "
                     "reference and alternate allele fields."
                 )
         if gene_analysis:
             if phenopacket_util.check_incomplete_gene_record():
-                info_log.warning(
+                logger.warning(
                     f"Removed {phenopacket_path.name} from the corpus due to missing gene fields."
                 )
                 continue
         if disease_analysis:
             if phenopacket_util.check_incomplete_disease_record():
-                info_log.warning(
+                logger.warning(
                     f"Removed {phenopacket_path.name} from the corpus due to missing disease fields."
                 )
                 continue
+        logger.info(f"{phenopacket_path.name} OK!")
         if hg19_template_vcf or hg38_template_vcf:
             output_dir.joinpath("vcf").mkdir(exist_ok=True)
+            logger.info(f" Created output directory: {output_dir.joinpath('vcf')}")
+            logger.info(f"Spiking VCF for {phenopacket_path}.")
             create_spiked_vcf(
                 output_dir.joinpath("vcf"),
                 phenopacket_path,
@@ -85,8 +97,12 @@ def prepare_corpus(
                 hg38_vcf_dir,
             )
         if gene_identifier:
+            logger.info(f"Updating gene identifiers to {gene_identifier} for {phenopacket_dir}")
             create_updated_phenopacket(
-                gene_identifier, phenopacket_path, output_dir.joinpath("phenopackets")
+                gene_identifier,
+                phenopacket_path,
+                output_dir.joinpath("phenopackets"),
+                identifier_map,
             )
         else:
             # if not updating phenopacket gene identifiers then copy phenopacket as is to output directory
@@ -97,3 +113,7 @@ def prepare_corpus(
                 if phenopacket_path != output_dir.joinpath(f"phenopackets/{phenopacket_path.name}")
                 else None
             )
+    logger.info(
+        f"Finished preparing corpus for {phenopacket_dir}. "
+        f"Total time: {time.perf_counter() - start_time:.2f} seconds."
+    )

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/prepare/update_phenopacket.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import time
 from pathlib import Path
 from typing import Union
@@ -5,6 +6,7 @@ import polars as pl
 from phenopackets import Family, Phenopacket
 from pheval.utils.file_utils import all_files
+from pheval.utils.logger import get_logger
 from pheval.utils.phenopacket_utils import (
     GeneIdentifierUpdater,
     PhenopacketRebuilder,
@@ -14,6 +16,8 @@ from pheval.utils.phenopacket_utils import (
     write_phenopacket,
 )
+logger = get_logger()
 def update_outdated_gene_context(
     phenopacket_path: Path, gene_identifier: str, identifier_map: pl.DataFrame
@@ -43,7 +47,10 @@ def update_outdated_gene_context(
 def create_updated_phenopacket(
-    gene_identifier: str, phenopacket_path: Path, output_dir: Path
+    gene_identifier: str,
+    phenopacket_path: Path,
+    output_dir: Path,
+    identifier_map: pl.DataFrame = None,
 ) -> None:
     """
     Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket.
@@ -52,12 +59,13 @@ def create_updated_phenopacket(
         gene_identifier (str): Identifier used to update the gene context.
         phenopacket_path (Path): The path to the input Phenopacket file.
         output_dir (Path): The directory where the updated Phenopacket will be written.
+        identifier_map (pl.DataFrame): The gene identifier map used for updating.
     Notes:
         The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
         to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
         to describe the gene identifiers.
     """
-    identifier_map = create_gene_identifier_map()
+    identifier_map = create_gene_identifier_map() if identifier_map is None else identifier_map
     updated_phenopacket = update_outdated_gene_context(
         phenopacket_path, gene_identifier, identifier_map
     )
@@ -82,6 +90,7 @@ def create_updated_phenopackets(
     """
     identifier_map = create_gene_identifier_map()
     for phenopacket_path in all_files(phenopacket_dir):
+        logger.info(f"Updating gene context for: {phenopacket_path.name}")
         updated_phenopacket = update_outdated_gene_context(
             phenopacket_path, gene_identifier, identifier_map
         )
@@ -104,8 +113,17 @@ def update_phenopackets(
         to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
         to describe the gene identifiers.
     """
+    start_time = time.perf_counter()
+    logger.info("Updating phenopackets.")
     output_dir.mkdir(exist_ok=True)
+    logger.info(f"Created directory {output_dir}.")
+    logger.info(f"Gene identifier set to: {gene_identifier}.")
     if phenopacket_path is not None:
+        logger.info(f"Updating {phenopacket_path}.")
         create_updated_phenopacket(gene_identifier, phenopacket_path, output_dir)
     elif phenopacket_dir is not None:
+        logger.info(
+            f"Updating {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
+        )
         create_updated_phenopackets(gene_identifier, phenopacket_dir, output_dir)
+    logger.info(f"Updating finished! Total time: {time.perf_counter() - start_time:.2f} seconds.")

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/runners/runner.py RENAMED Viewed

@@ -7,6 +7,9 @@ from pathlib import Path
 from pheval.config_parser import parse_input_dir_config
 from pheval.run_metadata import BasicOutputRunMetaData
+from pheval.utils.logger import get_logger
+logger = get_logger()
 @dataclass
@@ -86,6 +89,10 @@ class PhEvalRunner(ABC):
     def build_output_directory_structure(self):
         """build output directory structure"""
+        logger.info(
+            f"Building output directory structure for {self.input_dir_config.tool} "
+            f"version {self.input_dir_config.tool_version}"
+        )
         self.tool_input_commands_dir.mkdir(exist_ok=True)
         self.raw_results_dir.mkdir(exist_ok=True)
         if self._get_variant_analysis():

{pheval-0.5.1 → pheval-0.5.3}/src/pheval/utils/phenopacket_utils.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import json
-import logging
 import os
 from copy import copy
 from dataclasses import dataclass
@@ -19,8 +18,9 @@ from phenopackets import (
 )
 from pheval.prepare.custom_exceptions import IncorrectFileFormatError
+from pheval.utils.logger import get_logger
-info_log = logging.getLogger("info")
+logger = get_logger()
 class IncompatibleGenomeAssemblyError(Exception):
@@ -161,6 +161,7 @@ def create_gene_identifier_map() -> pl.DataFrame:
     Returns:
         pl.DataFrame: A mapping of gene identifiers to gene symbols.
     """
+    logger.info("Creating gene identifier map.")
     hgnc_df = parse_hgnc_data()
     return hgnc_df.melt(
         id_vars=["gene_symbol", "prev_symbols"],
@@ -192,6 +193,7 @@ def phenopacket_reader(file: Path) -> Union[Phenopacket, Family]:
     Returns:
         Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object
     """
+    logger.info(f"Parsing Phenopacket: {file.name}")
     file = open(file, "r")
     phenopacket = json.load(file)
     file.close()
@@ -593,6 +595,7 @@ class PhenopacketRebuilder:
         Returns:
         - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path.
         """
+        logger.info(f"Adding spiked VCF path {spiked_vcf_file_data.uri} to phenopacket.")
         phenopacket = copy(self.phenopacket)
         phenopacket_files = [
             file for file in phenopacket.files if file.file_attributes["fileFormat"] != "vcf"
@@ -627,6 +630,7 @@ def write_phenopacket(phenopacket: Union[Phenopacket, Family], output_file: Path
     Returns:
         None
     """
+    logger.info(f"Writing Phenopacket to {output_file}.")
     phenopacket_json = create_json_message(phenopacket)
     with open(output_file, "w") as outfile:
         outfile.write(phenopacket_json)
@@ -675,6 +679,7 @@ class GeneIdentifierUpdater:
         )
         if prev_symbol_matches.height > 0:
             return prev_symbol_matches["identifier"][0]
+        logger.warn(f"Could not find {self.gene_identifier} for {gene_symbol}.")
         return None
     def obtain_gene_symbol_from_identifier(self, query_gene_identifier: str) -> str:
@@ -735,10 +740,10 @@ class GeneIdentifierUpdater:
                 updated_gene_identifier = self.find_identifier(
                     g.variant_interpretation.variation_descriptor.gene_context.symbol
                 )
-                info_log.info(
-                    f"Updating gene identifier in {phenopacket_path} from "
+                logger.info(
+                    f"Updating gene identifier in {phenopacket_path.name} from "
                     f"{g.variant_interpretation.variation_descriptor.gene_context.value_id}"
-                    f"to {updated_gene_identifier}"
+                    f" to {updated_gene_identifier}"
                 )
                 g.variant_interpretation.variation_descriptor.gene_context.value_id = (
                     updated_gene_identifier