PyPI - pheval - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

pheval 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (42) hide show

pheval/__init__.py +0 -5
pheval/analyse/__init__.py +0 -0
pheval/analyse/analysis.py +703 -0
pheval/analyse/generate_plots.py +312 -0
pheval/analyse/generate_summary_outputs.py +186 -0
pheval/analyse/rank_stats.py +61 -0
pheval/cli.py +22 -7
pheval/cli_pheval.py +37 -12
pheval/cli_pheval_utils.py +225 -8
pheval/config_parser.py +36 -0
pheval/constants.py +1 -0
pheval/implementations/__init__.py +1 -3
pheval/post_processing/__init__.py +0 -0
pheval/post_processing/post_processing.py +210 -0
pheval/prepare/__init__.py +0 -0
pheval/prepare/create_noisy_phenopackets.py +173 -0
pheval/prepare/create_spiked_vcf.py +366 -0
pheval/prepare/custom_exceptions.py +47 -0
pheval/prepare/update_phenopacket.py +53 -0
pheval/resources/alternate_ouputs/CADA_results.txt +11 -0
pheval/resources/alternate_ouputs/DeepPVP_results.txt +22 -0
pheval/resources/alternate_ouputs/OVA_results.txt +11 -0
pheval/resources/alternate_ouputs/Phen2Gene_results.json +814 -0
pheval/resources/alternate_ouputs/Phenolyzer_results.txt +12 -0
pheval/resources/alternate_ouputs/lirical_results.tsv +152 -0
pheval/resources/alternate_ouputs/svanna_results.tsv +9 -0
pheval/resources/hgnc_complete_set_2022-10-01.txt +43222 -0
pheval/run_metadata.py +27 -0
pheval/runners/runner.py +92 -11
pheval/utils/__init__.py +0 -0
pheval/utils/docs_gen.py +105 -0
pheval/utils/docs_gen.sh +18 -0
pheval/utils/file_utils.py +88 -0
pheval/utils/phenopacket_utils.py +356 -0
pheval/utils/semsim_utils.py +156 -0
{pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/METADATA +12 -4
pheval-0.2.0.dist-info/RECORD +41 -0
{pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/WHEEL +1 -1
pheval/utils.py +0 -7
pheval-0.1.0.dist-info/RECORD +0 -13
{pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/LICENSE +0 -0
{pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/entry_points.txt +0 -0

pheval/cli_pheval.py CHANGED Viewed

@@ -1,26 +1,30 @@
 """
 Monarch Initiative
 """
+from pathlib import Path
 import click
 from pheval.implementations import get_implementation_resolver
+from pheval.utils.file_utils import write_metadata
 @click.command()
 @click.option(
-    "--inputdir",
+    "--input-dir",
     "-i",
     metavar="INPUTDIR",
     required=True,
     help="The input directory (relative path: e.g exomiser-13.11)",
+    type=Path,
 )
 @click.option(
-    "--testdatadir",
+    "--testdata-dir",
     "-t",
     metavar="TESTDATA",
     required=True,
     help="The input directory (relative path: e.g ./data)",
+    type=Path,
 )
 @click.option(
     "--runner",
@@ -30,18 +34,20 @@ from pheval.implementations import get_implementation_resolver
     help="Runner implementation (e.g exomiser-13.11)",
 )
 @click.option(
-    "--tmpdir",
+    "--tmp-dir",
     "-m",
     metavar="TMPDIR",
     required=False,
     help="The path of the temporary directory (optional)",
+    type=Path,
 )
 @click.option(
-    "--outputdir",
+    "--output-dir",
     "-o",
     metavar="OUTPUTDIR",
     required=True,
     help="The path of the output directory",
+    type=Path,
 )
 @click.option(
     "--config",
@@ -49,20 +55,39 @@ from pheval.implementations import get_implementation_resolver
     metavar="CONFIG",
     required=False,
     help="The path of the configuration file (optional e.g config.yaml)",
+    type=Path,
 )
-def run(inputdir, testdatadir, runner, tmpdir, outputdir, config) -> None:
+@click.option(
+    "--version",
+    "-v",
+    required=False,
+    help="Version of the tool implementation.",
+    type=str,
+)
+def run(
+    input_dir: Path,
+    testdata_dir: Path,
+    runner: str,
+    tmp_dir: Path,
+    output_dir: Path,
+    config: Path,
+    version: str,
+) -> None:
     """PhEval Runner Command Line Interface
     Args:
-        inputdir (Click.Path): The input directory (relative path: e.g exomiser-13.11)
-        testdatadir (Click.Path): The input directory (relative path: e.g ./data
+        input_dir (Path): The input directory (relative path: e.g exomiser-13.11)
+        testdata_dir (Path): The input directory (relative path: e.g ./data
         runner (str): Runner implementation (e.g exomiser-13.11)
-        tmpdir (Click.Path): The path of the temporary directory (optional)
-        outputdir (Click.Path): The path of the output directory
-        config (Click.Path): The path of the configuration file (optional e.g config.yaml)
+        tmp_dir (Path): The path of the temporary directory (optional)
+        output_dir (Path): The path of the output directory
+        config (Path): The path of the configuration file (optional e.g., config.yaml)
+        version (str): The version of the tool implementation
     """
     runner_class = get_implementation_resolver().lookup(runner)
-    runner_instance = runner_class(inputdir, testdatadir, tmpdir, outputdir, config)
+    runner_instance = runner_class(input_dir, testdata_dir, tmp_dir, output_dir, config, version)
+    runner_instance.build_output_directory_structure()
     runner_instance.prepare()
     runner_instance.run()
     runner_instance.post_process()
+    run_metadata = runner_instance.construct_meta_data()
+    write_metadata(output_dir, run_metadata)

pheval/cli_pheval_utils.py CHANGED Viewed

@@ -1,7 +1,15 @@
 """PhEval utils Command Line Interface"""
+from pathlib import Path
 import click
+from pheval.prepare.create_noisy_phenopackets import scramble_phenopackets
+from pheval.prepare.create_spiked_vcf import spike_vcfs
+from pheval.prepare.custom_exceptions import InputError, MutuallyExclusiveOptionError
+from pheval.prepare.update_phenopacket import update_phenopackets
+from pheval.utils.semsim_utils import percentage_diff, semsim_heatmap_plot
 @click.command()
 @click.option(
@@ -11,19 +19,228 @@ import click
     metavar="FILE",
     help="Path to the semantic similarity profile to be scrambled.",
 )
-def scramble_semsim():
+def scramble_semsim(input: Path):
     """scramble_semsim"""
     print("running pheval_utils::scramble_semsim command")
-@click.command()
+@click.command("scramble-phenopackets")
 @click.option(
-    "--input",
-    "-i",
+    "--phenopacket-path",
+    "-p",
+    metavar="PATH",
+    help="Path to phenopacket.",
+    type=Path,
+    cls=MutuallyExclusiveOptionError,
+    mutually_exclusive=["phenopacket_dir"],
+)
+@click.option(
+    "--phenopacket-dir",
+    "-P",
+    metavar="PATH",
+    help="Path to phenopackets directory.",
+    type=Path,
+    cls=MutuallyExclusiveOptionError,
+    mutually_exclusive=["phenopacket_path"],
+)
+@click.option(
+    "--scramble-factor",
+    "-s",
+    metavar=float,
+    required=True,
+    default=0.5,
+    show_default=True,
+    help="Scramble factor for randomising phenopacket phenotypic profiles.",
+    type=float,
+)
+@click.option(
+    "--output-dir",
+    "-O",
+    metavar="PATH",
+    required=True,
+    help="Path for creation of output directory",
+    default="noisy_phenopackets",
+    type=Path,
+)
+def scramble_phenopackets_command(
+    phenopacket_path: Path,
+    phenopacket_dir: Path,
+    scramble_factor: float,
+    output_dir: Path,
+):
+    """Generate noisy phenopackets from existing ones."""
+    if phenopacket_path is None and phenopacket_dir is None:
+        raise InputError("Either a phenopacket or phenopacket directory must be specified")
+    else:
+        scramble_phenopackets(output_dir, phenopacket_path, phenopacket_dir, scramble_factor)
+@click.command("semsim-comparison")
+@click.option(
+    "--semsim-left",
+    "-L",
+    required=True,
+    metavar="FILE",
+    help="Path to the first semantic similarity profile.",
+)
+@click.option(
+    "--semsim-right",
+    "-R",
     required=True,
     metavar="FILE",
-    help="Path to the phenopacket to be spiked.",
+    help="Path to the second semantic similarity profile.",
+)
+@click.option(
+    "--score-column",
+    "-s",
+    required=True,
+    type=click.Choice(
+        ["jaccard_similarity", "dice_similarity", "phenodigm_score"], case_sensitive=False
+    ),
+    help="Score column that will be used in comparison",
+)
+@click.option(
+    "--analysis",
+    "-a",
+    required=True,
+    type=click.Choice(["heatmap", "percentage_diff"], case_sensitive=False),
+    help="""There are two types of analysis:
+        heatmap - Generates a heatmap plot that shows the differences between the semantic similarity profiles using the
+        score column for this purpose. Defaults to "heatmap".
+        percentage_diff - Calculates the score column percentage difference between the semantic similarity profiles""",
+)
+@click.option(
+    "--output",
+    "-o",
+    metavar="FILE",
+    default="percentage_diff.semsim.tsv",
+    help="Output path for the difference tsv. Defaults to percentage_diff.semsim.tsv",
+)
+def semsim_comparison(
+    semsim_left: Path,
+    semsim_right: Path,
+    score_column: str,
+    analysis: str,
+    output: Path = "percentage_diff.semsim.tsv",
+):
+    """Compares two semantic similarity profiles
+    Args:
+        semsim-left (Path): File path of the first semantic similarity profile
+        semsim-right (Path): File path of the second semantic similarity profile
+        output (Path): Output path for the difference tsv. Defaults to "percentage_diff.semsim.tsv".
+        score_column (str): Score column that will be computed (e.g. jaccard_similarity)
+        analysis (str): There are two types of analysis:
+        heatmap - Generates a heatmap plot that shows the differences between the semantic similarity profiles using the
+        score column for this purpose. Defaults to "heatmap".
+        percentage_diff - Calculates the score column percentage difference between the semantic similarity profiles.
+    """
+    if analysis == "heatmap":
+        return semsim_heatmap_plot(semsim_left, semsim_right, score_column)
+    if analysis == "percentage_diff":
+        percentage_diff(semsim_left, semsim_right, score_column, output)
+@click.command("update-phenopackets")
+@click.option(
+    "--phenopacket-path",
+    "-p",
+    metavar="PATH",
+    help="Path to phenopacket.",
+    type=Path,
+    cls=MutuallyExclusiveOptionError,
+    mutually_exclusive=["phenopacket_dir"],
+)
+@click.option(
+    "--phenopacket-dir",
+    "-P",
+    metavar="PATH",
+    help="Path to phenopacket directory for updating.",
+    type=Path,
+    cls=MutuallyExclusiveOptionError,
+    mutually_exclusive=["phenopacket_path"],
+)
+@click.option(
+    "--output-dir",
+    "-o",
+    metavar="PATH",
+    required=True,
+    help="Path to write phenopacket.",
+    type=Path,
+)
+@click.option(
+    "--gene-identifier",
+    "-g",
+    required=False,
+    default="ensembl_id",
+    show_default=True,
+    help="Gene identifier to add to phenopacket",
+    type=click.Choice(["ensembl_id", "entrez_id", "hgnc_id"]),
+)
+def update_phenopackets_command(
+    phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path, gene_identifier: str
+):
+    """Update gene symbols and identifiers for phenopackets."""
+    if phenopacket_path is None and phenopacket_dir is None:
+        raise InputError("Either a phenopacket or phenopacket directory must be specified")
+    update_phenopackets(gene_identifier, phenopacket_path, phenopacket_dir, output_dir)
+@click.command("create-spiked-vcfs")
+@click.option(
+    "--phenopacket-path",
+    "-p",
+    metavar="PATH",
+    help="Path to phenopacket.",
+    type=Path,
+    cls=MutuallyExclusiveOptionError,
+    mutually_exclusive=["phenopacket_dir"],
+)
+@click.option(
+    "--phenopacket-dir",
+    "-P",
+    metavar="PATH",
+    help="Path to phenopacket directory for updating.",
+    type=Path,
+    cls=MutuallyExclusiveOptionError,
+    mutually_exclusive=["phenopacket_path"],
+)
+@click.option(
+    "--template-vcf-path",
+    "-t",
+    cls=MutuallyExclusiveOptionError,
+    metavar="PATH",
+    required=False,
+    help="Template VCF file",
+    mutually_exclusive=["vcf_dir"],
+    type=Path,
+)
+@click.option(
+    "--vcf-dir",
+    "-v",
+    cls=MutuallyExclusiveOptionError,
+    metavar="PATH",
+    help="Directory containing template VCF files",
+    mutually_exclusive=["template_vcf"],
+    type=Path,
+)
+@click.option(
+    "--output-dir",
+    "-O",
+    metavar="PATH",
+    required=True,
+    help="Path for creation of output directory",
+    default="vcf",
+    type=Path,
 )
-def scramble_phenopacket():
-    """scramble_phenopacket"""
-    print("running pheval_utils::scramble_phenopacket command")
+def create_spiked_vcfs_command(
+    phenopacket_path: Path,
+    phenopacket_dir: Path,
+    output_dir: Path,
+    template_vcf_path: Path = None,
+    vcf_dir: Path = None,
+):
+    """Spikes variants into a template VCF file for a directory of phenopackets."""
+    if phenopacket_path is None and phenopacket_dir is None:
+        raise InputError("Either a phenopacket or phenopacket directory must be specified")
+    spike_vcfs(output_dir, phenopacket_path, phenopacket_dir, template_vcf_path, vcf_dir)

pheval/config_parser.py ADDED Viewed

@@ -0,0 +1,36 @@
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+import yaml
+from serde import serde
+from serde.yaml import from_yaml
+@serde
+@dataclass
+class InputDirConfig:
+    """
+    Class for defining the fields within the input directory config.
+    Args:
+        tool (str): Name of the tool implementation (e.g. exomiser/phen2gene)
+        tool_version (str): Version of the tool implementation
+        phenotype_only (bool): Whether the tool is run with HPO terms only (True) or with variant data (False)
+        tool_specific_configuration_options (Any): Tool specific configurations
+    """
+    tool: str
+    tool_version: str
+    phenotype_only: bool
+    tool_specific_configuration_options: Any
+def parse_input_dir_config(input_dir: Path) -> InputDirConfig:
+    """Reads the config file."""
+    with open(Path(input_dir).joinpath("config.yaml"), "r") as config_file:
+        config = yaml.safe_load(config_file)
+    config_file.close()
+    return from_yaml(InputDirConfig, yaml.dump(config))

pheval/constants.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ PHEVAL_RESULTS_DIRECTORY_SUFFIX = "_results"

pheval/implementations/__init__.py CHANGED Viewed

@@ -13,9 +13,7 @@ def get_implementation_resolver() -> ClassResolver[PhEvalRunner]:
     Returns:
         ClassResolver[PhEvalRunner]: _description_
     """
-    implementation_resolver: ClassResolver[
-        PhEvalRunner
-    ] = ClassResolver.from_subclasses(
+    implementation_resolver: ClassResolver[PhEvalRunner] = ClassResolver.from_subclasses(
         PhEvalRunner,
         suffix="Implementation",
     )

pheval/post_processing/__init__.py ADDED Viewed

File without changes

pheval/post_processing/post_processing.py ADDED Viewed

@@ -0,0 +1,210 @@
+import operator
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+import pandas as pd
+def calculate_end_pos(variant_start: int, variant_ref: str) -> int:
+    """Calculate the end position for a variant."""
+    return variant_start + len(variant_ref) - 1
+@dataclass
+class PhEvalGeneResult:
+    """Minimal data required from tool-specific output for gene prioritisation."""
+    gene_symbol: str
+    gene_identifier: str
+    score: float
+@dataclass
+class RankedPhEvalGeneResult:
+    """PhEval gene result with corresponding rank."""
+    pheval_gene_result: PhEvalGeneResult
+    rank: int
+    def as_dict(self):
+        """Return PhEval gene result as dictionary."""
+        return {
+            "gene_symbol": self.pheval_gene_result.gene_symbol,
+            "gene_identifier": self.pheval_gene_result.gene_identifier,
+            "score": self.pheval_gene_result.score,
+            "rank": self.rank,
+        }
+@dataclass
+class PhEvalVariantResult:
+    """Minimal data required from tool-specific output for variant prioritisation."""
+    chromosome: str
+    start: int
+    end: int
+    ref: str
+    alt: str
+    score: float
+@dataclass
+class RankedPhEvalVariantResult:
+    """PhEval variant result with corresponding rank."""
+    pheval_variant_result: PhEvalVariantResult
+    rank: int
+    def as_dict(self):
+        """Return PhEval variant result as dictionary."""
+        return {
+            "chromosome": self.pheval_variant_result.chromosome,
+            "start": self.pheval_variant_result.start,
+            "end": self.pheval_variant_result.end,
+            "ref": self.pheval_variant_result.ref,
+            "alt": self.pheval_variant_result.alt,
+            "score": self.pheval_variant_result.score,
+            "rank": self.rank,
+        }
+class SortOrder(Enum):
+    ASCENDING = 1
+    DESCENDING = 2
+class ResultSorter:
+    def __init__(
+        self, pheval_results: [PhEvalGeneResult] or [PhEvalVariantResult], sort_order: SortOrder
+    ):
+        self.pheval_results = pheval_results
+        self.sort_order = sort_order
+    def _sort_by_decreasing_score(self) -> [PhEvalGeneResult] or [PhEvalVariantResult]:
+        """Sort results in descending order."""
+        return sorted(self.pheval_results, key=operator.attrgetter("score"), reverse=True)
+    def _sort_by_increasing_score(self) -> [PhEvalGeneResult] or [PhEvalVariantResult]:
+        """Sort results in ascending order."""
+        return sorted(self.pheval_results, key=operator.attrgetter("score"), reverse=False)
+    def sort_pheval_results(self) -> [PhEvalGeneResult] or [PhEvalVariantResult]:
+        """Sort results with best score first."""
+        return (
+            self._sort_by_increasing_score()
+            if self.sort_order == SortOrder.ASCENDING
+            else self._sort_by_decreasing_score()
+        )
+class ScoreRanker:
+    rank: int = 0
+    current_score: float = float("inf")
+    count: int = 0
+    def __init__(self, sort_order: SortOrder):
+        self.sort_order = sort_order
+    def _check_rank_order(self, round_score: float) -> None:
+        """Check the results are correctly ordered."""
+        if self.sort_order == SortOrder.ASCENDING and round_score < self.current_score != float(
+            "inf"
+        ):
+            raise ValueError("Results are not correctly sorted!")
+        elif self.sort_order == SortOrder.DESCENDING and round_score > self.current_score != float(
+            "inf"
+        ):
+            raise ValueError("Results are not correctly sorted!")
+    def rank_scores(self, round_score: float) -> int:
+        """Add ranks to a result, equal scores are given the same rank e.g., 1,1,3."""
+        self._check_rank_order(round_score)
+        self.count += 1
+        if self.current_score == round_score:
+            return self.rank
+        self.current_score = round_score
+        self.rank = self.count
+        return self.rank
+def _rank_pheval_result(
+    pheval_result: [PhEvalGeneResult] or [PhEvalVariantResult], sort_order: SortOrder
+) -> [RankedPhEvalGeneResult] or [RankedPhEvalVariantResult]:
+    """Ranks either a PhEval gene or variant result post-processed from a tool specific output.
+    Deals with ex aequo scores"""
+    score_ranker = ScoreRanker(sort_order)
+    ranked_result = []
+    for result in pheval_result:
+        ranked_result.append(
+            RankedPhEvalGeneResult(
+                pheval_gene_result=result, rank=score_ranker.rank_scores(result.score)
+            )
+        ) if type(result) == PhEvalGeneResult else ranked_result.append(
+            RankedPhEvalVariantResult(
+                pheval_variant_result=result, rank=score_ranker.rank_scores(result.score)
+            )
+        )
+    return ranked_result
+def _return_sort_order(sort_order_str: str) -> SortOrder:
+    """Return the SortOrder Enum from string derived from config."""
+    try:
+        return SortOrder[sort_order_str.upper()]
+    except KeyError:
+        raise ValueError("Incompatible ordering method specified.")
+def _create_pheval_result(
+    pheval_result: [PhEvalGeneResult] or [PhEvalVariantResult], sort_order_str: str
+) -> [RankedPhEvalGeneResult] or [RankedPhEvalVariantResult]:
+    """Create PhEval gene/variant result with corresponding ranks."""
+    sort_order = _return_sort_order(sort_order_str)
+    sorted_pheval_result = ResultSorter(pheval_result, sort_order).sort_pheval_results()
+    return _rank_pheval_result(sorted_pheval_result, sort_order)
+def _write_pheval_gene_result(
+    ranked_pheval_result: [RankedPhEvalGeneResult], output_dir: Path, tool_result_path: Path
+) -> None:
+    """Write ranked PhEval gene result to tsv."""
+    ranked_result = pd.DataFrame([x.as_dict() for x in ranked_pheval_result])
+    pheval_gene_output = ranked_result.loc[:, ["rank", "score", "gene_symbol", "gene_identifier"]]
+    pheval_gene_output.to_csv(
+        output_dir.joinpath(
+            "pheval_gene_results/" + tool_result_path.stem + "-pheval_gene_result.tsv"
+        ),
+        sep="\t",
+        index=False,
+    )
+def _write_pheval_variant_result(
+    ranked_pheval_result: [RankedPhEvalVariantResult], output_dir: Path, tool_result_path: Path
+) -> None:
+    """Write ranked PhEval variant result to tsv."""
+    ranked_result = pd.DataFrame([x.as_dict() for x in ranked_pheval_result])
+    pheval_variant_output = ranked_result.loc[
+        :, ["rank", "score", "chromosome", "start", "end", "ref", "alt"]
+    ]
+    pheval_variant_output.to_csv(
+        output_dir.joinpath(
+            "pheval_variant_results/" + tool_result_path.stem + "-pheval_variant_result.tsv"
+        ),
+        sep="\t",
+        index=False,
+    )
+def generate_pheval_result(
+    pheval_result: [PhEvalGeneResult] or [PhEvalVariantResult],
+    sort_order_str: str,
+    output_dir: Path,
+    tool_result_path: Path,
+):
+    """Generate either a PhEval variant or PhEval gene tsv result."""
+    ranked_pheval_result = _create_pheval_result(pheval_result, sort_order_str)
+    _write_pheval_variant_result(ranked_pheval_result, output_dir, tool_result_path) if all(
+        isinstance(result, RankedPhEvalVariantResult) for result in ranked_pheval_result
+    ) else _write_pheval_gene_result(ranked_pheval_result, output_dir, tool_result_path)

pheval/prepare/__init__.py ADDED Viewed

File without changes

pheval 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

pheval 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl