PyPI - pheval - Versions diffs - 0.3.5__tar.gz → 0.3.6__tar.gz - Mend

pheval 0.3.5tar.gz → 0.3.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pheval might be problematic. Click here for more details.

Files changed (56) hide show

{pheval-0.3.5 → pheval-0.3.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pheval
-Version: 0.3.5
+Version: 0.3.6
 Summary:
 Author: Yasemin Bridges
 Author-email: y.bridges@qmul.ac.uk

{pheval-0.3.5 → pheval-0.3.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pheval"
-version = "0.3.5"
+version = "0.3.6"
 description = ""
 authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
   "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",

{pheval-0.3.5 → pheval-0.3.6}/src/pheval/post_processing/post_processing.py RENAMED Viewed

@@ -227,68 +227,7 @@ class ResultSorter:
         )
-class ScoreRanker:
-    """
-    Class for ranking scores based on a given sort order
-    Attributes:
-       rank (int): Represents the current rank, initialised with 0
-       current_score (float): Represents the current score, initialised with positive infinity (float("inf"))
-       count (int): Used for counting, initialised with 0
-    """
-    rank: int = 0
-    current_score: float = float("inf")
-    count: int = 0
-    def __init__(self, sort_order: SortOrder):
-        """
-        Initialise ScoreRanker
-        Args:
-            sort_order (SortOrder): Sorting order to be applied
-        """
-        self.sort_order = sort_order
-    def _check_rank_order(self, round_score: float) -> None:
-        """
-        Check if the results are correctly ordered
-        Args:
-            round_score (float): Score to be checked against the current score
-        Raises:
-            ValueError: If results are not correctly sorted.
-        """
-        if self.sort_order == SortOrder.ASCENDING and round_score < self.current_score != float(
-            "inf"
-        ):
-            raise ValueError("Results are not correctly sorted!")
-        elif self.sort_order == SortOrder.DESCENDING and round_score > self.current_score != float(
-            "inf"
-        ):
-            raise ValueError("Results are not correctly sorted!")
-    def rank_scores(self, round_score: float) -> int:
-        """
-        Add ranks to a result; equal scores are given the same rank, e.g., 1, 1, 3
-        Args:
-            round_score (float): Score to be ranked
-        Returns:
-            int: Rank assigned to the score
-        """
-        self._check_rank_order(round_score)
-        self.count += 1
-        if self.current_score == round_score:
-            return self.rank
-        self.current_score = round_score
-        self.rank = self.count
-        return self.rank
-def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) -> [PhEvalResult]:
+def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) -> pd.DataFrame:
     """
     Rank PhEval results post-processed from tool-specific output, managing tied scores (ex aequo)
@@ -297,35 +236,17 @@ def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) ->
         sort_order (SortOrder): Sorting order based on which ranking is performed
     Returns:
-        List[PhEvalResult]: Ranked PhEval results with tied scores managed
+        pd.DataFrame : Ranked PhEval results with tied scores managed
     Raises:
         ValueError: If an incompatible PhEval result type is encountered
     """
-    score_ranker = ScoreRanker(sort_order)
-    ranked_result = []
-    for result in pheval_result:
-        if type(result) == PhEvalGeneResult:
-            ranked_result.append(
-                RankedPhEvalGeneResult.from_gene_result(
-                    result, score_ranker.rank_scores(result.score)
-                )
-            )
-        elif type(result) == PhEvalVariantResult:
-            ranked_result.append(
-                RankedPhEvalVariantResult.from_variant_result(
-                    result, score_ranker.rank_scores(result.score)
-                )
-            )
-        elif type(result) == PhEvalDiseaseResult:
-            ranked_result.append(
-                RankedPhEvalDiseaseResult.from_disease_result(
-                    result, score_ranker.rank_scores(result.score)
-                )
-            )
-        else:
-            raise ValueError("Incompatible PhEval result type.")
-    return ranked_result
+    pheval_result_df = pd.DataFrame([data.__dict__ for data in pheval_result])
+    if sort_order == SortOrder.ASCENDING:
+        pheval_result_df["rank"] = pheval_result_df["score"].rank(method="max", ascending=True)
+    elif sort_order == SortOrder.DESCENDING:
+        pheval_result_df["rank"] = pheval_result_df["score"].rank(method="max", ascending=False)
+    return pheval_result_df
 def _return_sort_order(sort_order_str: str) -> SortOrder:
@@ -347,7 +268,7 @@ def _return_sort_order(sort_order_str: str) -> SortOrder:
         raise ValueError("Incompatible ordering method specified.")
-def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) -> [PhEvalResult]:
+def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) -> pd.DataFrame:
     """
     Create PhEval results with corresponding ranks based on the specified sorting order.
@@ -356,7 +277,7 @@ def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
         sort_order_str (str): String representation of the desired sorting order.
     Returns:
-        List[PhEvalResult]: PhEval results with ranks assigned.
+       pd.DataFrame: PhEval results with ranks assigned.
     """
     sort_order = _return_sort_order(sort_order_str)
     sorted_pheval_result = ResultSorter(pheval_result, sort_order).sort_pheval_results()
@@ -364,7 +285,7 @@ def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
 def _write_pheval_gene_result(
-    ranked_pheval_result: [PhEvalResult], output_dir: Path, tool_result_path: Path
+    ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
 ) -> None:
     """
     Write ranked PhEval gene results to a TSV file
@@ -374,8 +295,9 @@ def _write_pheval_gene_result(
         output_dir (Path): Path to the output directory
         tool_result_path (Path): Path to the tool-specific result file
     """
-    ranked_result = pd.DataFrame([data.__dict__ for data in ranked_pheval_result])
-    pheval_gene_output = ranked_result.loc[:, ["rank", "score", "gene_symbol", "gene_identifier"]]
+    pheval_gene_output = ranked_pheval_result.loc[
+        :, ["rank", "score", "gene_symbol", "gene_identifier"]
+    ]
     pheval_gene_output.to_csv(
         output_dir.joinpath(
             "pheval_gene_results/" + tool_result_path.stem + "-pheval_gene_result.tsv"
@@ -386,7 +308,7 @@ def _write_pheval_gene_result(
 def _write_pheval_variant_result(
-    ranked_pheval_result: [PhEvalResult], output_dir: Path, tool_result_path: Path
+    ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
 ) -> None:
     """
     Write ranked PhEval variant results to a TSV file
@@ -396,8 +318,7 @@ def _write_pheval_variant_result(
         output_dir (Path): Path to the output directory
         tool_result_path (Path): Path to the tool-specific result file
     """
-    ranked_result = pd.DataFrame([data.__dict__ for data in ranked_pheval_result])
-    pheval_variant_output = ranked_result.loc[
+    pheval_variant_output = ranked_pheval_result.loc[
         :, ["rank", "score", "chromosome", "start", "end", "ref", "alt"]
     ]
     pheval_variant_output.to_csv(
@@ -410,7 +331,7 @@ def _write_pheval_variant_result(
 def _write_pheval_disease_result(
-    ranked_pheval_result: [RankedPhEvalDiseaseResult], output_dir: Path, tool_result_path: Path
+    ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
 ) -> None:
     """
     Write ranked PhEval disease results to a TSV file
@@ -420,8 +341,7 @@ def _write_pheval_disease_result(
         output_dir (Path): Path to the output directory
         tool_result_path (Path): Path to the tool-specific result file
     """
-    ranked_result = pd.DataFrame([data.__dict__ for data in ranked_pheval_result])
-    pheval_disease_output = ranked_result.loc[
+    pheval_disease_output = ranked_pheval_result.loc[
         :, ["rank", "score", "disease_name", "disease_identifier"]
     ]
     pheval_disease_output.to_csv(

{pheval-0.3.5 → pheval-0.3.6}/src/pheval/prepare/create_spiked_vcf.py RENAMED Viewed

@@ -328,22 +328,35 @@ class VcfSpiker:
             genotype_codes[proband_variant_data.genotype.lower()] + "\n",
         ]
-    def construct_vcf_records(self) -> List[str]:
+    def construct_vcf_records(self, template_vcf_name: str) -> List[str]:
         """
         Construct updated VCF records by inserting spiked variants into the correct positions within the VCF.
+        Args:
+            template_vcf_name (str): Name of the template VCF file.
         Returns:
             List[str]: Updated VCF records containing the spiked variants.
         """
         updated_vcf_records = copy(self.vcf_contents)
         for variant in self.proband_causative_variants:
-            variant = self.construct_variant_entry(variant)
-            variant_entry_position = [
+            variant_entry = self.construct_variant_entry(variant)
+            matching_indices = [
                 i
                 for i, val in enumerate(updated_vcf_records)
-                if val.split("\t")[0] == variant[0] and int(val.split("\t")[1]) < int(variant[1])
-            ][-1] + 1
-            updated_vcf_records.insert(variant_entry_position, "\t".join(variant))
+                if val.split("\t")[0] == variant_entry[0]
+                and int(val.split("\t")[1]) < int(variant_entry[1])
+            ]
+            if matching_indices:
+                variant_entry_position = matching_indices[-1] + 1
+            else:
+                info_log.warning(
+                    f"Could not find entry position for {variant.variant.chrom}-{variant.variant.pos}-"
+                    f"{variant.variant.ref}-{variant.variant.alt} in {template_vcf_name}, "
+                    "inserting at end of VCF contents."
+                )
+                variant_entry_position = len(updated_vcf_records)
+            updated_vcf_records.insert(variant_entry_position, "\t".join(variant_entry))
         return updated_vcf_records
     def construct_header(self, updated_vcf_records: List[str]) -> List[str]:
@@ -358,21 +371,27 @@ class VcfSpiker:
         """
         updated_vcf_file = []
         for line in updated_vcf_records:
-            text = line.replace(
-                self.vcf_header.sample_id,
-                self.proband_causative_variants[0].proband_id,
-            )
+            if line.startswith("#"):
+                text = line.replace(
+                    self.vcf_header.sample_id,
+                    self.proband_causative_variants[0].proband_id,
+                )
+            else:
+                text = line
             updated_vcf_file.append(text)
         return updated_vcf_file
-    def construct_vcf(self) -> List[str]:
+    def construct_vcf(self, template_vcf_name: str) -> List[str]:
         """
         Construct the entire spiked VCF file by incorporating the spiked variants into the VCF.
+        Args:
+            template_vcf_name (str): Name of the template VCF file.
         Returns:
             List[str]: The complete spiked VCF file content as a list of strings.
         """
-        return self.construct_header(self.construct_vcf_records())
+        return self.construct_header(self.construct_vcf_records(template_vcf_name))
 class VcfWriter:
@@ -454,7 +473,7 @@ def spike_vcf_contents(
             chosen_template_vcf.vcf_contents,
             phenopacket_causative_variants,
             chosen_template_vcf.vcf_header,
-        ).construct_vcf(),
+        ).construct_vcf(chosen_template_vcf.vcf_file_name),
     )

{pheval-0.3.5 → pheval-0.3.6}/src/pheval/prepare/prepare_corpus.py RENAMED Viewed

@@ -39,6 +39,11 @@ def prepare_corpus(
     output_dir.joinpath("phenopackets").mkdir(exist_ok=True, parents=True)
     for phenopacket_path in all_files(phenopacket_dir):
         phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
+        if not phenopacket_util.observed_phenotypic_features():
+            info_log.warning(
+                f"Removed {phenopacket_path.name} from the corpus due to no observed phenotypic features."
+            )
+            continue
         if variant_analysis:
             if phenopacket_util.check_incomplete_variant_record():
                 info_log.warning(