pheval 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

Files changed (42) hide show
  1. pheval/__init__.py +0 -5
  2. pheval/analyse/__init__.py +0 -0
  3. pheval/analyse/analysis.py +703 -0
  4. pheval/analyse/generate_plots.py +312 -0
  5. pheval/analyse/generate_summary_outputs.py +186 -0
  6. pheval/analyse/rank_stats.py +61 -0
  7. pheval/cli.py +22 -7
  8. pheval/cli_pheval.py +37 -12
  9. pheval/cli_pheval_utils.py +225 -8
  10. pheval/config_parser.py +36 -0
  11. pheval/constants.py +1 -0
  12. pheval/implementations/__init__.py +1 -3
  13. pheval/post_processing/__init__.py +0 -0
  14. pheval/post_processing/post_processing.py +210 -0
  15. pheval/prepare/__init__.py +0 -0
  16. pheval/prepare/create_noisy_phenopackets.py +173 -0
  17. pheval/prepare/create_spiked_vcf.py +366 -0
  18. pheval/prepare/custom_exceptions.py +47 -0
  19. pheval/prepare/update_phenopacket.py +53 -0
  20. pheval/resources/alternate_ouputs/CADA_results.txt +11 -0
  21. pheval/resources/alternate_ouputs/DeepPVP_results.txt +22 -0
  22. pheval/resources/alternate_ouputs/OVA_results.txt +11 -0
  23. pheval/resources/alternate_ouputs/Phen2Gene_results.json +814 -0
  24. pheval/resources/alternate_ouputs/Phenolyzer_results.txt +12 -0
  25. pheval/resources/alternate_ouputs/lirical_results.tsv +152 -0
  26. pheval/resources/alternate_ouputs/svanna_results.tsv +9 -0
  27. pheval/resources/hgnc_complete_set_2022-10-01.txt +43222 -0
  28. pheval/run_metadata.py +27 -0
  29. pheval/runners/runner.py +92 -11
  30. pheval/utils/__init__.py +0 -0
  31. pheval/utils/docs_gen.py +105 -0
  32. pheval/utils/docs_gen.sh +18 -0
  33. pheval/utils/file_utils.py +88 -0
  34. pheval/utils/phenopacket_utils.py +356 -0
  35. pheval/utils/semsim_utils.py +156 -0
  36. {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/METADATA +12 -4
  37. pheval-0.2.0.dist-info/RECORD +41 -0
  38. {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/WHEEL +1 -1
  39. pheval/utils.py +0 -7
  40. pheval-0.1.0.dist-info/RECORD +0 -13
  41. {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/LICENSE +0 -0
  42. {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/entry_points.txt +0 -0
pheval/run_metadata.py ADDED
@@ -0,0 +1,27 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ from serde import serde
6
+
7
+
8
+ @serde
9
+ @dataclass
10
+ class BasicOutputRunMetaData:
11
+ """Class for defining variables for the run metadata.
12
+ Args:
13
+ tool (str): Name of the tool implementation
14
+ tool_version (str): Version of the tool implementation
15
+ config (Path): Path to the config file located in the input directory
16
+ run_timestamp (int): Time taken for run to complete
17
+ corpus (Path): Path to corpus used in pheval run
18
+ tool_specific_configuration_options (Any): Special field that can be overwritten by tool implementations to
19
+ contain any extra tool specific configurations used in the run
20
+ """
21
+
22
+ tool: str
23
+ tool_version: str
24
+ config: Path
25
+ run_timestamp: int
26
+ corpus: Path
27
+ tool_specific_configuration_options: Any = None
pheval/runners/runner.py CHANGED
@@ -1,19 +1,95 @@
1
1
  """Runners Module"""
2
2
  from abc import ABC, abstractmethod
3
3
  from dataclasses import dataclass
4
+ from datetime import datetime
5
+ from pathlib import Path
4
6
 
5
- import click
7
+ from pheval.config_parser import parse_input_dir_config
8
+ from pheval.run_metadata import BasicOutputRunMetaData
6
9
 
7
10
 
8
11
  @dataclass
9
12
  class PhEvalRunner(ABC):
10
13
  """PhEvalRunner Class"""
11
14
 
12
- inputdir: click.Path
13
- testdatadir: click.Path
14
- tmpdir: click.Path
15
- outputdir: click.Path
16
- config: click.Path
15
+ input_dir: Path
16
+ testdata_dir: Path
17
+ tmp_dir: Path
18
+ output_dir: Path
19
+ config_file: Path
20
+ version: str
21
+ directory_path = None
22
+ input_dir_config = None
23
+ _meta_data = None
24
+ __raw_results_dir = "raw_results/"
25
+ __pheval_gene_results_dir = "pheval_gene_results/"
26
+ __pheval_variant_results_dir = "pheval_variant_results/"
27
+ __tool_input_commands_dir = "tool_input_commands/"
28
+ __run_meta_data_file = "results.yml"
29
+
30
+ def __post_init__(self):
31
+ self.input_dir_config = parse_input_dir_config(self.input_dir)
32
+
33
+ def _get_tool(self):
34
+ return self.input_dir_config.tool
35
+
36
+ def _get_phenotype_only(self):
37
+ return self.input_dir_config.phenotype_only
38
+
39
+ @property
40
+ def tool_input_commands_dir(self):
41
+ return Path(self.output_dir).joinpath(self.__tool_input_commands_dir)
42
+
43
+ @tool_input_commands_dir.setter
44
+ def tool_input_commands_dir(self, directory_path):
45
+ self.directory_path = Path(directory_path)
46
+
47
+ @property
48
+ def raw_results_dir(self):
49
+ return Path(self.output_dir).joinpath(self.__raw_results_dir)
50
+
51
+ @raw_results_dir.setter
52
+ def raw_results_dir(self, directory_path):
53
+ self.directory_path = Path(directory_path)
54
+
55
+ @property
56
+ def pheval_gene_results_dir(self):
57
+ return Path(self.output_dir).joinpath(self.__pheval_gene_results_dir)
58
+
59
+ @pheval_gene_results_dir.setter
60
+ def pheval_gene_results_dir(self, directory_path):
61
+ self.directory_path = Path(directory_path)
62
+
63
+ @property
64
+ def pheval_variant_results_dir(self):
65
+ return Path(self.output_dir).joinpath(self.__pheval_variant_results_dir)
66
+
67
+ @pheval_variant_results_dir.setter
68
+ def pheval_variant_results_dir(self, directory_path):
69
+ self.directory_path = Path(directory_path)
70
+
71
+ def build_output_directory_structure(self):
72
+ """build output directory structure"""
73
+ self.tool_input_commands_dir.mkdir(exist_ok=True)
74
+ self.raw_results_dir.mkdir(exist_ok=True)
75
+ self.pheval_gene_results_dir.mkdir(exist_ok=True)
76
+ if not self._get_phenotype_only():
77
+ self.pheval_variant_results_dir.mkdir(exist_ok=True)
78
+
79
+ @property
80
+ def meta_data(self):
81
+ self._meta_data = BasicOutputRunMetaData(
82
+ tool=self.input_dir_config.tool,
83
+ tool_version=self.version,
84
+ config=f"{Path(self.input_dir).parent.name}/{Path(self.input_dir).name}",
85
+ run_timestamp=datetime.now().timestamp(),
86
+ corpus=f"{Path(self.testdata_dir).parent.name}/{Path(self.testdata_dir).name}",
87
+ )
88
+ return self._meta_data
89
+
90
+ @meta_data.setter
91
+ def meta_data(self, meta_data):
92
+ self._meta_data = meta_data
17
93
 
18
94
  @abstractmethod
19
95
  def prepare(self) -> str:
@@ -27,6 +103,10 @@ class PhEvalRunner(ABC):
27
103
  def post_process(self):
28
104
  """post_process"""
29
105
 
106
+ def construct_meta_data(self):
107
+ """Construct run output meta data"""
108
+ return self.meta_data
109
+
30
110
 
31
111
  class DefaultPhEvalRunner(PhEvalRunner):
32
112
  """DefaultPhEvalRunner
@@ -35,11 +115,12 @@ class DefaultPhEvalRunner(PhEvalRunner):
35
115
  PhEvalRunner (PhEvalRunner): Abstract PhEvalRunnerClass
36
116
  """
37
117
 
38
- inputdir: click.Path
39
- testdatadir: click.Path
40
- tmpdir: click.Path
41
- outputdir: click.Path
42
- config: click.Path
118
+ input_dir: Path
119
+ testdata_dir: Path
120
+ tmp_dir: Path
121
+ output_dir: Path
122
+ config_file: Path
123
+ version: str
43
124
 
44
125
  def prepare(self):
45
126
  print("preparing")
File without changes
@@ -0,0 +1,105 @@
1
+ """docs generate utils"""
2
+ import ast
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+
8
+ def find_methods_in_python_file(file_path):
9
+ """Return method names from a python file
10
+
11
+ Args:
12
+ file_path ([type]): [description]
13
+ """
14
+ methods = []
15
+ with open(file_path, "r", encoding="utf-8") as file:
16
+ text = file.read()
17
+ parsed = ast.parse(text)
18
+ for node in ast.walk(parsed):
19
+ if isinstance(node, ast.FunctionDef):
20
+ methods.append(node.name)
21
+
22
+ return methods
23
+
24
+
25
+ def list_valid_files():
26
+ """This method lists the files whose documentation will be generated, excluding folders and empty files"""
27
+ ignored_files = ["docs_gen", "__init__"]
28
+ source_folder = "./src"
29
+ files = Path(source_folder).rglob("*.py")
30
+ filtered_files = []
31
+ for file in files:
32
+ if os.stat(file).st_size == 0 or not os.path.isfile(file):
33
+ continue
34
+
35
+ folder_parts = Path(file).parts
36
+ folder = "/".join(folder_parts[:-1])
37
+ basename = os.path.basename(file).split(".")[0]
38
+
39
+ docs_path = f"./docs/api/{folder.replace('src/', '')}/{basename}.md"
40
+ if basename in ignored_files:
41
+ continue
42
+
43
+ filtered_files.append(
44
+ {
45
+ "path": file,
46
+ "docs_path": docs_path,
47
+ "basename": basename,
48
+ "folder": folder,
49
+ }
50
+ )
51
+
52
+ return filtered_files
53
+
54
+
55
+ def print_api_doc(file_item):
56
+ "Writes the file path using the mkdocs pattern"
57
+ clean_path = str(file_item["folder"]).replace("./", "").replace("/", ".")
58
+ write_doc(file_item, f"::: {clean_path}.{file_item['basename']}")
59
+
60
+
61
+ def write_doc(file_item, content):
62
+ """Creates folder structures and writes file content"""
63
+ os.makedirs(os.path.dirname(file_item["docs_path"]), exist_ok=True)
64
+ with open(file_item["docs_path"], "a", encoding="utf-8") as file:
65
+ file.write(content)
66
+
67
+
68
+ def print_cli_doc(file_item):
69
+ """Documentation specific to the CLI is generated by this method.
70
+ When it comes to a CLI, mkdocs click must be used."""
71
+ methods = find_methods_in_python_file(file_item["path"])
72
+ for method in methods:
73
+ content = f"""
74
+ ::: mkdocs-click
75
+ :package: {file_item['folder'].replace("./", '').replace('/', '.')}.{file_item['basename']}
76
+ :module: {file_item['folder'].replace("./", '').replace('/', '.').replace('src.', '')}.{file_item['basename']}
77
+ :command: {method}
78
+ :depth: 4
79
+ :style: table
80
+ :list_subcommands: true
81
+ """
82
+ write_doc(file_item, content)
83
+
84
+
85
+ def gen_docs():
86
+ """The main method for generating documentation"""
87
+ api_folder = f"{os.path.abspath(os.curdir)}/docs/api"
88
+ print(api_folder)
89
+ shutil.rmtree(api_folder, ignore_errors=True)
90
+ valid_files = list_valid_files()
91
+ print(valid_files)
92
+ for file_item in valid_files:
93
+ bname = file_item["basename"]
94
+ if bname == "cli": # or bname.startswith("cli_"):
95
+ print(bname)
96
+ print_cli_doc(file_item)
97
+ elif bname.startswith("cli_"):
98
+ continue
99
+ else:
100
+ print_api_doc(file_item)
101
+ print("done")
102
+
103
+
104
+ if __name__ == "__main__":
105
+ gen_docs()
@@ -0,0 +1,18 @@
1
+ #!/bin/bash
2
+
3
+ # set -e
4
+ cd ../../../
5
+ SOURCE_FOLDER='./src'
6
+ FILES=$(find $SOURCE_FOLDER -type f -iname '*.py' -not -iname '__init__.py' -not -empty)
7
+ rm -rf ./docs/api
8
+
9
+ for f in $FILES
10
+ do
11
+ clean_dir=${f#./src/}
12
+ last_folder=`dirname $clean_dir`
13
+ full_fname="${f##*/}"
14
+ fname="${full_fname%%.*}"
15
+ mkdir -p ./docs/api/$last_folder
16
+ ref=$(echo $f | sed 's#/#.#g' | sed 's/..src/src/g' | sed 's/\.[^.]*$//')
17
+ echo ::: $ref >> ./docs/api/$last_folder/$fname.md
18
+ done
@@ -0,0 +1,88 @@
1
+ import difflib
2
+ import itertools
3
+ import re
4
+ import unicodedata
5
+ from os import path
6
+ from pathlib import Path
7
+ from typing import List
8
+
9
+ import pandas as pd
10
+ import yaml
11
+ from serde import to_dict
12
+
13
+ from pheval.run_metadata import BasicOutputRunMetaData
14
+
15
+
16
+ def files_with_suffix(directory: Path, suffix: str):
17
+ """Obtains all files ending in a specified suffix from a given directory."""
18
+ files = [path for path in directory.iterdir() if path.suffix == suffix]
19
+ files.sort()
20
+ return files
21
+
22
+
23
+ def all_files(directory: Path) -> list[Path]:
24
+ """Obtains all files from a given directory."""
25
+ files = [path for path in directory.iterdir()]
26
+ files.sort()
27
+ return files
28
+
29
+
30
+ def is_gzipped(path: Path) -> bool:
31
+ """Confirms whether a file is gzipped."""
32
+ return path.name.endswith(".gz")
33
+
34
+
35
+ def normalise_file_name(file_path: Path) -> str:
36
+ normalised_file_name = unicodedata.normalize("NFD", str(file_path))
37
+ return re.sub("[\u0300-\u036f]", "", normalised_file_name)
38
+
39
+
40
+ def obtain_closest_file_name(file_to_be_queried: Path, file_paths: list[Path]) -> Path:
41
+ """Obtains the closest file name when given a template file name and a list of full path of files to be queried."""
42
+ stems = [Path(file_path).stem for file_path in file_paths]
43
+ closest_file_match = difflib.get_close_matches(
44
+ str(Path(file_to_be_queried).stem), stems, cutoff=0.1, n=1
45
+ )[0]
46
+ return [file_path for file_path in file_paths if closest_file_match == str(file_path.stem)][0]
47
+
48
+
49
+ def ensure_file_exists(*files: str):
50
+ """Ensures the existence of files passed as parameter
51
+ Raises:
52
+ FileNotFoundError: If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised
53
+ """
54
+ for file in files:
55
+ if not path.isfile(file):
56
+ raise FileNotFoundError(f"File {file} not found")
57
+
58
+
59
+ def ensure_columns_exists(cols: list, dataframes: List[pd.DataFrame], err_message: str = ""):
60
+ """Ensures the columns exist in dataframes passed as argument (e.g)
61
+
62
+ "
63
+ ensure_columns_exists(
64
+ cols=['column_a', 'column_b, 'column_c'],
65
+ err_message="Custom error message if any column doesn't exist in any dataframe passed as argument",
66
+ dataframes=[data_frame1, data_frame2],
67
+ )
68
+ "
69
+
70
+ """
71
+ flat_cols = list(itertools.chain(cols))
72
+ if not dataframes or not flat_cols:
73
+ return
74
+ if err_message:
75
+ err_msg = f"""columns: {", ".join(flat_cols[:-1])} and {flat_cols[-1]} {err_message}"""
76
+ else:
77
+ err_msg = f"""columns: {", ".join(flat_cols[:-1])} and {flat_cols[-1]} \
78
+ - must be present in both left and right files"""
79
+ for dataframe in dataframes:
80
+ if not all(x in dataframe.columns for x in flat_cols):
81
+ raise ValueError(err_msg)
82
+
83
+
84
+ def write_metadata(output_dir: Path, meta_data: BasicOutputRunMetaData) -> None:
85
+ """Write the metadata for a run."""
86
+ with open(Path(output_dir).joinpath("results.yml"), "w") as metadata_file:
87
+ yaml.dump(to_dict(meta_data), metadata_file, sort_keys=False, default_style="")
88
+ metadata_file.close()