pheval 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pheval might be problematic. Click here for more details.
- pheval/__init__.py +0 -5
- pheval/analyse/__init__.py +0 -0
- pheval/analyse/analysis.py +703 -0
- pheval/analyse/generate_plots.py +312 -0
- pheval/analyse/generate_summary_outputs.py +186 -0
- pheval/analyse/rank_stats.py +61 -0
- pheval/cli.py +22 -7
- pheval/cli_pheval.py +37 -12
- pheval/cli_pheval_utils.py +225 -8
- pheval/config_parser.py +36 -0
- pheval/constants.py +1 -0
- pheval/implementations/__init__.py +1 -3
- pheval/post_processing/__init__.py +0 -0
- pheval/post_processing/post_processing.py +210 -0
- pheval/prepare/__init__.py +0 -0
- pheval/prepare/create_noisy_phenopackets.py +173 -0
- pheval/prepare/create_spiked_vcf.py +366 -0
- pheval/prepare/custom_exceptions.py +47 -0
- pheval/prepare/update_phenopacket.py +53 -0
- pheval/resources/alternate_ouputs/CADA_results.txt +11 -0
- pheval/resources/alternate_ouputs/DeepPVP_results.txt +22 -0
- pheval/resources/alternate_ouputs/OVA_results.txt +11 -0
- pheval/resources/alternate_ouputs/Phen2Gene_results.json +814 -0
- pheval/resources/alternate_ouputs/Phenolyzer_results.txt +12 -0
- pheval/resources/alternate_ouputs/lirical_results.tsv +152 -0
- pheval/resources/alternate_ouputs/svanna_results.tsv +9 -0
- pheval/resources/hgnc_complete_set_2022-10-01.txt +43222 -0
- pheval/run_metadata.py +27 -0
- pheval/runners/runner.py +92 -11
- pheval/utils/__init__.py +0 -0
- pheval/utils/docs_gen.py +105 -0
- pheval/utils/docs_gen.sh +18 -0
- pheval/utils/file_utils.py +88 -0
- pheval/utils/phenopacket_utils.py +356 -0
- pheval/utils/semsim_utils.py +156 -0
- {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/METADATA +12 -4
- pheval-0.2.0.dist-info/RECORD +41 -0
- {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/WHEEL +1 -1
- pheval/utils.py +0 -7
- pheval-0.1.0.dist-info/RECORD +0 -13
- {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/LICENSE +0 -0
- {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/entry_points.txt +0 -0
pheval/run_metadata.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from serde import serde
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@serde
|
|
9
|
+
@dataclass
|
|
10
|
+
class BasicOutputRunMetaData:
|
|
11
|
+
"""Class for defining variables for the run metadata.
|
|
12
|
+
Args:
|
|
13
|
+
tool (str): Name of the tool implementation
|
|
14
|
+
tool_version (str): Version of the tool implementation
|
|
15
|
+
config (Path): Path to the config file located in the input directory
|
|
16
|
+
run_timestamp (int): Time taken for run to complete
|
|
17
|
+
corpus (Path): Path to corpus used in pheval run
|
|
18
|
+
tool_specific_configuration_options (Any): Special field that can be overwritten by tool implementations to
|
|
19
|
+
contain any extra tool specific configurations used in the run
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
tool: str
|
|
23
|
+
tool_version: str
|
|
24
|
+
config: Path
|
|
25
|
+
run_timestamp: int
|
|
26
|
+
corpus: Path
|
|
27
|
+
tool_specific_configuration_options: Any = None
|
pheval/runners/runner.py
CHANGED
|
@@ -1,19 +1,95 @@
|
|
|
1
1
|
"""Runners Module"""
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
4
6
|
|
|
5
|
-
import
|
|
7
|
+
from pheval.config_parser import parse_input_dir_config
|
|
8
|
+
from pheval.run_metadata import BasicOutputRunMetaData
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
@dataclass
|
|
9
12
|
class PhEvalRunner(ABC):
|
|
10
13
|
"""PhEvalRunner Class"""
|
|
11
14
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
input_dir: Path
|
|
16
|
+
testdata_dir: Path
|
|
17
|
+
tmp_dir: Path
|
|
18
|
+
output_dir: Path
|
|
19
|
+
config_file: Path
|
|
20
|
+
version: str
|
|
21
|
+
directory_path = None
|
|
22
|
+
input_dir_config = None
|
|
23
|
+
_meta_data = None
|
|
24
|
+
__raw_results_dir = "raw_results/"
|
|
25
|
+
__pheval_gene_results_dir = "pheval_gene_results/"
|
|
26
|
+
__pheval_variant_results_dir = "pheval_variant_results/"
|
|
27
|
+
__tool_input_commands_dir = "tool_input_commands/"
|
|
28
|
+
__run_meta_data_file = "results.yml"
|
|
29
|
+
|
|
30
|
+
def __post_init__(self):
|
|
31
|
+
self.input_dir_config = parse_input_dir_config(self.input_dir)
|
|
32
|
+
|
|
33
|
+
def _get_tool(self):
|
|
34
|
+
return self.input_dir_config.tool
|
|
35
|
+
|
|
36
|
+
def _get_phenotype_only(self):
|
|
37
|
+
return self.input_dir_config.phenotype_only
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def tool_input_commands_dir(self):
|
|
41
|
+
return Path(self.output_dir).joinpath(self.__tool_input_commands_dir)
|
|
42
|
+
|
|
43
|
+
@tool_input_commands_dir.setter
|
|
44
|
+
def tool_input_commands_dir(self, directory_path):
|
|
45
|
+
self.directory_path = Path(directory_path)
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def raw_results_dir(self):
|
|
49
|
+
return Path(self.output_dir).joinpath(self.__raw_results_dir)
|
|
50
|
+
|
|
51
|
+
@raw_results_dir.setter
|
|
52
|
+
def raw_results_dir(self, directory_path):
|
|
53
|
+
self.directory_path = Path(directory_path)
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def pheval_gene_results_dir(self):
|
|
57
|
+
return Path(self.output_dir).joinpath(self.__pheval_gene_results_dir)
|
|
58
|
+
|
|
59
|
+
@pheval_gene_results_dir.setter
|
|
60
|
+
def pheval_gene_results_dir(self, directory_path):
|
|
61
|
+
self.directory_path = Path(directory_path)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def pheval_variant_results_dir(self):
|
|
65
|
+
return Path(self.output_dir).joinpath(self.__pheval_variant_results_dir)
|
|
66
|
+
|
|
67
|
+
@pheval_variant_results_dir.setter
|
|
68
|
+
def pheval_variant_results_dir(self, directory_path):
|
|
69
|
+
self.directory_path = Path(directory_path)
|
|
70
|
+
|
|
71
|
+
def build_output_directory_structure(self):
|
|
72
|
+
"""build output directory structure"""
|
|
73
|
+
self.tool_input_commands_dir.mkdir(exist_ok=True)
|
|
74
|
+
self.raw_results_dir.mkdir(exist_ok=True)
|
|
75
|
+
self.pheval_gene_results_dir.mkdir(exist_ok=True)
|
|
76
|
+
if not self._get_phenotype_only():
|
|
77
|
+
self.pheval_variant_results_dir.mkdir(exist_ok=True)
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def meta_data(self):
|
|
81
|
+
self._meta_data = BasicOutputRunMetaData(
|
|
82
|
+
tool=self.input_dir_config.tool,
|
|
83
|
+
tool_version=self.version,
|
|
84
|
+
config=f"{Path(self.input_dir).parent.name}/{Path(self.input_dir).name}",
|
|
85
|
+
run_timestamp=datetime.now().timestamp(),
|
|
86
|
+
corpus=f"{Path(self.testdata_dir).parent.name}/{Path(self.testdata_dir).name}",
|
|
87
|
+
)
|
|
88
|
+
return self._meta_data
|
|
89
|
+
|
|
90
|
+
@meta_data.setter
|
|
91
|
+
def meta_data(self, meta_data):
|
|
92
|
+
self._meta_data = meta_data
|
|
17
93
|
|
|
18
94
|
@abstractmethod
|
|
19
95
|
def prepare(self) -> str:
|
|
@@ -27,6 +103,10 @@ class PhEvalRunner(ABC):
|
|
|
27
103
|
def post_process(self):
|
|
28
104
|
"""post_process"""
|
|
29
105
|
|
|
106
|
+
def construct_meta_data(self):
|
|
107
|
+
"""Construct run output meta data"""
|
|
108
|
+
return self.meta_data
|
|
109
|
+
|
|
30
110
|
|
|
31
111
|
class DefaultPhEvalRunner(PhEvalRunner):
|
|
32
112
|
"""DefaultPhEvalRunner
|
|
@@ -35,11 +115,12 @@ class DefaultPhEvalRunner(PhEvalRunner):
|
|
|
35
115
|
PhEvalRunner (PhEvalRunner): Abstract PhEvalRunnerClass
|
|
36
116
|
"""
|
|
37
117
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
118
|
+
input_dir: Path
|
|
119
|
+
testdata_dir: Path
|
|
120
|
+
tmp_dir: Path
|
|
121
|
+
output_dir: Path
|
|
122
|
+
config_file: Path
|
|
123
|
+
version: str
|
|
43
124
|
|
|
44
125
|
def prepare(self):
|
|
45
126
|
print("preparing")
|
pheval/utils/__init__.py
ADDED
|
File without changes
|
pheval/utils/docs_gen.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""docs generate utils"""
|
|
2
|
+
import ast
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def find_methods_in_python_file(file_path):
|
|
9
|
+
"""Return method names from a python file
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
file_path ([type]): [description]
|
|
13
|
+
"""
|
|
14
|
+
methods = []
|
|
15
|
+
with open(file_path, "r", encoding="utf-8") as file:
|
|
16
|
+
text = file.read()
|
|
17
|
+
parsed = ast.parse(text)
|
|
18
|
+
for node in ast.walk(parsed):
|
|
19
|
+
if isinstance(node, ast.FunctionDef):
|
|
20
|
+
methods.append(node.name)
|
|
21
|
+
|
|
22
|
+
return methods
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def list_valid_files():
|
|
26
|
+
"""This method lists the files whose documentation will be generated, excluding folders and empty files"""
|
|
27
|
+
ignored_files = ["docs_gen", "__init__"]
|
|
28
|
+
source_folder = "./src"
|
|
29
|
+
files = Path(source_folder).rglob("*.py")
|
|
30
|
+
filtered_files = []
|
|
31
|
+
for file in files:
|
|
32
|
+
if os.stat(file).st_size == 0 or not os.path.isfile(file):
|
|
33
|
+
continue
|
|
34
|
+
|
|
35
|
+
folder_parts = Path(file).parts
|
|
36
|
+
folder = "/".join(folder_parts[:-1])
|
|
37
|
+
basename = os.path.basename(file).split(".")[0]
|
|
38
|
+
|
|
39
|
+
docs_path = f"./docs/api/{folder.replace('src/', '')}/{basename}.md"
|
|
40
|
+
if basename in ignored_files:
|
|
41
|
+
continue
|
|
42
|
+
|
|
43
|
+
filtered_files.append(
|
|
44
|
+
{
|
|
45
|
+
"path": file,
|
|
46
|
+
"docs_path": docs_path,
|
|
47
|
+
"basename": basename,
|
|
48
|
+
"folder": folder,
|
|
49
|
+
}
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return filtered_files
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def print_api_doc(file_item):
|
|
56
|
+
"Writes the file path using the mkdocs pattern"
|
|
57
|
+
clean_path = str(file_item["folder"]).replace("./", "").replace("/", ".")
|
|
58
|
+
write_doc(file_item, f"::: {clean_path}.{file_item['basename']}")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def write_doc(file_item, content):
|
|
62
|
+
"""Creates folder structures and writes file content"""
|
|
63
|
+
os.makedirs(os.path.dirname(file_item["docs_path"]), exist_ok=True)
|
|
64
|
+
with open(file_item["docs_path"], "a", encoding="utf-8") as file:
|
|
65
|
+
file.write(content)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def print_cli_doc(file_item):
|
|
69
|
+
"""Documentation specific to the CLI is generated by this method.
|
|
70
|
+
When it comes to a CLI, mkdocs click must be used."""
|
|
71
|
+
methods = find_methods_in_python_file(file_item["path"])
|
|
72
|
+
for method in methods:
|
|
73
|
+
content = f"""
|
|
74
|
+
::: mkdocs-click
|
|
75
|
+
:package: {file_item['folder'].replace("./", '').replace('/', '.')}.{file_item['basename']}
|
|
76
|
+
:module: {file_item['folder'].replace("./", '').replace('/', '.').replace('src.', '')}.{file_item['basename']}
|
|
77
|
+
:command: {method}
|
|
78
|
+
:depth: 4
|
|
79
|
+
:style: table
|
|
80
|
+
:list_subcommands: true
|
|
81
|
+
"""
|
|
82
|
+
write_doc(file_item, content)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def gen_docs():
|
|
86
|
+
"""The main method for generating documentation"""
|
|
87
|
+
api_folder = f"{os.path.abspath(os.curdir)}/docs/api"
|
|
88
|
+
print(api_folder)
|
|
89
|
+
shutil.rmtree(api_folder, ignore_errors=True)
|
|
90
|
+
valid_files = list_valid_files()
|
|
91
|
+
print(valid_files)
|
|
92
|
+
for file_item in valid_files:
|
|
93
|
+
bname = file_item["basename"]
|
|
94
|
+
if bname == "cli": # or bname.startswith("cli_"):
|
|
95
|
+
print(bname)
|
|
96
|
+
print_cli_doc(file_item)
|
|
97
|
+
elif bname.startswith("cli_"):
|
|
98
|
+
continue
|
|
99
|
+
else:
|
|
100
|
+
print_api_doc(file_item)
|
|
101
|
+
print("done")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
gen_docs()
|
pheval/utils/docs_gen.sh
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# set -e
|
|
4
|
+
cd ../../../
|
|
5
|
+
SOURCE_FOLDER='./src'
|
|
6
|
+
FILES=$(find $SOURCE_FOLDER -type f -iname '*.py' -not -iname '__init__.py' -not -empty)
|
|
7
|
+
rm -rf ./docs/api
|
|
8
|
+
|
|
9
|
+
for f in $FILES
|
|
10
|
+
do
|
|
11
|
+
clean_dir=${f#./src/}
|
|
12
|
+
last_folder=`dirname $clean_dir`
|
|
13
|
+
full_fname="${f##*/}"
|
|
14
|
+
fname="${full_fname%%.*}"
|
|
15
|
+
mkdir -p ./docs/api/$last_folder
|
|
16
|
+
ref=$(echo $f | sed 's#/#.#g' | sed 's/..src/src/g' | sed 's/\.[^.]*$//')
|
|
17
|
+
echo ::: $ref >> ./docs/api/$last_folder/$fname.md
|
|
18
|
+
done
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import difflib
|
|
2
|
+
import itertools
|
|
3
|
+
import re
|
|
4
|
+
import unicodedata
|
|
5
|
+
from os import path
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import yaml
|
|
11
|
+
from serde import to_dict
|
|
12
|
+
|
|
13
|
+
from pheval.run_metadata import BasicOutputRunMetaData
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def files_with_suffix(directory: Path, suffix: str):
|
|
17
|
+
"""Obtains all files ending in a specified suffix from a given directory."""
|
|
18
|
+
files = [path for path in directory.iterdir() if path.suffix == suffix]
|
|
19
|
+
files.sort()
|
|
20
|
+
return files
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def all_files(directory: Path) -> list[Path]:
|
|
24
|
+
"""Obtains all files from a given directory."""
|
|
25
|
+
files = [path for path in directory.iterdir()]
|
|
26
|
+
files.sort()
|
|
27
|
+
return files
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def is_gzipped(path: Path) -> bool:
|
|
31
|
+
"""Confirms whether a file is gzipped."""
|
|
32
|
+
return path.name.endswith(".gz")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def normalise_file_name(file_path: Path) -> str:
|
|
36
|
+
normalised_file_name = unicodedata.normalize("NFD", str(file_path))
|
|
37
|
+
return re.sub("[\u0300-\u036f]", "", normalised_file_name)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def obtain_closest_file_name(file_to_be_queried: Path, file_paths: list[Path]) -> Path:
|
|
41
|
+
"""Obtains the closest file name when given a template file name and a list of full path of files to be queried."""
|
|
42
|
+
stems = [Path(file_path).stem for file_path in file_paths]
|
|
43
|
+
closest_file_match = difflib.get_close_matches(
|
|
44
|
+
str(Path(file_to_be_queried).stem), stems, cutoff=0.1, n=1
|
|
45
|
+
)[0]
|
|
46
|
+
return [file_path for file_path in file_paths if closest_file_match == str(file_path.stem)][0]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def ensure_file_exists(*files: str):
|
|
50
|
+
"""Ensures the existence of files passed as parameter
|
|
51
|
+
Raises:
|
|
52
|
+
FileNotFoundError: If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised
|
|
53
|
+
"""
|
|
54
|
+
for file in files:
|
|
55
|
+
if not path.isfile(file):
|
|
56
|
+
raise FileNotFoundError(f"File {file} not found")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def ensure_columns_exists(cols: list, dataframes: List[pd.DataFrame], err_message: str = ""):
|
|
60
|
+
"""Ensures the columns exist in dataframes passed as argument (e.g)
|
|
61
|
+
|
|
62
|
+
"
|
|
63
|
+
ensure_columns_exists(
|
|
64
|
+
cols=['column_a', 'column_b, 'column_c'],
|
|
65
|
+
err_message="Custom error message if any column doesn't exist in any dataframe passed as argument",
|
|
66
|
+
dataframes=[data_frame1, data_frame2],
|
|
67
|
+
)
|
|
68
|
+
"
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
flat_cols = list(itertools.chain(cols))
|
|
72
|
+
if not dataframes or not flat_cols:
|
|
73
|
+
return
|
|
74
|
+
if err_message:
|
|
75
|
+
err_msg = f"""columns: {", ".join(flat_cols[:-1])} and {flat_cols[-1]} {err_message}"""
|
|
76
|
+
else:
|
|
77
|
+
err_msg = f"""columns: {", ".join(flat_cols[:-1])} and {flat_cols[-1]} \
|
|
78
|
+
- must be present in both left and right files"""
|
|
79
|
+
for dataframe in dataframes:
|
|
80
|
+
if not all(x in dataframe.columns for x in flat_cols):
|
|
81
|
+
raise ValueError(err_msg)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def write_metadata(output_dir: Path, meta_data: BasicOutputRunMetaData) -> None:
|
|
85
|
+
"""Write the metadata for a run."""
|
|
86
|
+
with open(Path(output_dir).joinpath("results.yml"), "w") as metadata_file:
|
|
87
|
+
yaml.dump(to_dict(meta_data), metadata_file, sort_keys=False, default_style="")
|
|
88
|
+
metadata_file.close()
|