pheval-exomiser 0.2.7__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/PKG-INFO +4 -6
  2. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/pyproject.toml +3 -4
  3. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/post_process/post_process.py +5 -3
  4. pheval_exomiser-0.3.0/src/pheval_exomiser/post_process/post_process_results_format.py +266 -0
  5. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/runner.py +1 -0
  6. pheval_exomiser-0.2.7/src/pheval_exomiser/post_process/post_process_results_format.py +0 -333
  7. pheval_exomiser-0.2.7/src/pheval_exomiser/prepare/yaml_to_family_phenopacket.py +0 -392
  8. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/README.md +0 -0
  9. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/__init__.py +0 -0
  10. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/cli.py +0 -0
  11. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/constants.py +0 -0
  12. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/post_process/__init__.py +0 -0
  13. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/prepare/__init__.py +0 -0
  14. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/prepare/create_batch_commands.py +0 -0
  15. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/prepare/tool_specific_configuration_options.py +0 -0
  16. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/prepare/write_application_properties.py +0 -0
  17. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/run/__init__.py +0 -0
  18. {pheval_exomiser-0.2.7 → pheval_exomiser-0.3.0}/src/pheval_exomiser/run/run.py +0 -0
@@ -1,12 +1,11 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: pheval_exomiser
3
- Version: 0.2.7
3
+ Version: 0.3.0
4
4
  Summary:
5
5
  Author: Yasemin Bridges
6
6
  Author-email: y.bridges@qmul.ac.uk
7
- Requires-Python: >=3.9,<4.0.0
7
+ Requires-Python: >=3.10,<4.0.0
8
8
  Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.9
10
9
  Classifier: Programming Language :: Python :: 3.10
11
10
  Classifier: Programming Language :: Python :: 3.11
12
11
  Classifier: Programming Language :: Python :: 3.12
@@ -16,9 +15,8 @@ Requires-Dist: docker (>=6.0.1,<7.0.0)
16
15
  Requires-Dist: google (>=3.0.0,<4.0.0)
17
16
  Requires-Dist: numpy (<2)
18
17
  Requires-Dist: oaklib (>=0.5.12,<0.6.0)
19
- Requires-Dist: pandas (>=1.5.2,<2.0.0)
20
18
  Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
21
- Requires-Dist: pheval (>=0.4.0,<0.5.0)
19
+ Requires-Dist: pheval (>=0.5.1,<0.6.0)
22
20
  Requires-Dist: pyaml (>=21.10.1,<22.0.0)
23
21
  Requires-Dist: pydantic (>=2.7.1,<3.0.0)
24
22
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pheval_exomiser"
3
- version = "0.2.7"
3
+ version = "0.3.0"
4
4
  description = ""
5
5
  authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
6
6
  "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",
@@ -10,9 +10,8 @@ readme = "README.md"
10
10
  packages = [{ include = "pheval_exomiser", from = "src" }]
11
11
 
12
12
  [tool.poetry.dependencies]
13
- python = ">=3.9,<4.0.0"
13
+ python = ">=3.10,<4.0.0"
14
14
  click = "^8.1.3"
15
- pandas = "^1.5.2"
16
15
  phenopackets = "^2.0.2"
17
16
  google = "^3.0.0"
18
17
  pyaml = "^21.10.1"
@@ -20,7 +19,7 @@ oaklib = "^0.5.12"
20
19
  docker = "^6.0.1"
21
20
  pydantic = "^2.7.1"
22
21
  numpy = "<2"
23
- pheval = "^0.4.0"
22
+ pheval = "^0.5.1"
24
23
 
25
24
  [tool.poetry.dev-dependencies]
26
25
  pytest = "^7.1.2"
@@ -8,6 +8,7 @@ def post_process_result_format(
8
8
  config: ExomiserConfigurations,
9
9
  raw_results_dir: Path,
10
10
  output_dir: Path,
11
+ phenopacket_dir: Path,
11
12
  variant_analysis: bool,
12
13
  gene_analysis: bool,
13
14
  disease_analysis: bool,
@@ -15,12 +16,13 @@ def post_process_result_format(
15
16
  """Standardise Exomiser json format to separated gene and variant results."""
16
17
  print("...standardising results format...")
17
18
  create_standardised_results(
18
- results_dir=raw_results_dir,
19
+ result_dir=raw_results_dir,
19
20
  output_dir=output_dir,
20
- score_name=config.post_process.score_name,
21
+ phenopacket_dir=phenopacket_dir,
21
22
  sort_order=config.post_process.sort_order,
22
- variant_analysis=variant_analysis,
23
+ score_name=config.post_process.score_name,
23
24
  gene_analysis=gene_analysis,
24
25
  disease_analysis=disease_analysis,
26
+ variant_analysis=variant_analysis,
25
27
  )
26
28
  print("done")
@@ -0,0 +1,266 @@
1
+ import uuid
2
+ from enum import Enum
3
+ from pathlib import Path
4
+
5
+ import click
6
+ import polars as pl
7
+ from pheval.post_processing.post_processing import (
8
+ SortOrder,
9
+ generate_disease_result,
10
+ generate_gene_result,
11
+ generate_variant_result,
12
+ )
13
+ from pheval.utils.file_utils import files_with_suffix
14
+
15
+
16
+ class ModeOfInheritance(Enum):
17
+ AUTOSOMAL_DOMINANT = 1
18
+ AUTOSOMAL_RECESSIVE = 2
19
+ X_DOMINANT = 1
20
+ X_RECESSIVE = 2
21
+ MITOCHONDRIAL = 3
22
+
23
+
24
+ def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
25
+ """Trim suffix appended to Exomiser JSON result path."""
26
+ return Path(str(exomiser_result_path.name).replace("-exomiser", ""))
27
+
28
+
29
+ def extract_gene_results_from_json(
30
+ exomiser_json_result: pl.DataFrame, score_name: str
31
+ ) -> pl.DataFrame:
32
+ return exomiser_json_result.select(
33
+ [
34
+ pl.col("geneSymbol").alias("gene_symbol"),
35
+ pl.col("geneIdentifier").struct.field("geneId").alias("gene_identifier"),
36
+ pl.col(score_name).fill_null(0).round(4).alias("score"),
37
+ ]
38
+ ).drop_nulls()
39
+
40
+
41
+ def extract_disease_results_from_json(exomiser_json_result: pl.DataFrame) -> pl.DataFrame:
42
+ return (
43
+ exomiser_json_result.select(
44
+ [
45
+ pl.col("priorityResults")
46
+ .struct.field("HIPHIVE_PRIORITY")
47
+ .struct.field("diseaseMatches")
48
+ ]
49
+ )
50
+ .explode("diseaseMatches")
51
+ .unnest("diseaseMatches")
52
+ .unnest("model")
53
+ .select([pl.col("diseaseId").alias("disease_identifier"), pl.col("score").round(4)])
54
+ .drop_nulls()
55
+ )
56
+
57
+
58
+ def extract_variant_results_from_json(
59
+ exomiser_json_result: pl.DataFrame, score_name: str
60
+ ) -> pl.DataFrame:
61
+ return (
62
+ exomiser_json_result.filter(pl.col("geneScores").is_not_null())
63
+ .select([pl.col("geneScores"), pl.col(score_name).alias("score"), pl.col("geneSymbol")])
64
+ .explode("geneScores")
65
+ .unnest("geneScores")
66
+ .filter(pl.col("contributingVariants").is_not_null())
67
+ .explode("contributingVariants")
68
+ .with_columns(
69
+ [
70
+ pl.col("contributingVariants").struct.field("contigName").alias("chrom"),
71
+ pl.col("contributingVariants").struct.field("start"),
72
+ pl.col("contributingVariants").struct.field("end"),
73
+ pl.col("contributingVariants").struct.field("ref"),
74
+ pl.col("contributingVariants")
75
+ .struct.field("alt")
76
+ .fill_null("")
77
+ .str.strip_chars("<>")
78
+ .alias("alt"),
79
+ pl.col("modeOfInheritance")
80
+ .map_elements(lambda moi: ModeOfInheritance[moi].value, return_dtype=pl.Int8)
81
+ .alias("moi_enum"),
82
+ ]
83
+ )
84
+ .with_columns(
85
+ [
86
+ (pl.col("moi_enum") == 2).alias("is_recessive"),
87
+ pl.when(pl.col("moi_enum") == 2)
88
+ .then(
89
+ pl.format(
90
+ "recessive|{}|{}|{}",
91
+ pl.col("geneSymbol"),
92
+ pl.col("score"),
93
+ pl.col("moi_enum"),
94
+ )
95
+ )
96
+ .otherwise(
97
+ pl.format(
98
+ "dominant|{}|{}|{}|{}|{}|{}",
99
+ pl.col("chrom"),
100
+ pl.col("start"),
101
+ pl.col("end"),
102
+ pl.col("ref"),
103
+ pl.col("alt"),
104
+ pl.col("score"),
105
+ )
106
+ )
107
+ .alias("group_key"),
108
+ ]
109
+ )
110
+ .with_columns(
111
+ [
112
+ pl.col("group_key")
113
+ .rank("dense")
114
+ .cast(pl.UInt32)
115
+ .map_elements(
116
+ lambda i: str(uuid.uuid5(uuid.NAMESPACE_DNS, str(i))), return_dtype=pl.String
117
+ )
118
+ .alias("grouping_id")
119
+ ]
120
+ )
121
+ .select(
122
+ ["chrom", "start", "end", "ref", "alt", "score", "modeOfInheritance", "grouping_id"]
123
+ )
124
+ )
125
+
126
+
127
+ def create_standardised_results(
128
+ result_dir: Path,
129
+ output_dir: Path,
130
+ phenopacket_dir: Path,
131
+ score_name: str,
132
+ sort_order: str,
133
+ gene_analysis: bool,
134
+ disease_analysis: bool,
135
+ variant_analysis: bool,
136
+ ):
137
+ sort_order = SortOrder.ASCENDING if sort_order.lower() == "ascending" else SortOrder.DESCENDING
138
+ for exomiser_json_result_path in files_with_suffix(result_dir, ".json"):
139
+ exomiser_json_result = pl.read_json(exomiser_json_result_path)
140
+ if gene_analysis:
141
+ gene_results = extract_gene_results_from_json(exomiser_json_result, score_name)
142
+ generate_gene_result(
143
+ results=gene_results,
144
+ sort_order=sort_order,
145
+ output_dir=output_dir,
146
+ result_path=trim_exomiser_result_filename(exomiser_json_result_path),
147
+ phenopacket_dir=phenopacket_dir,
148
+ )
149
+ if disease_analysis:
150
+ disease_results = extract_disease_results_from_json(exomiser_json_result)
151
+ generate_disease_result(
152
+ results=disease_results,
153
+ sort_order=sort_order,
154
+ output_dir=output_dir,
155
+ result_path=trim_exomiser_result_filename(exomiser_json_result_path),
156
+ phenopacket_dir=phenopacket_dir,
157
+ )
158
+
159
+ if variant_analysis:
160
+ variant_results = extract_variant_results_from_json(exomiser_json_result, score_name)
161
+ generate_variant_result(
162
+ results=variant_results,
163
+ sort_order=sort_order,
164
+ output_dir=output_dir,
165
+ result_path=trim_exomiser_result_filename(exomiser_json_result_path),
166
+ phenopacket_dir=phenopacket_dir,
167
+ )
168
+
169
+
170
+ @click.command()
171
+ @click.option(
172
+ "--output-dir",
173
+ "-o",
174
+ required=True,
175
+ metavar="PATH",
176
+ help="Output directory for standardised results.",
177
+ type=Path,
178
+ )
179
+ @click.option(
180
+ "--results-dir",
181
+ "-R",
182
+ required=True,
183
+ metavar="DIRECTORY",
184
+ help="Full path to Exomiser results directory to be standardised.",
185
+ type=Path,
186
+ )
187
+ @click.option(
188
+ "--phenopacket-dir",
189
+ "-p",
190
+ required=True,
191
+ metavar="DIRECTORY",
192
+ help="Full path to phenopacket dir used to generate the raw results.",
193
+ type=Path,
194
+ )
195
+ @click.option(
196
+ "--score-name",
197
+ "-s",
198
+ required=True,
199
+ help="Score name to extract from results.",
200
+ type=click.Choice(["combinedScore", "priorityScore", "variantScore", "pValue"]),
201
+ default="combinedScore",
202
+ show_default=True,
203
+ )
204
+ @click.option(
205
+ "--sort-order",
206
+ "-so",
207
+ required=True,
208
+ help="Ordering of results for ranking.",
209
+ type=click.Choice(["ascending", "descending"]),
210
+ default="descending",
211
+ show_default=True,
212
+ )
213
+ @click.option(
214
+ "--gene-analysis/--no-gene-analysis",
215
+ type=bool,
216
+ default=False,
217
+ help="Specify whether to create PhEval gene results.",
218
+ )
219
+ @click.option(
220
+ "--variant-analysis/--no-variant-analysis",
221
+ type=bool,
222
+ default=False,
223
+ help="Specify whether to create PhEval variant results.",
224
+ )
225
+ @click.option(
226
+ "--disease-analysis/--no-disease-analysis",
227
+ type=bool,
228
+ default=False,
229
+ help="Specify whether to create PhEval disease results.",
230
+ )
231
+ def post_process_exomiser_results(
232
+ output_dir: Path,
233
+ results_dir: Path,
234
+ phenopacket_dir: Path,
235
+ score_name: str,
236
+ sort_order: str,
237
+ gene_analysis: bool,
238
+ variant_analysis: bool,
239
+ disease_analysis: bool,
240
+ ):
241
+ """Post-process Exomiser json results into PhEval gene and variant outputs."""
242
+ (
243
+ output_dir.joinpath("pheval_gene_results").mkdir(parents=True, exist_ok=True)
244
+ if gene_analysis
245
+ else None
246
+ )
247
+ (
248
+ output_dir.joinpath("pheval_variant_results").mkdir(parents=True, exist_ok=True)
249
+ if variant_analysis
250
+ else None
251
+ )
252
+ (
253
+ output_dir.joinpath("pheval_disease_results").mkdir(parents=True, exist_ok=True)
254
+ if disease_analysis
255
+ else None
256
+ )
257
+ create_standardised_results(
258
+ result_dir=results_dir,
259
+ output_dir=output_dir,
260
+ phenopacket_dir=phenopacket_dir,
261
+ score_name=score_name,
262
+ sort_order=sort_order,
263
+ variant_analysis=variant_analysis,
264
+ gene_analysis=gene_analysis,
265
+ disease_analysis=disease_analysis,
266
+ )
@@ -67,6 +67,7 @@ class ExomiserPhEvalRunner(PhEvalRunner):
67
67
  config=config,
68
68
  raw_results_dir=self.raw_results_dir,
69
69
  output_dir=self.output_dir,
70
+ phenopacket_dir=self.testdata_dir.joinpath("phenopackets"),
70
71
  variant_analysis=self.input_dir_config.variant_analysis,
71
72
  gene_analysis=self.input_dir_config.gene_analysis,
72
73
  disease_analysis=self.input_dir_config.disease_analysis,
@@ -1,333 +0,0 @@
1
- #!/usr/bin/python
2
- import json
3
- from pathlib import Path
4
-
5
- import click
6
- from pheval.post_processing.post_processing import (
7
- PhEvalDiseaseResult,
8
- PhEvalGeneResult,
9
- PhEvalVariantResult,
10
- generate_pheval_result,
11
- )
12
- from pheval.utils.file_utils import files_with_suffix
13
-
14
-
15
- def read_exomiser_json_result(exomiser_result_path: Path) -> dict:
16
- """Load Exomiser json result."""
17
- with open(exomiser_result_path) as exomiser_json_result:
18
- exomiser_result = json.load(exomiser_json_result)
19
- exomiser_json_result.close()
20
- return exomiser_result
21
-
22
-
23
- def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
24
- """Trim suffix appended to Exomiser JSON result path."""
25
- return Path(str(exomiser_result_path.name).replace("-exomiser", ""))
26
-
27
-
28
- class PhEvalGeneResultFromExomiserJsonCreator:
29
- def __init__(self, exomiser_json_result: [dict], score_name: str):
30
- self.exomiser_json_result = exomiser_json_result
31
- self.score_name = score_name
32
-
33
- @staticmethod
34
- def _find_gene_symbol(result_entry: dict) -> str:
35
- """Return gene symbol from Exomiser result entry."""
36
- return result_entry["geneSymbol"]
37
-
38
- @staticmethod
39
- def _find_gene_identifier(result_entry: dict) -> str:
40
- """Return ensembl gene identifier from Exomiser result entry."""
41
- return result_entry["geneIdentifier"]["geneId"]
42
-
43
- def _find_relevant_score(self, result_entry: dict):
44
- """Return score from Exomiser result entry."""
45
- return round(result_entry[self.score_name], 4)
46
-
47
- def extract_pheval_gene_requirements(self) -> [PhEvalGeneResult]:
48
- """Extract data required to produce PhEval gene output."""
49
- simplified_exomiser_result = []
50
- for result_entry in self.exomiser_json_result:
51
- if self.score_name in result_entry:
52
- simplified_exomiser_result.append(
53
- PhEvalGeneResult(
54
- gene_symbol=self._find_gene_symbol(result_entry),
55
- gene_identifier=self._find_gene_identifier(result_entry),
56
- score=self._find_relevant_score(result_entry),
57
- )
58
- )
59
-
60
- return simplified_exomiser_result
61
-
62
-
63
- class PhEvalVariantResultFromExomiserJsonCreator:
64
-
65
- def __init__(self, exomiser_json_result: [dict], score_name: str):
66
- self.exomiser_json_result = exomiser_json_result
67
- self.score_name = score_name
68
-
69
- @staticmethod
70
- def _find_chromosome(result_entry: dict) -> str:
71
- """Return chromosome from Exomiser result entry."""
72
- return result_entry["contigName"]
73
-
74
- @staticmethod
75
- def _find_start_pos(result_entry: dict) -> int:
76
- """Return start position from Exomiser result entry."""
77
- return result_entry["start"]
78
-
79
- @staticmethod
80
- def _find_end_pos(result_entry: dict) -> int:
81
- """Return end position from Exomiser result entry."""
82
- return result_entry["end"]
83
-
84
- @staticmethod
85
- def _find_ref(result_entry: dict) -> str:
86
- """Return reference allele from Exomiser result entry."""
87
- return result_entry["ref"]
88
-
89
- @staticmethod
90
- def _find_alt(result_entry: dict) -> str:
91
- """Return alternate allele from Exomiser result entry."""
92
- if "alt" in result_entry and result_entry["alt"] is not None:
93
- return result_entry["alt"].strip(">").strip("<")
94
- else:
95
- return ""
96
-
97
- def _find_relevant_score(self, result_entry) -> float:
98
- """Return score from Exomiser result entry."""
99
- return round(result_entry[self.score_name], 4)
100
-
101
- def _filter_for_acmg_assignments(
102
- self, variant: PhEvalVariantResult, score: float, variant_acmg_assignments: dict
103
- ) -> bool:
104
- """Filter variants if they meet the PATHOGENIC or LIKELY_PATHOGENIC ACMG classification."""
105
- for assignment in variant_acmg_assignments:
106
- if variant == PhEvalVariantResult(
107
- chromosome=self._find_chromosome(assignment["variantEvaluation"]),
108
- start=self._find_start_pos(assignment["variantEvaluation"]),
109
- end=self._find_end_pos(assignment["variantEvaluation"]),
110
- ref=self._find_ref(assignment["variantEvaluation"]),
111
- alt=self._find_alt(assignment["variantEvaluation"]),
112
- score=score,
113
- ) and (
114
- assignment["acmgClassification"] == "PATHOGENIC"
115
- or assignment["acmgClassification"] == "LIKELY_PATHOGENIC"
116
- ):
117
- return True
118
-
119
- def extract_pheval_variant_requirements(
120
- self, use_acmg_filter: bool = False
121
- ) -> [PhEvalVariantResult]:
122
- """Extract data required to produce PhEval variant output."""
123
- simplified_exomiser_result = []
124
- for result_entry in self.exomiser_json_result:
125
- for gene_hit in result_entry["geneScores"]:
126
- if self.score_name in result_entry:
127
- if "contributingVariants" in gene_hit:
128
- score = self._find_relevant_score(result_entry)
129
- contributing_variants = gene_hit["contributingVariants"]
130
- variant_acmg_assignments = gene_hit["acmgAssignments"]
131
- for cv in contributing_variants:
132
- variant = PhEvalVariantResult(
133
- chromosome=self._find_chromosome(cv),
134
- start=self._find_start_pos(cv),
135
- end=self._find_end_pos(cv),
136
- ref=self._find_ref(cv),
137
- alt=self._find_alt(cv),
138
- score=score,
139
- )
140
- if use_acmg_filter and self._filter_for_acmg_assignments(
141
- variant, score, variant_acmg_assignments
142
- ):
143
- simplified_exomiser_result.append(variant)
144
- if not use_acmg_filter:
145
- simplified_exomiser_result.append(variant)
146
- return simplified_exomiser_result
147
-
148
-
149
- class PhEvalDiseaseResultFromExomiserJsonCreator:
150
- def __init__(self, exomiser_json_result: [dict]):
151
- self.exomiser_json_result = exomiser_json_result
152
-
153
- @staticmethod
154
- def _find_disease_name(result_entry: dict) -> str:
155
- """Return disease term from Exomiser result entry."""
156
- return result_entry["diseaseTerm"]
157
-
158
- @staticmethod
159
- def _find_disease_identifier(result_entry: dict) -> int:
160
- """Return disease ID from Exomiser result entry."""
161
- return result_entry["diseaseId"]
162
-
163
- @staticmethod
164
- def _find_relevant_score(result_entry) -> float:
165
- """Return score from Exomiser result entry."""
166
- return round(result_entry["score"], 4)
167
-
168
- def extract_pheval_disease_requirements(self) -> [PhEvalDiseaseResult]:
169
- """Extract data required to produce PhEval disease output."""
170
- simplified_exomiser_result = []
171
- for result_entry in self.exomiser_json_result:
172
- try:
173
- for disease in result_entry["priorityResults"]["HIPHIVE_PRIORITY"][
174
- "diseaseMatches"
175
- ]:
176
- simplified_exomiser_result.append(
177
- PhEvalDiseaseResult(
178
- disease_name=self._find_disease_name(disease["model"]),
179
- disease_identifier=self._find_disease_identifier(disease["model"]),
180
- score=self._find_relevant_score(disease),
181
- )
182
- )
183
- except KeyError:
184
- pass
185
- return list(
186
- {
187
- (result.disease_identifier, result.score): result
188
- for result in simplified_exomiser_result
189
- }.values()
190
- )
191
-
192
-
193
- def create_standardised_results(
194
- results_dir: Path,
195
- output_dir: Path,
196
- score_name: str,
197
- sort_order: str,
198
- variant_analysis: bool,
199
- gene_analysis: bool,
200
- disease_analysis: bool,
201
- include_acmg: bool = False,
202
- ) -> None:
203
- """Write standardised gene/variant/disease results from default Exomiser json output."""
204
- for exomiser_json_result in files_with_suffix(results_dir, ".json"):
205
- exomiser_result = read_exomiser_json_result(exomiser_json_result)
206
- if gene_analysis:
207
- pheval_gene_requirements = PhEvalGeneResultFromExomiserJsonCreator(
208
- exomiser_result, score_name
209
- ).extract_pheval_gene_requirements()
210
- generate_pheval_result(
211
- pheval_result=pheval_gene_requirements,
212
- sort_order_str=sort_order,
213
- output_dir=output_dir,
214
- tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
215
- )
216
- if variant_analysis:
217
- pheval_variant_requirements = PhEvalVariantResultFromExomiserJsonCreator(
218
- exomiser_result, score_name
219
- ).extract_pheval_variant_requirements(include_acmg)
220
- generate_pheval_result(
221
- pheval_result=pheval_variant_requirements,
222
- sort_order_str=sort_order,
223
- output_dir=output_dir,
224
- tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
225
- )
226
- if disease_analysis:
227
- pheval_disease_requirements = PhEvalDiseaseResultFromExomiserJsonCreator(
228
- exomiser_result
229
- ).extract_pheval_disease_requirements()
230
- generate_pheval_result(
231
- pheval_result=pheval_disease_requirements,
232
- sort_order_str=sort_order,
233
- output_dir=output_dir,
234
- tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
235
- )
236
-
237
-
238
- @click.command()
239
- @click.option(
240
- "--output-dir",
241
- "-o",
242
- required=True,
243
- metavar="PATH",
244
- help="Output directory for standardised results.",
245
- type=Path,
246
- )
247
- @click.option(
248
- "--results-dir",
249
- "-R",
250
- required=True,
251
- metavar="DIRECTORY",
252
- help="Full path to Exomiser results directory to be standardised.",
253
- type=Path,
254
- )
255
- @click.option(
256
- "--score-name",
257
- "-s",
258
- required=True,
259
- help="Score name to extract from results.",
260
- type=click.Choice(["combinedScore", "priorityScore", "variantScore", "pValue"]),
261
- default="combinedScore",
262
- show_default=True,
263
- )
264
- @click.option(
265
- "--sort-order",
266
- "-so",
267
- required=True,
268
- help="Ordering of results for ranking.",
269
- type=click.Choice(["ascending", "descending"]),
270
- default="descending",
271
- show_default=True,
272
- )
273
- @click.option(
274
- "--gene-analysis/--no-gene-analysis",
275
- type=bool,
276
- default=False,
277
- help="Specify whether to create PhEval gene results.",
278
- )
279
- @click.option(
280
- "--variant-analysis/--no-variant-analysis",
281
- type=bool,
282
- default=False,
283
- help="Specify whether to create PhEval variant results.",
284
- )
285
- @click.option(
286
- "--disease-analysis/--no-disease-analysis",
287
- type=bool,
288
- default=False,
289
- help="Specify whether to create PhEval disease results.",
290
- )
291
- @click.option(
292
- "--include-acmg",
293
- is_flag=True,
294
- type=bool,
295
- default=False,
296
- help="Specify whether to include ACMG filter for PATHOGENIC or LIKELY_PATHOGENIC classifications.",
297
- )
298
- def post_process_exomiser_results(
299
- output_dir: Path,
300
- results_dir: Path,
301
- score_name: str,
302
- sort_order: str,
303
- gene_analysis: bool,
304
- variant_analysis: bool,
305
- disease_analysis: bool,
306
- include_acmg: bool,
307
- ):
308
- """Post-process Exomiser json results into PhEval gene and variant outputs."""
309
- (
310
- output_dir.joinpath("pheval_gene_results").mkdir(parents=True, exist_ok=True)
311
- if gene_analysis
312
- else None
313
- )
314
- (
315
- output_dir.joinpath("pheval_variant_results").mkdir(parents=True, exist_ok=True)
316
- if variant_analysis
317
- else None
318
- )
319
- (
320
- output_dir.joinpath("pheval_disease_results").mkdir(parents=True, exist_ok=True)
321
- if disease_analysis
322
- else None
323
- )
324
- create_standardised_results(
325
- results_dir,
326
- output_dir,
327
- score_name,
328
- sort_order,
329
- variant_analysis,
330
- gene_analysis,
331
- disease_analysis,
332
- include_acmg,
333
- )
@@ -1,392 +0,0 @@
1
- from copy import copy
2
- from pathlib import Path
3
-
4
- import click
5
- import pandas as pd
6
- import yaml
7
- from google.protobuf.timestamp_pb2 import Timestamp
8
- from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation
9
- from oaklib.resource import OntologyResource
10
- from phenopackets import (
11
- Diagnosis,
12
- Family,
13
- File,
14
- GeneDescriptor,
15
- GenomicInterpretation,
16
- Individual,
17
- Interpretation,
18
- MetaData,
19
- OntologyClass,
20
- Pedigree,
21
- Phenopacket,
22
- PhenotypicFeature,
23
- Resource,
24
- VariantInterpretation,
25
- VariationDescriptor,
26
- VcfRecord,
27
- )
28
- from pheval.prepare.create_noisy_phenopackets import load_ontology
29
- from pheval.utils.file_utils import files_with_suffix
30
- from pheval.utils.phenopacket_utils import create_hgnc_dict, write_phenopacket
31
-
32
-
33
- def load_genotype_ontology():
34
- """Load genotype ontology"""
35
- genotype_resource = OntologyResource(slug="geno.owl", local=False)
36
- return ProntoImplementation(genotype_resource)
37
-
38
-
39
- def exomiser_analysis_yml_reader(yaml_job_file_path: Path) -> dict:
40
- """Read an exomiser analysis yaml file."""
41
- with open(yaml_job_file_path) as yaml_job_file:
42
- yaml_job = yaml.safe_load(yaml_job_file)
43
- yaml_job_file.close()
44
- return yaml_job
45
-
46
-
47
- def read_diagnoses_file(diagnoses_file_path: Path) -> pd.DataFrame:
48
- """Read a diagnoses file."""
49
- return pd.read_csv(diagnoses_file_path, delimiter="t")
50
-
51
-
52
- def read_pedigree_file(pedigree_path: Path) -> list[str]:
53
- """Return the contents of a pedigree file"""
54
- return open(pedigree_path).readlines()
55
-
56
-
57
- class ExomiserYamlToPhenopacketConverter:
58
- def __init__(self, genotype_ontology, human_phenotype_ontology, hgnc_data):
59
- self.genotype_ontology = genotype_ontology
60
- self.human_phenotype_ontology = human_phenotype_ontology
61
- self.hgnc_data = hgnc_data
62
-
63
- @staticmethod
64
- def construct_individual(yaml_job: dict, diagnoses: pd.DataFrame) -> Individual:
65
- """Construct individual for phenopacket."""
66
- return Individual(
67
- id=yaml_job["analysis"]["proband"],
68
- sex=diagnoses[diagnoses.ProbandId == yaml_job["analysis"]["proband"]]
69
- .iloc[0]["Sex"]
70
- .upper(),
71
- )
72
-
73
- @staticmethod
74
- def get_diagnoses_for_proband(yaml_job: dict, diagnoses: pd.DataFrame):
75
- """Get all diagnoses for proband."""
76
- return diagnoses.loc[diagnoses["ProbandId"] == yaml_job["analysis"]["proband"]]
77
-
78
- def construct_phenotypic_interpretations(self, yaml_job: dict) -> list[PhenotypicFeature]:
79
- """Construct the phenotypic features for the proband."""
80
- hpo_ids = yaml_job["analysis"]["hpoIds"]
81
- phenotypic_features = []
82
- for hpo_id in hpo_ids:
83
- try:
84
- rels = self.human_phenotype_ontology.entity_alias_map(hpo_id)
85
- hpo_term = "".join(rels[(list(rels.keys())[0])])
86
- hpo = PhenotypicFeature(type=OntologyClass(id=hpo_id, label=hpo_term))
87
- phenotypic_features.append(hpo)
88
- except AttributeError:
89
- hpo = PhenotypicFeature(type=OntologyClass(id=hpo_id))
90
- phenotypic_features.append(hpo)
91
- return phenotypic_features
92
-
93
- @staticmethod
94
- def construct_vcf_record(yaml_job: dict, diagnosis: pd.DataFrame) -> VcfRecord:
95
- """Construct the VCF record for a diagnosis."""
96
- return VcfRecord(
97
- genome_assembly=yaml_job["analysis"]["genomeAssembly"],
98
- chrom=diagnosis["Chr"],
99
- pos=int(diagnosis["Start"]),
100
- ref=str(diagnosis["Ref/Alt"]).split("/")[0],
101
- alt=str(diagnosis["Ref/Alt"]).split("/")[1],
102
- )
103
-
104
- def construct_allelic_state(self, diagnosis: pd.DataFrame) -> OntologyClass:
105
- """Construct the allelic state for a diagnosis."""
106
- return OntologyClass(
107
- id=list(self.genotype_ontology.basic_search(diagnosis["Genotype"].lower()))[0],
108
- label=diagnosis["Genotype"].lower(),
109
- )
110
-
111
- def construct_gene_descriptor(self, diagnosis: pd.DataFrame) -> GeneDescriptor:
112
- """Construct the Gene Descriptor for a diagnosis."""
113
- try:
114
- return GeneDescriptor(
115
- value_id=self.hgnc_data[diagnosis["Gene"]]["ensembl_id"],
116
- symbol=diagnosis["Gene"],
117
- )
118
- except KeyError:
119
- for _gene, gene_info in self.hgnc_data.items():
120
- for previous_name in gene_info["previous_names"]:
121
- if diagnosis["Gene"] == previous_name:
122
- return GeneDescriptor(
123
- value_id=self.hgnc_data[gene_info["ensembl_id"]],
124
- symbol=diagnosis["Gene"],
125
- )
126
-
127
- def construct_variation_descriptor(
128
- self, yaml_job: dict, diagnosis: pd.DataFrame
129
- ) -> VariationDescriptor:
130
- """Construct a variation descriptor for a diagnosis."""
131
- return VariationDescriptor(
132
- id=yaml_job["analysis"]["proband"]
133
- + ":"
134
- + diagnosis["Chr"]
135
- + ":"
136
- + diagnosis["Start"]
137
- + ":"
138
- + diagnosis["Ref/Alt"],
139
- gene_context=self.construct_gene_descriptor(diagnosis),
140
- vcf_record=self.construct_vcf_record(yaml_job, diagnosis),
141
- allelic_state=self.construct_allelic_state(diagnosis),
142
- )
143
-
144
- def construct_variant_interpretation(
145
- self, yaml_job: dict, diagnosis: pd.DataFrame
146
- ) -> VariantInterpretation:
147
- """Construct the variant interpretation for a diagnosis."""
148
- return VariantInterpretation(
149
- variation_descriptor=self.construct_variation_descriptor(yaml_job, diagnosis),
150
- )
151
-
152
- def construct_genomic_interpretations(
153
- self, yaml_job: dict, diagnoses: pd.DataFrame
154
- ) -> list[GenomicInterpretation]:
155
- """Construct a list of genomic interpretations for a proband."""
156
- genomic_interpretations = []
157
- for _index, row in self.get_diagnoses_for_proband(yaml_job, diagnoses).iterrows():
158
- genomic_interpretation = GenomicInterpretation(
159
- subject_or_biosample_id=yaml_job["analysis"]["proband"],
160
- variant_interpretation=self.construct_variant_interpretation(
161
- yaml_job=yaml_job, diagnosis=row
162
- ),
163
- )
164
- genomic_interpretations.append(genomic_interpretation)
165
- return genomic_interpretations
166
-
167
- def construct_diagnosis(self, yaml_job: dict, diagnoses: pd.DataFrame) -> Diagnosis:
168
- """Construct the diagnosis for a proband."""
169
- return Diagnosis(
170
- genomic_interpretations=self.construct_genomic_interpretations(yaml_job, diagnoses)
171
- )
172
-
173
- def construct_interpretations(
174
- self, yaml_job: dict, diagnoses: pd.DataFrame
175
- ) -> list[Interpretation]:
176
- """Construct interpretations for a proband."""
177
- return [
178
- Interpretation(
179
- id=yaml_job["analysis"]["proband"] + "-interpretation",
180
- diagnosis=self.construct_diagnosis(yaml_job, diagnoses),
181
- )
182
- ]
183
-
184
- @staticmethod
185
- def construct_meta_data() -> MetaData:
186
- """Construct the meta-data."""
187
- timestamp = Timestamp()
188
- timestamp.GetCurrentTime()
189
- return MetaData(
190
- created=timestamp,
191
- created_by="pheval-converter",
192
- resources=[
193
- Resource(
194
- id="hp",
195
- name="human phenotype ontology",
196
- url="http://purl.obolibrary.org/obo/hp.owl",
197
- version="hp/releases/2019-11-08",
198
- namespace_prefix="HP",
199
- iri_prefix="http://purl.obolibrary.org/obo/HP_",
200
- )
201
- ],
202
- phenopacket_schema_version="2.0",
203
- )
204
-
205
- @staticmethod
206
- def construct_files(yaml_job_file: dict) -> list[File]:
207
- """Construct the files."""
208
- return [
209
- File(
210
- uri=yaml_job_file["analysis"]["vcf"],
211
- file_attributes={
212
- "fileFormat": "VCF",
213
- "genomeAssembly": yaml_job_file["analysis"]["genomeAssembly"],
214
- },
215
- )
216
- ]
217
-
218
-
219
- def construct_pedigree(pedigree: list[str]) -> tuple[str, Pedigree]:
220
- """Construct the pedigree message from a ped file."""
221
- persons = []
222
- family_id = None
223
- for individual in pedigree:
224
- entry = individual.split("\t")
225
- family_id = entry[0]
226
- sex = "."
227
- if (
228
- int(entry[4]) == 1
229
- ): # until this is fixed with the phenopackets package, sex has to be reassigned
230
- sex = 2
231
- if int(entry[4]) == 2:
232
- sex = 1
233
- if str(entry[3]) == "0" and str(entry[2]) == "0":
234
- person = Pedigree.Person(
235
- family_id=family_id, individual_id=entry[1], sex=sex, affected_status=int(entry[5])
236
- )
237
- persons.append(person)
238
- if str(entry[3]) == "0" and str(entry[2]) != "0":
239
- person = Pedigree.Person(
240
- family_id=family_id,
241
- individual_id=entry[1],
242
- paternal_id=entry[2],
243
- sex=sex,
244
- affected_status=int(entry[5]),
245
- )
246
- persons.append(person)
247
- if str(entry[2]) == "0" and str(entry[3]) != "0":
248
- person = Pedigree.Person(
249
- family_id=family_id,
250
- individual_id=entry[1],
251
- maternal_id=entry[3],
252
- sex=sex,
253
- affected_status=int(entry[5]),
254
- )
255
- persons.append(person)
256
- if str(entry[2]) != "0" and str(entry[3] != "0"):
257
- person = Pedigree.Person(
258
- family_id=family_id,
259
- individual_id=entry[1],
260
- paternal_id=entry[2],
261
- maternal_id=entry[3],
262
- sex=sex,
263
- affected_status=int(entry[5]),
264
- )
265
- persons.append(person)
266
- return family_id, Pedigree(persons=persons)
267
-
268
-
269
- def construct_phenopacket(
270
- yaml_job_file: dict,
271
- diagnoses: pd.DataFrame,
272
- exomiser_yaml_to_phenopacket_converter: ExomiserYamlToPhenopacketConverter,
273
- ) -> Phenopacket:
274
- """Construct a phenopacket."""
275
- return Phenopacket(
276
- id=yaml_job_file["analysis"]["proband"],
277
- subject=exomiser_yaml_to_phenopacket_converter.construct_individual(
278
- yaml_job=yaml_job_file, diagnoses=diagnoses
279
- ),
280
- phenotypic_features=exomiser_yaml_to_phenopacket_converter.construct_phenotypic_interpretations(
281
- yaml_job=yaml_job_file
282
- ),
283
- interpretations=exomiser_yaml_to_phenopacket_converter.construct_interpretations(
284
- yaml_job=yaml_job_file, diagnoses=diagnoses
285
- ),
286
- files=exomiser_yaml_to_phenopacket_converter.construct_files(yaml_job_file),
287
- meta_data=exomiser_yaml_to_phenopacket_converter.construct_meta_data(),
288
- )
289
-
290
-
291
- def construct_family(
292
- yaml_job_file: dict,
293
- diagnoses: pd.DataFrame,
294
- exomiser_yaml_to_phenopacket_converter: ExomiserYamlToPhenopacketConverter,
295
- pedigree: list[str],
296
- ) -> Family:
297
- """Construct a Family"""
298
- phenopacket = construct_phenopacket(
299
- yaml_job_file, diagnoses, exomiser_yaml_to_phenopacket_converter
300
- )
301
- proband = copy(phenopacket)
302
- del proband.files[:]
303
- del proband.meta_data[:]
304
- family_id, ped = construct_pedigree(pedigree)
305
- return Family(
306
- id=family_id,
307
- proband=proband,
308
- pedigree=ped,
309
- files=phenopacket.files,
310
- meta_data=phenopacket.meta_data,
311
- )
312
-
313
-
314
- def create_phenopacket(
315
- yaml_job_file: Path,
316
- diagnoses: pd.DataFrame,
317
- exomiser_converter: ExomiserYamlToPhenopacketConverter,
318
- ) -> Phenopacket or Family:
319
- """Construct either a family or phenopacket from an analysis yaml."""
320
- yaml_job = exomiser_analysis_yml_reader(yaml_job_file)
321
- phenopacket = (
322
- construct_phenopacket(yaml_job, diagnoses, exomiser_converter)
323
- if yaml_job["analysis"]["ped"] == ""
324
- else construct_family(
325
- yaml_job,
326
- diagnoses,
327
- exomiser_converter,
328
- read_pedigree_file(yaml_job["analysis"]["ped"]),
329
- )
330
- )
331
- return phenopacket
332
-
333
-
334
- @click.command()
335
- @click.option(
336
- "--directory",
337
- "-d",
338
- required=True,
339
- help="Directory for Exomiser yaml job files to be converted.",
340
- type=Path,
341
- )
342
- @click.option("--diagnoses-file", "-d", required=True, help="Diagnoses file", type=Path)
343
- @click.option(
344
- "--output-dir", "-o", required=True, help="Output directory to write phenopackets", type=Path
345
- )
346
- def convert_exomiser_analysis_yamls_to_phenopacket(
347
- output_dir: Path, directory: Path, diagnoses_file: Path
348
- ):
349
- """Convert an Exomiser YAML file to a phenopacket schema given a .tsv diagnoses file containing the following
350
- required fields: ..."""
351
- try:
352
- output_dir.mkdir()
353
- except FileExistsError:
354
- pass
355
- diagnoses = read_diagnoses_file(diagnoses_file)
356
- exomiser_converter = ExomiserYamlToPhenopacketConverter(
357
- load_genotype_ontology(), load_ontology(), create_hgnc_dict()
358
- )
359
- for yaml_job_file in files_with_suffix(directory, ".yml"):
360
- phenopacket = create_phenopacket(yaml_job_file, diagnoses, exomiser_converter)
361
- write_phenopacket(
362
- phenopacket, output_dir.joinpath(yaml_job_file.name.replace(".yml", ".json"))
363
- )
364
-
365
-
366
- @click.command()
367
- @click.option(
368
- "--yaml-file",
369
- "-y",
370
- required=True,
371
- help="Path to Exomiser analysis yaml file for phenopacket conversion.",
372
- type=Path,
373
- )
374
- @click.option("--diagnoses-file", "-d", required=True, help="Diagnoses file", type=Path)
375
- @click.option(
376
- "--output-dir", "-o", required=True, help="Output directory to write phenopackets", type=Path
377
- )
378
- def convert_exomiser_analysis_yaml_to_phenopacket(
379
- output_dir: Path, yaml_file: Path, diagnoses_file: Path
380
- ):
381
- """Convert Exomiser YAML files to the phenopacket schema given a .tsv diagnoses file containing the following
382
- required fields: ..."""
383
- try:
384
- output_dir.mkdir()
385
- except FileExistsError:
386
- pass
387
- diagnoses = read_diagnoses_file(diagnoses_file)
388
- exomiser_converter = ExomiserYamlToPhenopacketConverter(
389
- load_genotype_ontology(), load_ontology(), create_hgnc_dict()
390
- )
391
- phenopacket = create_phenopacket(yaml_file, diagnoses, exomiser_converter)
392
- write_phenopacket(phenopacket, Path(yaml_file.name + ".json"))