pheval-exomiser 0.1.3__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ PHENOPACKET_TARGET_DIRECTORY_DOCKER = "/exomiser-testdata-phenopacket/"
2
+ RAW_RESULTS_TARGET_DIRECTORY_DOCKER = "/exomiser-results/"
3
+ OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER = "/exomiser-testdata-output-options/"
4
+ VCF_TARGET_DIRECTORY_DOCKER = "/exomiser-testdata-vcf/"
5
+ EXOMISER_YAML_TARGET_DIRECTORY_DOCKER = "/exomiser-yaml-template/"
6
+ EXOMISER_DATA_DIRECTORY_TARGET_DOCKER = "/exomiser-data/"
7
+ INPUT_COMMANDS_TARGET_DIRECTORY_DOCKER = "/exomiser-batch-file/"
8
+ EXOMISER_CONFIG_TARGET_DIRECTORY_DOCKER = "/exomiser-config/"
@@ -1,22 +1,26 @@
1
- import os
2
1
  from pathlib import Path
3
2
 
4
- from pheval_exomiser.config_parser import ExomiserConfig
5
3
  from pheval_exomiser.post_process.post_process_results_format import create_standardised_results
4
+ from pheval_exomiser.prepare.tool_specific_configuration_options import ExomiserConfigurations
6
5
 
7
6
 
8
7
  def post_process_result_format(
9
- input_dir: Path, testdata_dir: Path, output_dir: Path, config: ExomiserConfig
8
+ config: ExomiserConfigurations,
9
+ raw_results_dir: Path,
10
+ output_dir: Path,
11
+ variant_analysis: bool,
12
+ gene_analysis: bool,
13
+ disease_analysis: bool,
10
14
  ):
11
15
  """Standardise Exomiser json format to separated gene and variant results."""
12
16
  print("...standardising results format...")
13
- run_output_dir = Path(output_dir).joinpath(
14
- f"exomiser_{config.run.exomiser_configurations.exomiser_version.replace('.', '_')}"
15
- f"_{os.path.basename(input_dir)}{os.sep}{os.path.basename(testdata_dir)}_results"
16
- )
17
17
  create_standardised_results(
18
- results_dir=Path(run_output_dir).joinpath("exomiser_results"),
19
- output_dir=run_output_dir,
20
- ranking_method=config.post_process.ranking_method,
18
+ results_dir=raw_results_dir,
19
+ output_dir=output_dir,
20
+ score_name=config.post_process.score_name,
21
+ sort_order=config.post_process.sort_order,
22
+ variant_analysis=variant_analysis,
23
+ gene_analysis=gene_analysis,
24
+ disease_analysis=disease_analysis,
21
25
  )
22
26
  print("done")
@@ -1,109 +1,15 @@
1
1
  #!/usr/bin/python
2
- import dataclasses
3
2
  import json
4
- from dataclasses import dataclass
5
3
  from pathlib import Path
6
4
 
7
5
  import click
8
- import pandas as pd
6
+ from pheval.post_processing.post_processing import (
7
+ PhEvalDiseaseResult,
8
+ PhEvalGeneResult,
9
+ PhEvalVariantResult,
10
+ generate_pheval_result,
11
+ )
9
12
  from pheval.utils.file_utils import files_with_suffix
10
- from pheval.utils.phenopacket_utils import VariantData
11
-
12
-
13
- @dataclass
14
- class SimplifiedExomiserGeneResult:
15
- """A simplified gene result format from Exomiser json."""
16
-
17
- exomiser_result: dict
18
- simplified_exomiser_gene_result: list
19
- ranking_method: str
20
-
21
- def add_gene_record(self) -> dict:
22
- """Add the gene and gene identifier record to simplified result format."""
23
- return {
24
- "gene_symbol": self.exomiser_result["geneSymbol"],
25
- "gene_identifier": self.exomiser_result["geneIdentifier"]["geneId"],
26
- }
27
-
28
- def add_ranking_score(self, simplified_result_entry: dict) -> dict:
29
- """Add the ranking score to simplified result format."""
30
- simplified_result_entry["score"] = round(self.exomiser_result[self.ranking_method], 4)
31
- return simplified_result_entry
32
-
33
- def create_simplified_gene_result(self) -> [dict]:
34
- """Create a simplified Exomiser Gene result."""
35
- self.simplified_exomiser_gene_result.append(self.add_ranking_score(self.add_gene_record()))
36
- return self.simplified_exomiser_gene_result
37
-
38
-
39
- @dataclass
40
- class SimplifiedExomiserVariantResult:
41
- """A simplified variant result format from Exomiser json."""
42
-
43
- exomiser_result: dict
44
- simplified_exomiser_variant_result: list
45
- ranking_method: str
46
- ranking_score: float
47
-
48
- def create_simplified_variant_result(self) -> [dict]:
49
- """Add data for contributing variants to simplified result format."""
50
- for cv in self.exomiser_result["contributingVariants"]:
51
- self.simplified_exomiser_variant_result.append(
52
- {
53
- "variant": dataclasses.asdict(
54
- VariantData(
55
- cv["contigName"],
56
- cv["start"],
57
- cv["ref"],
58
- cv["alt"],
59
- self.exomiser_result["geneIdentifier"]["geneSymbol"],
60
- )
61
- ),
62
- "score": self.ranking_score,
63
- }
64
- )
65
- return self.simplified_exomiser_variant_result
66
-
67
-
68
- class RankExomiserResult:
69
- """Add ranks to simplified Exomiser gene/variant results - taking care of ex-aequo scores."""
70
-
71
- def __init__(self, simplified_exomiser_result: [dict], ranking_method: str):
72
- self.simplified_exomiser_result = simplified_exomiser_result
73
- self.ranking_method = ranking_method
74
-
75
- def sort_exomiser_result(self) -> [dict]:
76
- """Sorts simplified Exomiser result by ranking method in decreasing order."""
77
- return sorted(
78
- self.simplified_exomiser_result,
79
- key=lambda d: d["score"],
80
- reverse=True,
81
- )
82
-
83
- def sort_exomiser_result_pvalue(self) -> [dict]:
84
- """Sort simplified Exomiser result by pvalue, most significant value first."""
85
- return sorted(
86
- self.simplified_exomiser_result,
87
- key=lambda d: d["score"],
88
- reverse=False,
89
- )
90
-
91
- def rank_results(self) -> [dict]:
92
- """Add ranks to the Exomiser results, equal scores are given the same rank e.g., 1,1,3."""
93
- sorted_exomiser_result = (
94
- self.sort_exomiser_result_pvalue()
95
- if self.ranking_method == "pValue"
96
- else self.sort_exomiser_result()
97
- )
98
- rank, count, previous = 0, 0, None
99
- for exomiser_result in sorted_exomiser_result:
100
- count += 1
101
- if exomiser_result["score"] != previous:
102
- rank += count
103
- previous = exomiser_result["score"]
104
- count = 0
105
- exomiser_result["rank"] = rank
106
- return sorted_exomiser_result
107
13
 
108
14
 
109
15
  def read_exomiser_json_result(exomiser_result_path: Path) -> dict:
@@ -114,78 +20,214 @@ def read_exomiser_json_result(exomiser_result_path: Path) -> dict:
114
20
  return exomiser_result
115
21
 
116
22
 
117
- class StandardiseExomiserResult:
118
- """Standardise Exomiser output into simplified gene and variant results for analysis."""
23
+ def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
24
+ """Trim suffix appended to Exomiser JSON result path."""
25
+ return Path(str(exomiser_result_path.name).replace("-exomiser", ""))
119
26
 
120
- def __init__(self, exomiser_json_result: [dict], ranking_method: str):
27
+
28
+ class PhEvalGeneResultFromExomiserJsonCreator:
29
+ def __init__(self, exomiser_json_result: [dict], score_name: str):
121
30
  self.exomiser_json_result = exomiser_json_result
122
- self.ranking_method = ranking_method
31
+ self.score_name = score_name
32
+
33
+ @staticmethod
34
+ def _find_gene_symbol(result_entry: dict) -> str:
35
+ """Return gene symbol from Exomiser result entry."""
36
+ return result_entry["geneSymbol"]
37
+
38
+ @staticmethod
39
+ def _find_gene_identifier(result_entry: dict) -> str:
40
+ """Return ensembl gene identifier from Exomiser result entry."""
41
+ return result_entry["geneIdentifier"]["geneId"]
123
42
 
124
- def simplify_gene_result(self) -> [dict]:
125
- """Simplify Exomiser json output into gene results."""
43
+ def _find_relevant_score(self, result_entry: dict):
44
+ """Return score from Exomiser result entry."""
45
+ return round(result_entry[self.score_name], 4)
46
+
47
+ def extract_pheval_gene_requirements(self) -> [PhEvalGeneResult]:
48
+ """Extract data required to produce PhEval gene output."""
126
49
  simplified_exomiser_result = []
127
- for result in self.exomiser_json_result:
128
- if self.ranking_method in result:
129
- simplified_exomiser_result = SimplifiedExomiserGeneResult(
130
- result, simplified_exomiser_result, self.ranking_method
131
- ).create_simplified_gene_result()
50
+ for result_entry in self.exomiser_json_result:
51
+ if self.score_name in result_entry:
52
+ simplified_exomiser_result.append(
53
+ PhEvalGeneResult(
54
+ gene_symbol=self._find_gene_symbol(result_entry),
55
+ gene_identifier=self._find_gene_identifier(result_entry),
56
+ score=self._find_relevant_score(result_entry),
57
+ )
58
+ )
59
+
132
60
  return simplified_exomiser_result
133
61
 
134
- def simplify_variant_result(self) -> [dict]:
135
- """Simplify Exomiser json output into variant results."""
62
+
63
+ class PhEvalVariantResultFromExomiserJsonCreator:
64
+
65
+ def __init__(self, exomiser_json_result: [dict], score_name: str):
66
+ self.exomiser_json_result = exomiser_json_result
67
+ self.score_name = score_name
68
+
69
+ @staticmethod
70
+ def _find_chromosome(result_entry: dict) -> str:
71
+ """Return chromosome from Exomiser result entry."""
72
+ return result_entry["contigName"]
73
+
74
+ @staticmethod
75
+ def _find_start_pos(result_entry: dict) -> int:
76
+ """Return start position from Exomiser result entry."""
77
+ return result_entry["start"]
78
+
79
+ @staticmethod
80
+ def _find_end_pos(result_entry: dict) -> int:
81
+ """Return end position from Exomiser result entry."""
82
+ return result_entry["end"]
83
+
84
+ @staticmethod
85
+ def _find_ref(result_entry: dict) -> str:
86
+ """Return reference allele from Exomiser result entry."""
87
+ return result_entry["ref"]
88
+
89
+ @staticmethod
90
+ def _find_alt(result_entry: dict) -> str:
91
+ """Return alternate allele from Exomiser result entry."""
92
+ if "alt" in result_entry and result_entry["alt"] is not None:
93
+ return result_entry["alt"].strip(">").strip("<")
94
+ else:
95
+ return ""
96
+
97
+ def _find_relevant_score(self, result_entry) -> float:
98
+ """Return score from Exomiser result entry."""
99
+ return round(result_entry[self.score_name], 4)
100
+
101
+ def _filter_for_acmg_assignments(
102
+ self, variant: PhEvalVariantResult, score: float, variant_acmg_assignments: dict
103
+ ) -> bool:
104
+ """Filter variants if they meet the PATHOGENIC or LIKELY_PATHOGENIC ACMG classification."""
105
+ for assignment in variant_acmg_assignments:
106
+ if variant == PhEvalVariantResult(
107
+ chromosome=self._find_chromosome(assignment["variantEvaluation"]),
108
+ start=self._find_start_pos(assignment["variantEvaluation"]),
109
+ end=self._find_end_pos(assignment["variantEvaluation"]),
110
+ ref=self._find_ref(assignment["variantEvaluation"]),
111
+ alt=self._find_alt(assignment["variantEvaluation"]),
112
+ score=score,
113
+ ) and (
114
+ assignment["acmgClassification"] == "PATHOGENIC"
115
+ or assignment["acmgClassification"] == "LIKELY_PATHOGENIC"
116
+ ):
117
+ return True
118
+
119
+ def extract_pheval_variant_requirements(
120
+ self, use_acmg_filter: bool = False
121
+ ) -> [PhEvalVariantResult]:
122
+ """Extract data required to produce PhEval variant output."""
136
123
  simplified_exomiser_result = []
137
- for result in self.exomiser_json_result:
138
- for gene_hit in result["geneScores"]:
139
- if self.ranking_method in gene_hit:
124
+ for result_entry in self.exomiser_json_result:
125
+ for gene_hit in result_entry["geneScores"]:
126
+ if self.score_name in result_entry:
140
127
  if "contributingVariants" in gene_hit:
141
- simplified_exomiser_result = SimplifiedExomiserVariantResult(
142
- gene_hit,
143
- simplified_exomiser_result,
144
- self.ranking_method,
145
- round(result[self.ranking_method], 4),
146
- ).create_simplified_variant_result()
128
+ score = self._find_relevant_score(result_entry)
129
+ contributing_variants = gene_hit["contributingVariants"]
130
+ variant_acmg_assignments = gene_hit["acmgAssignments"]
131
+ for cv in contributing_variants:
132
+ variant = PhEvalVariantResult(
133
+ chromosome=self._find_chromosome(cv),
134
+ start=self._find_start_pos(cv),
135
+ end=self._find_end_pos(cv),
136
+ ref=self._find_ref(cv),
137
+ alt=self._find_alt(cv),
138
+ score=score,
139
+ )
140
+ if use_acmg_filter and self._filter_for_acmg_assignments(
141
+ variant, score, variant_acmg_assignments
142
+ ):
143
+ simplified_exomiser_result.append(variant)
144
+ if not use_acmg_filter:
145
+ simplified_exomiser_result.append(variant)
147
146
  return simplified_exomiser_result
148
147
 
149
- def standardise_gene_result(self) -> [dict]:
150
- """Standardise Exomiser json to gene results for analysis."""
151
- simplified_exomiser_result = self.simplify_gene_result()
152
- return RankExomiserResult(simplified_exomiser_result, self.ranking_method).rank_results()
153
-
154
- def standardise_variant_result(self) -> [dict]:
155
- """Standardise Exomiser json to gene results for analysis."""
156
- simplified_exomiser_result = self.simplify_variant_result()
157
- return RankExomiserResult(simplified_exomiser_result, self.ranking_method).rank_results()
158
-
159
-
160
- def create_standardised_results(results_dir: Path, output_dir: Path, ranking_method) -> None:
161
- """Write standardised gene and variant results from default Exomiser json output."""
162
- output_dir.joinpath("pheval_gene_results/").mkdir(exist_ok=True, parents=True)
163
- output_dir.joinpath("pheval_variant_results/").mkdir(exist_ok=True, parents=True)
164
- for result in files_with_suffix(results_dir, ".json"):
165
- exomiser_result = read_exomiser_json_result(result)
166
- standardised_gene_result = StandardiseExomiserResult(
167
- exomiser_result, ranking_method
168
- ).standardise_gene_result()
169
- standardised_variant_result = StandardiseExomiserResult(
170
- exomiser_result, ranking_method
171
- ).standardise_variant_result()
172
- gene_df = pd.DataFrame(standardised_gene_result)
173
- gene_df = gene_df.loc[:, ["rank", "score", "gene_symbol", "gene_identifier"]]
174
- gene_df.to_csv(
175
- output_dir.joinpath("pheval_gene_results/" + result.stem + "-pheval_gene_result.tsv"),
176
- sep="\t",
177
- index=False,
178
- )
179
- variant_df = pd.DataFrame(standardised_variant_result)
180
- variant_df = variant_df.drop("variant", axis=1).join(variant_df.variant.apply(pd.Series))
181
- variant_df = variant_df.loc[:, ["rank", "score", "chrom", "pos", "ref", "alt", "gene"]]
182
- variant_df.to_csv(
183
- output_dir.joinpath(
184
- "pheval_variant_results/" + result.stem + "-pheval_variant_result.tsv"
185
- ),
186
- sep="\t",
187
- index=False,
188
- )
148
+
149
+ class PhEvalDiseaseResultFromExomiserJsonCreator:
150
+ def __init__(self, exomiser_json_result: [dict]):
151
+ self.exomiser_json_result = exomiser_json_result
152
+
153
+ @staticmethod
154
+ def _find_disease_name(result_entry: dict) -> str:
155
+ """Return disease term from Exomiser result entry."""
156
+ return result_entry["diseaseTerm"]
157
+
158
+ @staticmethod
159
+ def _find_disease_identifier(result_entry: dict) -> int:
160
+ """Return disease ID from Exomiser result entry."""
161
+ return result_entry["diseaseId"]
162
+
163
+ @staticmethod
164
+ def _find_relevant_score(result_entry) -> float:
165
+ """Return score from Exomiser result entry."""
166
+ return round(result_entry["score"], 4)
167
+
168
+ def extract_pheval_disease_requirements(self) -> [PhEvalDiseaseResult]:
169
+ """Extract data required to produce PhEval disease output."""
170
+ simplified_exomiser_result = []
171
+ for result_entry in self.exomiser_json_result:
172
+ try:
173
+ for disease in result_entry["priorityResults"]["HIPHIVE_PRIORITY"][
174
+ "diseaseMatches"
175
+ ]:
176
+ simplified_exomiser_result.append(
177
+ PhEvalDiseaseResult(
178
+ disease_name=self._find_disease_name(disease["model"]),
179
+ disease_identifier=self._find_disease_identifier(disease["model"]),
180
+ score=self._find_relevant_score(disease),
181
+ )
182
+ )
183
+ except KeyError:
184
+ pass
185
+ return simplified_exomiser_result
186
+
187
+
188
+ def create_standardised_results(
189
+ results_dir: Path,
190
+ output_dir: Path,
191
+ score_name: str,
192
+ sort_order: str,
193
+ variant_analysis: bool,
194
+ gene_analysis: bool,
195
+ disease_analysis: bool,
196
+ include_acmg: bool = False,
197
+ ) -> None:
198
+ """Write standardised gene/variant/disease results from default Exomiser json output."""
199
+ for exomiser_json_result in files_with_suffix(results_dir, ".json"):
200
+ exomiser_result = read_exomiser_json_result(exomiser_json_result)
201
+ if gene_analysis:
202
+ pheval_gene_requirements = PhEvalGeneResultFromExomiserJsonCreator(
203
+ exomiser_result, score_name
204
+ ).extract_pheval_gene_requirements()
205
+ generate_pheval_result(
206
+ pheval_result=pheval_gene_requirements,
207
+ sort_order_str=sort_order,
208
+ output_dir=output_dir,
209
+ tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
210
+ )
211
+ if variant_analysis:
212
+ pheval_variant_requirements = PhEvalVariantResultFromExomiserJsonCreator(
213
+ exomiser_result, score_name
214
+ ).extract_pheval_variant_requirements(include_acmg)
215
+ generate_pheval_result(
216
+ pheval_result=pheval_variant_requirements,
217
+ sort_order_str=sort_order,
218
+ output_dir=output_dir,
219
+ tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
220
+ )
221
+ if disease_analysis:
222
+ pheval_disease_requirements = PhEvalDiseaseResultFromExomiserJsonCreator(
223
+ exomiser_result
224
+ ).extract_pheval_disease_requirements()
225
+ generate_pheval_result(
226
+ pheval_result=pheval_disease_requirements,
227
+ sort_order_str=sort_order,
228
+ output_dir=output_dir,
229
+ tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
230
+ )
189
231
 
190
232
 
191
233
  @click.command()
@@ -206,18 +248,81 @@ def create_standardised_results(results_dir: Path, output_dir: Path, ranking_met
206
248
  type=Path,
207
249
  )
208
250
  @click.option(
209
- "--ranking-method",
210
- "-r",
251
+ "--score-name",
252
+ "-s",
211
253
  required=True,
212
- help="ranking method",
254
+ help="Score name to extract from results.",
213
255
  type=click.Choice(["combinedScore", "priorityScore", "variantScore", "pValue"]),
214
256
  default="combinedScore",
215
257
  show_default=True,
216
258
  )
217
- def post_process_exomiser_results(output_dir: Path, results_dir: Path, ranking_method):
218
- """Post-process Exomiser json results into standardised gene and variant outputs."""
219
- try:
220
- output_dir.mkdir()
221
- except FileExistsError:
222
- pass
223
- create_standardised_results(results_dir, output_dir, ranking_method)
259
+ @click.option(
260
+ "--sort-order",
261
+ "-so",
262
+ required=True,
263
+ help="Ordering of results for ranking.",
264
+ type=click.Choice(["ascending", "descending"]),
265
+ default="descending",
266
+ show_default=True,
267
+ )
268
+ @click.option(
269
+ "--gene-analysis/--no-gene-analysis",
270
+ type=bool,
271
+ default=False,
272
+ help="Specify whether to create PhEval gene results.",
273
+ )
274
+ @click.option(
275
+ "--variant-analysis/--no-variant-analysis",
276
+ type=bool,
277
+ default=False,
278
+ help="Specify whether to create PhEval variant results.",
279
+ )
280
+ @click.option(
281
+ "--disease-analysis/--no-disease-analysis",
282
+ type=bool,
283
+ default=False,
284
+ help="Specify whether to create PhEval disease results.",
285
+ )
286
+ @click.option(
287
+ "--include-acmg",
288
+ is_flag=True,
289
+ type=bool,
290
+ default=False,
291
+ help="Specify whether to include ACMG filter for PATHOGENIC or LIKELY_PATHOGENIC classifications.",
292
+ )
293
+ def post_process_exomiser_results(
294
+ output_dir: Path,
295
+ results_dir: Path,
296
+ score_name: str,
297
+ sort_order: str,
298
+ gene_analysis: bool,
299
+ variant_analysis: bool,
300
+ disease_analysis: bool,
301
+ include_acmg: bool,
302
+ ):
303
+ """Post-process Exomiser json results into PhEval gene and variant outputs."""
304
+ (
305
+ output_dir.joinpath("pheval_gene_results").mkdir(parents=True, exist_ok=True)
306
+ if gene_analysis
307
+ else None
308
+ )
309
+ (
310
+ output_dir.joinpath("pheval_variant_results").mkdir(parents=True, exist_ok=True)
311
+ if variant_analysis
312
+ else None
313
+ )
314
+ (
315
+ output_dir.joinpath("pheval_disease_results").mkdir(parents=True, exist_ok=True)
316
+ if disease_analysis
317
+ else None
318
+ )
319
+ create_standardised_results(
320
+ results_dir,
321
+ output_dir,
322
+ score_name,
323
+ sort_order,
324
+ variant_analysis,
325
+ gene_analysis,
326
+ disease_analysis,
327
+ include_acmg,
328
+ )