pheval-exomiser 0.2.6__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/PKG-INFO +6 -7
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/pyproject.toml +5 -5
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/post_process/post_process.py +5 -3
- pheval_exomiser-0.3.0/src/pheval_exomiser/post_process/post_process_results_format.py +266 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/runner.py +1 -0
- pheval_exomiser-0.2.6/src/pheval_exomiser/post_process/post_process_results_format.py +0 -333
- pheval_exomiser-0.2.6/src/pheval_exomiser/prepare/yaml_to_family_phenopacket.py +0 -392
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/README.md +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/__init__.py +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/cli.py +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/constants.py +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/post_process/__init__.py +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/prepare/__init__.py +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/prepare/create_batch_commands.py +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/prepare/tool_specific_configuration_options.py +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/prepare/write_application_properties.py +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/run/__init__.py +0 -0
- {pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/run/run.py +0 -0
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: pheval_exomiser
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Yasemin Bridges
|
|
6
6
|
Author-email: y.bridges@qmul.ac.uk
|
|
7
|
-
Requires-Python: >=3.
|
|
7
|
+
Requires-Python: >=3.10,<4.0.0
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
10
9
|
Classifier: Programming Language :: Python :: 3.10
|
|
11
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -14,12 +13,12 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
14
13
|
Requires-Dist: click (>=8.1.3,<9.0.0)
|
|
15
14
|
Requires-Dist: docker (>=6.0.1,<7.0.0)
|
|
16
15
|
Requires-Dist: google (>=3.0.0,<4.0.0)
|
|
16
|
+
Requires-Dist: numpy (<2)
|
|
17
17
|
Requires-Dist: oaklib (>=0.5.12,<0.6.0)
|
|
18
|
-
Requires-Dist: pandas (>=1.5.2,<2.0.0)
|
|
19
18
|
Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
|
|
20
|
-
Requires-Dist: pheval (>=0.
|
|
19
|
+
Requires-Dist: pheval (>=0.5.1,<0.6.0)
|
|
21
20
|
Requires-Dist: pyaml (>=21.10.1,<22.0.0)
|
|
22
|
-
Requires-Dist: pydantic (>=
|
|
21
|
+
Requires-Dist: pydantic (>=2.7.1,<3.0.0)
|
|
23
22
|
Description-Content-Type: text/markdown
|
|
24
23
|
|
|
25
24
|
# Exomiser Runner for PhEval
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "pheval_exomiser"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = ""
|
|
5
5
|
authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
|
|
6
6
|
"Julius Jacobsen <j.jacobsen@qmul.ac.uk>",
|
|
@@ -10,16 +10,16 @@ readme = "README.md"
|
|
|
10
10
|
packages = [{ include = "pheval_exomiser", from = "src" }]
|
|
11
11
|
|
|
12
12
|
[tool.poetry.dependencies]
|
|
13
|
-
python = ">=3.
|
|
13
|
+
python = ">=3.10,<4.0.0"
|
|
14
14
|
click = "^8.1.3"
|
|
15
|
-
pandas = "^1.5.2"
|
|
16
15
|
phenopackets = "^2.0.2"
|
|
17
16
|
google = "^3.0.0"
|
|
18
17
|
pyaml = "^21.10.1"
|
|
19
18
|
oaklib = "^0.5.12"
|
|
20
19
|
docker = "^6.0.1"
|
|
21
|
-
pydantic = "^
|
|
22
|
-
|
|
20
|
+
pydantic = "^2.7.1"
|
|
21
|
+
numpy = "<2"
|
|
22
|
+
pheval = "^0.5.1"
|
|
23
23
|
|
|
24
24
|
[tool.poetry.dev-dependencies]
|
|
25
25
|
pytest = "^7.1.2"
|
{pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/post_process/post_process.py
RENAMED
|
@@ -8,6 +8,7 @@ def post_process_result_format(
|
|
|
8
8
|
config: ExomiserConfigurations,
|
|
9
9
|
raw_results_dir: Path,
|
|
10
10
|
output_dir: Path,
|
|
11
|
+
phenopacket_dir: Path,
|
|
11
12
|
variant_analysis: bool,
|
|
12
13
|
gene_analysis: bool,
|
|
13
14
|
disease_analysis: bool,
|
|
@@ -15,12 +16,13 @@ def post_process_result_format(
|
|
|
15
16
|
"""Standardise Exomiser json format to separated gene and variant results."""
|
|
16
17
|
print("...standardising results format...")
|
|
17
18
|
create_standardised_results(
|
|
18
|
-
|
|
19
|
+
result_dir=raw_results_dir,
|
|
19
20
|
output_dir=output_dir,
|
|
20
|
-
|
|
21
|
+
phenopacket_dir=phenopacket_dir,
|
|
21
22
|
sort_order=config.post_process.sort_order,
|
|
22
|
-
|
|
23
|
+
score_name=config.post_process.score_name,
|
|
23
24
|
gene_analysis=gene_analysis,
|
|
24
25
|
disease_analysis=disease_analysis,
|
|
26
|
+
variant_analysis=variant_analysis,
|
|
25
27
|
)
|
|
26
28
|
print("done")
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
import polars as pl
|
|
7
|
+
from pheval.post_processing.post_processing import (
|
|
8
|
+
SortOrder,
|
|
9
|
+
generate_disease_result,
|
|
10
|
+
generate_gene_result,
|
|
11
|
+
generate_variant_result,
|
|
12
|
+
)
|
|
13
|
+
from pheval.utils.file_utils import files_with_suffix
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ModeOfInheritance(Enum):
|
|
17
|
+
AUTOSOMAL_DOMINANT = 1
|
|
18
|
+
AUTOSOMAL_RECESSIVE = 2
|
|
19
|
+
X_DOMINANT = 1
|
|
20
|
+
X_RECESSIVE = 2
|
|
21
|
+
MITOCHONDRIAL = 3
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
|
|
25
|
+
"""Trim suffix appended to Exomiser JSON result path."""
|
|
26
|
+
return Path(str(exomiser_result_path.name).replace("-exomiser", ""))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def extract_gene_results_from_json(
|
|
30
|
+
exomiser_json_result: pl.DataFrame, score_name: str
|
|
31
|
+
) -> pl.DataFrame:
|
|
32
|
+
return exomiser_json_result.select(
|
|
33
|
+
[
|
|
34
|
+
pl.col("geneSymbol").alias("gene_symbol"),
|
|
35
|
+
pl.col("geneIdentifier").struct.field("geneId").alias("gene_identifier"),
|
|
36
|
+
pl.col(score_name).fill_null(0).round(4).alias("score"),
|
|
37
|
+
]
|
|
38
|
+
).drop_nulls()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def extract_disease_results_from_json(exomiser_json_result: pl.DataFrame) -> pl.DataFrame:
|
|
42
|
+
return (
|
|
43
|
+
exomiser_json_result.select(
|
|
44
|
+
[
|
|
45
|
+
pl.col("priorityResults")
|
|
46
|
+
.struct.field("HIPHIVE_PRIORITY")
|
|
47
|
+
.struct.field("diseaseMatches")
|
|
48
|
+
]
|
|
49
|
+
)
|
|
50
|
+
.explode("diseaseMatches")
|
|
51
|
+
.unnest("diseaseMatches")
|
|
52
|
+
.unnest("model")
|
|
53
|
+
.select([pl.col("diseaseId").alias("disease_identifier"), pl.col("score").round(4)])
|
|
54
|
+
.drop_nulls()
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def extract_variant_results_from_json(
|
|
59
|
+
exomiser_json_result: pl.DataFrame, score_name: str
|
|
60
|
+
) -> pl.DataFrame:
|
|
61
|
+
return (
|
|
62
|
+
exomiser_json_result.filter(pl.col("geneScores").is_not_null())
|
|
63
|
+
.select([pl.col("geneScores"), pl.col(score_name).alias("score"), pl.col("geneSymbol")])
|
|
64
|
+
.explode("geneScores")
|
|
65
|
+
.unnest("geneScores")
|
|
66
|
+
.filter(pl.col("contributingVariants").is_not_null())
|
|
67
|
+
.explode("contributingVariants")
|
|
68
|
+
.with_columns(
|
|
69
|
+
[
|
|
70
|
+
pl.col("contributingVariants").struct.field("contigName").alias("chrom"),
|
|
71
|
+
pl.col("contributingVariants").struct.field("start"),
|
|
72
|
+
pl.col("contributingVariants").struct.field("end"),
|
|
73
|
+
pl.col("contributingVariants").struct.field("ref"),
|
|
74
|
+
pl.col("contributingVariants")
|
|
75
|
+
.struct.field("alt")
|
|
76
|
+
.fill_null("")
|
|
77
|
+
.str.strip_chars("<>")
|
|
78
|
+
.alias("alt"),
|
|
79
|
+
pl.col("modeOfInheritance")
|
|
80
|
+
.map_elements(lambda moi: ModeOfInheritance[moi].value, return_dtype=pl.Int8)
|
|
81
|
+
.alias("moi_enum"),
|
|
82
|
+
]
|
|
83
|
+
)
|
|
84
|
+
.with_columns(
|
|
85
|
+
[
|
|
86
|
+
(pl.col("moi_enum") == 2).alias("is_recessive"),
|
|
87
|
+
pl.when(pl.col("moi_enum") == 2)
|
|
88
|
+
.then(
|
|
89
|
+
pl.format(
|
|
90
|
+
"recessive|{}|{}|{}",
|
|
91
|
+
pl.col("geneSymbol"),
|
|
92
|
+
pl.col("score"),
|
|
93
|
+
pl.col("moi_enum"),
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
.otherwise(
|
|
97
|
+
pl.format(
|
|
98
|
+
"dominant|{}|{}|{}|{}|{}|{}",
|
|
99
|
+
pl.col("chrom"),
|
|
100
|
+
pl.col("start"),
|
|
101
|
+
pl.col("end"),
|
|
102
|
+
pl.col("ref"),
|
|
103
|
+
pl.col("alt"),
|
|
104
|
+
pl.col("score"),
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
.alias("group_key"),
|
|
108
|
+
]
|
|
109
|
+
)
|
|
110
|
+
.with_columns(
|
|
111
|
+
[
|
|
112
|
+
pl.col("group_key")
|
|
113
|
+
.rank("dense")
|
|
114
|
+
.cast(pl.UInt32)
|
|
115
|
+
.map_elements(
|
|
116
|
+
lambda i: str(uuid.uuid5(uuid.NAMESPACE_DNS, str(i))), return_dtype=pl.String
|
|
117
|
+
)
|
|
118
|
+
.alias("grouping_id")
|
|
119
|
+
]
|
|
120
|
+
)
|
|
121
|
+
.select(
|
|
122
|
+
["chrom", "start", "end", "ref", "alt", "score", "modeOfInheritance", "grouping_id"]
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def create_standardised_results(
|
|
128
|
+
result_dir: Path,
|
|
129
|
+
output_dir: Path,
|
|
130
|
+
phenopacket_dir: Path,
|
|
131
|
+
score_name: str,
|
|
132
|
+
sort_order: str,
|
|
133
|
+
gene_analysis: bool,
|
|
134
|
+
disease_analysis: bool,
|
|
135
|
+
variant_analysis: bool,
|
|
136
|
+
):
|
|
137
|
+
sort_order = SortOrder.ASCENDING if sort_order.lower() == "ascending" else SortOrder.DESCENDING
|
|
138
|
+
for exomiser_json_result_path in files_with_suffix(result_dir, ".json"):
|
|
139
|
+
exomiser_json_result = pl.read_json(exomiser_json_result_path)
|
|
140
|
+
if gene_analysis:
|
|
141
|
+
gene_results = extract_gene_results_from_json(exomiser_json_result, score_name)
|
|
142
|
+
generate_gene_result(
|
|
143
|
+
results=gene_results,
|
|
144
|
+
sort_order=sort_order,
|
|
145
|
+
output_dir=output_dir,
|
|
146
|
+
result_path=trim_exomiser_result_filename(exomiser_json_result_path),
|
|
147
|
+
phenopacket_dir=phenopacket_dir,
|
|
148
|
+
)
|
|
149
|
+
if disease_analysis:
|
|
150
|
+
disease_results = extract_disease_results_from_json(exomiser_json_result)
|
|
151
|
+
generate_disease_result(
|
|
152
|
+
results=disease_results,
|
|
153
|
+
sort_order=sort_order,
|
|
154
|
+
output_dir=output_dir,
|
|
155
|
+
result_path=trim_exomiser_result_filename(exomiser_json_result_path),
|
|
156
|
+
phenopacket_dir=phenopacket_dir,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if variant_analysis:
|
|
160
|
+
variant_results = extract_variant_results_from_json(exomiser_json_result, score_name)
|
|
161
|
+
generate_variant_result(
|
|
162
|
+
results=variant_results,
|
|
163
|
+
sort_order=sort_order,
|
|
164
|
+
output_dir=output_dir,
|
|
165
|
+
result_path=trim_exomiser_result_filename(exomiser_json_result_path),
|
|
166
|
+
phenopacket_dir=phenopacket_dir,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@click.command()
|
|
171
|
+
@click.option(
|
|
172
|
+
"--output-dir",
|
|
173
|
+
"-o",
|
|
174
|
+
required=True,
|
|
175
|
+
metavar="PATH",
|
|
176
|
+
help="Output directory for standardised results.",
|
|
177
|
+
type=Path,
|
|
178
|
+
)
|
|
179
|
+
@click.option(
|
|
180
|
+
"--results-dir",
|
|
181
|
+
"-R",
|
|
182
|
+
required=True,
|
|
183
|
+
metavar="DIRECTORY",
|
|
184
|
+
help="Full path to Exomiser results directory to be standardised.",
|
|
185
|
+
type=Path,
|
|
186
|
+
)
|
|
187
|
+
@click.option(
|
|
188
|
+
"--phenopacket-dir",
|
|
189
|
+
"-p",
|
|
190
|
+
required=True,
|
|
191
|
+
metavar="DIRECTORY",
|
|
192
|
+
help="Full path to phenopacket dir used to generate the raw results.",
|
|
193
|
+
type=Path,
|
|
194
|
+
)
|
|
195
|
+
@click.option(
|
|
196
|
+
"--score-name",
|
|
197
|
+
"-s",
|
|
198
|
+
required=True,
|
|
199
|
+
help="Score name to extract from results.",
|
|
200
|
+
type=click.Choice(["combinedScore", "priorityScore", "variantScore", "pValue"]),
|
|
201
|
+
default="combinedScore",
|
|
202
|
+
show_default=True,
|
|
203
|
+
)
|
|
204
|
+
@click.option(
|
|
205
|
+
"--sort-order",
|
|
206
|
+
"-so",
|
|
207
|
+
required=True,
|
|
208
|
+
help="Ordering of results for ranking.",
|
|
209
|
+
type=click.Choice(["ascending", "descending"]),
|
|
210
|
+
default="descending",
|
|
211
|
+
show_default=True,
|
|
212
|
+
)
|
|
213
|
+
@click.option(
|
|
214
|
+
"--gene-analysis/--no-gene-analysis",
|
|
215
|
+
type=bool,
|
|
216
|
+
default=False,
|
|
217
|
+
help="Specify whether to create PhEval gene results.",
|
|
218
|
+
)
|
|
219
|
+
@click.option(
|
|
220
|
+
"--variant-analysis/--no-variant-analysis",
|
|
221
|
+
type=bool,
|
|
222
|
+
default=False,
|
|
223
|
+
help="Specify whether to create PhEval variant results.",
|
|
224
|
+
)
|
|
225
|
+
@click.option(
|
|
226
|
+
"--disease-analysis/--no-disease-analysis",
|
|
227
|
+
type=bool,
|
|
228
|
+
default=False,
|
|
229
|
+
help="Specify whether to create PhEval disease results.",
|
|
230
|
+
)
|
|
231
|
+
def post_process_exomiser_results(
|
|
232
|
+
output_dir: Path,
|
|
233
|
+
results_dir: Path,
|
|
234
|
+
phenopacket_dir: Path,
|
|
235
|
+
score_name: str,
|
|
236
|
+
sort_order: str,
|
|
237
|
+
gene_analysis: bool,
|
|
238
|
+
variant_analysis: bool,
|
|
239
|
+
disease_analysis: bool,
|
|
240
|
+
):
|
|
241
|
+
"""Post-process Exomiser json results into PhEval gene and variant outputs."""
|
|
242
|
+
(
|
|
243
|
+
output_dir.joinpath("pheval_gene_results").mkdir(parents=True, exist_ok=True)
|
|
244
|
+
if gene_analysis
|
|
245
|
+
else None
|
|
246
|
+
)
|
|
247
|
+
(
|
|
248
|
+
output_dir.joinpath("pheval_variant_results").mkdir(parents=True, exist_ok=True)
|
|
249
|
+
if variant_analysis
|
|
250
|
+
else None
|
|
251
|
+
)
|
|
252
|
+
(
|
|
253
|
+
output_dir.joinpath("pheval_disease_results").mkdir(parents=True, exist_ok=True)
|
|
254
|
+
if disease_analysis
|
|
255
|
+
else None
|
|
256
|
+
)
|
|
257
|
+
create_standardised_results(
|
|
258
|
+
result_dir=results_dir,
|
|
259
|
+
output_dir=output_dir,
|
|
260
|
+
phenopacket_dir=phenopacket_dir,
|
|
261
|
+
score_name=score_name,
|
|
262
|
+
sort_order=sort_order,
|
|
263
|
+
variant_analysis=variant_analysis,
|
|
264
|
+
gene_analysis=gene_analysis,
|
|
265
|
+
disease_analysis=disease_analysis,
|
|
266
|
+
)
|
|
@@ -67,6 +67,7 @@ class ExomiserPhEvalRunner(PhEvalRunner):
|
|
|
67
67
|
config=config,
|
|
68
68
|
raw_results_dir=self.raw_results_dir,
|
|
69
69
|
output_dir=self.output_dir,
|
|
70
|
+
phenopacket_dir=self.testdata_dir.joinpath("phenopackets"),
|
|
70
71
|
variant_analysis=self.input_dir_config.variant_analysis,
|
|
71
72
|
gene_analysis=self.input_dir_config.gene_analysis,
|
|
72
73
|
disease_analysis=self.input_dir_config.disease_analysis,
|
|
@@ -1,333 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/python
|
|
2
|
-
import json
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
import click
|
|
6
|
-
from pheval.post_processing.post_processing import (
|
|
7
|
-
PhEvalDiseaseResult,
|
|
8
|
-
PhEvalGeneResult,
|
|
9
|
-
PhEvalVariantResult,
|
|
10
|
-
generate_pheval_result,
|
|
11
|
-
)
|
|
12
|
-
from pheval.utils.file_utils import files_with_suffix
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def read_exomiser_json_result(exomiser_result_path: Path) -> dict:
|
|
16
|
-
"""Load Exomiser json result."""
|
|
17
|
-
with open(exomiser_result_path) as exomiser_json_result:
|
|
18
|
-
exomiser_result = json.load(exomiser_json_result)
|
|
19
|
-
exomiser_json_result.close()
|
|
20
|
-
return exomiser_result
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
|
|
24
|
-
"""Trim suffix appended to Exomiser JSON result path."""
|
|
25
|
-
return Path(str(exomiser_result_path.name).replace("-exomiser", ""))
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class PhEvalGeneResultFromExomiserJsonCreator:
|
|
29
|
-
def __init__(self, exomiser_json_result: [dict], score_name: str):
|
|
30
|
-
self.exomiser_json_result = exomiser_json_result
|
|
31
|
-
self.score_name = score_name
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
def _find_gene_symbol(result_entry: dict) -> str:
|
|
35
|
-
"""Return gene symbol from Exomiser result entry."""
|
|
36
|
-
return result_entry["geneSymbol"]
|
|
37
|
-
|
|
38
|
-
@staticmethod
|
|
39
|
-
def _find_gene_identifier(result_entry: dict) -> str:
|
|
40
|
-
"""Return ensembl gene identifier from Exomiser result entry."""
|
|
41
|
-
return result_entry["geneIdentifier"]["geneId"]
|
|
42
|
-
|
|
43
|
-
def _find_relevant_score(self, result_entry: dict):
|
|
44
|
-
"""Return score from Exomiser result entry."""
|
|
45
|
-
return round(result_entry[self.score_name], 4)
|
|
46
|
-
|
|
47
|
-
def extract_pheval_gene_requirements(self) -> [PhEvalGeneResult]:
|
|
48
|
-
"""Extract data required to produce PhEval gene output."""
|
|
49
|
-
simplified_exomiser_result = []
|
|
50
|
-
for result_entry in self.exomiser_json_result:
|
|
51
|
-
if self.score_name in result_entry:
|
|
52
|
-
simplified_exomiser_result.append(
|
|
53
|
-
PhEvalGeneResult(
|
|
54
|
-
gene_symbol=self._find_gene_symbol(result_entry),
|
|
55
|
-
gene_identifier=self._find_gene_identifier(result_entry),
|
|
56
|
-
score=self._find_relevant_score(result_entry),
|
|
57
|
-
)
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
return simplified_exomiser_result
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class PhEvalVariantResultFromExomiserJsonCreator:
|
|
64
|
-
|
|
65
|
-
def __init__(self, exomiser_json_result: [dict], score_name: str):
|
|
66
|
-
self.exomiser_json_result = exomiser_json_result
|
|
67
|
-
self.score_name = score_name
|
|
68
|
-
|
|
69
|
-
@staticmethod
|
|
70
|
-
def _find_chromosome(result_entry: dict) -> str:
|
|
71
|
-
"""Return chromosome from Exomiser result entry."""
|
|
72
|
-
return result_entry["contigName"]
|
|
73
|
-
|
|
74
|
-
@staticmethod
|
|
75
|
-
def _find_start_pos(result_entry: dict) -> int:
|
|
76
|
-
"""Return start position from Exomiser result entry."""
|
|
77
|
-
return result_entry["start"]
|
|
78
|
-
|
|
79
|
-
@staticmethod
|
|
80
|
-
def _find_end_pos(result_entry: dict) -> int:
|
|
81
|
-
"""Return end position from Exomiser result entry."""
|
|
82
|
-
return result_entry["end"]
|
|
83
|
-
|
|
84
|
-
@staticmethod
|
|
85
|
-
def _find_ref(result_entry: dict) -> str:
|
|
86
|
-
"""Return reference allele from Exomiser result entry."""
|
|
87
|
-
return result_entry["ref"]
|
|
88
|
-
|
|
89
|
-
@staticmethod
|
|
90
|
-
def _find_alt(result_entry: dict) -> str:
|
|
91
|
-
"""Return alternate allele from Exomiser result entry."""
|
|
92
|
-
if "alt" in result_entry and result_entry["alt"] is not None:
|
|
93
|
-
return result_entry["alt"].strip(">").strip("<")
|
|
94
|
-
else:
|
|
95
|
-
return ""
|
|
96
|
-
|
|
97
|
-
def _find_relevant_score(self, result_entry) -> float:
|
|
98
|
-
"""Return score from Exomiser result entry."""
|
|
99
|
-
return round(result_entry[self.score_name], 4)
|
|
100
|
-
|
|
101
|
-
def _filter_for_acmg_assignments(
|
|
102
|
-
self, variant: PhEvalVariantResult, score: float, variant_acmg_assignments: dict
|
|
103
|
-
) -> bool:
|
|
104
|
-
"""Filter variants if they meet the PATHOGENIC or LIKELY_PATHOGENIC ACMG classification."""
|
|
105
|
-
for assignment in variant_acmg_assignments:
|
|
106
|
-
if variant == PhEvalVariantResult(
|
|
107
|
-
chromosome=self._find_chromosome(assignment["variantEvaluation"]),
|
|
108
|
-
start=self._find_start_pos(assignment["variantEvaluation"]),
|
|
109
|
-
end=self._find_end_pos(assignment["variantEvaluation"]),
|
|
110
|
-
ref=self._find_ref(assignment["variantEvaluation"]),
|
|
111
|
-
alt=self._find_alt(assignment["variantEvaluation"]),
|
|
112
|
-
score=score,
|
|
113
|
-
) and (
|
|
114
|
-
assignment["acmgClassification"] == "PATHOGENIC"
|
|
115
|
-
or assignment["acmgClassification"] == "LIKELY_PATHOGENIC"
|
|
116
|
-
):
|
|
117
|
-
return True
|
|
118
|
-
|
|
119
|
-
def extract_pheval_variant_requirements(
|
|
120
|
-
self, use_acmg_filter: bool = False
|
|
121
|
-
) -> [PhEvalVariantResult]:
|
|
122
|
-
"""Extract data required to produce PhEval variant output."""
|
|
123
|
-
simplified_exomiser_result = []
|
|
124
|
-
for result_entry in self.exomiser_json_result:
|
|
125
|
-
for gene_hit in result_entry["geneScores"]:
|
|
126
|
-
if self.score_name in result_entry:
|
|
127
|
-
if "contributingVariants" in gene_hit:
|
|
128
|
-
score = self._find_relevant_score(result_entry)
|
|
129
|
-
contributing_variants = gene_hit["contributingVariants"]
|
|
130
|
-
variant_acmg_assignments = gene_hit["acmgAssignments"]
|
|
131
|
-
for cv in contributing_variants:
|
|
132
|
-
variant = PhEvalVariantResult(
|
|
133
|
-
chromosome=self._find_chromosome(cv),
|
|
134
|
-
start=self._find_start_pos(cv),
|
|
135
|
-
end=self._find_end_pos(cv),
|
|
136
|
-
ref=self._find_ref(cv),
|
|
137
|
-
alt=self._find_alt(cv),
|
|
138
|
-
score=score,
|
|
139
|
-
)
|
|
140
|
-
if use_acmg_filter and self._filter_for_acmg_assignments(
|
|
141
|
-
variant, score, variant_acmg_assignments
|
|
142
|
-
):
|
|
143
|
-
simplified_exomiser_result.append(variant)
|
|
144
|
-
if not use_acmg_filter:
|
|
145
|
-
simplified_exomiser_result.append(variant)
|
|
146
|
-
return simplified_exomiser_result
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
class PhEvalDiseaseResultFromExomiserJsonCreator:
|
|
150
|
-
def __init__(self, exomiser_json_result: [dict]):
|
|
151
|
-
self.exomiser_json_result = exomiser_json_result
|
|
152
|
-
|
|
153
|
-
@staticmethod
|
|
154
|
-
def _find_disease_name(result_entry: dict) -> str:
|
|
155
|
-
"""Return disease term from Exomiser result entry."""
|
|
156
|
-
return result_entry["diseaseTerm"]
|
|
157
|
-
|
|
158
|
-
@staticmethod
|
|
159
|
-
def _find_disease_identifier(result_entry: dict) -> int:
|
|
160
|
-
"""Return disease ID from Exomiser result entry."""
|
|
161
|
-
return result_entry["diseaseId"]
|
|
162
|
-
|
|
163
|
-
@staticmethod
|
|
164
|
-
def _find_relevant_score(result_entry) -> float:
|
|
165
|
-
"""Return score from Exomiser result entry."""
|
|
166
|
-
return round(result_entry["score"], 4)
|
|
167
|
-
|
|
168
|
-
def extract_pheval_disease_requirements(self) -> [PhEvalDiseaseResult]:
|
|
169
|
-
"""Extract data required to produce PhEval disease output."""
|
|
170
|
-
simplified_exomiser_result = []
|
|
171
|
-
for result_entry in self.exomiser_json_result:
|
|
172
|
-
try:
|
|
173
|
-
for disease in result_entry["priorityResults"]["HIPHIVE_PRIORITY"][
|
|
174
|
-
"diseaseMatches"
|
|
175
|
-
]:
|
|
176
|
-
simplified_exomiser_result.append(
|
|
177
|
-
PhEvalDiseaseResult(
|
|
178
|
-
disease_name=self._find_disease_name(disease["model"]),
|
|
179
|
-
disease_identifier=self._find_disease_identifier(disease["model"]),
|
|
180
|
-
score=self._find_relevant_score(disease),
|
|
181
|
-
)
|
|
182
|
-
)
|
|
183
|
-
except KeyError:
|
|
184
|
-
pass
|
|
185
|
-
return list(
|
|
186
|
-
{
|
|
187
|
-
(result.disease_identifier, result.score): result
|
|
188
|
-
for result in simplified_exomiser_result
|
|
189
|
-
}.values()
|
|
190
|
-
)
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def create_standardised_results(
|
|
194
|
-
results_dir: Path,
|
|
195
|
-
output_dir: Path,
|
|
196
|
-
score_name: str,
|
|
197
|
-
sort_order: str,
|
|
198
|
-
variant_analysis: bool,
|
|
199
|
-
gene_analysis: bool,
|
|
200
|
-
disease_analysis: bool,
|
|
201
|
-
include_acmg: bool = False,
|
|
202
|
-
) -> None:
|
|
203
|
-
"""Write standardised gene/variant/disease results from default Exomiser json output."""
|
|
204
|
-
for exomiser_json_result in files_with_suffix(results_dir, ".json"):
|
|
205
|
-
exomiser_result = read_exomiser_json_result(exomiser_json_result)
|
|
206
|
-
if gene_analysis:
|
|
207
|
-
pheval_gene_requirements = PhEvalGeneResultFromExomiserJsonCreator(
|
|
208
|
-
exomiser_result, score_name
|
|
209
|
-
).extract_pheval_gene_requirements()
|
|
210
|
-
generate_pheval_result(
|
|
211
|
-
pheval_result=pheval_gene_requirements,
|
|
212
|
-
sort_order_str=sort_order,
|
|
213
|
-
output_dir=output_dir,
|
|
214
|
-
tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
|
|
215
|
-
)
|
|
216
|
-
if variant_analysis:
|
|
217
|
-
pheval_variant_requirements = PhEvalVariantResultFromExomiserJsonCreator(
|
|
218
|
-
exomiser_result, score_name
|
|
219
|
-
).extract_pheval_variant_requirements(include_acmg)
|
|
220
|
-
generate_pheval_result(
|
|
221
|
-
pheval_result=pheval_variant_requirements,
|
|
222
|
-
sort_order_str=sort_order,
|
|
223
|
-
output_dir=output_dir,
|
|
224
|
-
tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
|
|
225
|
-
)
|
|
226
|
-
if disease_analysis:
|
|
227
|
-
pheval_disease_requirements = PhEvalDiseaseResultFromExomiserJsonCreator(
|
|
228
|
-
exomiser_result
|
|
229
|
-
).extract_pheval_disease_requirements()
|
|
230
|
-
generate_pheval_result(
|
|
231
|
-
pheval_result=pheval_disease_requirements,
|
|
232
|
-
sort_order_str=sort_order,
|
|
233
|
-
output_dir=output_dir,
|
|
234
|
-
tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
@click.command()
|
|
239
|
-
@click.option(
|
|
240
|
-
"--output-dir",
|
|
241
|
-
"-o",
|
|
242
|
-
required=True,
|
|
243
|
-
metavar="PATH",
|
|
244
|
-
help="Output directory for standardised results.",
|
|
245
|
-
type=Path,
|
|
246
|
-
)
|
|
247
|
-
@click.option(
|
|
248
|
-
"--results-dir",
|
|
249
|
-
"-R",
|
|
250
|
-
required=True,
|
|
251
|
-
metavar="DIRECTORY",
|
|
252
|
-
help="Full path to Exomiser results directory to be standardised.",
|
|
253
|
-
type=Path,
|
|
254
|
-
)
|
|
255
|
-
@click.option(
|
|
256
|
-
"--score-name",
|
|
257
|
-
"-s",
|
|
258
|
-
required=True,
|
|
259
|
-
help="Score name to extract from results.",
|
|
260
|
-
type=click.Choice(["combinedScore", "priorityScore", "variantScore", "pValue"]),
|
|
261
|
-
default="combinedScore",
|
|
262
|
-
show_default=True,
|
|
263
|
-
)
|
|
264
|
-
@click.option(
|
|
265
|
-
"--sort-order",
|
|
266
|
-
"-so",
|
|
267
|
-
required=True,
|
|
268
|
-
help="Ordering of results for ranking.",
|
|
269
|
-
type=click.Choice(["ascending", "descending"]),
|
|
270
|
-
default="descending",
|
|
271
|
-
show_default=True,
|
|
272
|
-
)
|
|
273
|
-
@click.option(
|
|
274
|
-
"--gene-analysis/--no-gene-analysis",
|
|
275
|
-
type=bool,
|
|
276
|
-
default=False,
|
|
277
|
-
help="Specify whether to create PhEval gene results.",
|
|
278
|
-
)
|
|
279
|
-
@click.option(
|
|
280
|
-
"--variant-analysis/--no-variant-analysis",
|
|
281
|
-
type=bool,
|
|
282
|
-
default=False,
|
|
283
|
-
help="Specify whether to create PhEval variant results.",
|
|
284
|
-
)
|
|
285
|
-
@click.option(
|
|
286
|
-
"--disease-analysis/--no-disease-analysis",
|
|
287
|
-
type=bool,
|
|
288
|
-
default=False,
|
|
289
|
-
help="Specify whether to create PhEval disease results.",
|
|
290
|
-
)
|
|
291
|
-
@click.option(
|
|
292
|
-
"--include-acmg",
|
|
293
|
-
is_flag=True,
|
|
294
|
-
type=bool,
|
|
295
|
-
default=False,
|
|
296
|
-
help="Specify whether to include ACMG filter for PATHOGENIC or LIKELY_PATHOGENIC classifications.",
|
|
297
|
-
)
|
|
298
|
-
def post_process_exomiser_results(
|
|
299
|
-
output_dir: Path,
|
|
300
|
-
results_dir: Path,
|
|
301
|
-
score_name: str,
|
|
302
|
-
sort_order: str,
|
|
303
|
-
gene_analysis: bool,
|
|
304
|
-
variant_analysis: bool,
|
|
305
|
-
disease_analysis: bool,
|
|
306
|
-
include_acmg: bool,
|
|
307
|
-
):
|
|
308
|
-
"""Post-process Exomiser json results into PhEval gene and variant outputs."""
|
|
309
|
-
(
|
|
310
|
-
output_dir.joinpath("pheval_gene_results").mkdir(parents=True, exist_ok=True)
|
|
311
|
-
if gene_analysis
|
|
312
|
-
else None
|
|
313
|
-
)
|
|
314
|
-
(
|
|
315
|
-
output_dir.joinpath("pheval_variant_results").mkdir(parents=True, exist_ok=True)
|
|
316
|
-
if variant_analysis
|
|
317
|
-
else None
|
|
318
|
-
)
|
|
319
|
-
(
|
|
320
|
-
output_dir.joinpath("pheval_disease_results").mkdir(parents=True, exist_ok=True)
|
|
321
|
-
if disease_analysis
|
|
322
|
-
else None
|
|
323
|
-
)
|
|
324
|
-
create_standardised_results(
|
|
325
|
-
results_dir,
|
|
326
|
-
output_dir,
|
|
327
|
-
score_name,
|
|
328
|
-
sort_order,
|
|
329
|
-
variant_analysis,
|
|
330
|
-
gene_analysis,
|
|
331
|
-
disease_analysis,
|
|
332
|
-
include_acmg,
|
|
333
|
-
)
|
|
@@ -1,392 +0,0 @@
|
|
|
1
|
-
from copy import copy
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
import pandas as pd
|
|
6
|
-
import yaml
|
|
7
|
-
from google.protobuf.timestamp_pb2 import Timestamp
|
|
8
|
-
from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation
|
|
9
|
-
from oaklib.resource import OntologyResource
|
|
10
|
-
from phenopackets import (
|
|
11
|
-
Diagnosis,
|
|
12
|
-
Family,
|
|
13
|
-
File,
|
|
14
|
-
GeneDescriptor,
|
|
15
|
-
GenomicInterpretation,
|
|
16
|
-
Individual,
|
|
17
|
-
Interpretation,
|
|
18
|
-
MetaData,
|
|
19
|
-
OntologyClass,
|
|
20
|
-
Pedigree,
|
|
21
|
-
Phenopacket,
|
|
22
|
-
PhenotypicFeature,
|
|
23
|
-
Resource,
|
|
24
|
-
VariantInterpretation,
|
|
25
|
-
VariationDescriptor,
|
|
26
|
-
VcfRecord,
|
|
27
|
-
)
|
|
28
|
-
from pheval.prepare.create_noisy_phenopackets import load_ontology
|
|
29
|
-
from pheval.utils.file_utils import files_with_suffix
|
|
30
|
-
from pheval.utils.phenopacket_utils import create_hgnc_dict, write_phenopacket
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def load_genotype_ontology():
|
|
34
|
-
"""Load genotype ontology"""
|
|
35
|
-
genotype_resource = OntologyResource(slug="geno.owl", local=False)
|
|
36
|
-
return ProntoImplementation(genotype_resource)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def exomiser_analysis_yml_reader(yaml_job_file_path: Path) -> dict:
|
|
40
|
-
"""Read an exomiser analysis yaml file."""
|
|
41
|
-
with open(yaml_job_file_path) as yaml_job_file:
|
|
42
|
-
yaml_job = yaml.safe_load(yaml_job_file)
|
|
43
|
-
yaml_job_file.close()
|
|
44
|
-
return yaml_job
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def read_diagnoses_file(diagnoses_file_path: Path) -> pd.DataFrame:
|
|
48
|
-
"""Read a diagnoses file."""
|
|
49
|
-
return pd.read_csv(diagnoses_file_path, delimiter="t")
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def read_pedigree_file(pedigree_path: Path) -> list[str]:
|
|
53
|
-
"""Return the contents of a pedigree file"""
|
|
54
|
-
return open(pedigree_path).readlines()
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class ExomiserYamlToPhenopacketConverter:
|
|
58
|
-
def __init__(self, genotype_ontology, human_phenotype_ontology, hgnc_data):
|
|
59
|
-
self.genotype_ontology = genotype_ontology
|
|
60
|
-
self.human_phenotype_ontology = human_phenotype_ontology
|
|
61
|
-
self.hgnc_data = hgnc_data
|
|
62
|
-
|
|
63
|
-
@staticmethod
|
|
64
|
-
def construct_individual(yaml_job: dict, diagnoses: pd.DataFrame) -> Individual:
|
|
65
|
-
"""Construct individual for phenopacket."""
|
|
66
|
-
return Individual(
|
|
67
|
-
id=yaml_job["analysis"]["proband"],
|
|
68
|
-
sex=diagnoses[diagnoses.ProbandId == yaml_job["analysis"]["proband"]]
|
|
69
|
-
.iloc[0]["Sex"]
|
|
70
|
-
.upper(),
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
@staticmethod
|
|
74
|
-
def get_diagnoses_for_proband(yaml_job: dict, diagnoses: pd.DataFrame):
|
|
75
|
-
"""Get all diagnoses for proband."""
|
|
76
|
-
return diagnoses.loc[diagnoses["ProbandId"] == yaml_job["analysis"]["proband"]]
|
|
77
|
-
|
|
78
|
-
def construct_phenotypic_interpretations(self, yaml_job: dict) -> list[PhenotypicFeature]:
|
|
79
|
-
"""Construct the phenotypic features for the proband."""
|
|
80
|
-
hpo_ids = yaml_job["analysis"]["hpoIds"]
|
|
81
|
-
phenotypic_features = []
|
|
82
|
-
for hpo_id in hpo_ids:
|
|
83
|
-
try:
|
|
84
|
-
rels = self.human_phenotype_ontology.entity_alias_map(hpo_id)
|
|
85
|
-
hpo_term = "".join(rels[(list(rels.keys())[0])])
|
|
86
|
-
hpo = PhenotypicFeature(type=OntologyClass(id=hpo_id, label=hpo_term))
|
|
87
|
-
phenotypic_features.append(hpo)
|
|
88
|
-
except AttributeError:
|
|
89
|
-
hpo = PhenotypicFeature(type=OntologyClass(id=hpo_id))
|
|
90
|
-
phenotypic_features.append(hpo)
|
|
91
|
-
return phenotypic_features
|
|
92
|
-
|
|
93
|
-
@staticmethod
|
|
94
|
-
def construct_vcf_record(yaml_job: dict, diagnosis: pd.DataFrame) -> VcfRecord:
|
|
95
|
-
"""Construct the VCF record for a diagnosis."""
|
|
96
|
-
return VcfRecord(
|
|
97
|
-
genome_assembly=yaml_job["analysis"]["genomeAssembly"],
|
|
98
|
-
chrom=diagnosis["Chr"],
|
|
99
|
-
pos=int(diagnosis["Start"]),
|
|
100
|
-
ref=str(diagnosis["Ref/Alt"]).split("/")[0],
|
|
101
|
-
alt=str(diagnosis["Ref/Alt"]).split("/")[1],
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
def construct_allelic_state(self, diagnosis: pd.DataFrame) -> OntologyClass:
|
|
105
|
-
"""Construct the allelic state for a diagnosis."""
|
|
106
|
-
return OntologyClass(
|
|
107
|
-
id=list(self.genotype_ontology.basic_search(diagnosis["Genotype"].lower()))[0],
|
|
108
|
-
label=diagnosis["Genotype"].lower(),
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
def construct_gene_descriptor(self, diagnosis: pd.DataFrame) -> GeneDescriptor:
|
|
112
|
-
"""Construct the Gene Descriptor for a diagnosis."""
|
|
113
|
-
try:
|
|
114
|
-
return GeneDescriptor(
|
|
115
|
-
value_id=self.hgnc_data[diagnosis["Gene"]]["ensembl_id"],
|
|
116
|
-
symbol=diagnosis["Gene"],
|
|
117
|
-
)
|
|
118
|
-
except KeyError:
|
|
119
|
-
for _gene, gene_info in self.hgnc_data.items():
|
|
120
|
-
for previous_name in gene_info["previous_names"]:
|
|
121
|
-
if diagnosis["Gene"] == previous_name:
|
|
122
|
-
return GeneDescriptor(
|
|
123
|
-
value_id=self.hgnc_data[gene_info["ensembl_id"]],
|
|
124
|
-
symbol=diagnosis["Gene"],
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
def construct_variation_descriptor(
|
|
128
|
-
self, yaml_job: dict, diagnosis: pd.DataFrame
|
|
129
|
-
) -> VariationDescriptor:
|
|
130
|
-
"""Construct a variation descriptor for a diagnosis."""
|
|
131
|
-
return VariationDescriptor(
|
|
132
|
-
id=yaml_job["analysis"]["proband"]
|
|
133
|
-
+ ":"
|
|
134
|
-
+ diagnosis["Chr"]
|
|
135
|
-
+ ":"
|
|
136
|
-
+ diagnosis["Start"]
|
|
137
|
-
+ ":"
|
|
138
|
-
+ diagnosis["Ref/Alt"],
|
|
139
|
-
gene_context=self.construct_gene_descriptor(diagnosis),
|
|
140
|
-
vcf_record=self.construct_vcf_record(yaml_job, diagnosis),
|
|
141
|
-
allelic_state=self.construct_allelic_state(diagnosis),
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
def construct_variant_interpretation(
|
|
145
|
-
self, yaml_job: dict, diagnosis: pd.DataFrame
|
|
146
|
-
) -> VariantInterpretation:
|
|
147
|
-
"""Construct the variant interpretation for a diagnosis."""
|
|
148
|
-
return VariantInterpretation(
|
|
149
|
-
variation_descriptor=self.construct_variation_descriptor(yaml_job, diagnosis),
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
def construct_genomic_interpretations(
|
|
153
|
-
self, yaml_job: dict, diagnoses: pd.DataFrame
|
|
154
|
-
) -> list[GenomicInterpretation]:
|
|
155
|
-
"""Construct a list of genomic interpretations for a proband."""
|
|
156
|
-
genomic_interpretations = []
|
|
157
|
-
for _index, row in self.get_diagnoses_for_proband(yaml_job, diagnoses).iterrows():
|
|
158
|
-
genomic_interpretation = GenomicInterpretation(
|
|
159
|
-
subject_or_biosample_id=yaml_job["analysis"]["proband"],
|
|
160
|
-
variant_interpretation=self.construct_variant_interpretation(
|
|
161
|
-
yaml_job=yaml_job, diagnosis=row
|
|
162
|
-
),
|
|
163
|
-
)
|
|
164
|
-
genomic_interpretations.append(genomic_interpretation)
|
|
165
|
-
return genomic_interpretations
|
|
166
|
-
|
|
167
|
-
def construct_diagnosis(self, yaml_job: dict, diagnoses: pd.DataFrame) -> Diagnosis:
|
|
168
|
-
"""Construct the diagnosis for a proband."""
|
|
169
|
-
return Diagnosis(
|
|
170
|
-
genomic_interpretations=self.construct_genomic_interpretations(yaml_job, diagnoses)
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
def construct_interpretations(
|
|
174
|
-
self, yaml_job: dict, diagnoses: pd.DataFrame
|
|
175
|
-
) -> list[Interpretation]:
|
|
176
|
-
"""Construct interpretations for a proband."""
|
|
177
|
-
return [
|
|
178
|
-
Interpretation(
|
|
179
|
-
id=yaml_job["analysis"]["proband"] + "-interpretation",
|
|
180
|
-
diagnosis=self.construct_diagnosis(yaml_job, diagnoses),
|
|
181
|
-
)
|
|
182
|
-
]
|
|
183
|
-
|
|
184
|
-
@staticmethod
|
|
185
|
-
def construct_meta_data() -> MetaData:
|
|
186
|
-
"""Construct the meta-data."""
|
|
187
|
-
timestamp = Timestamp()
|
|
188
|
-
timestamp.GetCurrentTime()
|
|
189
|
-
return MetaData(
|
|
190
|
-
created=timestamp,
|
|
191
|
-
created_by="pheval-converter",
|
|
192
|
-
resources=[
|
|
193
|
-
Resource(
|
|
194
|
-
id="hp",
|
|
195
|
-
name="human phenotype ontology",
|
|
196
|
-
url="http://purl.obolibrary.org/obo/hp.owl",
|
|
197
|
-
version="hp/releases/2019-11-08",
|
|
198
|
-
namespace_prefix="HP",
|
|
199
|
-
iri_prefix="http://purl.obolibrary.org/obo/HP_",
|
|
200
|
-
)
|
|
201
|
-
],
|
|
202
|
-
phenopacket_schema_version="2.0",
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
@staticmethod
|
|
206
|
-
def construct_files(yaml_job_file: dict) -> list[File]:
|
|
207
|
-
"""Construct the files."""
|
|
208
|
-
return [
|
|
209
|
-
File(
|
|
210
|
-
uri=yaml_job_file["analysis"]["vcf"],
|
|
211
|
-
file_attributes={
|
|
212
|
-
"fileFormat": "VCF",
|
|
213
|
-
"genomeAssembly": yaml_job_file["analysis"]["genomeAssembly"],
|
|
214
|
-
},
|
|
215
|
-
)
|
|
216
|
-
]
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
def construct_pedigree(pedigree: list[str]) -> tuple[str, Pedigree]:
|
|
220
|
-
"""Construct the pedigree message from a ped file."""
|
|
221
|
-
persons = []
|
|
222
|
-
family_id = None
|
|
223
|
-
for individual in pedigree:
|
|
224
|
-
entry = individual.split("\t")
|
|
225
|
-
family_id = entry[0]
|
|
226
|
-
sex = "."
|
|
227
|
-
if (
|
|
228
|
-
int(entry[4]) == 1
|
|
229
|
-
): # until this is fixed with the phenopackets package, sex has to be reassigned
|
|
230
|
-
sex = 2
|
|
231
|
-
if int(entry[4]) == 2:
|
|
232
|
-
sex = 1
|
|
233
|
-
if str(entry[3]) == "0" and str(entry[2]) == "0":
|
|
234
|
-
person = Pedigree.Person(
|
|
235
|
-
family_id=family_id, individual_id=entry[1], sex=sex, affected_status=int(entry[5])
|
|
236
|
-
)
|
|
237
|
-
persons.append(person)
|
|
238
|
-
if str(entry[3]) == "0" and str(entry[2]) != "0":
|
|
239
|
-
person = Pedigree.Person(
|
|
240
|
-
family_id=family_id,
|
|
241
|
-
individual_id=entry[1],
|
|
242
|
-
paternal_id=entry[2],
|
|
243
|
-
sex=sex,
|
|
244
|
-
affected_status=int(entry[5]),
|
|
245
|
-
)
|
|
246
|
-
persons.append(person)
|
|
247
|
-
if str(entry[2]) == "0" and str(entry[3]) != "0":
|
|
248
|
-
person = Pedigree.Person(
|
|
249
|
-
family_id=family_id,
|
|
250
|
-
individual_id=entry[1],
|
|
251
|
-
maternal_id=entry[3],
|
|
252
|
-
sex=sex,
|
|
253
|
-
affected_status=int(entry[5]),
|
|
254
|
-
)
|
|
255
|
-
persons.append(person)
|
|
256
|
-
if str(entry[2]) != "0" and str(entry[3] != "0"):
|
|
257
|
-
person = Pedigree.Person(
|
|
258
|
-
family_id=family_id,
|
|
259
|
-
individual_id=entry[1],
|
|
260
|
-
paternal_id=entry[2],
|
|
261
|
-
maternal_id=entry[3],
|
|
262
|
-
sex=sex,
|
|
263
|
-
affected_status=int(entry[5]),
|
|
264
|
-
)
|
|
265
|
-
persons.append(person)
|
|
266
|
-
return family_id, Pedigree(persons=persons)
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def construct_phenopacket(
|
|
270
|
-
yaml_job_file: dict,
|
|
271
|
-
diagnoses: pd.DataFrame,
|
|
272
|
-
exomiser_yaml_to_phenopacket_converter: ExomiserYamlToPhenopacketConverter,
|
|
273
|
-
) -> Phenopacket:
|
|
274
|
-
"""Construct a phenopacket."""
|
|
275
|
-
return Phenopacket(
|
|
276
|
-
id=yaml_job_file["analysis"]["proband"],
|
|
277
|
-
subject=exomiser_yaml_to_phenopacket_converter.construct_individual(
|
|
278
|
-
yaml_job=yaml_job_file, diagnoses=diagnoses
|
|
279
|
-
),
|
|
280
|
-
phenotypic_features=exomiser_yaml_to_phenopacket_converter.construct_phenotypic_interpretations(
|
|
281
|
-
yaml_job=yaml_job_file
|
|
282
|
-
),
|
|
283
|
-
interpretations=exomiser_yaml_to_phenopacket_converter.construct_interpretations(
|
|
284
|
-
yaml_job=yaml_job_file, diagnoses=diagnoses
|
|
285
|
-
),
|
|
286
|
-
files=exomiser_yaml_to_phenopacket_converter.construct_files(yaml_job_file),
|
|
287
|
-
meta_data=exomiser_yaml_to_phenopacket_converter.construct_meta_data(),
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
def construct_family(
|
|
292
|
-
yaml_job_file: dict,
|
|
293
|
-
diagnoses: pd.DataFrame,
|
|
294
|
-
exomiser_yaml_to_phenopacket_converter: ExomiserYamlToPhenopacketConverter,
|
|
295
|
-
pedigree: list[str],
|
|
296
|
-
) -> Family:
|
|
297
|
-
"""Construct a Family"""
|
|
298
|
-
phenopacket = construct_phenopacket(
|
|
299
|
-
yaml_job_file, diagnoses, exomiser_yaml_to_phenopacket_converter
|
|
300
|
-
)
|
|
301
|
-
proband = copy(phenopacket)
|
|
302
|
-
del proband.files[:]
|
|
303
|
-
del proband.meta_data[:]
|
|
304
|
-
family_id, ped = construct_pedigree(pedigree)
|
|
305
|
-
return Family(
|
|
306
|
-
id=family_id,
|
|
307
|
-
proband=proband,
|
|
308
|
-
pedigree=ped,
|
|
309
|
-
files=phenopacket.files,
|
|
310
|
-
meta_data=phenopacket.meta_data,
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
def create_phenopacket(
|
|
315
|
-
yaml_job_file: Path,
|
|
316
|
-
diagnoses: pd.DataFrame,
|
|
317
|
-
exomiser_converter: ExomiserYamlToPhenopacketConverter,
|
|
318
|
-
) -> Phenopacket or Family:
|
|
319
|
-
"""Construct either a family or phenopacket from an analysis yaml."""
|
|
320
|
-
yaml_job = exomiser_analysis_yml_reader(yaml_job_file)
|
|
321
|
-
phenopacket = (
|
|
322
|
-
construct_phenopacket(yaml_job, diagnoses, exomiser_converter)
|
|
323
|
-
if yaml_job["analysis"]["ped"] == ""
|
|
324
|
-
else construct_family(
|
|
325
|
-
yaml_job,
|
|
326
|
-
diagnoses,
|
|
327
|
-
exomiser_converter,
|
|
328
|
-
read_pedigree_file(yaml_job["analysis"]["ped"]),
|
|
329
|
-
)
|
|
330
|
-
)
|
|
331
|
-
return phenopacket
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
@click.command()
|
|
335
|
-
@click.option(
|
|
336
|
-
"--directory",
|
|
337
|
-
"-d",
|
|
338
|
-
required=True,
|
|
339
|
-
help="Directory for Exomiser yaml job files to be converted.",
|
|
340
|
-
type=Path,
|
|
341
|
-
)
|
|
342
|
-
@click.option("--diagnoses-file", "-d", required=True, help="Diagnoses file", type=Path)
|
|
343
|
-
@click.option(
|
|
344
|
-
"--output-dir", "-o", required=True, help="Output directory to write phenopackets", type=Path
|
|
345
|
-
)
|
|
346
|
-
def convert_exomiser_analysis_yamls_to_phenopacket(
|
|
347
|
-
output_dir: Path, directory: Path, diagnoses_file: Path
|
|
348
|
-
):
|
|
349
|
-
"""Convert an Exomiser YAML file to a phenopacket schema given a .tsv diagnoses file containing the following
|
|
350
|
-
required fields: ..."""
|
|
351
|
-
try:
|
|
352
|
-
output_dir.mkdir()
|
|
353
|
-
except FileExistsError:
|
|
354
|
-
pass
|
|
355
|
-
diagnoses = read_diagnoses_file(diagnoses_file)
|
|
356
|
-
exomiser_converter = ExomiserYamlToPhenopacketConverter(
|
|
357
|
-
load_genotype_ontology(), load_ontology(), create_hgnc_dict()
|
|
358
|
-
)
|
|
359
|
-
for yaml_job_file in files_with_suffix(directory, ".yml"):
|
|
360
|
-
phenopacket = create_phenopacket(yaml_job_file, diagnoses, exomiser_converter)
|
|
361
|
-
write_phenopacket(
|
|
362
|
-
phenopacket, output_dir.joinpath(yaml_job_file.name.replace(".yml", ".json"))
|
|
363
|
-
)
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
@click.command()
|
|
367
|
-
@click.option(
|
|
368
|
-
"--yaml-file",
|
|
369
|
-
"-y",
|
|
370
|
-
required=True,
|
|
371
|
-
help="Path to Exomiser analysis yaml file for phenopacket conversion.",
|
|
372
|
-
type=Path,
|
|
373
|
-
)
|
|
374
|
-
@click.option("--diagnoses-file", "-d", required=True, help="Diagnoses file", type=Path)
|
|
375
|
-
@click.option(
|
|
376
|
-
"--output-dir", "-o", required=True, help="Output directory to write phenopackets", type=Path
|
|
377
|
-
)
|
|
378
|
-
def convert_exomiser_analysis_yaml_to_phenopacket(
|
|
379
|
-
output_dir: Path, yaml_file: Path, diagnoses_file: Path
|
|
380
|
-
):
|
|
381
|
-
"""Convert Exomiser YAML files to the phenopacket schema given a .tsv diagnoses file containing the following
|
|
382
|
-
required fields: ..."""
|
|
383
|
-
try:
|
|
384
|
-
output_dir.mkdir()
|
|
385
|
-
except FileExistsError:
|
|
386
|
-
pass
|
|
387
|
-
diagnoses = read_diagnoses_file(diagnoses_file)
|
|
388
|
-
exomiser_converter = ExomiserYamlToPhenopacketConverter(
|
|
389
|
-
load_genotype_ontology(), load_ontology(), create_hgnc_dict()
|
|
390
|
-
)
|
|
391
|
-
phenopacket = create_phenopacket(yaml_file, diagnoses, exomiser_converter)
|
|
392
|
-
write_phenopacket(phenopacket, Path(yaml_file.name + ".json"))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/post_process/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{pheval_exomiser-0.2.6 → pheval_exomiser-0.3.0}/src/pheval_exomiser/prepare/create_batch_commands.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|