pheval-exomiser 0.2.6__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pheval_exomiser/post_process/post_process.py +5 -3
- pheval_exomiser/post_process/post_process_results_format.py +148 -215
- pheval_exomiser/runner.py +1 -0
- {pheval_exomiser-0.2.6.dist-info → pheval_exomiser-0.3.0.dist-info}/METADATA +6 -7
- {pheval_exomiser-0.2.6.dist-info → pheval_exomiser-0.3.0.dist-info}/RECORD +7 -8
- {pheval_exomiser-0.2.6.dist-info → pheval_exomiser-0.3.0.dist-info}/WHEEL +1 -1
- pheval_exomiser/prepare/yaml_to_family_phenopacket.py +0 -392
- {pheval_exomiser-0.2.6.dist-info → pheval_exomiser-0.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -8,6 +8,7 @@ def post_process_result_format(
|
|
|
8
8
|
config: ExomiserConfigurations,
|
|
9
9
|
raw_results_dir: Path,
|
|
10
10
|
output_dir: Path,
|
|
11
|
+
phenopacket_dir: Path,
|
|
11
12
|
variant_analysis: bool,
|
|
12
13
|
gene_analysis: bool,
|
|
13
14
|
disease_analysis: bool,
|
|
@@ -15,12 +16,13 @@ def post_process_result_format(
|
|
|
15
16
|
"""Standardise Exomiser json format to separated gene and variant results."""
|
|
16
17
|
print("...standardising results format...")
|
|
17
18
|
create_standardised_results(
|
|
18
|
-
|
|
19
|
+
result_dir=raw_results_dir,
|
|
19
20
|
output_dir=output_dir,
|
|
20
|
-
|
|
21
|
+
phenopacket_dir=phenopacket_dir,
|
|
21
22
|
sort_order=config.post_process.sort_order,
|
|
22
|
-
|
|
23
|
+
score_name=config.post_process.score_name,
|
|
23
24
|
gene_analysis=gene_analysis,
|
|
24
25
|
disease_analysis=disease_analysis,
|
|
26
|
+
variant_analysis=variant_analysis,
|
|
25
27
|
)
|
|
26
28
|
print("done")
|
|
@@ -1,23 +1,24 @@
|
|
|
1
|
-
|
|
2
|
-
import
|
|
1
|
+
import uuid
|
|
2
|
+
from enum import Enum
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
5
|
import click
|
|
6
|
+
import polars as pl
|
|
6
7
|
from pheval.post_processing.post_processing import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
SortOrder,
|
|
9
|
+
generate_disease_result,
|
|
10
|
+
generate_gene_result,
|
|
11
|
+
generate_variant_result,
|
|
11
12
|
)
|
|
12
13
|
from pheval.utils.file_utils import files_with_suffix
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
16
|
+
class ModeOfInheritance(Enum):
|
|
17
|
+
AUTOSOMAL_DOMINANT = 1
|
|
18
|
+
AUTOSOMAL_RECESSIVE = 2
|
|
19
|
+
X_DOMINANT = 1
|
|
20
|
+
X_RECESSIVE = 2
|
|
21
|
+
MITOCHONDRIAL = 3
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
|
|
@@ -25,213 +26,144 @@ def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
|
|
|
25
26
|
return Path(str(exomiser_result_path.name).replace("-exomiser", ""))
|
|
26
27
|
|
|
27
28
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
29
|
+
def extract_gene_results_from_json(
|
|
30
|
+
exomiser_json_result: pl.DataFrame, score_name: str
|
|
31
|
+
) -> pl.DataFrame:
|
|
32
|
+
return exomiser_json_result.select(
|
|
33
|
+
[
|
|
34
|
+
pl.col("geneSymbol").alias("gene_symbol"),
|
|
35
|
+
pl.col("geneIdentifier").struct.field("geneId").alias("gene_identifier"),
|
|
36
|
+
pl.col(score_name).fill_null(0).round(4).alias("score"),
|
|
37
|
+
]
|
|
38
|
+
).drop_nulls()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def extract_disease_results_from_json(exomiser_json_result: pl.DataFrame) -> pl.DataFrame:
|
|
42
|
+
return (
|
|
43
|
+
exomiser_json_result.select(
|
|
44
|
+
[
|
|
45
|
+
pl.col("priorityResults")
|
|
46
|
+
.struct.field("HIPHIVE_PRIORITY")
|
|
47
|
+
.struct.field("diseaseMatches")
|
|
48
|
+
]
|
|
49
|
+
)
|
|
50
|
+
.explode("diseaseMatches")
|
|
51
|
+
.unnest("diseaseMatches")
|
|
52
|
+
.unnest("model")
|
|
53
|
+
.select([pl.col("diseaseId").alias("disease_identifier"), pl.col("score").round(4)])
|
|
54
|
+
.drop_nulls()
|
|
55
|
+
)
|
|
42
56
|
|
|
43
|
-
def _find_relevant_score(self, result_entry: dict):
|
|
44
|
-
"""Return score from Exomiser result entry."""
|
|
45
|
-
return round(result_entry[self.score_name], 4)
|
|
46
57
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
58
|
+
def extract_variant_results_from_json(
|
|
59
|
+
exomiser_json_result: pl.DataFrame, score_name: str
|
|
60
|
+
) -> pl.DataFrame:
|
|
61
|
+
return (
|
|
62
|
+
exomiser_json_result.filter(pl.col("geneScores").is_not_null())
|
|
63
|
+
.select([pl.col("geneScores"), pl.col(score_name).alias("score"), pl.col("geneSymbol")])
|
|
64
|
+
.explode("geneScores")
|
|
65
|
+
.unnest("geneScores")
|
|
66
|
+
.filter(pl.col("contributingVariants").is_not_null())
|
|
67
|
+
.explode("contributingVariants")
|
|
68
|
+
.with_columns(
|
|
69
|
+
[
|
|
70
|
+
pl.col("contributingVariants").struct.field("contigName").alias("chrom"),
|
|
71
|
+
pl.col("contributingVariants").struct.field("start"),
|
|
72
|
+
pl.col("contributingVariants").struct.field("end"),
|
|
73
|
+
pl.col("contributingVariants").struct.field("ref"),
|
|
74
|
+
pl.col("contributingVariants")
|
|
75
|
+
.struct.field("alt")
|
|
76
|
+
.fill_null("")
|
|
77
|
+
.str.strip_chars("<>")
|
|
78
|
+
.alias("alt"),
|
|
79
|
+
pl.col("modeOfInheritance")
|
|
80
|
+
.map_elements(lambda moi: ModeOfInheritance[moi].value, return_dtype=pl.Int8)
|
|
81
|
+
.alias("moi_enum"),
|
|
82
|
+
]
|
|
83
|
+
)
|
|
84
|
+
.with_columns(
|
|
85
|
+
[
|
|
86
|
+
(pl.col("moi_enum") == 2).alias("is_recessive"),
|
|
87
|
+
pl.when(pl.col("moi_enum") == 2)
|
|
88
|
+
.then(
|
|
89
|
+
pl.format(
|
|
90
|
+
"recessive|{}|{}|{}",
|
|
91
|
+
pl.col("geneSymbol"),
|
|
92
|
+
pl.col("score"),
|
|
93
|
+
pl.col("moi_enum"),
|
|
57
94
|
)
|
|
58
95
|
)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
@staticmethod
|
|
70
|
-
def _find_chromosome(result_entry: dict) -> str:
|
|
71
|
-
"""Return chromosome from Exomiser result entry."""
|
|
72
|
-
return result_entry["contigName"]
|
|
73
|
-
|
|
74
|
-
@staticmethod
|
|
75
|
-
def _find_start_pos(result_entry: dict) -> int:
|
|
76
|
-
"""Return start position from Exomiser result entry."""
|
|
77
|
-
return result_entry["start"]
|
|
78
|
-
|
|
79
|
-
@staticmethod
|
|
80
|
-
def _find_end_pos(result_entry: dict) -> int:
|
|
81
|
-
"""Return end position from Exomiser result entry."""
|
|
82
|
-
return result_entry["end"]
|
|
83
|
-
|
|
84
|
-
@staticmethod
|
|
85
|
-
def _find_ref(result_entry: dict) -> str:
|
|
86
|
-
"""Return reference allele from Exomiser result entry."""
|
|
87
|
-
return result_entry["ref"]
|
|
88
|
-
|
|
89
|
-
@staticmethod
|
|
90
|
-
def _find_alt(result_entry: dict) -> str:
|
|
91
|
-
"""Return alternate allele from Exomiser result entry."""
|
|
92
|
-
if "alt" in result_entry and result_entry["alt"] is not None:
|
|
93
|
-
return result_entry["alt"].strip(">").strip("<")
|
|
94
|
-
else:
|
|
95
|
-
return ""
|
|
96
|
-
|
|
97
|
-
def _find_relevant_score(self, result_entry) -> float:
|
|
98
|
-
"""Return score from Exomiser result entry."""
|
|
99
|
-
return round(result_entry[self.score_name], 4)
|
|
100
|
-
|
|
101
|
-
def _filter_for_acmg_assignments(
|
|
102
|
-
self, variant: PhEvalVariantResult, score: float, variant_acmg_assignments: dict
|
|
103
|
-
) -> bool:
|
|
104
|
-
"""Filter variants if they meet the PATHOGENIC or LIKELY_PATHOGENIC ACMG classification."""
|
|
105
|
-
for assignment in variant_acmg_assignments:
|
|
106
|
-
if variant == PhEvalVariantResult(
|
|
107
|
-
chromosome=self._find_chromosome(assignment["variantEvaluation"]),
|
|
108
|
-
start=self._find_start_pos(assignment["variantEvaluation"]),
|
|
109
|
-
end=self._find_end_pos(assignment["variantEvaluation"]),
|
|
110
|
-
ref=self._find_ref(assignment["variantEvaluation"]),
|
|
111
|
-
alt=self._find_alt(assignment["variantEvaluation"]),
|
|
112
|
-
score=score,
|
|
113
|
-
) and (
|
|
114
|
-
assignment["acmgClassification"] == "PATHOGENIC"
|
|
115
|
-
or assignment["acmgClassification"] == "LIKELY_PATHOGENIC"
|
|
116
|
-
):
|
|
117
|
-
return True
|
|
118
|
-
|
|
119
|
-
def extract_pheval_variant_requirements(
|
|
120
|
-
self, use_acmg_filter: bool = False
|
|
121
|
-
) -> [PhEvalVariantResult]:
|
|
122
|
-
"""Extract data required to produce PhEval variant output."""
|
|
123
|
-
simplified_exomiser_result = []
|
|
124
|
-
for result_entry in self.exomiser_json_result:
|
|
125
|
-
for gene_hit in result_entry["geneScores"]:
|
|
126
|
-
if self.score_name in result_entry:
|
|
127
|
-
if "contributingVariants" in gene_hit:
|
|
128
|
-
score = self._find_relevant_score(result_entry)
|
|
129
|
-
contributing_variants = gene_hit["contributingVariants"]
|
|
130
|
-
variant_acmg_assignments = gene_hit["acmgAssignments"]
|
|
131
|
-
for cv in contributing_variants:
|
|
132
|
-
variant = PhEvalVariantResult(
|
|
133
|
-
chromosome=self._find_chromosome(cv),
|
|
134
|
-
start=self._find_start_pos(cv),
|
|
135
|
-
end=self._find_end_pos(cv),
|
|
136
|
-
ref=self._find_ref(cv),
|
|
137
|
-
alt=self._find_alt(cv),
|
|
138
|
-
score=score,
|
|
139
|
-
)
|
|
140
|
-
if use_acmg_filter and self._filter_for_acmg_assignments(
|
|
141
|
-
variant, score, variant_acmg_assignments
|
|
142
|
-
):
|
|
143
|
-
simplified_exomiser_result.append(variant)
|
|
144
|
-
if not use_acmg_filter:
|
|
145
|
-
simplified_exomiser_result.append(variant)
|
|
146
|
-
return simplified_exomiser_result
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
class PhEvalDiseaseResultFromExomiserJsonCreator:
|
|
150
|
-
def __init__(self, exomiser_json_result: [dict]):
|
|
151
|
-
self.exomiser_json_result = exomiser_json_result
|
|
152
|
-
|
|
153
|
-
@staticmethod
|
|
154
|
-
def _find_disease_name(result_entry: dict) -> str:
|
|
155
|
-
"""Return disease term from Exomiser result entry."""
|
|
156
|
-
return result_entry["diseaseTerm"]
|
|
157
|
-
|
|
158
|
-
@staticmethod
|
|
159
|
-
def _find_disease_identifier(result_entry: dict) -> int:
|
|
160
|
-
"""Return disease ID from Exomiser result entry."""
|
|
161
|
-
return result_entry["diseaseId"]
|
|
162
|
-
|
|
163
|
-
@staticmethod
|
|
164
|
-
def _find_relevant_score(result_entry) -> float:
|
|
165
|
-
"""Return score from Exomiser result entry."""
|
|
166
|
-
return round(result_entry["score"], 4)
|
|
167
|
-
|
|
168
|
-
def extract_pheval_disease_requirements(self) -> [PhEvalDiseaseResult]:
|
|
169
|
-
"""Extract data required to produce PhEval disease output."""
|
|
170
|
-
simplified_exomiser_result = []
|
|
171
|
-
for result_entry in self.exomiser_json_result:
|
|
172
|
-
try:
|
|
173
|
-
for disease in result_entry["priorityResults"]["HIPHIVE_PRIORITY"][
|
|
174
|
-
"diseaseMatches"
|
|
175
|
-
]:
|
|
176
|
-
simplified_exomiser_result.append(
|
|
177
|
-
PhEvalDiseaseResult(
|
|
178
|
-
disease_name=self._find_disease_name(disease["model"]),
|
|
179
|
-
disease_identifier=self._find_disease_identifier(disease["model"]),
|
|
180
|
-
score=self._find_relevant_score(disease),
|
|
181
|
-
)
|
|
96
|
+
.otherwise(
|
|
97
|
+
pl.format(
|
|
98
|
+
"dominant|{}|{}|{}|{}|{}|{}",
|
|
99
|
+
pl.col("chrom"),
|
|
100
|
+
pl.col("start"),
|
|
101
|
+
pl.col("end"),
|
|
102
|
+
pl.col("ref"),
|
|
103
|
+
pl.col("alt"),
|
|
104
|
+
pl.col("score"),
|
|
182
105
|
)
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
106
|
+
)
|
|
107
|
+
.alias("group_key"),
|
|
108
|
+
]
|
|
109
|
+
)
|
|
110
|
+
.with_columns(
|
|
111
|
+
[
|
|
112
|
+
pl.col("group_key")
|
|
113
|
+
.rank("dense")
|
|
114
|
+
.cast(pl.UInt32)
|
|
115
|
+
.map_elements(
|
|
116
|
+
lambda i: str(uuid.uuid5(uuid.NAMESPACE_DNS, str(i))), return_dtype=pl.String
|
|
117
|
+
)
|
|
118
|
+
.alias("grouping_id")
|
|
119
|
+
]
|
|
190
120
|
)
|
|
121
|
+
.select(
|
|
122
|
+
["chrom", "start", "end", "ref", "alt", "score", "modeOfInheritance", "grouping_id"]
|
|
123
|
+
)
|
|
124
|
+
)
|
|
191
125
|
|
|
192
126
|
|
|
193
127
|
def create_standardised_results(
|
|
194
|
-
|
|
128
|
+
result_dir: Path,
|
|
195
129
|
output_dir: Path,
|
|
130
|
+
phenopacket_dir: Path,
|
|
196
131
|
score_name: str,
|
|
197
132
|
sort_order: str,
|
|
198
|
-
variant_analysis: bool,
|
|
199
133
|
gene_analysis: bool,
|
|
200
134
|
disease_analysis: bool,
|
|
201
|
-
|
|
202
|
-
)
|
|
203
|
-
|
|
204
|
-
for
|
|
205
|
-
|
|
135
|
+
variant_analysis: bool,
|
|
136
|
+
):
|
|
137
|
+
sort_order = SortOrder.ASCENDING if sort_order.lower() == "ascending" else SortOrder.DESCENDING
|
|
138
|
+
for exomiser_json_result_path in files_with_suffix(result_dir, ".json"):
|
|
139
|
+
exomiser_json_result = pl.read_json(exomiser_json_result_path)
|
|
206
140
|
if gene_analysis:
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
pheval_result=pheval_gene_requirements,
|
|
212
|
-
sort_order_str=sort_order,
|
|
141
|
+
gene_results = extract_gene_results_from_json(exomiser_json_result, score_name)
|
|
142
|
+
generate_gene_result(
|
|
143
|
+
results=gene_results,
|
|
144
|
+
sort_order=sort_order,
|
|
213
145
|
output_dir=output_dir,
|
|
214
|
-
|
|
146
|
+
result_path=trim_exomiser_result_filename(exomiser_json_result_path),
|
|
147
|
+
phenopacket_dir=phenopacket_dir,
|
|
215
148
|
)
|
|
216
|
-
if
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
pheval_result=pheval_variant_requirements,
|
|
222
|
-
sort_order_str=sort_order,
|
|
149
|
+
if disease_analysis:
|
|
150
|
+
disease_results = extract_disease_results_from_json(exomiser_json_result)
|
|
151
|
+
generate_disease_result(
|
|
152
|
+
results=disease_results,
|
|
153
|
+
sort_order=sort_order,
|
|
223
154
|
output_dir=output_dir,
|
|
224
|
-
|
|
155
|
+
result_path=trim_exomiser_result_filename(exomiser_json_result_path),
|
|
156
|
+
phenopacket_dir=phenopacket_dir,
|
|
225
157
|
)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
sort_order_str=sort_order,
|
|
158
|
+
|
|
159
|
+
if variant_analysis:
|
|
160
|
+
variant_results = extract_variant_results_from_json(exomiser_json_result, score_name)
|
|
161
|
+
generate_variant_result(
|
|
162
|
+
results=variant_results,
|
|
163
|
+
sort_order=sort_order,
|
|
233
164
|
output_dir=output_dir,
|
|
234
|
-
|
|
165
|
+
result_path=trim_exomiser_result_filename(exomiser_json_result_path),
|
|
166
|
+
phenopacket_dir=phenopacket_dir,
|
|
235
167
|
)
|
|
236
168
|
|
|
237
169
|
|
|
@@ -252,6 +184,14 @@ def create_standardised_results(
|
|
|
252
184
|
help="Full path to Exomiser results directory to be standardised.",
|
|
253
185
|
type=Path,
|
|
254
186
|
)
|
|
187
|
+
@click.option(
|
|
188
|
+
"--phenopacket-dir",
|
|
189
|
+
"-p",
|
|
190
|
+
required=True,
|
|
191
|
+
metavar="DIRECTORY",
|
|
192
|
+
help="Full path to phenopacket dir used to generate the raw results.",
|
|
193
|
+
type=Path,
|
|
194
|
+
)
|
|
255
195
|
@click.option(
|
|
256
196
|
"--score-name",
|
|
257
197
|
"-s",
|
|
@@ -288,22 +228,15 @@ def create_standardised_results(
|
|
|
288
228
|
default=False,
|
|
289
229
|
help="Specify whether to create PhEval disease results.",
|
|
290
230
|
)
|
|
291
|
-
@click.option(
|
|
292
|
-
"--include-acmg",
|
|
293
|
-
is_flag=True,
|
|
294
|
-
type=bool,
|
|
295
|
-
default=False,
|
|
296
|
-
help="Specify whether to include ACMG filter for PATHOGENIC or LIKELY_PATHOGENIC classifications.",
|
|
297
|
-
)
|
|
298
231
|
def post_process_exomiser_results(
|
|
299
232
|
output_dir: Path,
|
|
300
233
|
results_dir: Path,
|
|
234
|
+
phenopacket_dir: Path,
|
|
301
235
|
score_name: str,
|
|
302
236
|
sort_order: str,
|
|
303
237
|
gene_analysis: bool,
|
|
304
238
|
variant_analysis: bool,
|
|
305
239
|
disease_analysis: bool,
|
|
306
|
-
include_acmg: bool,
|
|
307
240
|
):
|
|
308
241
|
"""Post-process Exomiser json results into PhEval gene and variant outputs."""
|
|
309
242
|
(
|
|
@@ -322,12 +255,12 @@ def post_process_exomiser_results(
|
|
|
322
255
|
else None
|
|
323
256
|
)
|
|
324
257
|
create_standardised_results(
|
|
325
|
-
results_dir,
|
|
326
|
-
output_dir,
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
258
|
+
result_dir=results_dir,
|
|
259
|
+
output_dir=output_dir,
|
|
260
|
+
phenopacket_dir=phenopacket_dir,
|
|
261
|
+
score_name=score_name,
|
|
262
|
+
sort_order=sort_order,
|
|
263
|
+
variant_analysis=variant_analysis,
|
|
264
|
+
gene_analysis=gene_analysis,
|
|
265
|
+
disease_analysis=disease_analysis,
|
|
333
266
|
)
|
pheval_exomiser/runner.py
CHANGED
|
@@ -67,6 +67,7 @@ class ExomiserPhEvalRunner(PhEvalRunner):
|
|
|
67
67
|
config=config,
|
|
68
68
|
raw_results_dir=self.raw_results_dir,
|
|
69
69
|
output_dir=self.output_dir,
|
|
70
|
+
phenopacket_dir=self.testdata_dir.joinpath("phenopackets"),
|
|
70
71
|
variant_analysis=self.input_dir_config.variant_analysis,
|
|
71
72
|
gene_analysis=self.input_dir_config.gene_analysis,
|
|
72
73
|
disease_analysis=self.input_dir_config.disease_analysis,
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: pheval_exomiser
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Yasemin Bridges
|
|
6
6
|
Author-email: y.bridges@qmul.ac.uk
|
|
7
|
-
Requires-Python: >=3.
|
|
7
|
+
Requires-Python: >=3.10,<4.0.0
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
10
9
|
Classifier: Programming Language :: Python :: 3.10
|
|
11
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -14,12 +13,12 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
14
13
|
Requires-Dist: click (>=8.1.3,<9.0.0)
|
|
15
14
|
Requires-Dist: docker (>=6.0.1,<7.0.0)
|
|
16
15
|
Requires-Dist: google (>=3.0.0,<4.0.0)
|
|
16
|
+
Requires-Dist: numpy (<2)
|
|
17
17
|
Requires-Dist: oaklib (>=0.5.12,<0.6.0)
|
|
18
|
-
Requires-Dist: pandas (>=1.5.2,<2.0.0)
|
|
19
18
|
Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
|
|
20
|
-
Requires-Dist: pheval (>=0.
|
|
19
|
+
Requires-Dist: pheval (>=0.5.1,<0.6.0)
|
|
21
20
|
Requires-Dist: pyaml (>=21.10.1,<22.0.0)
|
|
22
|
-
Requires-Dist: pydantic (>=
|
|
21
|
+
Requires-Dist: pydantic (>=2.7.1,<3.0.0)
|
|
23
22
|
Description-Content-Type: text/markdown
|
|
24
23
|
|
|
25
24
|
# Exomiser Runner for PhEval
|
|
@@ -2,17 +2,16 @@ pheval_exomiser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
2
2
|
pheval_exomiser/cli.py,sha256=0SR1-L2sREEkFRfUPwYwkbSaBsz_L_Sxq1S4c9LQLJg,350
|
|
3
3
|
pheval_exomiser/constants.py,sha256=o_pLWF8kX74BqyTsAZa7twwSKzedLnpupCI90k_bMqY,517
|
|
4
4
|
pheval_exomiser/post_process/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
pheval_exomiser/post_process/post_process.py,sha256=
|
|
6
|
-
pheval_exomiser/post_process/post_process_results_format.py,sha256=
|
|
5
|
+
pheval_exomiser/post_process/post_process.py,sha256=2vkwe60Ptf7UuPCR2ShcI80-kn-1WaPDa74cCBTUKF0,968
|
|
6
|
+
pheval_exomiser/post_process/post_process_results_format.py,sha256=zMz2HwAJENuKyRjkbJZrydrZDoULWuPjaDWC5Dc5hxg,8432
|
|
7
7
|
pheval_exomiser/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
pheval_exomiser/prepare/create_batch_commands.py,sha256=tDUBtpfSmNGqHte-vrGnusYZzM59pOX1IAumqRDlnBE,17205
|
|
9
9
|
pheval_exomiser/prepare/tool_specific_configuration_options.py,sha256=4gedZ9iadRXK6tF9P-ju-dhj8-F2-fhrXVhfYIsAxFQ,2922
|
|
10
10
|
pheval_exomiser/prepare/write_application_properties.py,sha256=KmG7GvkQo8AhnhRyqohTFvqjfhEhbcs78UYYoigxJ3w,8933
|
|
11
|
-
pheval_exomiser/prepare/yaml_to_family_phenopacket.py,sha256=Hz77dHpVaRMV1fQWKmOCqCKJfmk_hdpZh_6o7hq9Sec,14452
|
|
12
11
|
pheval_exomiser/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
12
|
pheval_exomiser/run/run.py,sha256=bK_gL52zRl71Lxe-i-P6L4-dMstxFAG6SVNPO6G823o,7109
|
|
14
|
-
pheval_exomiser/runner.py,sha256=
|
|
15
|
-
pheval_exomiser-0.
|
|
16
|
-
pheval_exomiser-0.
|
|
17
|
-
pheval_exomiser-0.
|
|
18
|
-
pheval_exomiser-0.
|
|
13
|
+
pheval_exomiser/runner.py,sha256=3-0kec2yzQoZNpqZXSBIWBD1QR24s_BmHGCLXmP4fos,2620
|
|
14
|
+
pheval_exomiser-0.3.0.dist-info/METADATA,sha256=95Kqa6LqtavG86Okr1fWqWK12wKl3WMWXYJol_4Swno,7463
|
|
15
|
+
pheval_exomiser-0.3.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
16
|
+
pheval_exomiser-0.3.0.dist-info/entry_points.txt,sha256=lbZMu-x7ns8UrFveWSqEQ1UB5l33TbRMomqBUyGYIwI,131
|
|
17
|
+
pheval_exomiser-0.3.0.dist-info/RECORD,,
|
|
@@ -1,392 +0,0 @@
|
|
|
1
|
-
from copy import copy
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
import click
|
|
5
|
-
import pandas as pd
|
|
6
|
-
import yaml
|
|
7
|
-
from google.protobuf.timestamp_pb2 import Timestamp
|
|
8
|
-
from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation
|
|
9
|
-
from oaklib.resource import OntologyResource
|
|
10
|
-
from phenopackets import (
|
|
11
|
-
Diagnosis,
|
|
12
|
-
Family,
|
|
13
|
-
File,
|
|
14
|
-
GeneDescriptor,
|
|
15
|
-
GenomicInterpretation,
|
|
16
|
-
Individual,
|
|
17
|
-
Interpretation,
|
|
18
|
-
MetaData,
|
|
19
|
-
OntologyClass,
|
|
20
|
-
Pedigree,
|
|
21
|
-
Phenopacket,
|
|
22
|
-
PhenotypicFeature,
|
|
23
|
-
Resource,
|
|
24
|
-
VariantInterpretation,
|
|
25
|
-
VariationDescriptor,
|
|
26
|
-
VcfRecord,
|
|
27
|
-
)
|
|
28
|
-
from pheval.prepare.create_noisy_phenopackets import load_ontology
|
|
29
|
-
from pheval.utils.file_utils import files_with_suffix
|
|
30
|
-
from pheval.utils.phenopacket_utils import create_hgnc_dict, write_phenopacket
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def load_genotype_ontology():
|
|
34
|
-
"""Load genotype ontology"""
|
|
35
|
-
genotype_resource = OntologyResource(slug="geno.owl", local=False)
|
|
36
|
-
return ProntoImplementation(genotype_resource)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def exomiser_analysis_yml_reader(yaml_job_file_path: Path) -> dict:
|
|
40
|
-
"""Read an exomiser analysis yaml file."""
|
|
41
|
-
with open(yaml_job_file_path) as yaml_job_file:
|
|
42
|
-
yaml_job = yaml.safe_load(yaml_job_file)
|
|
43
|
-
yaml_job_file.close()
|
|
44
|
-
return yaml_job
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def read_diagnoses_file(diagnoses_file_path: Path) -> pd.DataFrame:
|
|
48
|
-
"""Read a diagnoses file."""
|
|
49
|
-
return pd.read_csv(diagnoses_file_path, delimiter="t")
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def read_pedigree_file(pedigree_path: Path) -> list[str]:
|
|
53
|
-
"""Return the contents of a pedigree file"""
|
|
54
|
-
return open(pedigree_path).readlines()
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class ExomiserYamlToPhenopacketConverter:
|
|
58
|
-
def __init__(self, genotype_ontology, human_phenotype_ontology, hgnc_data):
|
|
59
|
-
self.genotype_ontology = genotype_ontology
|
|
60
|
-
self.human_phenotype_ontology = human_phenotype_ontology
|
|
61
|
-
self.hgnc_data = hgnc_data
|
|
62
|
-
|
|
63
|
-
@staticmethod
|
|
64
|
-
def construct_individual(yaml_job: dict, diagnoses: pd.DataFrame) -> Individual:
|
|
65
|
-
"""Construct individual for phenopacket."""
|
|
66
|
-
return Individual(
|
|
67
|
-
id=yaml_job["analysis"]["proband"],
|
|
68
|
-
sex=diagnoses[diagnoses.ProbandId == yaml_job["analysis"]["proband"]]
|
|
69
|
-
.iloc[0]["Sex"]
|
|
70
|
-
.upper(),
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
@staticmethod
|
|
74
|
-
def get_diagnoses_for_proband(yaml_job: dict, diagnoses: pd.DataFrame):
|
|
75
|
-
"""Get all diagnoses for proband."""
|
|
76
|
-
return diagnoses.loc[diagnoses["ProbandId"] == yaml_job["analysis"]["proband"]]
|
|
77
|
-
|
|
78
|
-
def construct_phenotypic_interpretations(self, yaml_job: dict) -> list[PhenotypicFeature]:
|
|
79
|
-
"""Construct the phenotypic features for the proband."""
|
|
80
|
-
hpo_ids = yaml_job["analysis"]["hpoIds"]
|
|
81
|
-
phenotypic_features = []
|
|
82
|
-
for hpo_id in hpo_ids:
|
|
83
|
-
try:
|
|
84
|
-
rels = self.human_phenotype_ontology.entity_alias_map(hpo_id)
|
|
85
|
-
hpo_term = "".join(rels[(list(rels.keys())[0])])
|
|
86
|
-
hpo = PhenotypicFeature(type=OntologyClass(id=hpo_id, label=hpo_term))
|
|
87
|
-
phenotypic_features.append(hpo)
|
|
88
|
-
except AttributeError:
|
|
89
|
-
hpo = PhenotypicFeature(type=OntologyClass(id=hpo_id))
|
|
90
|
-
phenotypic_features.append(hpo)
|
|
91
|
-
return phenotypic_features
|
|
92
|
-
|
|
93
|
-
@staticmethod
|
|
94
|
-
def construct_vcf_record(yaml_job: dict, diagnosis: pd.DataFrame) -> VcfRecord:
|
|
95
|
-
"""Construct the VCF record for a diagnosis."""
|
|
96
|
-
return VcfRecord(
|
|
97
|
-
genome_assembly=yaml_job["analysis"]["genomeAssembly"],
|
|
98
|
-
chrom=diagnosis["Chr"],
|
|
99
|
-
pos=int(diagnosis["Start"]),
|
|
100
|
-
ref=str(diagnosis["Ref/Alt"]).split("/")[0],
|
|
101
|
-
alt=str(diagnosis["Ref/Alt"]).split("/")[1],
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
def construct_allelic_state(self, diagnosis: pd.DataFrame) -> OntologyClass:
|
|
105
|
-
"""Construct the allelic state for a diagnosis."""
|
|
106
|
-
return OntologyClass(
|
|
107
|
-
id=list(self.genotype_ontology.basic_search(diagnosis["Genotype"].lower()))[0],
|
|
108
|
-
label=diagnosis["Genotype"].lower(),
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
def construct_gene_descriptor(self, diagnosis: pd.DataFrame) -> GeneDescriptor:
|
|
112
|
-
"""Construct the Gene Descriptor for a diagnosis."""
|
|
113
|
-
try:
|
|
114
|
-
return GeneDescriptor(
|
|
115
|
-
value_id=self.hgnc_data[diagnosis["Gene"]]["ensembl_id"],
|
|
116
|
-
symbol=diagnosis["Gene"],
|
|
117
|
-
)
|
|
118
|
-
except KeyError:
|
|
119
|
-
for _gene, gene_info in self.hgnc_data.items():
|
|
120
|
-
for previous_name in gene_info["previous_names"]:
|
|
121
|
-
if diagnosis["Gene"] == previous_name:
|
|
122
|
-
return GeneDescriptor(
|
|
123
|
-
value_id=self.hgnc_data[gene_info["ensembl_id"]],
|
|
124
|
-
symbol=diagnosis["Gene"],
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
def construct_variation_descriptor(
|
|
128
|
-
self, yaml_job: dict, diagnosis: pd.DataFrame
|
|
129
|
-
) -> VariationDescriptor:
|
|
130
|
-
"""Construct a variation descriptor for a diagnosis."""
|
|
131
|
-
return VariationDescriptor(
|
|
132
|
-
id=yaml_job["analysis"]["proband"]
|
|
133
|
-
+ ":"
|
|
134
|
-
+ diagnosis["Chr"]
|
|
135
|
-
+ ":"
|
|
136
|
-
+ diagnosis["Start"]
|
|
137
|
-
+ ":"
|
|
138
|
-
+ diagnosis["Ref/Alt"],
|
|
139
|
-
gene_context=self.construct_gene_descriptor(diagnosis),
|
|
140
|
-
vcf_record=self.construct_vcf_record(yaml_job, diagnosis),
|
|
141
|
-
allelic_state=self.construct_allelic_state(diagnosis),
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
def construct_variant_interpretation(
|
|
145
|
-
self, yaml_job: dict, diagnosis: pd.DataFrame
|
|
146
|
-
) -> VariantInterpretation:
|
|
147
|
-
"""Construct the variant interpretation for a diagnosis."""
|
|
148
|
-
return VariantInterpretation(
|
|
149
|
-
variation_descriptor=self.construct_variation_descriptor(yaml_job, diagnosis),
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
def construct_genomic_interpretations(
|
|
153
|
-
self, yaml_job: dict, diagnoses: pd.DataFrame
|
|
154
|
-
) -> list[GenomicInterpretation]:
|
|
155
|
-
"""Construct a list of genomic interpretations for a proband."""
|
|
156
|
-
genomic_interpretations = []
|
|
157
|
-
for _index, row in self.get_diagnoses_for_proband(yaml_job, diagnoses).iterrows():
|
|
158
|
-
genomic_interpretation = GenomicInterpretation(
|
|
159
|
-
subject_or_biosample_id=yaml_job["analysis"]["proband"],
|
|
160
|
-
variant_interpretation=self.construct_variant_interpretation(
|
|
161
|
-
yaml_job=yaml_job, diagnosis=row
|
|
162
|
-
),
|
|
163
|
-
)
|
|
164
|
-
genomic_interpretations.append(genomic_interpretation)
|
|
165
|
-
return genomic_interpretations
|
|
166
|
-
|
|
167
|
-
def construct_diagnosis(self, yaml_job: dict, diagnoses: pd.DataFrame) -> Diagnosis:
|
|
168
|
-
"""Construct the diagnosis for a proband."""
|
|
169
|
-
return Diagnosis(
|
|
170
|
-
genomic_interpretations=self.construct_genomic_interpretations(yaml_job, diagnoses)
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
def construct_interpretations(
|
|
174
|
-
self, yaml_job: dict, diagnoses: pd.DataFrame
|
|
175
|
-
) -> list[Interpretation]:
|
|
176
|
-
"""Construct interpretations for a proband."""
|
|
177
|
-
return [
|
|
178
|
-
Interpretation(
|
|
179
|
-
id=yaml_job["analysis"]["proband"] + "-interpretation",
|
|
180
|
-
diagnosis=self.construct_diagnosis(yaml_job, diagnoses),
|
|
181
|
-
)
|
|
182
|
-
]
|
|
183
|
-
|
|
184
|
-
@staticmethod
|
|
185
|
-
def construct_meta_data() -> MetaData:
|
|
186
|
-
"""Construct the meta-data."""
|
|
187
|
-
timestamp = Timestamp()
|
|
188
|
-
timestamp.GetCurrentTime()
|
|
189
|
-
return MetaData(
|
|
190
|
-
created=timestamp,
|
|
191
|
-
created_by="pheval-converter",
|
|
192
|
-
resources=[
|
|
193
|
-
Resource(
|
|
194
|
-
id="hp",
|
|
195
|
-
name="human phenotype ontology",
|
|
196
|
-
url="http://purl.obolibrary.org/obo/hp.owl",
|
|
197
|
-
version="hp/releases/2019-11-08",
|
|
198
|
-
namespace_prefix="HP",
|
|
199
|
-
iri_prefix="http://purl.obolibrary.org/obo/HP_",
|
|
200
|
-
)
|
|
201
|
-
],
|
|
202
|
-
phenopacket_schema_version="2.0",
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
@staticmethod
|
|
206
|
-
def construct_files(yaml_job_file: dict) -> list[File]:
|
|
207
|
-
"""Construct the files."""
|
|
208
|
-
return [
|
|
209
|
-
File(
|
|
210
|
-
uri=yaml_job_file["analysis"]["vcf"],
|
|
211
|
-
file_attributes={
|
|
212
|
-
"fileFormat": "VCF",
|
|
213
|
-
"genomeAssembly": yaml_job_file["analysis"]["genomeAssembly"],
|
|
214
|
-
},
|
|
215
|
-
)
|
|
216
|
-
]
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
def construct_pedigree(pedigree: list[str]) -> tuple[str, Pedigree]:
|
|
220
|
-
"""Construct the pedigree message from a ped file."""
|
|
221
|
-
persons = []
|
|
222
|
-
family_id = None
|
|
223
|
-
for individual in pedigree:
|
|
224
|
-
entry = individual.split("\t")
|
|
225
|
-
family_id = entry[0]
|
|
226
|
-
sex = "."
|
|
227
|
-
if (
|
|
228
|
-
int(entry[4]) == 1
|
|
229
|
-
): # until this is fixed with the phenopackets package, sex has to be reassigned
|
|
230
|
-
sex = 2
|
|
231
|
-
if int(entry[4]) == 2:
|
|
232
|
-
sex = 1
|
|
233
|
-
if str(entry[3]) == "0" and str(entry[2]) == "0":
|
|
234
|
-
person = Pedigree.Person(
|
|
235
|
-
family_id=family_id, individual_id=entry[1], sex=sex, affected_status=int(entry[5])
|
|
236
|
-
)
|
|
237
|
-
persons.append(person)
|
|
238
|
-
if str(entry[3]) == "0" and str(entry[2]) != "0":
|
|
239
|
-
person = Pedigree.Person(
|
|
240
|
-
family_id=family_id,
|
|
241
|
-
individual_id=entry[1],
|
|
242
|
-
paternal_id=entry[2],
|
|
243
|
-
sex=sex,
|
|
244
|
-
affected_status=int(entry[5]),
|
|
245
|
-
)
|
|
246
|
-
persons.append(person)
|
|
247
|
-
if str(entry[2]) == "0" and str(entry[3]) != "0":
|
|
248
|
-
person = Pedigree.Person(
|
|
249
|
-
family_id=family_id,
|
|
250
|
-
individual_id=entry[1],
|
|
251
|
-
maternal_id=entry[3],
|
|
252
|
-
sex=sex,
|
|
253
|
-
affected_status=int(entry[5]),
|
|
254
|
-
)
|
|
255
|
-
persons.append(person)
|
|
256
|
-
if str(entry[2]) != "0" and str(entry[3] != "0"):
|
|
257
|
-
person = Pedigree.Person(
|
|
258
|
-
family_id=family_id,
|
|
259
|
-
individual_id=entry[1],
|
|
260
|
-
paternal_id=entry[2],
|
|
261
|
-
maternal_id=entry[3],
|
|
262
|
-
sex=sex,
|
|
263
|
-
affected_status=int(entry[5]),
|
|
264
|
-
)
|
|
265
|
-
persons.append(person)
|
|
266
|
-
return family_id, Pedigree(persons=persons)
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def construct_phenopacket(
|
|
270
|
-
yaml_job_file: dict,
|
|
271
|
-
diagnoses: pd.DataFrame,
|
|
272
|
-
exomiser_yaml_to_phenopacket_converter: ExomiserYamlToPhenopacketConverter,
|
|
273
|
-
) -> Phenopacket:
|
|
274
|
-
"""Construct a phenopacket."""
|
|
275
|
-
return Phenopacket(
|
|
276
|
-
id=yaml_job_file["analysis"]["proband"],
|
|
277
|
-
subject=exomiser_yaml_to_phenopacket_converter.construct_individual(
|
|
278
|
-
yaml_job=yaml_job_file, diagnoses=diagnoses
|
|
279
|
-
),
|
|
280
|
-
phenotypic_features=exomiser_yaml_to_phenopacket_converter.construct_phenotypic_interpretations(
|
|
281
|
-
yaml_job=yaml_job_file
|
|
282
|
-
),
|
|
283
|
-
interpretations=exomiser_yaml_to_phenopacket_converter.construct_interpretations(
|
|
284
|
-
yaml_job=yaml_job_file, diagnoses=diagnoses
|
|
285
|
-
),
|
|
286
|
-
files=exomiser_yaml_to_phenopacket_converter.construct_files(yaml_job_file),
|
|
287
|
-
meta_data=exomiser_yaml_to_phenopacket_converter.construct_meta_data(),
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
def construct_family(
|
|
292
|
-
yaml_job_file: dict,
|
|
293
|
-
diagnoses: pd.DataFrame,
|
|
294
|
-
exomiser_yaml_to_phenopacket_converter: ExomiserYamlToPhenopacketConverter,
|
|
295
|
-
pedigree: list[str],
|
|
296
|
-
) -> Family:
|
|
297
|
-
"""Construct a Family"""
|
|
298
|
-
phenopacket = construct_phenopacket(
|
|
299
|
-
yaml_job_file, diagnoses, exomiser_yaml_to_phenopacket_converter
|
|
300
|
-
)
|
|
301
|
-
proband = copy(phenopacket)
|
|
302
|
-
del proband.files[:]
|
|
303
|
-
del proband.meta_data[:]
|
|
304
|
-
family_id, ped = construct_pedigree(pedigree)
|
|
305
|
-
return Family(
|
|
306
|
-
id=family_id,
|
|
307
|
-
proband=proband,
|
|
308
|
-
pedigree=ped,
|
|
309
|
-
files=phenopacket.files,
|
|
310
|
-
meta_data=phenopacket.meta_data,
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
def create_phenopacket(
|
|
315
|
-
yaml_job_file: Path,
|
|
316
|
-
diagnoses: pd.DataFrame,
|
|
317
|
-
exomiser_converter: ExomiserYamlToPhenopacketConverter,
|
|
318
|
-
) -> Phenopacket or Family:
|
|
319
|
-
"""Construct either a family or phenopacket from an analysis yaml."""
|
|
320
|
-
yaml_job = exomiser_analysis_yml_reader(yaml_job_file)
|
|
321
|
-
phenopacket = (
|
|
322
|
-
construct_phenopacket(yaml_job, diagnoses, exomiser_converter)
|
|
323
|
-
if yaml_job["analysis"]["ped"] == ""
|
|
324
|
-
else construct_family(
|
|
325
|
-
yaml_job,
|
|
326
|
-
diagnoses,
|
|
327
|
-
exomiser_converter,
|
|
328
|
-
read_pedigree_file(yaml_job["analysis"]["ped"]),
|
|
329
|
-
)
|
|
330
|
-
)
|
|
331
|
-
return phenopacket
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
@click.command()
|
|
335
|
-
@click.option(
|
|
336
|
-
"--directory",
|
|
337
|
-
"-d",
|
|
338
|
-
required=True,
|
|
339
|
-
help="Directory for Exomiser yaml job files to be converted.",
|
|
340
|
-
type=Path,
|
|
341
|
-
)
|
|
342
|
-
@click.option("--diagnoses-file", "-d", required=True, help="Diagnoses file", type=Path)
|
|
343
|
-
@click.option(
|
|
344
|
-
"--output-dir", "-o", required=True, help="Output directory to write phenopackets", type=Path
|
|
345
|
-
)
|
|
346
|
-
def convert_exomiser_analysis_yamls_to_phenopacket(
|
|
347
|
-
output_dir: Path, directory: Path, diagnoses_file: Path
|
|
348
|
-
):
|
|
349
|
-
"""Convert an Exomiser YAML file to a phenopacket schema given a .tsv diagnoses file containing the following
|
|
350
|
-
required fields: ..."""
|
|
351
|
-
try:
|
|
352
|
-
output_dir.mkdir()
|
|
353
|
-
except FileExistsError:
|
|
354
|
-
pass
|
|
355
|
-
diagnoses = read_diagnoses_file(diagnoses_file)
|
|
356
|
-
exomiser_converter = ExomiserYamlToPhenopacketConverter(
|
|
357
|
-
load_genotype_ontology(), load_ontology(), create_hgnc_dict()
|
|
358
|
-
)
|
|
359
|
-
for yaml_job_file in files_with_suffix(directory, ".yml"):
|
|
360
|
-
phenopacket = create_phenopacket(yaml_job_file, diagnoses, exomiser_converter)
|
|
361
|
-
write_phenopacket(
|
|
362
|
-
phenopacket, output_dir.joinpath(yaml_job_file.name.replace(".yml", ".json"))
|
|
363
|
-
)
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
@click.command()
|
|
367
|
-
@click.option(
|
|
368
|
-
"--yaml-file",
|
|
369
|
-
"-y",
|
|
370
|
-
required=True,
|
|
371
|
-
help="Path to Exomiser analysis yaml file for phenopacket conversion.",
|
|
372
|
-
type=Path,
|
|
373
|
-
)
|
|
374
|
-
@click.option("--diagnoses-file", "-d", required=True, help="Diagnoses file", type=Path)
|
|
375
|
-
@click.option(
|
|
376
|
-
"--output-dir", "-o", required=True, help="Output directory to write phenopackets", type=Path
|
|
377
|
-
)
|
|
378
|
-
def convert_exomiser_analysis_yaml_to_phenopacket(
|
|
379
|
-
output_dir: Path, yaml_file: Path, diagnoses_file: Path
|
|
380
|
-
):
|
|
381
|
-
"""Convert Exomiser YAML files to the phenopacket schema given a .tsv diagnoses file containing the following
|
|
382
|
-
required fields: ..."""
|
|
383
|
-
try:
|
|
384
|
-
output_dir.mkdir()
|
|
385
|
-
except FileExistsError:
|
|
386
|
-
pass
|
|
387
|
-
diagnoses = read_diagnoses_file(diagnoses_file)
|
|
388
|
-
exomiser_converter = ExomiserYamlToPhenopacketConverter(
|
|
389
|
-
load_genotype_ontology(), load_ontology(), create_hgnc_dict()
|
|
390
|
-
)
|
|
391
|
-
phenopacket = create_phenopacket(yaml_file, diagnoses, exomiser_converter)
|
|
392
|
-
write_phenopacket(phenopacket, Path(yaml_file.name + ".json"))
|
|
File without changes
|