pheval-exomiser 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pheval_exomiser/post_process/post_process.py +2 -2
- pheval_exomiser/post_process/post_process_results_format.py +129 -9
- pheval_exomiser/prepare/create_batch_commands.py +143 -107
- pheval_exomiser/run/run.py +35 -18
- pheval_exomiser/runner.py +2 -0
- {pheval_exomiser-0.3.3.dist-info → pheval_exomiser-0.4.0.dist-info}/METADATA +4 -3
- {pheval_exomiser-0.3.3.dist-info → pheval_exomiser-0.4.0.dist-info}/RECORD +9 -9
- {pheval_exomiser-0.3.3.dist-info → pheval_exomiser-0.4.0.dist-info}/WHEEL +1 -1
- {pheval_exomiser-0.3.3.dist-info → pheval_exomiser-0.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -12,9 +12,9 @@ def post_process_result_format(
|
|
|
12
12
|
variant_analysis: bool,
|
|
13
13
|
gene_analysis: bool,
|
|
14
14
|
disease_analysis: bool,
|
|
15
|
+
exomiser_version: str,
|
|
15
16
|
):
|
|
16
17
|
"""Standardise Exomiser json format to separated gene and variant results."""
|
|
17
|
-
print("...standardising results format...")
|
|
18
18
|
create_standardised_results(
|
|
19
19
|
result_dir=raw_results_dir,
|
|
20
20
|
output_dir=output_dir,
|
|
@@ -24,5 +24,5 @@ def post_process_result_format(
|
|
|
24
24
|
gene_analysis=gene_analysis,
|
|
25
25
|
disease_analysis=disease_analysis,
|
|
26
26
|
variant_analysis=variant_analysis,
|
|
27
|
+
exomiser_version=exomiser_version,
|
|
27
28
|
)
|
|
28
|
-
print("done")
|
|
@@ -4,6 +4,7 @@ from pathlib import Path
|
|
|
4
4
|
|
|
5
5
|
import click
|
|
6
6
|
import polars as pl
|
|
7
|
+
from packaging import version
|
|
7
8
|
from pheval.post_processing.post_processing import (
|
|
8
9
|
SortOrder,
|
|
9
10
|
generate_disease_result,
|
|
@@ -15,10 +16,15 @@ from pheval.utils.file_utils import files_with_suffix
|
|
|
15
16
|
|
|
16
17
|
class ModeOfInheritance(Enum):
|
|
17
18
|
AUTOSOMAL_DOMINANT = 1
|
|
19
|
+
AD = 1
|
|
18
20
|
AUTOSOMAL_RECESSIVE = 2
|
|
21
|
+
AR = 2
|
|
19
22
|
X_DOMINANT = 1
|
|
23
|
+
XD = 1
|
|
20
24
|
X_RECESSIVE = 2
|
|
25
|
+
XR = 2
|
|
21
26
|
MITOCHONDRIAL = 3
|
|
27
|
+
MT = 3
|
|
22
28
|
|
|
23
29
|
|
|
24
30
|
def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
|
|
@@ -38,6 +44,18 @@ def extract_gene_results_from_json(
|
|
|
38
44
|
).drop_nulls()
|
|
39
45
|
|
|
40
46
|
|
|
47
|
+
def extract_gene_results_from_parquet(
|
|
48
|
+
exomiser_parquet_result: pl.DataFrame, score_name: str
|
|
49
|
+
) -> pl.DataFrame:
|
|
50
|
+
return exomiser_parquet_result.select(
|
|
51
|
+
[
|
|
52
|
+
pl.col("geneSymbol").alias("gene_symbol"),
|
|
53
|
+
pl.col("ensemblGeneId").alias("gene_identifier"),
|
|
54
|
+
pl.col(score_name).fill_null(0).round(4).alias("score"),
|
|
55
|
+
]
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
41
59
|
def extract_disease_results_from_json(exomiser_json_result: pl.DataFrame) -> pl.DataFrame:
|
|
42
60
|
return (
|
|
43
61
|
exomiser_json_result.select(
|
|
@@ -55,6 +73,18 @@ def extract_disease_results_from_json(exomiser_json_result: pl.DataFrame) -> pl.
|
|
|
55
73
|
)
|
|
56
74
|
|
|
57
75
|
|
|
76
|
+
def extract_disease_results_from_parquet(exomiser_parquet_result: pl.DataFrame) -> pl.DataFrame:
|
|
77
|
+
return (
|
|
78
|
+
exomiser_parquet_result.select(pl.col("diseaseMatches"))
|
|
79
|
+
.explode("diseaseMatches")
|
|
80
|
+
.select(
|
|
81
|
+
pl.col("diseaseMatches").struct.field("diseaseId").alias("disease_identifier"),
|
|
82
|
+
pl.col("diseaseMatches").struct.field("score").alias("score"),
|
|
83
|
+
)
|
|
84
|
+
.drop_nulls()
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
58
88
|
def extract_variant_results_from_json(
|
|
59
89
|
exomiser_json_result: pl.DataFrame, score_name: str
|
|
60
90
|
) -> pl.DataFrame:
|
|
@@ -124,6 +154,67 @@ def extract_variant_results_from_json(
|
|
|
124
154
|
)
|
|
125
155
|
|
|
126
156
|
|
|
157
|
+
def extract_variant_results_from_parquet(
|
|
158
|
+
exomiser_parquet_result: pl.DataFrame, score_name: str
|
|
159
|
+
) -> pl.DataFrame:
|
|
160
|
+
contributing_variant_only = exomiser_parquet_result.filter(
|
|
161
|
+
pl.col("isContributingVariant") == True # noqa
|
|
162
|
+
)
|
|
163
|
+
return (
|
|
164
|
+
contributing_variant_only.select(
|
|
165
|
+
[
|
|
166
|
+
pl.col("geneSymbol"),
|
|
167
|
+
pl.col("contigName").alias("chrom").cast(pl.String),
|
|
168
|
+
pl.col("start").cast(pl.Int64),
|
|
169
|
+
pl.col("end").cast(pl.Int64),
|
|
170
|
+
pl.col("ref"),
|
|
171
|
+
pl.col("alt"),
|
|
172
|
+
pl.col(score_name).alias("score"),
|
|
173
|
+
pl.col("moi")
|
|
174
|
+
.map_elements(lambda moi: ModeOfInheritance[moi].value, return_dtype=pl.Int8)
|
|
175
|
+
.alias("moi_enum"),
|
|
176
|
+
]
|
|
177
|
+
)
|
|
178
|
+
.with_columns(
|
|
179
|
+
[
|
|
180
|
+
(pl.col("moi_enum") == 2).alias("is_recessive"),
|
|
181
|
+
pl.when(pl.col("moi_enum") == 2)
|
|
182
|
+
.then(
|
|
183
|
+
pl.format(
|
|
184
|
+
"recessive|{}|{}|{}",
|
|
185
|
+
pl.col("geneSymbol"),
|
|
186
|
+
pl.col("score"),
|
|
187
|
+
pl.col("moi_enum"),
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
.otherwise(
|
|
191
|
+
pl.format(
|
|
192
|
+
"dominant|{}|{}|{}|{}|{}|{}",
|
|
193
|
+
pl.col("chrom"),
|
|
194
|
+
pl.col("start"),
|
|
195
|
+
pl.col("end"),
|
|
196
|
+
pl.col("ref"),
|
|
197
|
+
pl.col("alt"),
|
|
198
|
+
pl.col("score"),
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
.alias("group_key"),
|
|
202
|
+
]
|
|
203
|
+
)
|
|
204
|
+
.with_columns(
|
|
205
|
+
[
|
|
206
|
+
pl.col("group_key")
|
|
207
|
+
.rank("dense")
|
|
208
|
+
.cast(pl.UInt32)
|
|
209
|
+
.map_elements(
|
|
210
|
+
lambda i: str(uuid.uuid5(uuid.NAMESPACE_DNS, str(i))), return_dtype=pl.String
|
|
211
|
+
)
|
|
212
|
+
.alias("grouping_id")
|
|
213
|
+
]
|
|
214
|
+
)
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
127
218
|
def create_standardised_results(
|
|
128
219
|
result_dir: Path,
|
|
129
220
|
output_dir: Path,
|
|
@@ -133,36 +224,55 @@ def create_standardised_results(
|
|
|
133
224
|
gene_analysis: bool,
|
|
134
225
|
disease_analysis: bool,
|
|
135
226
|
variant_analysis: bool,
|
|
227
|
+
exomiser_version: str,
|
|
136
228
|
):
|
|
137
229
|
sort_order = SortOrder.ASCENDING if sort_order.lower() == "ascending" else SortOrder.DESCENDING
|
|
138
|
-
|
|
139
|
-
|
|
230
|
+
use_parquet = True if version.parse(exomiser_version) >= version.parse("15.0.0") else False
|
|
231
|
+
read_result = pl.read_parquet if use_parquet else pl.read_json
|
|
232
|
+
result_files = (
|
|
233
|
+
files_with_suffix(result_dir, ".parquet")
|
|
234
|
+
if use_parquet
|
|
235
|
+
else files_with_suffix(result_dir, ".json")
|
|
236
|
+
)
|
|
237
|
+
for exomiser_result_path in result_files:
|
|
238
|
+
exomiser_result = read_result(exomiser_result_path)
|
|
140
239
|
if gene_analysis:
|
|
141
|
-
gene_results =
|
|
240
|
+
gene_results = (
|
|
241
|
+
extract_gene_results_from_parquet(exomiser_result, score_name)
|
|
242
|
+
if use_parquet
|
|
243
|
+
else extract_gene_results_from_json(exomiser_result, score_name)
|
|
244
|
+
)
|
|
142
245
|
generate_gene_result(
|
|
143
246
|
results=gene_results,
|
|
144
247
|
sort_order=sort_order,
|
|
145
248
|
output_dir=output_dir,
|
|
146
|
-
result_path=trim_exomiser_result_filename(
|
|
249
|
+
result_path=trim_exomiser_result_filename(exomiser_result_path),
|
|
147
250
|
phenopacket_dir=phenopacket_dir,
|
|
148
251
|
)
|
|
149
252
|
if disease_analysis:
|
|
150
|
-
disease_results =
|
|
253
|
+
disease_results = (
|
|
254
|
+
extract_disease_results_from_parquet(exomiser_result)
|
|
255
|
+
if use_parquet
|
|
256
|
+
else extract_disease_results_from_json(exomiser_result)
|
|
257
|
+
)
|
|
151
258
|
generate_disease_result(
|
|
152
259
|
results=disease_results,
|
|
153
260
|
sort_order=sort_order,
|
|
154
261
|
output_dir=output_dir,
|
|
155
|
-
result_path=trim_exomiser_result_filename(
|
|
262
|
+
result_path=trim_exomiser_result_filename(exomiser_result_path),
|
|
156
263
|
phenopacket_dir=phenopacket_dir,
|
|
157
264
|
)
|
|
158
|
-
|
|
159
265
|
if variant_analysis:
|
|
160
|
-
variant_results =
|
|
266
|
+
variant_results = (
|
|
267
|
+
extract_variant_results_from_parquet(exomiser_result, score_name)
|
|
268
|
+
if use_parquet
|
|
269
|
+
else extract_variant_results_from_json(exomiser_result, score_name)
|
|
270
|
+
)
|
|
161
271
|
generate_variant_result(
|
|
162
272
|
results=variant_results,
|
|
163
273
|
sort_order=sort_order,
|
|
164
274
|
output_dir=output_dir,
|
|
165
|
-
result_path=trim_exomiser_result_filename(
|
|
275
|
+
result_path=trim_exomiser_result_filename(exomiser_result_path),
|
|
166
276
|
phenopacket_dir=phenopacket_dir,
|
|
167
277
|
)
|
|
168
278
|
|
|
@@ -228,6 +338,14 @@ def create_standardised_results(
|
|
|
228
338
|
default=False,
|
|
229
339
|
help="Specify whether to create PhEval disease results.",
|
|
230
340
|
)
|
|
341
|
+
@click.option(
|
|
342
|
+
"--version",
|
|
343
|
+
"-v",
|
|
344
|
+
required=True,
|
|
345
|
+
help="Exomiser version used to generate results.",
|
|
346
|
+
default="15.0.0",
|
|
347
|
+
show_default=True,
|
|
348
|
+
)
|
|
231
349
|
def post_process_exomiser_results(
|
|
232
350
|
output_dir: Path,
|
|
233
351
|
results_dir: Path,
|
|
@@ -237,6 +355,7 @@ def post_process_exomiser_results(
|
|
|
237
355
|
gene_analysis: bool,
|
|
238
356
|
variant_analysis: bool,
|
|
239
357
|
disease_analysis: bool,
|
|
358
|
+
version: str,
|
|
240
359
|
):
|
|
241
360
|
"""Post-process Exomiser json results into PhEval gene and variant outputs."""
|
|
242
361
|
(
|
|
@@ -263,4 +382,5 @@ def post_process_exomiser_results(
|
|
|
263
382
|
variant_analysis=variant_analysis,
|
|
264
383
|
gene_analysis=gene_analysis,
|
|
265
384
|
disease_analysis=disease_analysis,
|
|
385
|
+
exomiser_version=version,
|
|
266
386
|
)
|
|
@@ -4,6 +4,7 @@ from pathlib import Path
|
|
|
4
4
|
from typing import List, Optional
|
|
5
5
|
|
|
6
6
|
import click
|
|
7
|
+
from packaging import version
|
|
7
8
|
from phenopackets import Family, Phenopacket
|
|
8
9
|
from pheval.prepare.custom_exceptions import MutuallyExclusiveOptionError
|
|
9
10
|
from pheval.utils.file_utils import all_files, files_with_suffix
|
|
@@ -23,13 +24,13 @@ class ExomiserCommandLineArguments:
|
|
|
23
24
|
"""Store command line arguments for each phenopacket to be run with Exomiser."""
|
|
24
25
|
|
|
25
26
|
sample: Path
|
|
26
|
-
analysis_yaml: Path
|
|
27
|
-
vcf_file: Path
|
|
28
|
-
vcf_assembly: str
|
|
29
|
-
raw_results_dir: Path
|
|
30
|
-
variant_analysis: bool
|
|
27
|
+
analysis_yaml: Optional[Path] = None
|
|
28
|
+
vcf_file: Optional[Path] = None
|
|
29
|
+
vcf_assembly: Optional[str] = None
|
|
30
|
+
raw_results_dir: Optional[Path] = None
|
|
31
|
+
variant_analysis: Optional[bool] = None
|
|
31
32
|
output_options_file: Optional[Path] = None
|
|
32
|
-
output_formats: List[str]
|
|
33
|
+
output_formats: Optional[List[str]] = None
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
def get_all_files_from_output_opt_directory(output_options_dir: Path) -> List[Path] or None:
|
|
@@ -80,39 +81,41 @@ class CommandCreator:
|
|
|
80
81
|
output_options_file = self.assign_output_options_file()
|
|
81
82
|
if self.environment == "docker":
|
|
82
83
|
return ExomiserCommandLineArguments(
|
|
83
|
-
sample=f"{PHENOPACKET_TARGET_DIRECTORY_DOCKER}{
|
|
84
|
+
sample=Path(f"{PHENOPACKET_TARGET_DIRECTORY_DOCKER}{self.phenopacket_path.name}"),
|
|
84
85
|
variant_analysis=self.variant_analysis,
|
|
85
86
|
output_options_file=(
|
|
86
|
-
f"{OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER}{
|
|
87
|
-
if output_options_file
|
|
87
|
+
Path(f"{OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER}{output_options_file.name}")
|
|
88
|
+
if output_options_file
|
|
88
89
|
else None
|
|
89
90
|
),
|
|
90
|
-
raw_results_dir=RAW_RESULTS_TARGET_DIRECTORY_DOCKER,
|
|
91
|
+
raw_results_dir=Path(RAW_RESULTS_TARGET_DIRECTORY_DOCKER),
|
|
91
92
|
output_formats=self.output_formats,
|
|
92
93
|
)
|
|
93
94
|
elif self.environment == "local":
|
|
94
95
|
return ExomiserCommandLineArguments(
|
|
95
|
-
sample=
|
|
96
|
+
sample=self.phenopacket_path,
|
|
96
97
|
variant_analysis=self.variant_analysis,
|
|
97
98
|
output_options_file=output_options_file,
|
|
98
99
|
raw_results_dir=self.results_dir,
|
|
99
100
|
output_formats=self.output_formats,
|
|
100
101
|
)
|
|
102
|
+
raise ValueError(f"Unknown environment: {self.environment}")
|
|
101
103
|
|
|
102
104
|
def add_variant_analysis_arguments(self, vcf_dir: Path) -> ExomiserCommandLineArguments:
|
|
103
|
-
|
|
104
|
-
PhenopacketUtil(self.phenopacket).vcf_file_data(
|
|
105
|
-
|
|
106
|
-
|
|
105
|
+
if vcf_dir.exists():
|
|
106
|
+
vcf_file_data = PhenopacketUtil(self.phenopacket).vcf_file_data(
|
|
107
|
+
self.phenopacket_path, vcf_dir
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
vcf_file_data = next(
|
|
107
111
|
file
|
|
108
112
|
for file in self.phenopacket.files
|
|
109
113
|
if file.file_attributes["fileFormat"] == "vcf"
|
|
110
|
-
|
|
111
|
-
)
|
|
114
|
+
)
|
|
112
115
|
output_options_file = self.assign_output_options_file()
|
|
113
116
|
if self.environment == "local":
|
|
114
117
|
return ExomiserCommandLineArguments(
|
|
115
|
-
sample=
|
|
118
|
+
sample=self.phenopacket_path,
|
|
116
119
|
vcf_file=Path(vcf_file_data.uri),
|
|
117
120
|
vcf_assembly=vcf_file_data.file_attributes["genomeAssembly"],
|
|
118
121
|
output_options_file=output_options_file,
|
|
@@ -123,18 +126,24 @@ class CommandCreator:
|
|
|
123
126
|
)
|
|
124
127
|
elif self.environment == "docker":
|
|
125
128
|
return ExomiserCommandLineArguments(
|
|
126
|
-
sample=f"{PHENOPACKET_TARGET_DIRECTORY_DOCKER}{
|
|
127
|
-
vcf_file=f"{VCF_TARGET_DIRECTORY_DOCKER}{Path(vcf_file_data.uri).name}",
|
|
129
|
+
sample=Path(f"{PHENOPACKET_TARGET_DIRECTORY_DOCKER}{self.phenopacket_path.name}"),
|
|
130
|
+
vcf_file=Path(f"{VCF_TARGET_DIRECTORY_DOCKER}{Path(vcf_file_data.uri).name}"),
|
|
128
131
|
vcf_assembly=vcf_file_data.file_attributes["genomeAssembly"],
|
|
129
132
|
output_options_file=(
|
|
130
|
-
f"{OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER}{
|
|
131
|
-
if output_options_file
|
|
133
|
+
Path(f"{OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER}{output_options_file.name}")
|
|
134
|
+
if output_options_file
|
|
132
135
|
else None
|
|
133
136
|
),
|
|
134
137
|
variant_analysis=self.variant_analysis,
|
|
135
|
-
raw_results_dir=RAW_RESULTS_TARGET_DIRECTORY_DOCKER,
|
|
136
|
-
analysis_yaml=
|
|
138
|
+
raw_results_dir=Path(RAW_RESULTS_TARGET_DIRECTORY_DOCKER),
|
|
139
|
+
analysis_yaml=(
|
|
140
|
+
Path(f"{EXOMISER_YAML_TARGET_DIRECTORY_DOCKER}{self.analysis_yaml.name}")
|
|
141
|
+
if self.analysis_yaml
|
|
142
|
+
else None
|
|
143
|
+
),
|
|
144
|
+
output_formats=self.output_formats,
|
|
137
145
|
)
|
|
146
|
+
raise ValueError(f"Unknown environment: {self.environment}")
|
|
138
147
|
|
|
139
148
|
def add_command_line_arguments(self, vcf_dir: Path or None) -> ExomiserCommandLineArguments:
|
|
140
149
|
"""Return a dataclass of all the command line arguments corresponding to phenopacket sample."""
|
|
@@ -181,92 +190,89 @@ def create_command_arguments(
|
|
|
181
190
|
class CommandsWriter:
|
|
182
191
|
"""Write a command to file."""
|
|
183
192
|
|
|
184
|
-
def __init__(self, file: Path, variant_analysis: bool):
|
|
193
|
+
def __init__(self, file: Path, variant_analysis: bool, exomiser_version: str):
|
|
185
194
|
self.file = open(file, "w")
|
|
186
195
|
self.variant_analysis = variant_analysis
|
|
196
|
+
self.exomiser_version = exomiser_version
|
|
187
197
|
|
|
188
198
|
def write_basic_analysis_command(self, command_arguments: ExomiserCommandLineArguments):
|
|
189
199
|
"""Write basic analysis command for Exomiser"""
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
)
|
|
203
|
-
except IOError:
|
|
204
|
-
print("Error writing ", self.file)
|
|
200
|
+
self.file.write(
|
|
201
|
+
"--analysis "
|
|
202
|
+
+ str(command_arguments.analysis_yaml)
|
|
203
|
+
+ " --sample "
|
|
204
|
+
+ str(command_arguments.sample)
|
|
205
|
+
+ " --vcf "
|
|
206
|
+
+ str(command_arguments.vcf_file)
|
|
207
|
+
+ " --assembly "
|
|
208
|
+
+ command_arguments.vcf_assembly
|
|
209
|
+
+ " --output-filename "
|
|
210
|
+
+ f"{command_arguments.sample.stem}-exomiser"
|
|
211
|
+
)
|
|
205
212
|
|
|
206
213
|
def write_results_dir(self, command_arguments: ExomiserCommandLineArguments) -> None:
|
|
207
214
|
"""Write results directory for exomiser ≥13.2.0 to run."""
|
|
208
|
-
|
|
209
|
-
(
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
)
|
|
214
|
-
except IOError:
|
|
215
|
-
print("Error writing ", self.file)
|
|
215
|
+
(
|
|
216
|
+
self.file.write(" --output-directory " + str(command_arguments.raw_results_dir))
|
|
217
|
+
if command_arguments.raw_results_dir is not None
|
|
218
|
+
else None
|
|
219
|
+
)
|
|
216
220
|
|
|
217
221
|
def write_output_options(self, command_arguments: ExomiserCommandLineArguments) -> None:
|
|
218
222
|
"""Write a command out for exomiser ≤13.1.0 to run - including output option file specified."""
|
|
219
|
-
|
|
220
|
-
(
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
)
|
|
225
|
-
except IOError:
|
|
226
|
-
print("Error writing ", self.file)
|
|
223
|
+
(
|
|
224
|
+
self.file.write(" --output " + str(command_arguments.output_options_file))
|
|
225
|
+
if command_arguments.output_options_file is not None
|
|
226
|
+
else None
|
|
227
|
+
)
|
|
227
228
|
|
|
228
229
|
def write_output_format(self, command_arguments: ExomiserCommandLineArguments) -> None:
|
|
229
230
|
"""Write output formats for Exomiser raw result output."""
|
|
231
|
+
(
|
|
232
|
+
self.file.write(" --output-format " + ",".join(command_arguments.output_formats))
|
|
233
|
+
if command_arguments.output_formats is not None
|
|
234
|
+
else None
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
def write_analysis_command(self, command_arguments: ExomiserCommandLineArguments):
|
|
230
238
|
try:
|
|
231
|
-
(
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
)
|
|
239
|
+
self.write_basic_analysis_command(command_arguments)
|
|
240
|
+
self.write_results_dir(command_arguments)
|
|
241
|
+
self.write_output_options(command_arguments)
|
|
242
|
+
self.write_output_format(command_arguments)
|
|
243
|
+
self.file.write("\n")
|
|
236
244
|
except IOError:
|
|
237
245
|
print("Error writing ", self.file)
|
|
238
246
|
|
|
239
|
-
def write_analysis_command(self, command_arguments: ExomiserCommandLineArguments):
|
|
240
|
-
self.write_basic_analysis_command(command_arguments)
|
|
241
|
-
self.write_results_dir(command_arguments)
|
|
242
|
-
self.write_output_options(command_arguments)
|
|
243
|
-
self.write_output_format(command_arguments)
|
|
244
|
-
self.file.write("\n")
|
|
245
|
-
|
|
246
247
|
def write_basic_phenotype_only_command(
|
|
247
248
|
self, command_arguments: ExomiserCommandLineArguments
|
|
248
249
|
) -> None:
|
|
249
250
|
"""Write a phenotype-only command out for exomiser ≥13.2.0 to run."""
|
|
251
|
+
phenotype_only = (
|
|
252
|
+
"phenotype-only"
|
|
253
|
+
if version.parse(self.exomiser_version) < version.parse("15.0.0")
|
|
254
|
+
else "phenotype_only"
|
|
255
|
+
)
|
|
256
|
+
self.file.write(
|
|
257
|
+
"--sample "
|
|
258
|
+
+ str(command_arguments.sample)
|
|
259
|
+
+ " --output-directory "
|
|
260
|
+
+ str(command_arguments.raw_results_dir)
|
|
261
|
+
+ " --output-filename "
|
|
262
|
+
+ f"{Path(command_arguments.sample).stem}-exomiser"
|
|
263
|
+
+ " --preset "
|
|
264
|
+
+ phenotype_only
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def write_phenotype_only_command(self, command_arguments: ExomiserCommandLineArguments):
|
|
250
268
|
try:
|
|
251
|
-
self.
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
+ str(command_arguments.raw_results_dir)
|
|
256
|
-
+ " --output-filename "
|
|
257
|
-
+ f"{Path(command_arguments.sample).stem}-exomiser"
|
|
258
|
-
+ " --preset "
|
|
259
|
-
+ "phenotype-only"
|
|
260
|
-
)
|
|
269
|
+
self.write_basic_phenotype_only_command(command_arguments)
|
|
270
|
+
self.write_output_options(command_arguments)
|
|
271
|
+
self.write_output_format(command_arguments)
|
|
272
|
+
self.file.write("\n")
|
|
261
273
|
except IOError:
|
|
262
274
|
print("Error writing ", self.file)
|
|
263
275
|
|
|
264
|
-
def write_phenotype_only_command(self, command_arguments: ExomiserCommandLineArguments):
|
|
265
|
-
self.write_basic_phenotype_only_command(command_arguments)
|
|
266
|
-
self.write_output_options(command_arguments)
|
|
267
|
-
self.write_output_format(command_arguments)
|
|
268
|
-
self.file.write("\n")
|
|
269
|
-
|
|
270
276
|
def write_local_commands(self, command_arguments: ExomiserCommandLineArguments):
|
|
271
277
|
(
|
|
272
278
|
self.write_analysis_command(command_arguments)
|
|
@@ -291,11 +297,13 @@ class BatchFileWriter:
|
|
|
291
297
|
variant_analysis: bool,
|
|
292
298
|
output_dir: Path,
|
|
293
299
|
batch_prefix: str,
|
|
300
|
+
exomiser_version: str,
|
|
294
301
|
):
|
|
295
302
|
self.command_arguments_list = command_arguments_list
|
|
296
303
|
self.variant_analysis = variant_analysis
|
|
297
304
|
self.output_dir = output_dir
|
|
298
305
|
self.batch_prefix = batch_prefix
|
|
306
|
+
self.exomiser_version = exomiser_version
|
|
299
307
|
|
|
300
308
|
def write_commands(self, commands_writer: CommandsWriter) -> None:
|
|
301
309
|
"""Write command arguments to a file."""
|
|
@@ -306,7 +314,9 @@ class BatchFileWriter:
|
|
|
306
314
|
def write_temp_file(self) -> str:
|
|
307
315
|
"""Write commands out to a temporary file."""
|
|
308
316
|
temp = tempfile.NamedTemporaryFile(delete=False)
|
|
309
|
-
commands_writer = CommandsWriter(
|
|
317
|
+
commands_writer = CommandsWriter(
|
|
318
|
+
Path(temp.name), self.variant_analysis, self.exomiser_version
|
|
319
|
+
)
|
|
310
320
|
self.write_commands(commands_writer)
|
|
311
321
|
return temp.name
|
|
312
322
|
|
|
@@ -315,6 +325,7 @@ class BatchFileWriter:
|
|
|
315
325
|
commands_writer = CommandsWriter(
|
|
316
326
|
Path(self.output_dir).joinpath(self.batch_prefix + "-exomiser-batch.txt"),
|
|
317
327
|
self.variant_analysis,
|
|
328
|
+
self.exomiser_version,
|
|
318
329
|
)
|
|
319
330
|
self.write_commands(commands_writer)
|
|
320
331
|
|
|
@@ -350,6 +361,7 @@ def create_batch_file(
|
|
|
350
361
|
max_jobs: int,
|
|
351
362
|
variant_analysis: bool,
|
|
352
363
|
results_dir: Path,
|
|
364
|
+
exomiser_version: str,
|
|
353
365
|
output_options_dir: Path = None,
|
|
354
366
|
output_options_file: Path = None,
|
|
355
367
|
output_formats: List[str] = None,
|
|
@@ -368,10 +380,7 @@ def create_batch_file(
|
|
|
368
380
|
)
|
|
369
381
|
(
|
|
370
382
|
BatchFileWriter(
|
|
371
|
-
command_arguments,
|
|
372
|
-
variant_analysis,
|
|
373
|
-
output_dir,
|
|
374
|
-
batch_prefix,
|
|
383
|
+
command_arguments, variant_analysis, output_dir, batch_prefix, exomiser_version
|
|
375
384
|
).write_all_commands()
|
|
376
385
|
if max_jobs == 0
|
|
377
386
|
else BatchFileWriter(
|
|
@@ -379,6 +388,7 @@ def create_batch_file(
|
|
|
379
388
|
variant_analysis,
|
|
380
389
|
output_dir,
|
|
381
390
|
batch_prefix,
|
|
391
|
+
exomiser_version,
|
|
382
392
|
).create_split_batch_files(max_jobs)
|
|
383
393
|
)
|
|
384
394
|
|
|
@@ -403,19 +413,18 @@ def create_batch_file(
|
|
|
403
413
|
)
|
|
404
414
|
@click.option(
|
|
405
415
|
"--phenopacket-dir",
|
|
406
|
-
"-
|
|
416
|
+
"-p",
|
|
407
417
|
required=True,
|
|
408
418
|
metavar="PATH",
|
|
409
419
|
type=Path,
|
|
410
|
-
help="Path to
|
|
420
|
+
help="Path to phenopacket directory.",
|
|
411
421
|
)
|
|
412
422
|
@click.option(
|
|
413
423
|
"--vcf-dir",
|
|
414
424
|
"-v",
|
|
415
|
-
required=True,
|
|
416
425
|
metavar="PATH",
|
|
417
426
|
type=Path,
|
|
418
|
-
help="Path to VCF
|
|
427
|
+
help="Path to VCF directory.",
|
|
419
428
|
)
|
|
420
429
|
@click.option(
|
|
421
430
|
"--batch-prefix",
|
|
@@ -437,19 +446,34 @@ def create_batch_file(
|
|
|
437
446
|
help="Number of jobs in each file.",
|
|
438
447
|
)
|
|
439
448
|
@click.option(
|
|
440
|
-
"--
|
|
449
|
+
"--variant-analysis",
|
|
441
450
|
type=bool,
|
|
442
451
|
default=False,
|
|
443
|
-
|
|
444
|
-
mutually_exclusive=["vcf_dir", "analysis_yaml"],
|
|
452
|
+
is_flag=True,
|
|
445
453
|
help="Run Exomiser with phenotype only preset - strongly recommended to run with versions 13.2.0 onwards.",
|
|
446
454
|
)
|
|
455
|
+
@click.option(
|
|
456
|
+
"--output-dir",
|
|
457
|
+
"-d",
|
|
458
|
+
type=Path,
|
|
459
|
+
required=False,
|
|
460
|
+
help="Results directory for Exomiser results - compatible with versions 13.2.0 onwards.",
|
|
461
|
+
)
|
|
447
462
|
@click.option(
|
|
448
463
|
"--results-dir",
|
|
464
|
+
"-r",
|
|
449
465
|
type=Path,
|
|
450
466
|
required=False,
|
|
451
467
|
help="Results directory for Exomiser results - compatible with versions 13.2.0 onwards.",
|
|
452
468
|
)
|
|
469
|
+
@click.option(
|
|
470
|
+
"--exomiser-version",
|
|
471
|
+
"-v",
|
|
472
|
+
required=True,
|
|
473
|
+
help="Exomiser version used to generate results.",
|
|
474
|
+
default="15.0.0",
|
|
475
|
+
show_default=True,
|
|
476
|
+
)
|
|
453
477
|
@click.option(
|
|
454
478
|
"--output-options-dir",
|
|
455
479
|
"-O",
|
|
@@ -470,29 +494,41 @@ def create_batch_file(
|
|
|
470
494
|
type=Path,
|
|
471
495
|
help="Path to the output options file. ",
|
|
472
496
|
)
|
|
497
|
+
@click.option(
|
|
498
|
+
"--output-formats",
|
|
499
|
+
"-f",
|
|
500
|
+
multiple=True,
|
|
501
|
+
help="One or more output formats (e.g., --output-format vcf --output-format json).",
|
|
502
|
+
)
|
|
473
503
|
def prepare_exomiser_batch(
|
|
474
504
|
environment: str,
|
|
475
505
|
analysis_yaml: Path,
|
|
476
506
|
phenopacket_dir: Path,
|
|
477
507
|
vcf_dir: Path,
|
|
478
508
|
output_dir: Path,
|
|
509
|
+
results_dir: Path,
|
|
479
510
|
batch_prefix: str,
|
|
480
511
|
max_jobs: int,
|
|
481
|
-
|
|
512
|
+
variant_analysis: bool,
|
|
513
|
+
exomiser_version: str,
|
|
482
514
|
output_options_dir: Path = None,
|
|
483
515
|
output_options_file: Path = None,
|
|
516
|
+
output_formats: List[str] = None,
|
|
484
517
|
):
|
|
485
518
|
"""Generate Exomiser batch files."""
|
|
486
519
|
Path(output_dir).joinpath("tool_input_commands").mkdir(exist_ok=True)
|
|
487
520
|
create_batch_file(
|
|
488
|
-
environment,
|
|
489
|
-
analysis_yaml,
|
|
490
|
-
phenopacket_dir,
|
|
491
|
-
vcf_dir,
|
|
492
|
-
output_dir,
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
521
|
+
environment=environment,
|
|
522
|
+
analysis=analysis_yaml,
|
|
523
|
+
phenopacket_dir=phenopacket_dir,
|
|
524
|
+
vcf_dir=vcf_dir,
|
|
525
|
+
output_dir=output_dir,
|
|
526
|
+
results_dir=results_dir,
|
|
527
|
+
batch_prefix=batch_prefix,
|
|
528
|
+
max_jobs=max_jobs,
|
|
529
|
+
variant_analysis=variant_analysis,
|
|
530
|
+
output_options_dir=output_options_dir,
|
|
531
|
+
output_options_file=output_options_file,
|
|
532
|
+
output_formats=list(output_formats),
|
|
533
|
+
exomiser_version=exomiser_version,
|
|
498
534
|
)
|
pheval_exomiser/run/run.py
CHANGED
|
@@ -27,15 +27,17 @@ def prepare_batch_files(
|
|
|
27
27
|
tool_input_commands_dir: Path,
|
|
28
28
|
raw_results_dir: Path,
|
|
29
29
|
variant_analysis: bool,
|
|
30
|
+
exomiser_version: str,
|
|
30
31
|
) -> None:
|
|
31
32
|
"""Prepare the exomiser batch files"""
|
|
32
33
|
print("...preparing batch files...")
|
|
33
34
|
vcf_dir_name = Path(testdata_dir).joinpath("vcf")
|
|
34
|
-
|
|
35
|
-
config.output_formats
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
if version.parse(exomiser_version) >= version.parse("15.0.0"):
|
|
36
|
+
if "PARQUET" not in config.output_formats:
|
|
37
|
+
config.output_formats.append("PARQUET")
|
|
38
|
+
elif version.parse(exomiser_version) < version.parse("15.0.0"):
|
|
39
|
+
if "JSON" not in config.output_formats:
|
|
40
|
+
config.output_formats.append("JSON")
|
|
39
41
|
create_batch_file(
|
|
40
42
|
environment=config.environment,
|
|
41
43
|
analysis=input_dir.joinpath(config.analysis_configuration_file),
|
|
@@ -48,7 +50,8 @@ def prepare_batch_files(
|
|
|
48
50
|
output_options_dir=None,
|
|
49
51
|
results_dir=raw_results_dir,
|
|
50
52
|
variant_analysis=variant_analysis,
|
|
51
|
-
output_formats=output_formats,
|
|
53
|
+
output_formats=config.output_formats,
|
|
54
|
+
exomiser_version=exomiser_version,
|
|
52
55
|
)
|
|
53
56
|
|
|
54
57
|
|
|
@@ -121,18 +124,32 @@ def run_exomiser_local(
|
|
|
121
124
|
][0]
|
|
122
125
|
exomiser_jar_file_path = config.exomiser_software_directory.joinpath(exomiser_jar_file)
|
|
123
126
|
for file in batch_files:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
127
|
+
if version.parse(exomiser_version) < version.parse("15.0.0"):
|
|
128
|
+
subprocess.run(
|
|
129
|
+
[
|
|
130
|
+
"java",
|
|
131
|
+
"-Xmx4g",
|
|
132
|
+
"-jar",
|
|
133
|
+
exomiser_jar_file_path,
|
|
134
|
+
"--batch",
|
|
135
|
+
file,
|
|
136
|
+
f"--spring.config.location={Path(input_dir).joinpath('application.properties')}",
|
|
137
|
+
],
|
|
138
|
+
shell=False,
|
|
139
|
+
)
|
|
140
|
+
elif version.parse(exomiser_version) >= version.parse("15.0.0"):
|
|
141
|
+
subprocess.run(
|
|
142
|
+
[
|
|
143
|
+
"java",
|
|
144
|
+
"-Xmx4g",
|
|
145
|
+
f"-Dspring.config.location={str(Path(input_dir).joinpath('application.properties'))}",
|
|
146
|
+
"-jar",
|
|
147
|
+
exomiser_jar_file_path,
|
|
148
|
+
"batch",
|
|
149
|
+
file,
|
|
150
|
+
],
|
|
151
|
+
shell=False,
|
|
152
|
+
)
|
|
136
153
|
if version.parse(exomiser_version) < version.parse("13.1.0"):
|
|
137
154
|
os.rename(
|
|
138
155
|
f"{output_dir}/results",
|
pheval_exomiser/runner.py
CHANGED
|
@@ -45,6 +45,7 @@ class ExomiserPhEvalRunner(PhEvalRunner):
|
|
|
45
45
|
tool_input_commands_dir=self.tool_input_commands_dir,
|
|
46
46
|
raw_results_dir=self.raw_results_dir,
|
|
47
47
|
variant_analysis=self.input_dir_config.variant_analysis,
|
|
48
|
+
exomiser_version=self.version,
|
|
48
49
|
)
|
|
49
50
|
run_exomiser(
|
|
50
51
|
input_dir=self.input_dir,
|
|
@@ -71,4 +72,5 @@ class ExomiserPhEvalRunner(PhEvalRunner):
|
|
|
71
72
|
variant_analysis=self.input_dir_config.variant_analysis,
|
|
72
73
|
gene_analysis=self.input_dir_config.gene_analysis,
|
|
73
74
|
disease_analysis=self.input_dir_config.disease_analysis,
|
|
75
|
+
exomiser_version=self.version,
|
|
74
76
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: pheval_exomiser
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Yasemin Bridges
|
|
6
6
|
Author-email: y.bridges@qmul.ac.uk
|
|
@@ -10,13 +10,14 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
13
14
|
Requires-Dist: click (>=8.1.3,<9.0.0)
|
|
14
15
|
Requires-Dist: docker (>=6.0.1,<7.0.0)
|
|
15
16
|
Requires-Dist: google (>=3.0.0,<4.0.0)
|
|
16
17
|
Requires-Dist: numpy (<2)
|
|
17
18
|
Requires-Dist: oaklib (>=0.5.12,<0.6.0)
|
|
18
19
|
Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
|
|
19
|
-
Requires-Dist: pheval (>=0.6.
|
|
20
|
+
Requires-Dist: pheval (>=0.6.7,<0.7.0)
|
|
20
21
|
Requires-Dist: pyaml (>=21.10.1,<22.0.0)
|
|
21
22
|
Requires-Dist: pydantic (>=2.7.1,<3.0.0)
|
|
22
23
|
Description-Content-Type: text/markdown
|
|
@@ -2,16 +2,16 @@ pheval_exomiser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
2
2
|
pheval_exomiser/cli.py,sha256=0SR1-L2sREEkFRfUPwYwkbSaBsz_L_Sxq1S4c9LQLJg,350
|
|
3
3
|
pheval_exomiser/constants.py,sha256=o_pLWF8kX74BqyTsAZa7twwSKzedLnpupCI90k_bMqY,517
|
|
4
4
|
pheval_exomiser/post_process/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
pheval_exomiser/post_process/post_process.py,sha256=
|
|
6
|
-
pheval_exomiser/post_process/post_process_results_format.py,sha256=
|
|
5
|
+
pheval_exomiser/post_process/post_process.py,sha256=bGNLO0LlsG26oKtvL3mtlcBTDY5gynKh1BwNjmUaIgI,972
|
|
6
|
+
pheval_exomiser/post_process/post_process_results_format.py,sha256=Y5Wi6zkBUaDoHVqFD5tmToGLSEqc86hN5s08edL4Tic,12287
|
|
7
7
|
pheval_exomiser/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
pheval_exomiser/prepare/create_batch_commands.py,sha256=
|
|
8
|
+
pheval_exomiser/prepare/create_batch_commands.py,sha256=g3hTCWDldqD0oLmIoZeinLn9uS2ZX2Hm6jqTxvATf8I,18638
|
|
9
9
|
pheval_exomiser/prepare/tool_specific_configuration_options.py,sha256=4gedZ9iadRXK6tF9P-ju-dhj8-F2-fhrXVhfYIsAxFQ,2922
|
|
10
10
|
pheval_exomiser/prepare/write_application_properties.py,sha256=KmG7GvkQo8AhnhRyqohTFvqjfhEhbcs78UYYoigxJ3w,8933
|
|
11
11
|
pheval_exomiser/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
pheval_exomiser/run/run.py,sha256=
|
|
13
|
-
pheval_exomiser/runner.py,sha256=
|
|
14
|
-
pheval_exomiser-0.
|
|
15
|
-
pheval_exomiser-0.
|
|
16
|
-
pheval_exomiser-0.
|
|
17
|
-
pheval_exomiser-0.
|
|
12
|
+
pheval_exomiser/run/run.py,sha256=yyjjNgOOm1baDFWwQ2ENU4YeHtIOnvaQ7b3DmWtb7zY,7934
|
|
13
|
+
pheval_exomiser/runner.py,sha256=RqVobVJlOwcPzbO5gLjDtkGaygWdFT9VrlIvOmyBQPw,2706
|
|
14
|
+
pheval_exomiser-0.4.0.dist-info/METADATA,sha256=7fNZcLql69hJXVZr2XA3znjVp8O5s__FIHdCV6lu4A0,7692
|
|
15
|
+
pheval_exomiser-0.4.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
16
|
+
pheval_exomiser-0.4.0.dist-info/entry_points.txt,sha256=lbZMu-x7ns8UrFveWSqEQ1UB5l33TbRMomqBUyGYIwI,131
|
|
17
|
+
pheval_exomiser-0.4.0.dist-info/RECORD,,
|
|
File without changes
|