pheval-exomiser 0.3.3__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/PKG-INFO +4 -3
  2. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/pyproject.toml +2 -2
  3. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/post_process/post_process.py +2 -2
  4. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/post_process/post_process_results_format.py +129 -9
  5. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/prepare/create_batch_commands.py +143 -107
  6. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/run/run.py +35 -18
  7. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/runner.py +2 -0
  8. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/README.md +0 -0
  9. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/__init__.py +0 -0
  10. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/cli.py +0 -0
  11. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/constants.py +0 -0
  12. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/post_process/__init__.py +0 -0
  13. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/prepare/__init__.py +0 -0
  14. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/prepare/tool_specific_configuration_options.py +0 -0
  15. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/prepare/write_application_properties.py +0 -0
  16. {pheval_exomiser-0.3.3 → pheval_exomiser-0.4.0}/src/pheval_exomiser/run/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: pheval_exomiser
3
- Version: 0.3.3
3
+ Version: 0.4.0
4
4
  Summary:
5
5
  Author: Yasemin Bridges
6
6
  Author-email: y.bridges@qmul.ac.uk
@@ -10,13 +10,14 @@ Classifier: Programming Language :: Python :: 3.10
10
10
  Classifier: Programming Language :: Python :: 3.11
11
11
  Classifier: Programming Language :: Python :: 3.12
12
12
  Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Programming Language :: Python :: 3.14
13
14
  Requires-Dist: click (>=8.1.3,<9.0.0)
14
15
  Requires-Dist: docker (>=6.0.1,<7.0.0)
15
16
  Requires-Dist: google (>=3.0.0,<4.0.0)
16
17
  Requires-Dist: numpy (<2)
17
18
  Requires-Dist: oaklib (>=0.5.12,<0.6.0)
18
19
  Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
19
- Requires-Dist: pheval (>=0.6.1,<0.7.0)
20
+ Requires-Dist: pheval (>=0.6.7,<0.7.0)
20
21
  Requires-Dist: pyaml (>=21.10.1,<22.0.0)
21
22
  Requires-Dist: pydantic (>=2.7.1,<3.0.0)
22
23
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pheval_exomiser"
3
- version = "0.3.3"
3
+ version = "0.4.0"
4
4
  description = ""
5
5
  authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
6
6
  "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",
@@ -19,7 +19,7 @@ oaklib = "^0.5.12"
19
19
  docker = "^6.0.1"
20
20
  pydantic = "^2.7.1"
21
21
  numpy = "<2"
22
- pheval = "^0.6.1"
22
+ pheval = "^0.6.7"
23
23
 
24
24
  [tool.poetry.dev-dependencies]
25
25
  pytest = "^7.1.2"
@@ -12,9 +12,9 @@ def post_process_result_format(
12
12
  variant_analysis: bool,
13
13
  gene_analysis: bool,
14
14
  disease_analysis: bool,
15
+ exomiser_version: str,
15
16
  ):
16
17
  """Standardise Exomiser json format to separated gene and variant results."""
17
- print("...standardising results format...")
18
18
  create_standardised_results(
19
19
  result_dir=raw_results_dir,
20
20
  output_dir=output_dir,
@@ -24,5 +24,5 @@ def post_process_result_format(
24
24
  gene_analysis=gene_analysis,
25
25
  disease_analysis=disease_analysis,
26
26
  variant_analysis=variant_analysis,
27
+ exomiser_version=exomiser_version,
27
28
  )
28
- print("done")
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
 
5
5
  import click
6
6
  import polars as pl
7
+ from packaging import version
7
8
  from pheval.post_processing.post_processing import (
8
9
  SortOrder,
9
10
  generate_disease_result,
@@ -15,10 +16,15 @@ from pheval.utils.file_utils import files_with_suffix
15
16
 
16
17
  class ModeOfInheritance(Enum):
17
18
  AUTOSOMAL_DOMINANT = 1
19
+ AD = 1
18
20
  AUTOSOMAL_RECESSIVE = 2
21
+ AR = 2
19
22
  X_DOMINANT = 1
23
+ XD = 1
20
24
  X_RECESSIVE = 2
25
+ XR = 2
21
26
  MITOCHONDRIAL = 3
27
+ MT = 3
22
28
 
23
29
 
24
30
  def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
@@ -38,6 +44,18 @@ def extract_gene_results_from_json(
38
44
  ).drop_nulls()
39
45
 
40
46
 
47
+ def extract_gene_results_from_parquet(
48
+ exomiser_parquet_result: pl.DataFrame, score_name: str
49
+ ) -> pl.DataFrame:
50
+ return exomiser_parquet_result.select(
51
+ [
52
+ pl.col("geneSymbol").alias("gene_symbol"),
53
+ pl.col("ensemblGeneId").alias("gene_identifier"),
54
+ pl.col(score_name).fill_null(0).round(4).alias("score"),
55
+ ]
56
+ )
57
+
58
+
41
59
  def extract_disease_results_from_json(exomiser_json_result: pl.DataFrame) -> pl.DataFrame:
42
60
  return (
43
61
  exomiser_json_result.select(
@@ -55,6 +73,18 @@ def extract_disease_results_from_json(exomiser_json_result: pl.DataFrame) -> pl.
55
73
  )
56
74
 
57
75
 
76
+ def extract_disease_results_from_parquet(exomiser_parquet_result: pl.DataFrame) -> pl.DataFrame:
77
+ return (
78
+ exomiser_parquet_result.select(pl.col("diseaseMatches"))
79
+ .explode("diseaseMatches")
80
+ .select(
81
+ pl.col("diseaseMatches").struct.field("diseaseId").alias("disease_identifier"),
82
+ pl.col("diseaseMatches").struct.field("score").alias("score"),
83
+ )
84
+ .drop_nulls()
85
+ )
86
+
87
+
58
88
  def extract_variant_results_from_json(
59
89
  exomiser_json_result: pl.DataFrame, score_name: str
60
90
  ) -> pl.DataFrame:
@@ -124,6 +154,67 @@ def extract_variant_results_from_json(
124
154
  )
125
155
 
126
156
 
157
+ def extract_variant_results_from_parquet(
158
+ exomiser_parquet_result: pl.DataFrame, score_name: str
159
+ ) -> pl.DataFrame:
160
+ contributing_variant_only = exomiser_parquet_result.filter(
161
+ pl.col("isContributingVariant") == True # noqa
162
+ )
163
+ return (
164
+ contributing_variant_only.select(
165
+ [
166
+ pl.col("geneSymbol"),
167
+ pl.col("contigName").alias("chrom").cast(pl.String),
168
+ pl.col("start").cast(pl.Int64),
169
+ pl.col("end").cast(pl.Int64),
170
+ pl.col("ref"),
171
+ pl.col("alt"),
172
+ pl.col(score_name).alias("score"),
173
+ pl.col("moi")
174
+ .map_elements(lambda moi: ModeOfInheritance[moi].value, return_dtype=pl.Int8)
175
+ .alias("moi_enum"),
176
+ ]
177
+ )
178
+ .with_columns(
179
+ [
180
+ (pl.col("moi_enum") == 2).alias("is_recessive"),
181
+ pl.when(pl.col("moi_enum") == 2)
182
+ .then(
183
+ pl.format(
184
+ "recessive|{}|{}|{}",
185
+ pl.col("geneSymbol"),
186
+ pl.col("score"),
187
+ pl.col("moi_enum"),
188
+ )
189
+ )
190
+ .otherwise(
191
+ pl.format(
192
+ "dominant|{}|{}|{}|{}|{}|{}",
193
+ pl.col("chrom"),
194
+ pl.col("start"),
195
+ pl.col("end"),
196
+ pl.col("ref"),
197
+ pl.col("alt"),
198
+ pl.col("score"),
199
+ )
200
+ )
201
+ .alias("group_key"),
202
+ ]
203
+ )
204
+ .with_columns(
205
+ [
206
+ pl.col("group_key")
207
+ .rank("dense")
208
+ .cast(pl.UInt32)
209
+ .map_elements(
210
+ lambda i: str(uuid.uuid5(uuid.NAMESPACE_DNS, str(i))), return_dtype=pl.String
211
+ )
212
+ .alias("grouping_id")
213
+ ]
214
+ )
215
+ )
216
+
217
+
127
218
  def create_standardised_results(
128
219
  result_dir: Path,
129
220
  output_dir: Path,
@@ -133,36 +224,55 @@ def create_standardised_results(
133
224
  gene_analysis: bool,
134
225
  disease_analysis: bool,
135
226
  variant_analysis: bool,
227
+ exomiser_version: str,
136
228
  ):
137
229
  sort_order = SortOrder.ASCENDING if sort_order.lower() == "ascending" else SortOrder.DESCENDING
138
- for exomiser_json_result_path in files_with_suffix(result_dir, ".json"):
139
- exomiser_json_result = pl.read_json(exomiser_json_result_path, infer_schema_length=None)
230
+ use_parquet = True if version.parse(exomiser_version) >= version.parse("15.0.0") else False
231
+ read_result = pl.read_parquet if use_parquet else pl.read_json
232
+ result_files = (
233
+ files_with_suffix(result_dir, ".parquet")
234
+ if use_parquet
235
+ else files_with_suffix(result_dir, ".json")
236
+ )
237
+ for exomiser_result_path in result_files:
238
+ exomiser_result = read_result(exomiser_result_path)
140
239
  if gene_analysis:
141
- gene_results = extract_gene_results_from_json(exomiser_json_result, score_name)
240
+ gene_results = (
241
+ extract_gene_results_from_parquet(exomiser_result, score_name)
242
+ if use_parquet
243
+ else extract_gene_results_from_json(exomiser_result, score_name)
244
+ )
142
245
  generate_gene_result(
143
246
  results=gene_results,
144
247
  sort_order=sort_order,
145
248
  output_dir=output_dir,
146
- result_path=trim_exomiser_result_filename(exomiser_json_result_path),
249
+ result_path=trim_exomiser_result_filename(exomiser_result_path),
147
250
  phenopacket_dir=phenopacket_dir,
148
251
  )
149
252
  if disease_analysis:
150
- disease_results = extract_disease_results_from_json(exomiser_json_result)
253
+ disease_results = (
254
+ extract_disease_results_from_parquet(exomiser_result)
255
+ if use_parquet
256
+ else extract_disease_results_from_json(exomiser_result)
257
+ )
151
258
  generate_disease_result(
152
259
  results=disease_results,
153
260
  sort_order=sort_order,
154
261
  output_dir=output_dir,
155
- result_path=trim_exomiser_result_filename(exomiser_json_result_path),
262
+ result_path=trim_exomiser_result_filename(exomiser_result_path),
156
263
  phenopacket_dir=phenopacket_dir,
157
264
  )
158
-
159
265
  if variant_analysis:
160
- variant_results = extract_variant_results_from_json(exomiser_json_result, score_name)
266
+ variant_results = (
267
+ extract_variant_results_from_parquet(exomiser_result, score_name)
268
+ if use_parquet
269
+ else extract_variant_results_from_json(exomiser_result, score_name)
270
+ )
161
271
  generate_variant_result(
162
272
  results=variant_results,
163
273
  sort_order=sort_order,
164
274
  output_dir=output_dir,
165
- result_path=trim_exomiser_result_filename(exomiser_json_result_path),
275
+ result_path=trim_exomiser_result_filename(exomiser_result_path),
166
276
  phenopacket_dir=phenopacket_dir,
167
277
  )
168
278
 
@@ -228,6 +338,14 @@ def create_standardised_results(
228
338
  default=False,
229
339
  help="Specify whether to create PhEval disease results.",
230
340
  )
341
+ @click.option(
342
+ "--version",
343
+ "-v",
344
+ required=True,
345
+ help="Exomiser version used to generate results.",
346
+ default="15.0.0",
347
+ show_default=True,
348
+ )
231
349
  def post_process_exomiser_results(
232
350
  output_dir: Path,
233
351
  results_dir: Path,
@@ -237,6 +355,7 @@ def post_process_exomiser_results(
237
355
  gene_analysis: bool,
238
356
  variant_analysis: bool,
239
357
  disease_analysis: bool,
358
+ version: str,
240
359
  ):
241
360
  """Post-process Exomiser json results into PhEval gene and variant outputs."""
242
361
  (
@@ -263,4 +382,5 @@ def post_process_exomiser_results(
263
382
  variant_analysis=variant_analysis,
264
383
  gene_analysis=gene_analysis,
265
384
  disease_analysis=disease_analysis,
385
+ exomiser_version=version,
266
386
  )
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
  from typing import List, Optional
5
5
 
6
6
  import click
7
+ from packaging import version
7
8
  from phenopackets import Family, Phenopacket
8
9
  from pheval.prepare.custom_exceptions import MutuallyExclusiveOptionError
9
10
  from pheval.utils.file_utils import all_files, files_with_suffix
@@ -23,13 +24,13 @@ class ExomiserCommandLineArguments:
23
24
  """Store command line arguments for each phenopacket to be run with Exomiser."""
24
25
 
25
26
  sample: Path
26
- analysis_yaml: Path or None = None
27
- vcf_file: Path or None = None
28
- vcf_assembly: str or None = None
29
- raw_results_dir: Path or None = None
30
- variant_analysis: bool or None = None
27
+ analysis_yaml: Optional[Path] = None
28
+ vcf_file: Optional[Path] = None
29
+ vcf_assembly: Optional[str] = None
30
+ raw_results_dir: Optional[Path] = None
31
+ variant_analysis: Optional[bool] = None
31
32
  output_options_file: Optional[Path] = None
32
- output_formats: List[str] or None = None
33
+ output_formats: Optional[List[str]] = None
33
34
 
34
35
 
35
36
  def get_all_files_from_output_opt_directory(output_options_dir: Path) -> List[Path] or None:
@@ -80,39 +81,41 @@ class CommandCreator:
80
81
  output_options_file = self.assign_output_options_file()
81
82
  if self.environment == "docker":
82
83
  return ExomiserCommandLineArguments(
83
- sample=f"{PHENOPACKET_TARGET_DIRECTORY_DOCKER}{Path(self.phenopacket_path.name)}",
84
+ sample=Path(f"{PHENOPACKET_TARGET_DIRECTORY_DOCKER}{self.phenopacket_path.name}"),
84
85
  variant_analysis=self.variant_analysis,
85
86
  output_options_file=(
86
- f"{OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER}{Path(output_options_file).name}"
87
- if output_options_file is not None
87
+ Path(f"{OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER}{output_options_file.name}")
88
+ if output_options_file
88
89
  else None
89
90
  ),
90
- raw_results_dir=RAW_RESULTS_TARGET_DIRECTORY_DOCKER,
91
+ raw_results_dir=Path(RAW_RESULTS_TARGET_DIRECTORY_DOCKER),
91
92
  output_formats=self.output_formats,
92
93
  )
93
94
  elif self.environment == "local":
94
95
  return ExomiserCommandLineArguments(
95
- sample=Path(self.phenopacket_path),
96
+ sample=self.phenopacket_path,
96
97
  variant_analysis=self.variant_analysis,
97
98
  output_options_file=output_options_file,
98
99
  raw_results_dir=self.results_dir,
99
100
  output_formats=self.output_formats,
100
101
  )
102
+ raise ValueError(f"Unknown environment: {self.environment}")
101
103
 
102
104
  def add_variant_analysis_arguments(self, vcf_dir: Path) -> ExomiserCommandLineArguments:
103
- vcf_file_data = (
104
- PhenopacketUtil(self.phenopacket).vcf_file_data(self.phenopacket_path, vcf_dir)
105
- if vcf_dir.exists()
106
- else [
105
+ if vcf_dir.exists():
106
+ vcf_file_data = PhenopacketUtil(self.phenopacket).vcf_file_data(
107
+ self.phenopacket_path, vcf_dir
108
+ )
109
+ else:
110
+ vcf_file_data = next(
107
111
  file
108
112
  for file in self.phenopacket.files
109
113
  if file.file_attributes["fileFormat"] == "vcf"
110
- ][0]
111
- )
114
+ )
112
115
  output_options_file = self.assign_output_options_file()
113
116
  if self.environment == "local":
114
117
  return ExomiserCommandLineArguments(
115
- sample=Path(self.phenopacket_path),
118
+ sample=self.phenopacket_path,
116
119
  vcf_file=Path(vcf_file_data.uri),
117
120
  vcf_assembly=vcf_file_data.file_attributes["genomeAssembly"],
118
121
  output_options_file=output_options_file,
@@ -123,18 +126,24 @@ class CommandCreator:
123
126
  )
124
127
  elif self.environment == "docker":
125
128
  return ExomiserCommandLineArguments(
126
- sample=f"{PHENOPACKET_TARGET_DIRECTORY_DOCKER}{Path(self.phenopacket_path.name)}",
127
- vcf_file=f"{VCF_TARGET_DIRECTORY_DOCKER}{Path(vcf_file_data.uri).name}",
129
+ sample=Path(f"{PHENOPACKET_TARGET_DIRECTORY_DOCKER}{self.phenopacket_path.name}"),
130
+ vcf_file=Path(f"{VCF_TARGET_DIRECTORY_DOCKER}{Path(vcf_file_data.uri).name}"),
128
131
  vcf_assembly=vcf_file_data.file_attributes["genomeAssembly"],
129
132
  output_options_file=(
130
- f"{OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER}{Path(output_options_file).name}"
131
- if output_options_file is not None
133
+ Path(f"{OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER}{output_options_file.name}")
134
+ if output_options_file
132
135
  else None
133
136
  ),
134
137
  variant_analysis=self.variant_analysis,
135
- raw_results_dir=RAW_RESULTS_TARGET_DIRECTORY_DOCKER,
136
- analysis_yaml=f"{EXOMISER_YAML_TARGET_DIRECTORY_DOCKER}{Path(self.analysis_yaml).name}",
138
+ raw_results_dir=Path(RAW_RESULTS_TARGET_DIRECTORY_DOCKER),
139
+ analysis_yaml=(
140
+ Path(f"{EXOMISER_YAML_TARGET_DIRECTORY_DOCKER}{self.analysis_yaml.name}")
141
+ if self.analysis_yaml
142
+ else None
143
+ ),
144
+ output_formats=self.output_formats,
137
145
  )
146
+ raise ValueError(f"Unknown environment: {self.environment}")
138
147
 
139
148
  def add_command_line_arguments(self, vcf_dir: Path or None) -> ExomiserCommandLineArguments:
140
149
  """Return a dataclass of all the command line arguments corresponding to phenopacket sample."""
@@ -181,92 +190,89 @@ def create_command_arguments(
181
190
  class CommandsWriter:
182
191
  """Write a command to file."""
183
192
 
184
- def __init__(self, file: Path, variant_analysis: bool):
193
+ def __init__(self, file: Path, variant_analysis: bool, exomiser_version: str):
185
194
  self.file = open(file, "w")
186
195
  self.variant_analysis = variant_analysis
196
+ self.exomiser_version = exomiser_version
187
197
 
188
198
  def write_basic_analysis_command(self, command_arguments: ExomiserCommandLineArguments):
189
199
  """Write basic analysis command for Exomiser"""
190
- try:
191
- self.file.write(
192
- "--analysis "
193
- + str(command_arguments.analysis_yaml)
194
- + " --sample "
195
- + str(command_arguments.sample)
196
- + " --vcf "
197
- + str(command_arguments.vcf_file)
198
- + " --assembly "
199
- + command_arguments.vcf_assembly
200
- + " --output-filename "
201
- + f"{command_arguments.sample.stem}-exomiser"
202
- )
203
- except IOError:
204
- print("Error writing ", self.file)
200
+ self.file.write(
201
+ "--analysis "
202
+ + str(command_arguments.analysis_yaml)
203
+ + " --sample "
204
+ + str(command_arguments.sample)
205
+ + " --vcf "
206
+ + str(command_arguments.vcf_file)
207
+ + " --assembly "
208
+ + command_arguments.vcf_assembly
209
+ + " --output-filename "
210
+ + f"{command_arguments.sample.stem}-exomiser"
211
+ )
205
212
 
206
213
  def write_results_dir(self, command_arguments: ExomiserCommandLineArguments) -> None:
207
214
  """Write results directory for exomiser ≥13.2.0 to run."""
208
- try:
209
- (
210
- self.file.write(" --output-directory " + str(command_arguments.raw_results_dir))
211
- if command_arguments.raw_results_dir is not None
212
- else None
213
- )
214
- except IOError:
215
- print("Error writing ", self.file)
215
+ (
216
+ self.file.write(" --output-directory " + str(command_arguments.raw_results_dir))
217
+ if command_arguments.raw_results_dir is not None
218
+ else None
219
+ )
216
220
 
217
221
  def write_output_options(self, command_arguments: ExomiserCommandLineArguments) -> None:
218
222
  """Write a command out for exomiser ≤13.1.0 to run - including output option file specified."""
219
- try:
220
- (
221
- self.file.write(" --output " + str(command_arguments.output_options_file))
222
- if command_arguments.output_options_file is not None
223
- else None
224
- )
225
- except IOError:
226
- print("Error writing ", self.file)
223
+ (
224
+ self.file.write(" --output " + str(command_arguments.output_options_file))
225
+ if command_arguments.output_options_file is not None
226
+ else None
227
+ )
227
228
 
228
229
  def write_output_format(self, command_arguments: ExomiserCommandLineArguments) -> None:
229
230
  """Write output formats for Exomiser raw result output."""
231
+ (
232
+ self.file.write(" --output-format " + ",".join(command_arguments.output_formats))
233
+ if command_arguments.output_formats is not None
234
+ else None
235
+ )
236
+
237
+ def write_analysis_command(self, command_arguments: ExomiserCommandLineArguments):
230
238
  try:
231
- (
232
- self.file.write(" --output-format " + ",".join(command_arguments.output_formats))
233
- if command_arguments.output_formats is not None
234
- else None
235
- )
239
+ self.write_basic_analysis_command(command_arguments)
240
+ self.write_results_dir(command_arguments)
241
+ self.write_output_options(command_arguments)
242
+ self.write_output_format(command_arguments)
243
+ self.file.write("\n")
236
244
  except IOError:
237
245
  print("Error writing ", self.file)
238
246
 
239
- def write_analysis_command(self, command_arguments: ExomiserCommandLineArguments):
240
- self.write_basic_analysis_command(command_arguments)
241
- self.write_results_dir(command_arguments)
242
- self.write_output_options(command_arguments)
243
- self.write_output_format(command_arguments)
244
- self.file.write("\n")
245
-
246
247
  def write_basic_phenotype_only_command(
247
248
  self, command_arguments: ExomiserCommandLineArguments
248
249
  ) -> None:
249
250
  """Write a phenotype-only command out for exomiser ≥13.2.0 to run."""
251
+ phenotype_only = (
252
+ "phenotype-only"
253
+ if version.parse(self.exomiser_version) < version.parse("15.0.0")
254
+ else "phenotype_only"
255
+ )
256
+ self.file.write(
257
+ "--sample "
258
+ + str(command_arguments.sample)
259
+ + " --output-directory "
260
+ + str(command_arguments.raw_results_dir)
261
+ + " --output-filename "
262
+ + f"{Path(command_arguments.sample).stem}-exomiser"
263
+ + " --preset "
264
+ + phenotype_only
265
+ )
266
+
267
+ def write_phenotype_only_command(self, command_arguments: ExomiserCommandLineArguments):
250
268
  try:
251
- self.file.write(
252
- "--sample "
253
- + str(command_arguments.sample)
254
- + " --output-directory "
255
- + str(command_arguments.raw_results_dir)
256
- + " --output-filename "
257
- + f"{Path(command_arguments.sample).stem}-exomiser"
258
- + " --preset "
259
- + "phenotype-only"
260
- )
269
+ self.write_basic_phenotype_only_command(command_arguments)
270
+ self.write_output_options(command_arguments)
271
+ self.write_output_format(command_arguments)
272
+ self.file.write("\n")
261
273
  except IOError:
262
274
  print("Error writing ", self.file)
263
275
 
264
- def write_phenotype_only_command(self, command_arguments: ExomiserCommandLineArguments):
265
- self.write_basic_phenotype_only_command(command_arguments)
266
- self.write_output_options(command_arguments)
267
- self.write_output_format(command_arguments)
268
- self.file.write("\n")
269
-
270
276
  def write_local_commands(self, command_arguments: ExomiserCommandLineArguments):
271
277
  (
272
278
  self.write_analysis_command(command_arguments)
@@ -291,11 +297,13 @@ class BatchFileWriter:
291
297
  variant_analysis: bool,
292
298
  output_dir: Path,
293
299
  batch_prefix: str,
300
+ exomiser_version: str,
294
301
  ):
295
302
  self.command_arguments_list = command_arguments_list
296
303
  self.variant_analysis = variant_analysis
297
304
  self.output_dir = output_dir
298
305
  self.batch_prefix = batch_prefix
306
+ self.exomiser_version = exomiser_version
299
307
 
300
308
  def write_commands(self, commands_writer: CommandsWriter) -> None:
301
309
  """Write command arguments to a file."""
@@ -306,7 +314,9 @@ class BatchFileWriter:
306
314
  def write_temp_file(self) -> str:
307
315
  """Write commands out to a temporary file."""
308
316
  temp = tempfile.NamedTemporaryFile(delete=False)
309
- commands_writer = CommandsWriter(Path(temp.name), self.variant_analysis)
317
+ commands_writer = CommandsWriter(
318
+ Path(temp.name), self.variant_analysis, self.exomiser_version
319
+ )
310
320
  self.write_commands(commands_writer)
311
321
  return temp.name
312
322
 
@@ -315,6 +325,7 @@ class BatchFileWriter:
315
325
  commands_writer = CommandsWriter(
316
326
  Path(self.output_dir).joinpath(self.batch_prefix + "-exomiser-batch.txt"),
317
327
  self.variant_analysis,
328
+ self.exomiser_version,
318
329
  )
319
330
  self.write_commands(commands_writer)
320
331
 
@@ -350,6 +361,7 @@ def create_batch_file(
350
361
  max_jobs: int,
351
362
  variant_analysis: bool,
352
363
  results_dir: Path,
364
+ exomiser_version: str,
353
365
  output_options_dir: Path = None,
354
366
  output_options_file: Path = None,
355
367
  output_formats: List[str] = None,
@@ -368,10 +380,7 @@ def create_batch_file(
368
380
  )
369
381
  (
370
382
  BatchFileWriter(
371
- command_arguments,
372
- variant_analysis,
373
- output_dir,
374
- batch_prefix,
383
+ command_arguments, variant_analysis, output_dir, batch_prefix, exomiser_version
375
384
  ).write_all_commands()
376
385
  if max_jobs == 0
377
386
  else BatchFileWriter(
@@ -379,6 +388,7 @@ def create_batch_file(
379
388
  variant_analysis,
380
389
  output_dir,
381
390
  batch_prefix,
391
+ exomiser_version,
382
392
  ).create_split_batch_files(max_jobs)
383
393
  )
384
394
 
@@ -403,19 +413,18 @@ def create_batch_file(
403
413
  )
404
414
  @click.option(
405
415
  "--phenopacket-dir",
406
- "-P",
416
+ "-p",
407
417
  required=True,
408
418
  metavar="PATH",
409
419
  type=Path,
410
- help="Path to phenopackets.",
420
+ help="Path to phenopacket directory.",
411
421
  )
412
422
  @click.option(
413
423
  "--vcf-dir",
414
424
  "-v",
415
- required=True,
416
425
  metavar="PATH",
417
426
  type=Path,
418
- help="Path to VCF files.",
427
+ help="Path to VCF directory.",
419
428
  )
420
429
  @click.option(
421
430
  "--batch-prefix",
@@ -437,19 +446,34 @@ def create_batch_file(
437
446
  help="Number of jobs in each file.",
438
447
  )
439
448
  @click.option(
440
- "--phenotype-only",
449
+ "--variant-analysis",
441
450
  type=bool,
442
451
  default=False,
443
- cls=MutuallyExclusiveOptionError,
444
- mutually_exclusive=["vcf_dir", "analysis_yaml"],
452
+ is_flag=True,
445
453
  help="Run Exomiser with phenotype only preset - strongly recommended to run with versions 13.2.0 onwards.",
446
454
  )
455
+ @click.option(
456
+ "--output-dir",
457
+ "-d",
458
+ type=Path,
459
+ required=False,
460
+ help="Results directory for Exomiser results - compatible with versions 13.2.0 onwards.",
461
+ )
447
462
  @click.option(
448
463
  "--results-dir",
464
+ "-r",
449
465
  type=Path,
450
466
  required=False,
451
467
  help="Results directory for Exomiser results - compatible with versions 13.2.0 onwards.",
452
468
  )
469
+ @click.option(
470
+ "--exomiser-version",
471
+ "-v",
472
+ required=True,
473
+ help="Exomiser version used to generate results.",
474
+ default="15.0.0",
475
+ show_default=True,
476
+ )
453
477
  @click.option(
454
478
  "--output-options-dir",
455
479
  "-O",
@@ -470,29 +494,41 @@ def create_batch_file(
470
494
  type=Path,
471
495
  help="Path to the output options file. ",
472
496
  )
497
+ @click.option(
498
+ "--output-formats",
499
+ "-f",
500
+ multiple=True,
501
+ help="One or more output formats (e.g., --output-format vcf --output-format json).",
502
+ )
473
503
  def prepare_exomiser_batch(
474
504
  environment: str,
475
505
  analysis_yaml: Path,
476
506
  phenopacket_dir: Path,
477
507
  vcf_dir: Path,
478
508
  output_dir: Path,
509
+ results_dir: Path,
479
510
  batch_prefix: str,
480
511
  max_jobs: int,
481
- phenotype_only: bool,
512
+ variant_analysis: bool,
513
+ exomiser_version: str,
482
514
  output_options_dir: Path = None,
483
515
  output_options_file: Path = None,
516
+ output_formats: List[str] = None,
484
517
  ):
485
518
  """Generate Exomiser batch files."""
486
519
  Path(output_dir).joinpath("tool_input_commands").mkdir(exist_ok=True)
487
520
  create_batch_file(
488
- environment,
489
- analysis_yaml,
490
- phenopacket_dir,
491
- vcf_dir,
492
- output_dir,
493
- batch_prefix,
494
- max_jobs,
495
- phenotype_only,
496
- output_options_dir,
497
- output_options_file,
521
+ environment=environment,
522
+ analysis=analysis_yaml,
523
+ phenopacket_dir=phenopacket_dir,
524
+ vcf_dir=vcf_dir,
525
+ output_dir=output_dir,
526
+ results_dir=results_dir,
527
+ batch_prefix=batch_prefix,
528
+ max_jobs=max_jobs,
529
+ variant_analysis=variant_analysis,
530
+ output_options_dir=output_options_dir,
531
+ output_options_file=output_options_file,
532
+ output_formats=list(output_formats),
533
+ exomiser_version=exomiser_version,
498
534
  )
@@ -27,15 +27,17 @@ def prepare_batch_files(
27
27
  tool_input_commands_dir: Path,
28
28
  raw_results_dir: Path,
29
29
  variant_analysis: bool,
30
+ exomiser_version: str,
30
31
  ) -> None:
31
32
  """Prepare the exomiser batch files"""
32
33
  print("...preparing batch files...")
33
34
  vcf_dir_name = Path(testdata_dir).joinpath("vcf")
34
- output_formats = (
35
- config.output_formats + ["JSON"]
36
- if config.output_formats and "JSON" not in config.output_formats
37
- else config.output_formats
38
- )
35
+ if version.parse(exomiser_version) >= version.parse("15.0.0"):
36
+ if "PARQUET" not in config.output_formats:
37
+ config.output_formats.append("PARQUET")
38
+ elif version.parse(exomiser_version) < version.parse("15.0.0"):
39
+ if "JSON" not in config.output_formats:
40
+ config.output_formats.append("JSON")
39
41
  create_batch_file(
40
42
  environment=config.environment,
41
43
  analysis=input_dir.joinpath(config.analysis_configuration_file),
@@ -48,7 +50,8 @@ def prepare_batch_files(
48
50
  output_options_dir=None,
49
51
  results_dir=raw_results_dir,
50
52
  variant_analysis=variant_analysis,
51
- output_formats=output_formats,
53
+ output_formats=config.output_formats,
54
+ exomiser_version=exomiser_version,
52
55
  )
53
56
 
54
57
 
@@ -121,18 +124,32 @@ def run_exomiser_local(
121
124
  ][0]
122
125
  exomiser_jar_file_path = config.exomiser_software_directory.joinpath(exomiser_jar_file)
123
126
  for file in batch_files:
124
- subprocess.run(
125
- [
126
- "java",
127
- "-Xmx4g",
128
- "-jar",
129
- exomiser_jar_file_path,
130
- "--batch",
131
- file,
132
- f"--spring.config.location={Path(input_dir).joinpath('application.properties')}",
133
- ],
134
- shell=False,
135
- )
127
+ if version.parse(exomiser_version) < version.parse("15.0.0"):
128
+ subprocess.run(
129
+ [
130
+ "java",
131
+ "-Xmx4g",
132
+ "-jar",
133
+ exomiser_jar_file_path,
134
+ "--batch",
135
+ file,
136
+ f"--spring.config.location={Path(input_dir).joinpath('application.properties')}",
137
+ ],
138
+ shell=False,
139
+ )
140
+ elif version.parse(exomiser_version) >= version.parse("15.0.0"):
141
+ subprocess.run(
142
+ [
143
+ "java",
144
+ "-Xmx4g",
145
+ f"-Dspring.config.location={str(Path(input_dir).joinpath('application.properties'))}",
146
+ "-jar",
147
+ exomiser_jar_file_path,
148
+ "batch",
149
+ file,
150
+ ],
151
+ shell=False,
152
+ )
136
153
  if version.parse(exomiser_version) < version.parse("13.1.0"):
137
154
  os.rename(
138
155
  f"{output_dir}/results",
@@ -45,6 +45,7 @@ class ExomiserPhEvalRunner(PhEvalRunner):
45
45
  tool_input_commands_dir=self.tool_input_commands_dir,
46
46
  raw_results_dir=self.raw_results_dir,
47
47
  variant_analysis=self.input_dir_config.variant_analysis,
48
+ exomiser_version=self.version,
48
49
  )
49
50
  run_exomiser(
50
51
  input_dir=self.input_dir,
@@ -71,4 +72,5 @@ class ExomiserPhEvalRunner(PhEvalRunner):
71
72
  variant_analysis=self.input_dir_config.variant_analysis,
72
73
  gene_analysis=self.input_dir_config.gene_analysis,
73
74
  disease_analysis=self.input_dir_config.disease_analysis,
75
+ exomiser_version=self.version,
74
76
  )