pheval 0.3.9__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

@@ -5,13 +5,9 @@ from typing import List
5
5
 
6
6
  import click
7
7
 
8
- from pheval.analyse.analysis import (
9
- TrackInputOutputDirectories,
10
- benchmark_directory,
11
- benchmark_run_comparisons,
12
- )
13
- from pheval.analyse.generate_plots import generate_plots_from_benchmark_summary_tsv
14
- from pheval.analyse.run_data_parser import parse_run_data_text_file
8
+ from pheval.analyse.analysis import benchmark_run_comparisons
9
+ from pheval.analyse.generate_plots import generate_plots_from_benchmark_summary_db
10
+ from pheval.analyse.run_data_parser import parse_run_config
15
11
  from pheval.prepare.create_noisy_phenopackets import scramble_phenopackets
16
12
  from pheval.prepare.create_spiked_vcf import spike_vcfs
17
13
  from pheval.prepare.custom_exceptions import InputError, MutuallyExclusiveOptionError
@@ -110,17 +106,29 @@ def semsim_scramble_command(
110
106
  default="noisy_phenopackets",
111
107
  type=Path,
112
108
  )
109
+ @click.option(
110
+ "--local-ontology-cache",
111
+ "-l",
112
+ metavar="PATH",
113
+ required=False,
114
+ help="Path to the local ontology cache, e.g., path to the hp.obo.",
115
+ default=None,
116
+ type=Path,
117
+ )
113
118
  def scramble_phenopackets_command(
114
119
  phenopacket_path: Path,
115
120
  phenopacket_dir: Path,
116
121
  scramble_factor: float,
117
122
  output_dir: Path,
123
+ local_ontology_cache: Path,
118
124
  ):
119
125
  """Generate noisy phenopackets from existing ones."""
120
126
  if phenopacket_path is None and phenopacket_dir is None:
121
127
  raise InputError("Either a phenopacket or phenopacket directory must be specified")
122
128
  else:
123
- scramble_phenopackets(output_dir, phenopacket_path, phenopacket_dir, scramble_factor)
129
+ scramble_phenopackets(
130
+ output_dir, phenopacket_path, phenopacket_dir, scramble_factor, local_ontology_cache
131
+ )
124
132
 
125
133
 
126
134
  @click.command("semsim-comparison")
@@ -338,196 +346,19 @@ def create_spiked_vcfs_command(
338
346
 
339
347
  @click.command()
340
348
  @click.option(
341
- "--directory",
342
- "-d",
343
- required=True,
344
- metavar="PATH",
345
- help="General results directory to be benchmarked, assumes contains subdirectories of pheval_gene_results/,"
346
- "pheval_variant_results/ or pheval_disease_results/. ",
347
- type=Path,
348
- )
349
- @click.option(
350
- "--phenopacket-dir",
351
- "-p",
352
- required=True,
353
- metavar="PATH",
354
- help="Full path to directory containing input phenopackets.",
355
- type=Path,
356
- )
357
- @click.option(
358
- "--output-prefix",
359
- "-o",
360
- metavar="<str>",
361
- required=True,
362
- help=" Output file prefix. ",
363
- )
364
- @click.option(
365
- "--score-order",
366
- "-so",
367
- required=True,
368
- help="Ordering of results for ranking.",
369
- type=click.Choice(["ascending", "descending"]),
370
- default="descending",
371
- show_default=True,
372
- )
373
- @click.option(
374
- "--threshold",
375
- "-t",
376
- metavar="<float>",
377
- default=float(0.0),
378
- required=False,
379
- help="Score threshold.",
380
- type=float,
381
- )
382
- @click.option(
383
- "--gene-analysis/--no-gene-analysis",
384
- default=False,
385
- required=False,
386
- type=bool,
387
- show_default=True,
388
- help="Specify analysis for gene prioritisation",
389
- )
390
- @click.option(
391
- "--variant-analysis/--no-variant-analysis",
392
- default=False,
393
- required=False,
394
- type=bool,
395
- show_default=True,
396
- help="Specify analysis for variant prioritisation",
397
- )
398
- @click.option(
399
- "--disease-analysis/--no-disease-analysis",
400
- default=False,
401
- required=False,
402
- type=bool,
403
- show_default=True,
404
- help="Specify analysis for disease prioritisation",
405
- )
406
- @click.option(
407
- "--plot-type",
408
- "-y",
409
- default="bar_stacked",
410
- show_default=True,
411
- type=click.Choice(["bar_stacked", "bar_cumulative", "bar_non_cumulative"]),
412
- help="Bar chart type to output.",
413
- )
414
- def benchmark(
415
- directory: Path,
416
- phenopacket_dir: Path,
417
- score_order: str,
418
- output_prefix: str,
419
- threshold: float,
420
- gene_analysis: bool,
421
- variant_analysis: bool,
422
- disease_analysis: bool,
423
- plot_type: str,
424
- ):
425
- """Benchmark the gene/variant/disease prioritisation performance for a single run."""
426
- if not gene_analysis and not variant_analysis and not disease_analysis:
427
- raise InputError("Need to specify at least one of gene/variant/disease analysis.")
428
- benchmark_directory(
429
- TrackInputOutputDirectories(results_dir=directory, phenopacket_dir=phenopacket_dir),
430
- score_order,
431
- output_prefix,
432
- threshold,
433
- gene_analysis,
434
- variant_analysis,
435
- disease_analysis,
436
- plot_type,
437
- )
438
-
439
-
440
- @click.command()
441
- @click.option(
442
- "--run-data",
349
+ "--run-yaml",
443
350
  "-r",
444
351
  required=True,
445
352
  metavar="PATH",
446
- help="Path to .txt file containing testdata phenopacket directory "
447
- "and corresponding results directory separated by tab."
448
- "Each run contained to a new line with the input testdata listed first and on the same line separated by a tab"
449
- "the results directory.",
353
+ help="Path to yaml configuration file for benchmarking.",
450
354
  type=Path,
451
355
  )
452
- @click.option(
453
- "--output-prefix",
454
- "-o",
455
- metavar="<str>",
456
- required=True,
457
- help=" Output file prefix. ",
458
- )
459
- @click.option(
460
- "--score-order",
461
- "-so",
462
- required=True,
463
- help="Ordering of results for ranking.",
464
- type=click.Choice(["ascending", "descending"]),
465
- default="descending",
466
- show_default=True,
467
- )
468
- @click.option(
469
- "--threshold",
470
- "-t",
471
- metavar="<float>",
472
- default=float(0.0),
473
- required=False,
474
- help="Score threshold.",
475
- type=float,
476
- )
477
- @click.option(
478
- "--gene-analysis/--no-gene-analysis",
479
- default=False,
480
- required=False,
481
- type=bool,
482
- show_default=True,
483
- help="Specify analysis for gene prioritisation",
484
- )
485
- @click.option(
486
- "--variant-analysis/--no-variant-analysis",
487
- default=False,
488
- required=False,
489
- type=bool,
490
- show_default=True,
491
- help="Specify analysis for variant prioritisation",
492
- )
493
- @click.option(
494
- "--disease-analysis/--no-disease-analysis",
495
- default=False,
496
- required=False,
497
- type=bool,
498
- show_default=True,
499
- help="Specify analysis for disease prioritisation",
500
- )
501
- @click.option(
502
- "--plot-type",
503
- "-y",
504
- default="bar_cumulative",
505
- show_default=True,
506
- type=click.Choice(["bar_stacked", "bar_cumulative", "bar_non_cumulative"]),
507
- help="Bar chart type to output.",
508
- )
509
- def benchmark_comparison(
510
- run_data: Path,
511
- score_order: str,
512
- output_prefix: str,
513
- threshold: float,
514
- gene_analysis: bool,
515
- variant_analysis: bool,
516
- disease_analysis: bool,
517
- plot_type: str,
356
+ def generate_benchmark_stats(
357
+ run_yaml: Path,
518
358
  ):
519
- """Benchmark the gene/variant/disease prioritisation performance for two runs."""
520
- if not gene_analysis and not variant_analysis and not disease_analysis:
521
- raise InputError("Need to specify at least one of gene/variant/disease analysis.")
359
+ """Benchmark the gene/variant/disease prioritisation performance for runs."""
522
360
  benchmark_run_comparisons(
523
- parse_run_data_text_file(run_data),
524
- score_order,
525
- output_prefix,
526
- threshold,
527
- gene_analysis,
528
- variant_analysis,
529
- disease_analysis,
530
- plot_type,
361
+ parse_run_config(run_yaml),
531
362
  )
532
363
 
533
364
 
@@ -580,69 +411,27 @@ def semsim_to_exomiserdb_command(
580
411
 
581
412
  @click.command()
582
413
  @click.option(
583
- "--benchmarking-tsv",
414
+ "--benchmark-db",
584
415
  "-b",
585
416
  required=True,
586
417
  metavar="PATH",
587
- help="Path to benchmark summary tsv output by PhEval benchmark commands.",
418
+ help="Path to benchmark db output by PhEval benchmark commands.",
588
419
  type=Path,
589
420
  )
590
421
  @click.option(
591
- "--gene-analysis/--no-gene-analysis",
592
- default=False,
593
- required=False,
594
- type=bool,
595
- show_default=True,
596
- help="Specify analysis for gene prioritisation",
597
- cls=MutuallyExclusiveOptionError,
598
- mutually_exclusive=["variant_analysis", "disease_analysis"],
599
- )
600
- @click.option(
601
- "--variant-analysis/--no-variant-analysis",
602
- default=False,
603
- required=False,
604
- type=bool,
605
- show_default=True,
606
- help="Specify analysis for variant prioritisation",
607
- cls=MutuallyExclusiveOptionError,
608
- mutually_exclusive=["gene_analysis", "disease_analysis"],
609
- )
610
- @click.option(
611
- "--disease-analysis/--no-disease-analysis",
612
- default=False,
613
- required=False,
614
- type=bool,
615
- show_default=True,
616
- help="Specify analysis for disease prioritisation",
617
- cls=MutuallyExclusiveOptionError,
618
- mutually_exclusive=["gene_analysis", "variant_analysis"],
619
- )
620
- @click.option(
621
- "--plot-type",
622
- "-y",
623
- default="bar_cumulative",
624
- show_default=True,
625
- type=click.Choice(["bar_stacked", "bar_cumulative", "bar_non_cumulative"]),
626
- help="Bar chart type to output.",
627
- )
628
- @click.option(
629
- "--title",
630
- "-t",
631
- type=str,
632
- help='Title for plot, specify the title on the CLI enclosed with ""',
422
+ "--run-data",
423
+ "-r",
424
+ required=True,
425
+ metavar="PATH",
426
+ help="Path to yaml configuration file for benchmarking.",
427
+ type=Path,
633
428
  )
634
429
  def generate_stats_plot(
635
- benchmarking_tsv: Path,
636
- gene_analysis: bool,
637
- variant_analysis: bool,
638
- disease_analysis: bool,
639
- plot_type: str,
640
- title: str = None,
430
+ benchmark_db: Path,
431
+ run_data: Path,
641
432
  ):
642
- """Generate bar plot from benchmark stats summary tsv."""
643
- generate_plots_from_benchmark_summary_tsv(
644
- benchmarking_tsv, gene_analysis, variant_analysis, disease_analysis, plot_type, title
645
- )
433
+ """Generate bar plot from benchmark db."""
434
+ generate_plots_from_benchmark_summary_db(benchmark_db, run_data)
646
435
 
647
436
 
648
437
  @click.command("prepare-corpus")
@@ -15,15 +15,20 @@ from pheval.utils.phenopacket_utils import (
15
15
  )
16
16
 
17
17
 
18
- def load_ontology():
18
+ def load_ontology(local_cached_ontology: Path = None) -> ProntoImplementation:
19
19
  """
20
20
  Load the Human Phenotype Ontology (HPO).
21
-
21
+ Args:
22
+ local_cached_ontology(Path): Path to the local cached ontology.
22
23
  Returns:
23
24
  ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO.
24
25
  """
25
- resource = OntologyResource(slug="hp.obo", local=False)
26
- return ProntoImplementation(resource)
26
+ if local_cached_ontology is None:
27
+ resource = OntologyResource(slug="hp.obo", local=False)
28
+ return ProntoImplementation(resource)
29
+ else:
30
+ resource = OntologyResource(slug=local_cached_ontology, local=True)
31
+ return ProntoImplementation(resource)
27
32
 
28
33
 
29
34
  class HpoRandomiser:
@@ -181,78 +186,77 @@ class HpoRandomiser:
181
186
  + self.create_random_hpo_terms(number_of_scrambled_terms)
182
187
  )
183
188
 
189
+ def add_noise_to_phenotypic_profile(
190
+ self,
191
+ phenopacket: Union[Phenopacket, Family],
192
+ ) -> Union[Phenopacket, Family]:
193
+ """
194
+ Randomise the phenotypic profile of a Phenopacket or Family.
184
195
 
185
- def add_noise_to_phenotypic_profile(
186
- hpo_randomiser: HpoRandomiser,
187
- phenopacket: Union[Phenopacket, Family],
188
- ) -> Union[Phenopacket, Family]:
189
- """
190
- Randomise the phenotypic profile of a Phenopacket or Family.
191
-
192
- Args:
193
- hpo_randomiser (HpoRandomiser): An instance of HpoRandomiser used for randomisation.
194
- phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family to be randomised.
195
-
196
- Returns:
197
- Union[Phenopacket, Family]: The randomised Phenopacket or Family.
198
- """
199
- phenotypic_features = PhenopacketUtil(phenopacket).observed_phenotypic_features()
200
- random_phenotypes = hpo_randomiser.randomise_hpo_terms(phenotypic_features)
201
- randomised_phenopacket = PhenopacketRebuilder(phenopacket).add_randomised_hpo(random_phenotypes)
202
- return randomised_phenopacket
203
-
196
+ Args:
197
+ phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family to be randomised.
204
198
 
205
- def create_scrambled_phenopacket(
206
- output_dir: Path, phenopacket_path: Path, scramble_factor: float
207
- ) -> None:
208
- """
209
- Create a scrambled version of a Phenopacket.
199
+ Returns:
200
+ Union[Phenopacket, Family]: The randomised Phenopacket or Family.
201
+ """
202
+ phenotypic_features = PhenopacketUtil(phenopacket).observed_phenotypic_features()
203
+ random_phenotypes = self.randomise_hpo_terms(phenotypic_features)
204
+ randomised_phenopacket = PhenopacketRebuilder(phenopacket).add_randomised_hpo(
205
+ random_phenotypes
206
+ )
207
+ return randomised_phenopacket
210
208
 
211
- Args:
212
- output_dir (Path): The directory to store the output scrambled Phenopacket.
213
- phenopacket_path (Path): The path to the original Phenopacket file.
214
- scramble_factor (float): A factor determining the level of scrambling for phenotypic features.
215
- """
216
- ontology = load_ontology()
217
- hpo_randomiser = HpoRandomiser(ontology, scramble_factor)
218
- phenopacket = phenopacket_reader(phenopacket_path)
219
- created_noisy_phenopacket = add_noise_to_phenotypic_profile(
220
- hpo_randomiser,
221
- phenopacket,
222
- )
223
- write_phenopacket(
224
- created_noisy_phenopacket,
225
- output_dir.joinpath(phenopacket_path.name),
226
- )
227
-
228
-
229
- def create_scrambled_phenopackets(
230
- output_dir: Path, phenopacket_dir: Path, scramble_factor: float
231
- ) -> None:
232
- """
233
- Create scrambled versions of Phenopackets within a directory.
209
+ def create_scrambled_phenopacket(
210
+ self,
211
+ output_dir: Path,
212
+ phenopacket_path: Path,
213
+ ) -> None:
214
+ """
215
+ Create a scrambled version of a Phenopacket.
234
216
 
235
- Args:
236
- output_dir (Path): The directory to store the output scrambled Phenopackets.
237
- phenopacket_dir (Path): The directory containing the original Phenopacket files.
238
- scramble_factor (float): A factor determining the level of scrambling for phenotypic features.
239
- """
240
- ontology = load_ontology()
241
- hpo_randomiser = HpoRandomiser(ontology, scramble_factor)
242
- phenopacket_files = files_with_suffix(phenopacket_dir, ".json")
243
- for phenopacket_path in phenopacket_files:
217
+ Args:
218
+ output_dir (Path): The directory to store the output scrambled Phenopacket.
219
+ phenopacket_path (Path): The path to the original Phenopacket file.
220
+ """
244
221
  phenopacket = phenopacket_reader(phenopacket_path)
245
- created_noisy_phenopacket = add_noise_to_phenotypic_profile(hpo_randomiser, phenopacket)
222
+ created_noisy_phenopacket = self.add_noise_to_phenotypic_profile(
223
+ phenopacket,
224
+ )
246
225
  write_phenopacket(
247
226
  created_noisy_phenopacket,
248
- output_dir.joinpath(
249
- phenopacket_path.name,
250
- ),
227
+ output_dir.joinpath(phenopacket_path.name),
251
228
  )
252
229
 
230
+ def create_scrambled_phenopackets(
231
+ self,
232
+ output_dir: Path,
233
+ phenopacket_dir: Path,
234
+ ) -> None:
235
+ """
236
+ Create scrambled versions of Phenopackets within a directory.
237
+
238
+ Args:
239
+ output_dir (Path): The directory to store the output scrambled Phenopackets.
240
+ phenopacket_dir (Path): The directory containing the original Phenopacket files.
241
+ """
242
+ phenopacket_files = files_with_suffix(phenopacket_dir, ".json")
243
+ for phenopacket_path in phenopacket_files:
244
+ phenopacket = phenopacket_reader(phenopacket_path)
245
+ created_noisy_phenopacket = self.add_noise_to_phenotypic_profile(phenopacket)
246
+ write_phenopacket(
247
+ created_noisy_phenopacket,
248
+ output_dir.joinpath(
249
+ phenopacket_path.name,
250
+ ),
251
+ )
252
+
253
253
 
254
254
  def scramble_phenopackets(
255
- output_dir: Path, phenopacket_path: Path, phenopacket_dir: Path, scramble_factor: float
255
+ output_dir: Path,
256
+ phenopacket_path: Path,
257
+ phenopacket_dir: Path,
258
+ scramble_factor: float,
259
+ local_cached_ontology: Path,
256
260
  ) -> None:
257
261
  """
258
262
  Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets.
@@ -262,9 +266,16 @@ def scramble_phenopackets(
262
266
  phenopacket_path (Path): The path to a single Phenopacket file (if applicable).
263
267
  phenopacket_dir (Path): The directory containing multiple Phenopacket files (if applicable).
264
268
  scramble_factor (float): A factor determining the level of scrambling for phenotypic features.
269
+ local_cached_ontology (Path): The path to the local cached ontology.
265
270
  """
266
271
  output_dir.mkdir(exist_ok=True)
272
+ ontology = load_ontology(local_cached_ontology)
267
273
  if phenopacket_path is not None:
268
- create_scrambled_phenopacket(output_dir, phenopacket_path, scramble_factor)
274
+ HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopacket(
275
+ output_dir, phenopacket_path
276
+ )
269
277
  elif phenopacket_dir is not None:
270
- create_scrambled_phenopackets(output_dir, phenopacket_dir, scramble_factor)
278
+ HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopackets(
279
+ output_dir,
280
+ phenopacket_dir,
281
+ )
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.1
2
+ Name: pheval
3
+ Version: 0.4.1
4
+ Summary:
5
+ Author: Yasemin Bridges
6
+ Author-email: y.bridges@qmul.ac.uk
7
+ Requires-Python: >=3.9,<4.0.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Dist: class-resolver (>=0.4.2)
15
+ Requires-Dist: click (>=8.1.3)
16
+ Requires-Dist: deprecation (>=2.1.0)
17
+ Requires-Dist: duckdb (>=1.0.0,<2.0.0)
18
+ Requires-Dist: google (>=3.0.0,<4.0.0)
19
+ Requires-Dist: jaydebeapi (>=1.2.3)
20
+ Requires-Dist: matplotlib (>=3.7.0,<4.0.0)
21
+ Requires-Dist: oaklib (>=0.5.6)
22
+ Requires-Dist: pandas (>=1.5.1)
23
+ Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
24
+ Requires-Dist: plotly (>=5.13.0,<6.0.0)
25
+ Requires-Dist: polars (>=0.19.15,<0.20.0)
26
+ Requires-Dist: pyaml (>=21.10.1,<22.0.0)
27
+ Requires-Dist: pyserde (>=0.9.8,<0.10.0)
28
+ Requires-Dist: scikit-learn (>=1.4.0,<2.0.0)
29
+ Requires-Dist: seaborn (>=0.12.2,<0.13.0)
30
+ Requires-Dist: tqdm (>=4.64.1)
31
+ Description-Content-Type: text/markdown
32
+
33
+ # PhEval - Phenotypic Inference Evaluation Framework
34
+
35
+ ![PyPI](https://img.shields.io/pypi/v/pheval)
36
+ ![Build Status](https://img.shields.io/github/actions/workflow/status/monarch-initiative/pheval/pypi-publish.yml?branch=main)
37
+ ![License](https://img.shields.io/github/license/monarch-initiative/pheval)
38
+ ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
39
+ ![Issues](https://img.shields.io/github/issues/monarch-initiative/pheval)
40
+
41
+ ## Overview
42
+
43
+ The absence of standardised benchmarks and data standardisation for Variant and Gene Prioritisation Algorithms (VGPAs) presents a significant challenge in the field of genomic research. To address this, we developed PhEval, a novel framework designed to streamline the evaluation of VGPAs that incorporate phenotypic data. PhEval offers several key benefits:
44
+
45
+ - Automated Processes: Reduces manual effort by automating various evaluation tasks, thus enhancing efficiency.
46
+ - Standardisation: Ensures consistency and comparability in evaluation methodologies, leading to more reliable and standardised assessments.
47
+ - Reproducibility: Facilitates reproducibility in research by providing a standardised platform, allowing for consistent validation of algorithms.
48
+ - Comprehensive Benchmarking: Enables thorough benchmarking of algorithms, providing well-founded comparisons and deeper insights into their performance.
49
+
50
+ PhEval is a valuable tool for researchers looking to improve the accuracy and reliability of VGPA evaluations through a structured and standardised approach.
51
+
52
+ For more information please see the full [documentation](https://monarch-initiative.github.io/pheval/).
53
+
54
+ ## Download and Installation
55
+
56
+ 1. Ensure you have Python 3.8 or greater installed.
57
+ 2. Install with `pip`:
58
+ ```bash
59
+ pip install pheval
60
+ ```
61
+ 3. See list of all PhEval utility commands:
62
+ ```bash
63
+ pheval-utils --help
64
+ ```
65
+
66
+ ## Usage
67
+
68
+ The PhEval CLI offers a variety of commands categorised into two main types: **Runner Implementations** and **Utility Commands**. Below is an overview of each category, detailing how they can be utilised to perform various tasks within PhEval.
69
+
70
+ ### Runner Implementations
71
+
72
+ The primary command used within PhEval is `pheval run`. This command is responsible for executing concrete VGPA runner implementations, that we sometimes term as plugins. By using pheval run, users can leverage these runner implementations to: execute the VGPA on a set of test corpora, produce tool-specific result outputs, and post-process tool-specific outputs to PhEval standardised TSV outputs.
73
+
74
+ Some concrete PhEval runner implementations include the [Exomiser runner](https://github.com/monarch-initiative/pheval.exomiser) and the [Phen2Gene runner](https://github.com/monarch-initiative/pheval.phen2gene). The full list of currently implemented runners can be found [here](https://monarch-initiative.github.io/pheval/plugins/)
75
+
76
+ Please read the [documentation](https://monarch-initiative.github.io/pheval/developing_a_pheval_plugin/) for a step-by-step for creating your own PhEval plugin.
77
+
78
+ ### Utility Commands
79
+
80
+ In addition to the main `run` command, PhEval provides a set of utility commands designed to enhance the overall functionality of the CLI. These commands can be used to set up and configure experiments, streamline data preparation, and benchmark the performance of various VGPA runner implementations. By utilising these utilities, users can optimise their experimental workflows, ensure reproducibility, and compare the efficiency and accuracy of different approaches. The utility commands offer a range of options that facilitate the customisation and fine-tuning to suit diverse research objectives.
81
+
82
+ #### Example Usage
83
+
84
+ To add noise to an existing corpus of phenopackets, this could be used to assess the robustness of VGPAs when less relevant or unreliable phenotype data is introduced:
85
+ ```bash
86
+ pheval-utils scramble-phenopackets --phenopacket-dir /phenopackets --scramble-factor 0.5 --output-dir /scrambled_phenopackets_0.5
87
+ ```
88
+
89
+ To update the gene symbols and identifiers to a specific namespace:
90
+ ```bash
91
+ pheval-utils update-phenopackets --phenopacket-dir /phenopackets --output-dir /updated_phenopackets --gene-identifier ensembl_id
92
+ ```
93
+
94
+ To prepare VCF files for a corpus of phenopackets, spiking in the known causative variants:
95
+ ```bash
96
+ pheval-utils create-spiked-vcfs --phenopacket-dir /phenopackets --hg19-template-vcf /template_hg19.vcf --hg38-template-vcf /template_hg38.vcf --output-dir /vcf
97
+ ```
98
+
99
+ Alternatively, you can wrap all corpus preparatory commands into a single step. Specifying `--variant-analysis`/`--gene-analysis`/`--disease-analysis` will check the phenopackets for complete records documenting the known entities. If template vcf(s) are provided this will spike VCFs with the known variant for the corpus. If a `--gene-identifier` is specified then the corpus of phenopackets is updated.
100
+ ```bash
101
+ pheval-utils prepare-corpus \
102
+ --phenopacket-dir /phenopackets \
103
+ --variant-analysis \
104
+ --gene-analysis \
105
+ --gene-identifier ensembl_id \
106
+ --hg19-template-vcf /template_hg19.vcf \
107
+ --hg38-template-vcf /template_hg38.vcf \
108
+ --output-dir /vcf
109
+ ```
110
+
111
+ See the [documentation](https://monarch-initiative.github.io/pheval/executing_a_benchmark/) for instructions on benchmarking and evaluating the performance of various VGPAs.
112
+
113
+