pheval 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pheval might be problematic. Click here for more details.
- pheval/analyse/generate_plots.py +6 -3
- pheval/cli.py +2 -0
- pheval/cli_pheval_utils.py +124 -14
- pheval/prepare/create_spiked_vcf.py +132 -95
- pheval/prepare/prepare_corpus.py +67 -0
- pheval/prepare/update_phenopacket.py +1 -2
- pheval/utils/phenopacket_utils.py +69 -7
- {pheval-0.3.2.dist-info → pheval-0.3.4.dist-info}/METADATA +1 -1
- {pheval-0.3.2.dist-info → pheval-0.3.4.dist-info}/RECORD +12 -11
- {pheval-0.3.2.dist-info → pheval-0.3.4.dist-info}/LICENSE +0 -0
- {pheval-0.3.2.dist-info → pheval-0.3.4.dist-info}/WHEEL +0 -0
- {pheval-0.3.2.dist-info → pheval-0.3.4.dist-info}/entry_points.txt +0 -0
pheval/analyse/generate_plots.py
CHANGED
|
@@ -482,6 +482,7 @@ def generate_plots(
|
|
|
482
482
|
benchmark_generator: BenchmarkRunOutputGenerator,
|
|
483
483
|
plot_type: str,
|
|
484
484
|
title: str = None,
|
|
485
|
+
generate_from_tsv: bool = False,
|
|
485
486
|
) -> None:
|
|
486
487
|
"""
|
|
487
488
|
Generate summary statistics bar plots for prioritisation.
|
|
@@ -493,10 +494,12 @@ def generate_plots(
|
|
|
493
494
|
benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
|
|
494
495
|
plot_type (str): Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").
|
|
495
496
|
title (str, optional): Title for the generated plot. Defaults to None.
|
|
497
|
+
generate_from_tsv (bool): Specify whether to generate plots from the TSV file. Defaults to False.
|
|
496
498
|
"""
|
|
497
499
|
plot_generator = PlotGenerator()
|
|
498
|
-
|
|
499
|
-
|
|
500
|
+
if not generate_from_tsv:
|
|
501
|
+
plot_generator.generate_roc_curve(benchmarking_results, benchmark_generator)
|
|
502
|
+
plot_generator.generate_precision_recall(benchmarking_results, benchmark_generator)
|
|
500
503
|
if plot_type == "bar_stacked":
|
|
501
504
|
plot_generator.generate_stacked_bar_plot(benchmarking_results, benchmark_generator, title)
|
|
502
505
|
elif plot_type == "bar_cumulative":
|
|
@@ -541,4 +544,4 @@ def generate_plots_from_benchmark_summary_tsv(
|
|
|
541
544
|
raise ValueError(
|
|
542
545
|
"Specify one analysis type (gene_analysis, variant_analysis, or disease_analysis)"
|
|
543
546
|
)
|
|
544
|
-
generate_plots(benchmarking_results, benchmark_generator, plot_type, title)
|
|
547
|
+
generate_plots(benchmarking_results, benchmark_generator, plot_type, title, True)
|
pheval/cli.py
CHANGED
|
@@ -10,6 +10,7 @@ from .cli_pheval_utils import (
|
|
|
10
10
|
benchmark_comparison,
|
|
11
11
|
create_spiked_vcfs_command,
|
|
12
12
|
generate_stats_plot,
|
|
13
|
+
prepare_corpus_command,
|
|
13
14
|
scramble_phenopackets_command,
|
|
14
15
|
semsim_scramble_command,
|
|
15
16
|
semsim_to_exomiserdb_command,
|
|
@@ -60,6 +61,7 @@ pheval_utils.add_command(benchmark)
|
|
|
60
61
|
pheval_utils.add_command(benchmark_comparison)
|
|
61
62
|
pheval_utils.add_command(semsim_to_exomiserdb_command)
|
|
62
63
|
pheval_utils.add_command(generate_stats_plot)
|
|
64
|
+
pheval_utils.add_command(prepare_corpus_command)
|
|
63
65
|
|
|
64
66
|
if __name__ == "__main__":
|
|
65
67
|
main()
|
pheval/cli_pheval_utils.py
CHANGED
|
@@ -15,6 +15,7 @@ from pheval.analyse.run_data_parser import parse_run_data_text_file
|
|
|
15
15
|
from pheval.prepare.create_noisy_phenopackets import scramble_phenopackets
|
|
16
16
|
from pheval.prepare.create_spiked_vcf import spike_vcfs
|
|
17
17
|
from pheval.prepare.custom_exceptions import InputError, MutuallyExclusiveOptionError
|
|
18
|
+
from pheval.prepare.prepare_corpus import prepare_corpus
|
|
18
19
|
from pheval.prepare.update_phenopacket import update_phenopackets
|
|
19
20
|
from pheval.utils.exomiser import semsim_to_exomiserdb
|
|
20
21
|
from pheval.utils.semsim_utils import percentage_diff, semsim_heatmap_plot
|
|
@@ -253,22 +254,19 @@ def update_phenopackets_command(
|
|
|
253
254
|
mutually_exclusive=["phenopacket_path"],
|
|
254
255
|
)
|
|
255
256
|
@click.option(
|
|
256
|
-
"--template-vcf
|
|
257
|
-
"-
|
|
258
|
-
cls=MutuallyExclusiveOptionError,
|
|
257
|
+
"--hg19-template-vcf",
|
|
258
|
+
"-hg19",
|
|
259
259
|
metavar="PATH",
|
|
260
260
|
required=False,
|
|
261
|
-
help="Template VCF file",
|
|
262
|
-
mutually_exclusive=["vcf_dir"],
|
|
261
|
+
help="Template hg19 VCF file",
|
|
263
262
|
type=Path,
|
|
264
263
|
)
|
|
265
264
|
@click.option(
|
|
266
|
-
"--vcf
|
|
267
|
-
"-
|
|
268
|
-
cls=MutuallyExclusiveOptionError,
|
|
265
|
+
"--hg38-template-vcf",
|
|
266
|
+
"-hg38",
|
|
269
267
|
metavar="PATH",
|
|
270
|
-
|
|
271
|
-
|
|
268
|
+
required=False,
|
|
269
|
+
help="Template hg38 VCF file",
|
|
272
270
|
type=Path,
|
|
273
271
|
)
|
|
274
272
|
@click.option(
|
|
@@ -284,13 +282,22 @@ def create_spiked_vcfs_command(
|
|
|
284
282
|
phenopacket_path: Path,
|
|
285
283
|
phenopacket_dir: Path,
|
|
286
284
|
output_dir: Path,
|
|
287
|
-
|
|
288
|
-
|
|
285
|
+
hg19_template_vcf: Path = None,
|
|
286
|
+
hg38_template_vcf: Path = None,
|
|
289
287
|
):
|
|
290
|
-
"""
|
|
288
|
+
"""
|
|
289
|
+
Create spiked VCF from either a Phenopacket or a Phenopacket directory.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
phenopacket_path (Path): Path to a single Phenopacket file (optional).
|
|
293
|
+
phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional).
|
|
294
|
+
output_dir (Path): The directory to store the generated spiked VCF file(s).
|
|
295
|
+
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
296
|
+
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
297
|
+
"""
|
|
291
298
|
if phenopacket_path is None and phenopacket_dir is None:
|
|
292
299
|
raise InputError("Either a phenopacket or phenopacket directory must be specified")
|
|
293
|
-
spike_vcfs(output_dir, phenopacket_path, phenopacket_dir,
|
|
300
|
+
spike_vcfs(output_dir, phenopacket_path, phenopacket_dir, hg19_template_vcf, hg38_template_vcf)
|
|
294
301
|
|
|
295
302
|
|
|
296
303
|
@click.command()
|
|
@@ -600,3 +607,106 @@ def generate_stats_plot(
|
|
|
600
607
|
generate_plots_from_benchmark_summary_tsv(
|
|
601
608
|
benchmarking_tsv, gene_analysis, variant_analysis, disease_analysis, plot_type, title
|
|
602
609
|
)
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
@click.command("prepare-corpus")
|
|
613
|
+
@click.option(
|
|
614
|
+
"--phenopacket-dir",
|
|
615
|
+
"-p",
|
|
616
|
+
required=True,
|
|
617
|
+
metavar="PATH",
|
|
618
|
+
help="Path to phenopacket corpus directory..",
|
|
619
|
+
type=Path,
|
|
620
|
+
)
|
|
621
|
+
@click.option(
|
|
622
|
+
"--variant-analysis/--no-variant-analysis",
|
|
623
|
+
default=False,
|
|
624
|
+
required=False,
|
|
625
|
+
type=bool,
|
|
626
|
+
show_default=True,
|
|
627
|
+
help="Specify whether to check for complete variant records in the phenopackets.",
|
|
628
|
+
)
|
|
629
|
+
@click.option(
|
|
630
|
+
"--gene-analysis/--no-gene-analysis",
|
|
631
|
+
default=False,
|
|
632
|
+
required=False,
|
|
633
|
+
type=bool,
|
|
634
|
+
show_default=True,
|
|
635
|
+
help="Specify whether to check for complete gene records in the phenopackets.",
|
|
636
|
+
)
|
|
637
|
+
@click.option(
|
|
638
|
+
"--disease-analysis/--no-disease-analysis",
|
|
639
|
+
default=False,
|
|
640
|
+
required=False,
|
|
641
|
+
type=bool,
|
|
642
|
+
show_default=True,
|
|
643
|
+
help="Specify whether to check for complete disease records in the phenopackets.",
|
|
644
|
+
)
|
|
645
|
+
@click.option(
|
|
646
|
+
"--gene-identifier",
|
|
647
|
+
"-g",
|
|
648
|
+
required=False,
|
|
649
|
+
help="Gene identifier to update in phenopacket",
|
|
650
|
+
type=click.Choice(["ensembl_id", "entrez_id", "hgnc_id"]),
|
|
651
|
+
)
|
|
652
|
+
@click.option(
|
|
653
|
+
"--hg19-template-vcf",
|
|
654
|
+
"-hg19",
|
|
655
|
+
metavar="PATH",
|
|
656
|
+
required=False,
|
|
657
|
+
help="Template hg19 VCF file",
|
|
658
|
+
type=Path,
|
|
659
|
+
)
|
|
660
|
+
@click.option(
|
|
661
|
+
"--hg38-template-vcf",
|
|
662
|
+
"-hg38",
|
|
663
|
+
metavar="PATH",
|
|
664
|
+
required=False,
|
|
665
|
+
help="Template hg38 VCF file",
|
|
666
|
+
type=Path,
|
|
667
|
+
)
|
|
668
|
+
@click.option(
|
|
669
|
+
"--output-dir",
|
|
670
|
+
"-o",
|
|
671
|
+
metavar="PATH",
|
|
672
|
+
required=True,
|
|
673
|
+
help="Path to output prepared corpus.",
|
|
674
|
+
default="prepared_corpus",
|
|
675
|
+
type=Path,
|
|
676
|
+
)
|
|
677
|
+
def prepare_corpus_command(
|
|
678
|
+
phenopacket_dir: Path,
|
|
679
|
+
variant_analysis: bool,
|
|
680
|
+
gene_analysis: bool,
|
|
681
|
+
disease_analysis: bool,
|
|
682
|
+
gene_identifier: str,
|
|
683
|
+
hg19_template_vcf: Path,
|
|
684
|
+
hg38_template_vcf: Path,
|
|
685
|
+
output_dir: Path,
|
|
686
|
+
):
|
|
687
|
+
"""
|
|
688
|
+
Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating
|
|
689
|
+
gene identifiers.
|
|
690
|
+
|
|
691
|
+
Args:
|
|
692
|
+
phenopacket_dir (Path): The path to the directory containing Phenopackets.
|
|
693
|
+
variant_analysis (bool): If True, check for complete variant records in the Phenopackets.
|
|
694
|
+
gene_analysis (bool): If True, check for complete gene records in the Phenopackets.
|
|
695
|
+
disease_analysis (bool): If True, check for complete disease records in the Phenopackets.
|
|
696
|
+
gene_identifier (str): Identifier for updating gene identifiers, if applicable.
|
|
697
|
+
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional), to spike variants into
|
|
698
|
+
VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required.
|
|
699
|
+
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional), to spike variants into
|
|
700
|
+
VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required.
|
|
701
|
+
output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files.
|
|
702
|
+
"""
|
|
703
|
+
prepare_corpus(
|
|
704
|
+
phenopacket_dir,
|
|
705
|
+
variant_analysis,
|
|
706
|
+
gene_analysis,
|
|
707
|
+
disease_analysis,
|
|
708
|
+
gene_identifier,
|
|
709
|
+
hg19_template_vcf,
|
|
710
|
+
hg38_template_vcf,
|
|
711
|
+
output_dir,
|
|
712
|
+
)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import gzip
|
|
2
2
|
import logging
|
|
3
3
|
import re
|
|
4
|
-
import secrets
|
|
5
4
|
import urllib.parse
|
|
6
5
|
from copy import copy
|
|
7
6
|
from dataclasses import dataclass
|
|
@@ -10,6 +9,8 @@ from typing import List, Union
|
|
|
10
9
|
|
|
11
10
|
from phenopackets import Family, File, Phenopacket
|
|
12
11
|
|
|
12
|
+
from pheval.prepare.custom_exceptions import InputError
|
|
13
|
+
from pheval.utils.file_utils import files_with_suffix, is_gzipped
|
|
13
14
|
from pheval.utils.phenopacket_utils import (
|
|
14
15
|
IncompatibleGenomeAssemblyError,
|
|
15
16
|
PhenopacketRebuilder,
|
|
@@ -19,9 +20,6 @@ from pheval.utils.phenopacket_utils import (
|
|
|
19
20
|
write_phenopacket,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
|
-
from .custom_exceptions import InputError
|
|
23
|
-
from ..utils.file_utils import all_files, files_with_suffix, is_gzipped
|
|
24
|
-
|
|
25
23
|
info_log = logging.getLogger("info")
|
|
26
24
|
|
|
27
25
|
genome_assemblies = {
|
|
@@ -91,39 +89,6 @@ class VcfHeader:
|
|
|
91
89
|
chr_status: bool
|
|
92
90
|
|
|
93
91
|
|
|
94
|
-
class VcfPicker:
|
|
95
|
-
"""Choose a VCF file randomly from a directory if provided, otherwise selects the single template."""
|
|
96
|
-
|
|
97
|
-
def __init__(self, template_vcf: Path or None, vcf_dir: Path or None):
|
|
98
|
-
"""
|
|
99
|
-
Initialise the VcfPicker.
|
|
100
|
-
|
|
101
|
-
Args:
|
|
102
|
-
template_vcf (Path or None): The path to a template VCF file, or None if not provided.
|
|
103
|
-
vcf_dir (Path or None): The directory containing VCF files, or None if not provided.
|
|
104
|
-
"""
|
|
105
|
-
self.template_vcf = template_vcf
|
|
106
|
-
self.vcf_dir = vcf_dir
|
|
107
|
-
|
|
108
|
-
def pick_file_from_dir(self) -> Path:
|
|
109
|
-
"""
|
|
110
|
-
Selects a VCF file from a directory at random.
|
|
111
|
-
|
|
112
|
-
Returns:
|
|
113
|
-
Path: The randomly selected VCF file path from the directory.
|
|
114
|
-
"""
|
|
115
|
-
return secrets.choice(all_files(self.vcf_dir))
|
|
116
|
-
|
|
117
|
-
def pick_file(self) -> Path:
|
|
118
|
-
"""
|
|
119
|
-
Select a VCF file randomly when given a directory; if not, the template VCF is assigned.
|
|
120
|
-
|
|
121
|
-
Returns:
|
|
122
|
-
Path: The selected VCF file path.
|
|
123
|
-
"""
|
|
124
|
-
return self.pick_file_from_dir() if self.vcf_dir is not None else self.template_vcf
|
|
125
|
-
|
|
126
|
-
|
|
127
92
|
def read_vcf(vcf_file: Path) -> List[str]:
|
|
128
93
|
"""
|
|
129
94
|
Read the contents of a VCF file into memory, handling both uncompressed and gzipped files.
|
|
@@ -206,6 +171,72 @@ class VcfHeaderParser:
|
|
|
206
171
|
return VcfHeader(sample_id, assembly, chr_status)
|
|
207
172
|
|
|
208
173
|
|
|
174
|
+
@dataclass
|
|
175
|
+
class VcfFile:
|
|
176
|
+
"""
|
|
177
|
+
Represents a VCF file with its name, contents, and header information.
|
|
178
|
+
|
|
179
|
+
Attributes:
|
|
180
|
+
vcf_file_name (str): The name of the VCF file.
|
|
181
|
+
vcf_contents (List[str]): The contents of the VCF file.
|
|
182
|
+
vcf_header (VcfHeader): The parsed header information of the VCF file.
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
vcf_file_name: str = None
|
|
186
|
+
vcf_contents: List[str] = None
|
|
187
|
+
vcf_header: VcfHeader = None
|
|
188
|
+
|
|
189
|
+
@staticmethod
|
|
190
|
+
def populate_fields(template_vcf: Path):
|
|
191
|
+
"""
|
|
192
|
+
Populate the fields of the VcfFile instance using the contents of a template VCF file.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
template_vcf (Path): The path to the template VCF file.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
VcfFile: An instance of VcfFile with populated fields.
|
|
199
|
+
|
|
200
|
+
"""
|
|
201
|
+
contents = read_vcf(template_vcf)
|
|
202
|
+
return VcfFile(template_vcf.name, contents, VcfHeaderParser(contents).parse_vcf_header())
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def select_vcf_template(
|
|
206
|
+
phenopacket_path: Path,
|
|
207
|
+
proband_causative_variants: List[ProbandCausativeVariant],
|
|
208
|
+
hg19_vcf_info: VcfFile,
|
|
209
|
+
hg38_vcf_info: VcfFile,
|
|
210
|
+
) -> VcfFile:
|
|
211
|
+
"""
|
|
212
|
+
Select the appropriate VCF template based on the assembly information of the proband causative variants.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
phenopacket_path (Path): The path to the Phenopacket file.
|
|
216
|
+
proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband.
|
|
217
|
+
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
218
|
+
hg38_vcf_info (VcfFile): CF file info for hg38 template vcf.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
VcfFile: The selected VCF template file based on the assembly information of the proband causative variants.
|
|
222
|
+
|
|
223
|
+
"""
|
|
224
|
+
if proband_causative_variants[0].assembly in ["hg19", "GRCh37"]:
|
|
225
|
+
if hg19_vcf_info:
|
|
226
|
+
return hg19_vcf_info
|
|
227
|
+
else:
|
|
228
|
+
raise InputError("Must specify hg19 template VCF!")
|
|
229
|
+
elif proband_causative_variants[0].assembly in ["hg38", "GRCh38"]:
|
|
230
|
+
if hg38_vcf_info:
|
|
231
|
+
return hg38_vcf_info
|
|
232
|
+
else:
|
|
233
|
+
raise InputError("Must specify hg38 template VCF!")
|
|
234
|
+
else:
|
|
235
|
+
raise IncompatibleGenomeAssemblyError(
|
|
236
|
+
proband_causative_variants[0].assembly, phenopacket_path
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
209
240
|
def check_variant_assembly(
|
|
210
241
|
proband_causative_variants: list[ProbandCausativeVariant],
|
|
211
242
|
vcf_header: VcfHeader,
|
|
@@ -229,7 +260,13 @@ def check_variant_assembly(
|
|
|
229
260
|
raise ValueError("Too many genome assemblies!")
|
|
230
261
|
if phenopacket_assembly[0] not in compatible_genome_assembly:
|
|
231
262
|
raise IncompatibleGenomeAssemblyError(phenopacket_assembly, phenopacket_path)
|
|
232
|
-
if
|
|
263
|
+
if (
|
|
264
|
+
phenopacket_assembly[0] in {"hg19", "GRCh37"}
|
|
265
|
+
and vcf_header.assembly not in {"hg19", "GRCh37"}
|
|
266
|
+
) or (
|
|
267
|
+
phenopacket_assembly[0] in {"hg38", "GRCh38"}
|
|
268
|
+
and vcf_header.assembly not in {"hg38", "GRCh38"}
|
|
269
|
+
):
|
|
233
270
|
raise IncompatibleGenomeAssemblyError(
|
|
234
271
|
assembly=phenopacket_assembly, phenopacket=phenopacket_path
|
|
235
272
|
)
|
|
@@ -387,7 +424,8 @@ class VcfWriter:
|
|
|
387
424
|
def spike_vcf_contents(
|
|
388
425
|
phenopacket: Union[Phenopacket, Family],
|
|
389
426
|
phenopacket_path: Path,
|
|
390
|
-
|
|
427
|
+
hg19_vcf_info: VcfFile,
|
|
428
|
+
hg38_vcf_info: VcfFile,
|
|
391
429
|
) -> tuple[str, List[str]]:
|
|
392
430
|
"""
|
|
393
431
|
Spike VCF records with variants obtained from a Phenopacket or Family.
|
|
@@ -395,22 +433,28 @@ def spike_vcf_contents(
|
|
|
395
433
|
Args:
|
|
396
434
|
phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants.
|
|
397
435
|
phenopacket_path (Path): Path to the Phenopacket file.
|
|
398
|
-
|
|
436
|
+
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
437
|
+
hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf.
|
|
399
438
|
|
|
400
439
|
Returns:
|
|
401
440
|
A tuple containing:
|
|
402
441
|
assembly (str): The genome assembly information extracted from VCF header.
|
|
403
442
|
modified_vcf_contents (List[str]): Modified VCF records with spiked variants.
|
|
404
443
|
"""
|
|
405
|
-
# this is a separate function to a click command as it will fail if annotated with click annotations
|
|
406
|
-
# and referenced from another click command
|
|
407
444
|
phenopacket_causative_variants = PhenopacketUtil(phenopacket).causative_variants()
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
445
|
+
chosen_template_vcf = select_vcf_template(
|
|
446
|
+
phenopacket_path, phenopacket_causative_variants, hg19_vcf_info, hg38_vcf_info
|
|
447
|
+
)
|
|
448
|
+
check_variant_assembly(
|
|
449
|
+
phenopacket_causative_variants, chosen_template_vcf.vcf_header, phenopacket_path
|
|
450
|
+
)
|
|
411
451
|
return (
|
|
412
|
-
vcf_header.assembly,
|
|
413
|
-
VcfSpiker(
|
|
452
|
+
chosen_template_vcf.vcf_header.assembly,
|
|
453
|
+
VcfSpiker(
|
|
454
|
+
chosen_template_vcf.vcf_contents,
|
|
455
|
+
phenopacket_causative_variants,
|
|
456
|
+
chosen_template_vcf.vcf_header,
|
|
457
|
+
).construct_vcf(),
|
|
414
458
|
)
|
|
415
459
|
|
|
416
460
|
|
|
@@ -418,7 +462,8 @@ def generate_spiked_vcf_file(
|
|
|
418
462
|
output_dir: Path,
|
|
419
463
|
phenopacket: Union[Phenopacket, Family],
|
|
420
464
|
phenopacket_path: Path,
|
|
421
|
-
|
|
465
|
+
hg19_vcf_info: VcfFile,
|
|
466
|
+
hg38_vcf_info: VcfFile,
|
|
422
467
|
) -> File:
|
|
423
468
|
"""
|
|
424
469
|
Write spiked VCF contents to a new file.
|
|
@@ -427,21 +472,17 @@ def generate_spiked_vcf_file(
|
|
|
427
472
|
output_dir (Path): Path to the directory to store the generated file.
|
|
428
473
|
phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants.
|
|
429
474
|
phenopacket_path (Path): Path to the Phenopacket file.
|
|
430
|
-
|
|
431
|
-
|
|
475
|
+
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
476
|
+
hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf.
|
|
432
477
|
Returns:
|
|
433
478
|
File: The generated File object representing the newly created spiked VCF file.
|
|
434
479
|
"""
|
|
435
480
|
output_dir.mkdir(exist_ok=True)
|
|
436
481
|
info_log.info(f" Created a directory {output_dir}")
|
|
437
482
|
vcf_assembly, spiked_vcf = spike_vcf_contents(
|
|
438
|
-
phenopacket, phenopacket_path,
|
|
439
|
-
)
|
|
440
|
-
spiked_vcf_path = (
|
|
441
|
-
output_dir.joinpath(phenopacket_path.name.replace(".json", ".vcf.gz"))
|
|
442
|
-
if is_gzipped(chosen_template_vcf)
|
|
443
|
-
else output_dir.joinpath(phenopacket_path.name.replace(".json", ".vcf"))
|
|
483
|
+
phenopacket, phenopacket_path, hg19_vcf_info, hg38_vcf_info
|
|
444
484
|
)
|
|
485
|
+
spiked_vcf_path = output_dir.joinpath(phenopacket_path.name.replace(".json", ".vcf.gz"))
|
|
445
486
|
VcfWriter(spiked_vcf, spiked_vcf_path).write_vcf_file()
|
|
446
487
|
return File(
|
|
447
488
|
uri=urllib.parse.unquote(spiked_vcf_path.as_uri()),
|
|
@@ -449,8 +490,19 @@ def generate_spiked_vcf_file(
|
|
|
449
490
|
)
|
|
450
491
|
|
|
451
492
|
|
|
493
|
+
def spike_and_update_phenopacket(hg19_vcf_info, hg38_vcf_info, output_dir, phenopacket_path):
|
|
494
|
+
phenopacket = phenopacket_reader(phenopacket_path)
|
|
495
|
+
spiked_vcf_file_message = generate_spiked_vcf_file(
|
|
496
|
+
output_dir, phenopacket, phenopacket_path, hg19_vcf_info, hg38_vcf_info
|
|
497
|
+
)
|
|
498
|
+
updated_phenopacket = PhenopacketRebuilder(phenopacket).add_spiked_vcf_path(
|
|
499
|
+
spiked_vcf_file_message
|
|
500
|
+
)
|
|
501
|
+
write_phenopacket(updated_phenopacket, phenopacket_path)
|
|
502
|
+
|
|
503
|
+
|
|
452
504
|
def create_spiked_vcf(
|
|
453
|
-
output_dir: Path, phenopacket_path: Path,
|
|
505
|
+
output_dir: Path, phenopacket_path: Path, hg19_template_vcf: Path, hg38_template_vcf: Path
|
|
454
506
|
) -> None:
|
|
455
507
|
"""
|
|
456
508
|
Create a spiked VCF for a Phenopacket.
|
|
@@ -458,27 +510,21 @@ def create_spiked_vcf(
|
|
|
458
510
|
Args:
|
|
459
511
|
output_dir (Path): The directory to store the generated spiked VCF file.
|
|
460
512
|
phenopacket_path (Path): Path to the Phenopacket file.
|
|
461
|
-
|
|
462
|
-
|
|
513
|
+
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
514
|
+
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
463
515
|
|
|
464
516
|
Raises:
|
|
465
|
-
InputError: If both
|
|
517
|
+
InputError: If both hg19_template_vcf and hg38_template_vcf are None.
|
|
466
518
|
"""
|
|
467
|
-
if
|
|
468
|
-
raise InputError("Either a
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
output_dir, phenopacket, phenopacket_path, vcf_file_path
|
|
473
|
-
)
|
|
474
|
-
updated_phenopacket = PhenopacketRebuilder(phenopacket).add_spiked_vcf_path(
|
|
475
|
-
spiked_vcf_file_message
|
|
476
|
-
)
|
|
477
|
-
write_phenopacket(updated_phenopacket, phenopacket_path)
|
|
519
|
+
if hg19_template_vcf is None and hg38_template_vcf is None:
|
|
520
|
+
raise InputError("Either a hg19 template vcf or hg38 template vcf must be specified")
|
|
521
|
+
hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
|
|
522
|
+
hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
|
|
523
|
+
spike_and_update_phenopacket(hg19_vcf_info, hg38_vcf_info, output_dir, phenopacket_path)
|
|
478
524
|
|
|
479
525
|
|
|
480
526
|
def create_spiked_vcfs(
|
|
481
|
-
output_dir: Path, phenopacket_dir: Path,
|
|
527
|
+
output_dir: Path, phenopacket_dir: Path, hg19_template_vcf: Path, hg38_template_vcf: Path
|
|
482
528
|
) -> None:
|
|
483
529
|
"""
|
|
484
530
|
Create a spiked VCF for a directory of Phenopackets.
|
|
@@ -486,35 +532,26 @@ def create_spiked_vcfs(
|
|
|
486
532
|
Args:
|
|
487
533
|
output_dir (Path): The directory to store the generated spiked VCF file.
|
|
488
534
|
phenopacket_dir (Path): Path to the Phenopacket directory.
|
|
489
|
-
|
|
490
|
-
|
|
535
|
+
hg19_template_vcf (Path): Path to the template hg19 VCF file (optional).
|
|
536
|
+
hg38_template_vcf (Path): Path to the template hg19 VCF file (optional).
|
|
491
537
|
|
|
492
538
|
Raises:
|
|
493
|
-
InputError: If both
|
|
539
|
+
InputError: If both hg19_template_vcf and hg38_template_vcf are None.
|
|
494
540
|
"""
|
|
495
|
-
if
|
|
496
|
-
raise InputError("Either a
|
|
541
|
+
if hg19_template_vcf is None and hg38_template_vcf is None:
|
|
542
|
+
raise InputError("Either a hg19 template vcf or hg38 template vcf must be specified")
|
|
543
|
+
hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
|
|
544
|
+
hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
|
|
497
545
|
for phenopacket_path in files_with_suffix(phenopacket_dir, ".json"):
|
|
498
|
-
|
|
499
|
-
phenopacket = phenopacket_reader(phenopacket_path)
|
|
500
|
-
spiked_vcf_file_message = generate_spiked_vcf_file(
|
|
501
|
-
output_dir, phenopacket, phenopacket_path, vcf_file_path
|
|
502
|
-
)
|
|
503
|
-
updated_phenopacket = PhenopacketRebuilder(phenopacket).add_spiked_vcf_path(
|
|
504
|
-
spiked_vcf_file_message
|
|
505
|
-
)
|
|
506
|
-
write_phenopacket(updated_phenopacket, phenopacket_path)
|
|
507
|
-
# or made a lambda one-liner for maximum wtf...
|
|
508
|
-
# [spike_vcf(path, output_dir, template_vcf, vcf_dir) for path in phenopacket_dir.iterdir() if path.suffix ==
|
|
509
|
-
# ".json"]
|
|
546
|
+
spike_and_update_phenopacket(hg19_vcf_info, hg38_vcf_info, output_dir, phenopacket_path)
|
|
510
547
|
|
|
511
548
|
|
|
512
549
|
def spike_vcfs(
|
|
513
550
|
output_dir: Path,
|
|
514
551
|
phenopacket_path: Path,
|
|
515
552
|
phenopacket_dir: Path,
|
|
516
|
-
|
|
517
|
-
|
|
553
|
+
hg19_template_vcf: Path,
|
|
554
|
+
hg38_template_vcf: Path,
|
|
518
555
|
) -> None:
|
|
519
556
|
"""
|
|
520
557
|
Create spiked VCF from either a Phenopacket or a Phenopacket directory.
|
|
@@ -523,10 +560,10 @@ def spike_vcfs(
|
|
|
523
560
|
output_dir (Path): The directory to store the generated spiked VCF file(s).
|
|
524
561
|
phenopacket_path (Path): Path to a single Phenopacket file (optional).
|
|
525
562
|
phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional).
|
|
526
|
-
|
|
527
|
-
|
|
563
|
+
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
564
|
+
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
528
565
|
"""
|
|
529
566
|
if phenopacket_path is not None:
|
|
530
|
-
create_spiked_vcf(output_dir, phenopacket_path,
|
|
567
|
+
create_spiked_vcf(output_dir, phenopacket_path, hg19_template_vcf, hg38_template_vcf)
|
|
531
568
|
elif phenopacket_dir is not None:
|
|
532
|
-
create_spiked_vcfs(output_dir, phenopacket_dir,
|
|
569
|
+
create_spiked_vcfs(output_dir, phenopacket_dir, hg19_template_vcf, hg38_template_vcf)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from pheval.prepare.create_spiked_vcf import create_spiked_vcf
|
|
5
|
+
from pheval.prepare.update_phenopacket import create_updated_phenopacket
|
|
6
|
+
from pheval.utils.file_utils import all_files
|
|
7
|
+
from pheval.utils.phenopacket_utils import PhenopacketUtil, phenopacket_reader
|
|
8
|
+
|
|
9
|
+
info_log = logging.getLogger("info")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def prepare_corpus(
|
|
13
|
+
phenopacket_dir: Path,
|
|
14
|
+
variant_analysis: bool,
|
|
15
|
+
gene_analysis: bool,
|
|
16
|
+
disease_analysis: bool,
|
|
17
|
+
gene_identifier: str,
|
|
18
|
+
hg19_template_vcf: Path,
|
|
19
|
+
hg38_template_vcf: Path,
|
|
20
|
+
output_dir: Path,
|
|
21
|
+
) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating
|
|
24
|
+
gene identifiers.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
phenopacket_dir (Path): The path to the directory containing Phenopackets.
|
|
28
|
+
variant_analysis (bool): If True, check for complete variant records in the Phenopackets.
|
|
29
|
+
gene_analysis (bool): If True, check for complete gene records in the Phenopackets.
|
|
30
|
+
disease_analysis (bool): If True, check for complete disease records in the Phenopackets.
|
|
31
|
+
gene_identifier (str): Identifier for updating gene identifiers, if applicable.
|
|
32
|
+
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional), to spike variants into
|
|
33
|
+
VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required.
|
|
34
|
+
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional), to spike variants into
|
|
35
|
+
VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required.
|
|
36
|
+
output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files.
|
|
37
|
+
"""
|
|
38
|
+
output_dir.joinpath("phenopackets").mkdir(exist_ok=True, parents=True)
|
|
39
|
+
for phenopacket_path in all_files(phenopacket_dir):
|
|
40
|
+
phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
|
|
41
|
+
if variant_analysis:
|
|
42
|
+
if phenopacket_util.check_incomplete_variant_record():
|
|
43
|
+
info_log.warning(
|
|
44
|
+
f"Removed {phenopacket_path.name} from the corpus due to missing variant fields."
|
|
45
|
+
)
|
|
46
|
+
continue
|
|
47
|
+
if gene_analysis:
|
|
48
|
+
if phenopacket_util.check_incomplete_gene_record():
|
|
49
|
+
info_log.warning(
|
|
50
|
+
f"Removed {phenopacket_path.name} from the corpus due to missing gene fields."
|
|
51
|
+
)
|
|
52
|
+
continue
|
|
53
|
+
if disease_analysis:
|
|
54
|
+
if phenopacket_util.check_incomplete_disease_record():
|
|
55
|
+
info_log.warning(
|
|
56
|
+
f"Removed {phenopacket_path.name} from the corpus due to missing disease fields."
|
|
57
|
+
)
|
|
58
|
+
continue
|
|
59
|
+
if gene_identifier:
|
|
60
|
+
create_updated_phenopacket(
|
|
61
|
+
gene_identifier, phenopacket_path, output_dir.joinpath("phenopackets")
|
|
62
|
+
)
|
|
63
|
+
if hg19_template_vcf or hg38_template_vcf:
|
|
64
|
+
output_dir.joinpath("vcf").mkdir(exist_ok=True)
|
|
65
|
+
create_spiked_vcf(
|
|
66
|
+
output_dir.joinpath("vcf"), phenopacket_path, hg19_template_vcf, hg38_template_vcf
|
|
67
|
+
)
|
|
@@ -38,8 +38,7 @@ def update_outdated_gene_context(
|
|
|
38
38
|
interpretations = PhenopacketUtil(phenopacket).interpretations()
|
|
39
39
|
updated_interpretations = GeneIdentifierUpdater(
|
|
40
40
|
hgnc_data=hgnc_data, gene_identifier=gene_identifier
|
|
41
|
-
).update_genomic_interpretations_gene_identifier(interpretations)
|
|
42
|
-
|
|
41
|
+
).update_genomic_interpretations_gene_identifier(interpretations, phenopacket_path)
|
|
43
42
|
return PhenopacketRebuilder(phenopacket).update_interpretations(updated_interpretations)
|
|
44
43
|
|
|
45
44
|
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
-
|
|
3
|
-
# import logging
|
|
2
|
+
import logging
|
|
4
3
|
import os
|
|
5
4
|
from collections import defaultdict
|
|
6
5
|
from copy import copy
|
|
@@ -22,6 +21,8 @@ from phenopackets import (
|
|
|
22
21
|
|
|
23
22
|
from pheval.prepare.custom_exceptions import IncorrectFileFormatError
|
|
24
23
|
|
|
24
|
+
info_log = logging.getLogger("info")
|
|
25
|
+
|
|
25
26
|
|
|
26
27
|
class IncompatibleGenomeAssemblyError(Exception):
|
|
27
28
|
"""Exception raised for incompatible genome assembly."""
|
|
@@ -467,7 +468,9 @@ class PhenopacketUtil:
|
|
|
467
468
|
for i in pheno_interpretation:
|
|
468
469
|
for g in i.diagnosis.genomic_interpretations:
|
|
469
470
|
variant = GenomicVariant(
|
|
470
|
-
chrom=g.variant_interpretation.variation_descriptor.vcf_record.chrom
|
|
471
|
+
chrom=g.variant_interpretation.variation_descriptor.vcf_record.chrom.replace(
|
|
472
|
+
"chr", ""
|
|
473
|
+
),
|
|
471
474
|
pos=g.variant_interpretation.variation_descriptor.vcf_record.pos,
|
|
472
475
|
ref=g.variant_interpretation.variation_descriptor.vcf_record.ref,
|
|
473
476
|
alt=g.variant_interpretation.variation_descriptor.vcf_record.alt,
|
|
@@ -475,6 +478,59 @@ class PhenopacketUtil:
|
|
|
475
478
|
variants.append(variant)
|
|
476
479
|
return variants
|
|
477
480
|
|
|
481
|
+
def check_incomplete_variant_record(self) -> bool:
|
|
482
|
+
"""
|
|
483
|
+
Check if any variant record in the phenopacket has incomplete information.
|
|
484
|
+
|
|
485
|
+
This method iterates through the diagnosed variant records and checks if any of them
|
|
486
|
+
have missing or incomplete information such as empty chromosome, position, reference,
|
|
487
|
+
or alternate allele.
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
bool: True if any variant record is incomplete, False otherwise.
|
|
491
|
+
"""
|
|
492
|
+
variants = self.diagnosed_variants()
|
|
493
|
+
for variant in variants:
|
|
494
|
+
if (
|
|
495
|
+
variant.chrom == ""
|
|
496
|
+
or variant.pos == 0
|
|
497
|
+
or variant.pos == ""
|
|
498
|
+
or variant.ref == ""
|
|
499
|
+
or variant.alt == ""
|
|
500
|
+
):
|
|
501
|
+
return True
|
|
502
|
+
return False
|
|
503
|
+
|
|
504
|
+
def check_incomplete_gene_record(self) -> bool:
|
|
505
|
+
"""
|
|
506
|
+
Check if any gene record in the phenopacket has incomplete information.
|
|
507
|
+
|
|
508
|
+
This method iterates through the diagnosed gene records and checks if any of them
|
|
509
|
+
have missing or incomplete information such as gene name, or gene identifier.
|
|
510
|
+
|
|
511
|
+
Returns:
|
|
512
|
+
bool: True if any gene record is incomplete, False otherwise.
|
|
513
|
+
"""
|
|
514
|
+
genes = self.diagnosed_genes()
|
|
515
|
+
for gene in genes:
|
|
516
|
+
if gene.gene_symbol == "" or gene.gene_identifier == "":
|
|
517
|
+
return True
|
|
518
|
+
return False
|
|
519
|
+
|
|
520
|
+
def check_incomplete_disease_record(self) -> bool:
|
|
521
|
+
"""
|
|
522
|
+
Check if any disease record in the phenopacket has incomplete information.
|
|
523
|
+
|
|
524
|
+
This method iterates through the diagnosed disease records and checks if any of them
|
|
525
|
+
have missing or incomplete information such as empty disease name, or disease identifier.
|
|
526
|
+
|
|
527
|
+
Returns:
|
|
528
|
+
bool: True if any disease record is incomplete, False otherwise.
|
|
529
|
+
"""
|
|
530
|
+
if len(self.diagnoses()) == 0:
|
|
531
|
+
return True
|
|
532
|
+
return False
|
|
533
|
+
|
|
478
534
|
|
|
479
535
|
class PhenopacketRebuilder:
|
|
480
536
|
"""Class for rebuilding a Phenopacket"""
|
|
@@ -653,7 +709,7 @@ class GeneIdentifierUpdater:
|
|
|
653
709
|
]
|
|
654
710
|
|
|
655
711
|
def update_genomic_interpretations_gene_identifier(
|
|
656
|
-
self, interpretations: List[Interpretation]
|
|
712
|
+
self, interpretations: List[Interpretation], phenopacket_path: Path
|
|
657
713
|
) -> List[Interpretation]:
|
|
658
714
|
"""
|
|
659
715
|
Update the genomic interpretations of a Phenopacket.
|
|
@@ -667,10 +723,16 @@ class GeneIdentifierUpdater:
|
|
|
667
723
|
updated_interpretations = copy(list(interpretations))
|
|
668
724
|
for updated_interpretation in updated_interpretations:
|
|
669
725
|
for g in updated_interpretation.diagnosis.genomic_interpretations:
|
|
726
|
+
updated_gene_identifier = self.find_identifier(
|
|
727
|
+
g.variant_interpretation.variation_descriptor.gene_context.symbol
|
|
728
|
+
)
|
|
729
|
+
info_log.info(
|
|
730
|
+
f"Updating gene identifier in {phenopacket_path} from "
|
|
731
|
+
f"{g.variant_interpretation.variation_descriptor.gene_context.value_id}"
|
|
732
|
+
f"to {updated_gene_identifier}"
|
|
733
|
+
)
|
|
670
734
|
g.variant_interpretation.variation_descriptor.gene_context.value_id = (
|
|
671
|
-
|
|
672
|
-
g.variant_interpretation.variation_descriptor.gene_context.symbol
|
|
673
|
-
)
|
|
735
|
+
updated_gene_identifier
|
|
674
736
|
)
|
|
675
737
|
del g.variant_interpretation.variation_descriptor.gene_context.alternate_ids[:]
|
|
676
738
|
g.variant_interpretation.variation_descriptor.gene_context.alternate_ids.extend(
|
|
@@ -6,7 +6,7 @@ pheval/analyse/benchmarking_data.py,sha256=aNZkWdmWemlnC1Tg35MtR60S9YC71QWS2rMuz
|
|
|
6
6
|
pheval/analyse/binary_classification_stats.py,sha256=E35YjvGM-zFnuEt8M3pgN03vBab4MH6ih726QKvuogg,12519
|
|
7
7
|
pheval/analyse/disease_prioritisation_analysis.py,sha256=qadEVhBMtBgtjGCJLhNQA510F8Pd0Ll4NAQXoT23BYs,12649
|
|
8
8
|
pheval/analyse/gene_prioritisation_analysis.py,sha256=lAN171xfXqweK8ie6191s_6WPPGjZKJXL1Z0dIqp54k,12373
|
|
9
|
-
pheval/analyse/generate_plots.py,sha256=
|
|
9
|
+
pheval/analyse/generate_plots.py,sha256=MFORnFTgoelYAahFlu3Dc3Rul4cwCg8Bloxe62vONSc,21350
|
|
10
10
|
pheval/analyse/generate_summary_outputs.py,sha256=s9pXMSW6xm4ZBe1aCd0UJSaFiKBvpUfPwJ2BI4qfTas,6591
|
|
11
11
|
pheval/analyse/parse_benchmark_summary.py,sha256=Y8uPTlHTEiaeVBOqxMcdOqjY3ZBtOS3DoRycL78Dzxg,2384
|
|
12
12
|
pheval/analyse/parse_pheval_result.py,sha256=j8YFVA0YXfySOkm8gMwrfIuV45DI9AX3ETn7h-r8ayE,1211
|
|
@@ -15,9 +15,9 @@ pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6h
|
|
|
15
15
|
pheval/analyse/rank_stats.py,sha256=knj1tsKrly17QgtOUVpqA14UjbO99N3ydkWN4xU6c2k,15785
|
|
16
16
|
pheval/analyse/run_data_parser.py,sha256=HzBKsJL2skjmrRZdrF3VYzswtKNgbX6U5qhY_kqq9mA,1552
|
|
17
17
|
pheval/analyse/variant_prioritisation_analysis.py,sha256=ApmUeTW0cl_BPh7LusbApxtgjEXEkhuNFyh0DxKKpgU,12384
|
|
18
|
-
pheval/cli.py,sha256=
|
|
18
|
+
pheval/cli.py,sha256=X4tDi7e3VB3v2RawkqIbfv4SFPCBuQwMXMnYCPTGtIo,1570
|
|
19
19
|
pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
|
|
20
|
-
pheval/cli_pheval_utils.py,sha256=
|
|
20
|
+
pheval/cli_pheval_utils.py,sha256=kySsSa7NyewwVwYBMu93y8l5_qSJaVkdXklGchcXExU,20504
|
|
21
21
|
pheval/config_parser.py,sha256=lh-Dy_FflXJUnRC3HYaEdSvPAsNZWQZlEr1hHQigrTM,1227
|
|
22
22
|
pheval/constants.py,sha256=TWBgWOc05FGXFu63fs-hEHS2IJkLLAPHtMppiWBfBOg,349
|
|
23
23
|
pheval/implementations/__init__.py,sha256=BMUTotjTdgy5j5xubWCIQgRXrSQ1ZIcjooer7r299Zo,1228
|
|
@@ -27,9 +27,10 @@ pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
|
|
|
27
27
|
pheval/post_processing/post_processing.py,sha256=2srdlw2D3qMh2B3PUSDvA6COYlbXINC08Wt4eccMZp8,16030
|
|
28
28
|
pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
pheval/prepare/create_noisy_phenopackets.py,sha256=UbBRWDD95BFHPv03VYx04v35AGwJ9ynLltYKqQJHbZ0,11236
|
|
30
|
-
pheval/prepare/create_spiked_vcf.py,sha256=
|
|
30
|
+
pheval/prepare/create_spiked_vcf.py,sha256=KZIyjtDDTqJj3hxL3u4YP6P0toA4RN1oPeDrzLMB2z4,20235
|
|
31
31
|
pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
|
|
32
|
-
pheval/prepare/
|
|
32
|
+
pheval/prepare/prepare_corpus.py,sha256=q9sCTfpzEHeJpzo0nNc8WhdZPeXq_bm_udQaLCAbAJI,3187
|
|
33
|
+
pheval/prepare/update_phenopacket.py,sha256=21fzUPbwKN6Ey5TSh9PFzjT2x86U19RAE6WmkjG8u28,4770
|
|
33
34
|
pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
|
|
34
35
|
pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
|
|
35
36
|
pheval/resources/alternate_ouputs/OVA_results.txt,sha256=_5XFCR4W04D-W7DObpALLsa0-693g2kiIUB_uo79aHk,9845
|
|
@@ -46,11 +47,11 @@ pheval/utils/docs_gen.py,sha256=6FGtHicBC0rZKi0tdL3Epsg8d4osE44I9f1Ga0j4JLA,3193
|
|
|
46
47
|
pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
|
|
47
48
|
pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
|
|
48
49
|
pheval/utils/file_utils.py,sha256=9HoCmtF73D3wY6bBhFLefMBI5uhvCe_meZeHXQzF_ts,4640
|
|
49
|
-
pheval/utils/phenopacket_utils.py,sha256=
|
|
50
|
+
pheval/utils/phenopacket_utils.py,sha256=4inrnhZ4UjYgO0Y85ls_Nxq6voAIIXQV57_fMeIX-24,26792
|
|
50
51
|
pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
|
|
51
52
|
pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
|
|
52
|
-
pheval-0.3.
|
|
53
|
-
pheval-0.3.
|
|
54
|
-
pheval-0.3.
|
|
55
|
-
pheval-0.3.
|
|
56
|
-
pheval-0.3.
|
|
53
|
+
pheval-0.3.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
54
|
+
pheval-0.3.4.dist-info/METADATA,sha256=pG569TPPJkj7dQ6C-SmMXRBRN-B2aw7hWSoMoa5k-0E,1810
|
|
55
|
+
pheval-0.3.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
56
|
+
pheval-0.3.4.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
|
|
57
|
+
pheval-0.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|