pheval 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pheval might be problematic. Click here for more details.
- pheval/analyse/gene_prioritisation_analysis.py +35 -3
- pheval/analyse/variant_prioritisation_analysis.py +2 -2
- pheval/cli_pheval_utils.py +79 -12
- pheval/post_processing/post_processing.py +5 -4
- pheval/prepare/create_spiked_vcf.py +104 -13
- pheval/prepare/prepare_corpus.py +13 -1
- pheval/utils/phenopacket_utils.py +5 -3
- {pheval-0.3.7.dist-info → pheval-0.3.8.dist-info}/METADATA +1 -1
- {pheval-0.3.7.dist-info → pheval-0.3.8.dist-info}/RECORD +12 -12
- {pheval-0.3.7.dist-info → pheval-0.3.8.dist-info}/LICENSE +0 -0
- {pheval-0.3.7.dist-info → pheval-0.3.8.dist-info}/WHEEL +0 -0
- {pheval-0.3.7.dist-info → pheval-0.3.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import re
|
|
1
3
|
from collections import defaultdict
|
|
2
4
|
from pathlib import Path
|
|
3
|
-
from typing import List
|
|
5
|
+
from typing import List, Union
|
|
4
6
|
|
|
5
7
|
from pheval.analyse.benchmarking_data import BenchmarkRunResults
|
|
6
8
|
from pheval.analyse.binary_classification_stats import BinaryClassificationStats
|
|
@@ -140,6 +142,24 @@ class AssessGenePrioritisation:
|
|
|
140
142
|
)
|
|
141
143
|
)
|
|
142
144
|
|
|
145
|
+
@staticmethod
|
|
146
|
+
def _check_string_representation(entity: str) -> Union[List[str], str]:
|
|
147
|
+
"""
|
|
148
|
+
Check if the input string is a representation of a list and returns the list if true, otherwise the string.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
entity (str): The input entity to check.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Union[List[str], str]: A list if the input string is a list representation, otherwise
|
|
155
|
+
the original string.
|
|
156
|
+
"""
|
|
157
|
+
list_pattern = re.compile(r"^\[\s*(?:[^\[\],\s]+(?:\s*,\s*[^\[\],\s]+)*)?\s*\]$")
|
|
158
|
+
if list_pattern.match(entity):
|
|
159
|
+
return ast.literal_eval(entity)
|
|
160
|
+
else:
|
|
161
|
+
return entity
|
|
162
|
+
|
|
143
163
|
def assess_gene_prioritisation(
|
|
144
164
|
self,
|
|
145
165
|
rank_stats: RankStats,
|
|
@@ -161,9 +181,21 @@ class AssessGenePrioritisation:
|
|
|
161
181
|
rank_stats.total += 1
|
|
162
182
|
gene_match = GenePrioritisationResult(self.phenopacket_path, gene.gene_symbol)
|
|
163
183
|
for standardised_gene_result in self.standardised_gene_results:
|
|
184
|
+
gene_identifier = self._check_string_representation(
|
|
185
|
+
standardised_gene_result.gene_identifier
|
|
186
|
+
)
|
|
187
|
+
gene_symbol = self._check_string_representation(
|
|
188
|
+
standardised_gene_result.gene_symbol
|
|
189
|
+
)
|
|
164
190
|
if (
|
|
165
|
-
|
|
166
|
-
|
|
191
|
+
isinstance(gene_identifier, list)
|
|
192
|
+
and gene.gene_identifier in gene_identifier
|
|
193
|
+
or isinstance(gene_identifier, str)
|
|
194
|
+
and gene.gene_identifier == str
|
|
195
|
+
or isinstance(gene_symbol, list)
|
|
196
|
+
and gene.gene_symbol in gene_symbol
|
|
197
|
+
or isinstance(gene_symbol, str)
|
|
198
|
+
and gene.gene_symbol == gene_symbol
|
|
167
199
|
):
|
|
168
200
|
gene_match = self._record_matched_gene(
|
|
169
201
|
gene, rank_stats, standardised_gene_result
|
|
@@ -162,8 +162,8 @@ class AssessVariantPrioritisation:
|
|
|
162
162
|
variant_match = VariantPrioritisationResult(self.phenopacket_path, variant)
|
|
163
163
|
for result in self.standardised_variant_results:
|
|
164
164
|
result_variant = GenomicVariant(
|
|
165
|
-
chrom=result.chromosome,
|
|
166
|
-
pos=result.start,
|
|
165
|
+
chrom=str(result.chromosome),
|
|
166
|
+
pos=int(result.start),
|
|
167
167
|
ref=result.ref,
|
|
168
168
|
alt=result.alt,
|
|
169
169
|
)
|
pheval/cli_pheval_utils.py
CHANGED
|
@@ -260,6 +260,8 @@ def update_phenopackets_command(
|
|
|
260
260
|
required=False,
|
|
261
261
|
help="Template hg19 VCF file",
|
|
262
262
|
type=Path,
|
|
263
|
+
cls=MutuallyExclusiveOptionError,
|
|
264
|
+
mutually_exclusive=["hg19_vcf_dir"],
|
|
263
265
|
)
|
|
264
266
|
@click.option(
|
|
265
267
|
"--hg38-template-vcf",
|
|
@@ -268,6 +270,28 @@ def update_phenopackets_command(
|
|
|
268
270
|
required=False,
|
|
269
271
|
help="Template hg38 VCF file",
|
|
270
272
|
type=Path,
|
|
273
|
+
cls=MutuallyExclusiveOptionError,
|
|
274
|
+
mutually_exclusive=["hg38_vcf_dir"],
|
|
275
|
+
)
|
|
276
|
+
@click.option(
|
|
277
|
+
"--hg19-vcf-dir",
|
|
278
|
+
"-hg19-dir",
|
|
279
|
+
metavar="PATH",
|
|
280
|
+
required=False,
|
|
281
|
+
help="Path to directory containing hg19 VCF templates.",
|
|
282
|
+
type=Path,
|
|
283
|
+
cls=MutuallyExclusiveOptionError,
|
|
284
|
+
mutually_exclusive=["hg19_template_vcf"],
|
|
285
|
+
)
|
|
286
|
+
@click.option(
|
|
287
|
+
"--hg38-vcf-dir",
|
|
288
|
+
"-hg38-dir",
|
|
289
|
+
metavar="PATH",
|
|
290
|
+
required=False,
|
|
291
|
+
help="Path to directory containing hg38 VCF templates.",
|
|
292
|
+
type=Path,
|
|
293
|
+
cls=MutuallyExclusiveOptionError,
|
|
294
|
+
mutually_exclusive=["hg38_template_vcf"],
|
|
271
295
|
)
|
|
272
296
|
@click.option(
|
|
273
297
|
"--output-dir",
|
|
@@ -284,6 +308,8 @@ def create_spiked_vcfs_command(
|
|
|
284
308
|
output_dir: Path,
|
|
285
309
|
hg19_template_vcf: Path = None,
|
|
286
310
|
hg38_template_vcf: Path = None,
|
|
311
|
+
hg19_vcf_dir: Path = None,
|
|
312
|
+
hg38_vcf_dir: Path = None,
|
|
287
313
|
):
|
|
288
314
|
"""
|
|
289
315
|
Create spiked VCF from either a Phenopacket or a Phenopacket directory.
|
|
@@ -294,10 +320,20 @@ def create_spiked_vcfs_command(
|
|
|
294
320
|
output_dir (Path): The directory to store the generated spiked VCF file(s).
|
|
295
321
|
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
296
322
|
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
323
|
+
hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional).
|
|
324
|
+
hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional).
|
|
297
325
|
"""
|
|
298
326
|
if phenopacket_path is None and phenopacket_dir is None:
|
|
299
327
|
raise InputError("Either a phenopacket or phenopacket directory must be specified")
|
|
300
|
-
spike_vcfs(
|
|
328
|
+
spike_vcfs(
|
|
329
|
+
output_dir,
|
|
330
|
+
phenopacket_path,
|
|
331
|
+
phenopacket_dir,
|
|
332
|
+
hg19_template_vcf,
|
|
333
|
+
hg38_template_vcf,
|
|
334
|
+
hg19_vcf_dir,
|
|
335
|
+
hg38_vcf_dir,
|
|
336
|
+
)
|
|
301
337
|
|
|
302
338
|
|
|
303
339
|
@click.command()
|
|
@@ -656,6 +692,8 @@ def generate_stats_plot(
|
|
|
656
692
|
required=False,
|
|
657
693
|
help="Template hg19 VCF file",
|
|
658
694
|
type=Path,
|
|
695
|
+
cls=MutuallyExclusiveOptionError,
|
|
696
|
+
mutually_exclusive=["hg19_vcf_dir"],
|
|
659
697
|
)
|
|
660
698
|
@click.option(
|
|
661
699
|
"--hg38-template-vcf",
|
|
@@ -664,6 +702,28 @@ def generate_stats_plot(
|
|
|
664
702
|
required=False,
|
|
665
703
|
help="Template hg38 VCF file",
|
|
666
704
|
type=Path,
|
|
705
|
+
cls=MutuallyExclusiveOptionError,
|
|
706
|
+
mutually_exclusive=["hg38_vcf_dir"],
|
|
707
|
+
)
|
|
708
|
+
@click.option(
|
|
709
|
+
"--hg19-vcf-dir",
|
|
710
|
+
"-hg19-dir",
|
|
711
|
+
metavar="PATH",
|
|
712
|
+
required=False,
|
|
713
|
+
help="Path to directory containing hg19 VCF templates.",
|
|
714
|
+
type=Path,
|
|
715
|
+
cls=MutuallyExclusiveOptionError,
|
|
716
|
+
mutually_exclusive=["hg19_template_vcf"],
|
|
717
|
+
)
|
|
718
|
+
@click.option(
|
|
719
|
+
"--hg38-vcf-dir",
|
|
720
|
+
"-hg38-dir",
|
|
721
|
+
metavar="PATH",
|
|
722
|
+
required=False,
|
|
723
|
+
help="Path to directory containing hg38 VCF templates.",
|
|
724
|
+
type=Path,
|
|
725
|
+
cls=MutuallyExclusiveOptionError,
|
|
726
|
+
mutually_exclusive=["hg38_template_vcf"],
|
|
667
727
|
)
|
|
668
728
|
@click.option(
|
|
669
729
|
"--output-dir",
|
|
@@ -682,23 +742,28 @@ def prepare_corpus_command(
|
|
|
682
742
|
gene_identifier: str,
|
|
683
743
|
hg19_template_vcf: Path,
|
|
684
744
|
hg38_template_vcf: Path,
|
|
745
|
+
hg19_vcf_dir: Path,
|
|
746
|
+
hg38_vcf_dir: Path,
|
|
685
747
|
output_dir: Path,
|
|
686
748
|
):
|
|
687
749
|
"""
|
|
688
750
|
Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating
|
|
689
751
|
gene identifiers.
|
|
690
752
|
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
753
|
+
Args:
|
|
754
|
+
phenopacket_dir (Path): The path to the directory containing Phenopackets.
|
|
755
|
+
variant_analysis (bool): If True, check for complete variant records in the Phenopackets.
|
|
756
|
+
gene_analysis (bool): If True, check for complete gene records in the Phenopackets.
|
|
757
|
+
disease_analysis (bool): If True, check for complete disease records in the Phenopackets.
|
|
758
|
+
gene_identifier (str): Identifier for updating gene identifiers, if applicable.
|
|
759
|
+
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
760
|
+
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
761
|
+
hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional).
|
|
762
|
+
hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional).
|
|
763
|
+
output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files.
|
|
764
|
+
Notes:
|
|
765
|
+
To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf,
|
|
766
|
+
hg19_vcf_dir or hg38_vcf_dir is required.
|
|
702
767
|
"""
|
|
703
768
|
prepare_corpus(
|
|
704
769
|
phenopacket_dir,
|
|
@@ -708,5 +773,7 @@ def prepare_corpus_command(
|
|
|
708
773
|
gene_identifier,
|
|
709
774
|
hg19_template_vcf,
|
|
710
775
|
hg38_template_vcf,
|
|
776
|
+
hg19_vcf_dir,
|
|
777
|
+
hg38_vcf_dir,
|
|
711
778
|
output_dir,
|
|
712
779
|
)
|
|
@@ -3,6 +3,7 @@ import operator
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from typing import List, Union
|
|
6
7
|
|
|
7
8
|
import pandas as pd
|
|
8
9
|
|
|
@@ -30,8 +31,8 @@ class PhEvalResult:
|
|
|
30
31
|
class PhEvalGeneResult(PhEvalResult):
|
|
31
32
|
"""Minimal data required from tool-specific output for gene prioritisation result
|
|
32
33
|
Args:
|
|
33
|
-
gene_symbol (str): The gene symbol for the result entry
|
|
34
|
-
gene_identifier (str): The ENSEMBL gene identifier for the result entry
|
|
34
|
+
gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry
|
|
35
|
+
gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry
|
|
35
36
|
score (float): The score for the gene result entry
|
|
36
37
|
Notes:
|
|
37
38
|
While we recommend providing the gene identifier in the ENSEMBL namespace,
|
|
@@ -39,8 +40,8 @@ class PhEvalGeneResult(PhEvalResult):
|
|
|
39
40
|
in the analysis.
|
|
40
41
|
"""
|
|
41
42
|
|
|
42
|
-
gene_symbol: str
|
|
43
|
-
gene_identifier: str
|
|
43
|
+
gene_symbol: Union[List[str], str]
|
|
44
|
+
gene_identifier: Union[List[str], str]
|
|
44
45
|
score: float
|
|
45
46
|
|
|
46
47
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import gzip
|
|
2
2
|
import logging
|
|
3
|
+
import random
|
|
3
4
|
import re
|
|
4
5
|
import urllib.parse
|
|
5
6
|
from copy import copy
|
|
@@ -10,7 +11,7 @@ from typing import List, Union
|
|
|
10
11
|
from phenopackets import Family, File, Phenopacket
|
|
11
12
|
|
|
12
13
|
from pheval.prepare.custom_exceptions import InputError
|
|
13
|
-
from pheval.utils.file_utils import files_with_suffix, is_gzipped
|
|
14
|
+
from pheval.utils.file_utils import all_files, files_with_suffix, is_gzipped
|
|
14
15
|
from pheval.utils.phenopacket_utils import (
|
|
15
16
|
IncompatibleGenomeAssemblyError,
|
|
16
17
|
PhenopacketRebuilder,
|
|
@@ -207,6 +208,8 @@ def select_vcf_template(
|
|
|
207
208
|
proband_causative_variants: List[ProbandCausativeVariant],
|
|
208
209
|
hg19_vcf_info: VcfFile,
|
|
209
210
|
hg38_vcf_info: VcfFile,
|
|
211
|
+
hg19_vcf_dir: Path,
|
|
212
|
+
hg38_vcf_dir: Path,
|
|
210
213
|
) -> VcfFile:
|
|
211
214
|
"""
|
|
212
215
|
Select the appropriate VCF template based on the assembly information of the proband causative variants.
|
|
@@ -216,6 +219,8 @@ def select_vcf_template(
|
|
|
216
219
|
proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband.
|
|
217
220
|
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
218
221
|
hg38_vcf_info (VcfFile): CF file info for hg38 template vcf.
|
|
222
|
+
hg19_vcf_dir (Path): The directory containing the hg19 VCF files.
|
|
223
|
+
hg38_vcf_dir (Path): The directory containing the hg38 VCF files.
|
|
219
224
|
|
|
220
225
|
Returns:
|
|
221
226
|
VcfFile: The selected VCF template file based on the assembly information of the proband causative variants.
|
|
@@ -224,11 +229,15 @@ def select_vcf_template(
|
|
|
224
229
|
if proband_causative_variants[0].assembly in ["hg19", "GRCh37"]:
|
|
225
230
|
if hg19_vcf_info:
|
|
226
231
|
return hg19_vcf_info
|
|
232
|
+
elif hg19_vcf_dir:
|
|
233
|
+
return VcfFile.populate_fields(random.choice(all_files(hg19_vcf_dir)))
|
|
227
234
|
else:
|
|
228
235
|
raise InputError("Must specify hg19 template VCF!")
|
|
229
236
|
elif proband_causative_variants[0].assembly in ["hg38", "GRCh38"]:
|
|
230
237
|
if hg38_vcf_info:
|
|
231
238
|
return hg38_vcf_info
|
|
239
|
+
elif hg38_vcf_dir:
|
|
240
|
+
return VcfFile.populate_fields(random.choice(all_files(hg38_vcf_dir)))
|
|
232
241
|
else:
|
|
233
242
|
raise InputError("Must specify hg38 template VCF!")
|
|
234
243
|
else:
|
|
@@ -445,6 +454,8 @@ def spike_vcf_contents(
|
|
|
445
454
|
phenopacket_path: Path,
|
|
446
455
|
hg19_vcf_info: VcfFile,
|
|
447
456
|
hg38_vcf_info: VcfFile,
|
|
457
|
+
hg19_vcf_dir: Path,
|
|
458
|
+
hg38_vcf_dir: Path,
|
|
448
459
|
) -> tuple[str, List[str]]:
|
|
449
460
|
"""
|
|
450
461
|
Spike VCF records with variants obtained from a Phenopacket or Family.
|
|
@@ -454,6 +465,8 @@ def spike_vcf_contents(
|
|
|
454
465
|
phenopacket_path (Path): Path to the Phenopacket file.
|
|
455
466
|
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
456
467
|
hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf.
|
|
468
|
+
hg19_vcf_dir (Path): The directory containing the hg19 VCF files.
|
|
469
|
+
hg38_vcf_dir (Path): The directory containing the hg38 VCF files.
|
|
457
470
|
|
|
458
471
|
Returns:
|
|
459
472
|
A tuple containing:
|
|
@@ -462,7 +475,12 @@ def spike_vcf_contents(
|
|
|
462
475
|
"""
|
|
463
476
|
phenopacket_causative_variants = PhenopacketUtil(phenopacket).causative_variants()
|
|
464
477
|
chosen_template_vcf = select_vcf_template(
|
|
465
|
-
phenopacket_path,
|
|
478
|
+
phenopacket_path,
|
|
479
|
+
phenopacket_causative_variants,
|
|
480
|
+
hg19_vcf_info,
|
|
481
|
+
hg38_vcf_info,
|
|
482
|
+
hg19_vcf_dir,
|
|
483
|
+
hg38_vcf_dir,
|
|
466
484
|
)
|
|
467
485
|
check_variant_assembly(
|
|
468
486
|
phenopacket_causative_variants, chosen_template_vcf.vcf_header, phenopacket_path
|
|
@@ -483,6 +501,8 @@ def generate_spiked_vcf_file(
|
|
|
483
501
|
phenopacket_path: Path,
|
|
484
502
|
hg19_vcf_info: VcfFile,
|
|
485
503
|
hg38_vcf_info: VcfFile,
|
|
504
|
+
hg19_vcf_dir: Path,
|
|
505
|
+
hg38_vcf_dir: Path,
|
|
486
506
|
) -> File:
|
|
487
507
|
"""
|
|
488
508
|
Write spiked VCF contents to a new file.
|
|
@@ -493,13 +513,15 @@ def generate_spiked_vcf_file(
|
|
|
493
513
|
phenopacket_path (Path): Path to the Phenopacket file.
|
|
494
514
|
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
495
515
|
hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf.
|
|
516
|
+
hg19_vcf_dir (Path): The directory containing the hg19 VCF files.
|
|
517
|
+
hg38_vcf_dir (Path): The directory containing the hg38 VCF files.
|
|
496
518
|
Returns:
|
|
497
519
|
File: The generated File object representing the newly created spiked VCF file.
|
|
498
520
|
"""
|
|
499
521
|
output_dir.mkdir(exist_ok=True)
|
|
500
522
|
info_log.info(f" Created a directory {output_dir}")
|
|
501
523
|
vcf_assembly, spiked_vcf = spike_vcf_contents(
|
|
502
|
-
phenopacket, phenopacket_path, hg19_vcf_info, hg38_vcf_info
|
|
524
|
+
phenopacket, phenopacket_path, hg19_vcf_info, hg38_vcf_info, hg19_vcf_dir, hg38_vcf_dir
|
|
503
525
|
)
|
|
504
526
|
spiked_vcf_path = output_dir.joinpath(phenopacket_path.name.replace(".json", ".vcf.gz"))
|
|
505
527
|
VcfWriter(spiked_vcf, spiked_vcf_path).write_vcf_file()
|
|
@@ -509,10 +531,38 @@ def generate_spiked_vcf_file(
|
|
|
509
531
|
)
|
|
510
532
|
|
|
511
533
|
|
|
512
|
-
def spike_and_update_phenopacket(
|
|
534
|
+
def spike_and_update_phenopacket(
|
|
535
|
+
hg19_vcf_info: VcfFile,
|
|
536
|
+
hg38_vcf_info: VcfFile,
|
|
537
|
+
hg19_vcf_dir: Path,
|
|
538
|
+
hg38_vcf_dir: Path,
|
|
539
|
+
output_dir: Path,
|
|
540
|
+
phenopacket_path: Path,
|
|
541
|
+
) -> None:
|
|
542
|
+
"""
|
|
543
|
+
Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket
|
|
544
|
+
accordingly, and write the updated Phenopacket to the specified output directory.
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
548
|
+
hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf.
|
|
549
|
+
hg19_vcf_dir (Path): The directory containing the hg19 VCF files.
|
|
550
|
+
hg38_vcf_dir (Path): The directory containing the hg38 VCF files.
|
|
551
|
+
output_dir (Path): Directory where the updated Phenopacket will be saved.
|
|
552
|
+
phenopacket_path (Path): Path to the original Phenopacket file.
|
|
553
|
+
|
|
554
|
+
Returns:
|
|
555
|
+
None
|
|
556
|
+
"""
|
|
513
557
|
phenopacket = phenopacket_reader(phenopacket_path)
|
|
514
558
|
spiked_vcf_file_message = generate_spiked_vcf_file(
|
|
515
|
-
output_dir,
|
|
559
|
+
output_dir,
|
|
560
|
+
phenopacket,
|
|
561
|
+
phenopacket_path,
|
|
562
|
+
hg19_vcf_info,
|
|
563
|
+
hg38_vcf_info,
|
|
564
|
+
hg19_vcf_dir,
|
|
565
|
+
hg38_vcf_dir,
|
|
516
566
|
)
|
|
517
567
|
updated_phenopacket = PhenopacketRebuilder(phenopacket).add_spiked_vcf_path(
|
|
518
568
|
spiked_vcf_file_message
|
|
@@ -521,7 +571,12 @@ def spike_and_update_phenopacket(hg19_vcf_info, hg38_vcf_info, output_dir, pheno
|
|
|
521
571
|
|
|
522
572
|
|
|
523
573
|
def create_spiked_vcf(
|
|
524
|
-
output_dir: Path,
|
|
574
|
+
output_dir: Path,
|
|
575
|
+
phenopacket_path: Path,
|
|
576
|
+
hg19_template_vcf: Path,
|
|
577
|
+
hg38_template_vcf: Path,
|
|
578
|
+
hg19_vcf_dir: Path,
|
|
579
|
+
hg38_vcf_dir: Path,
|
|
525
580
|
) -> None:
|
|
526
581
|
"""
|
|
527
582
|
Create a spiked VCF for a Phenopacket.
|
|
@@ -531,6 +586,8 @@ def create_spiked_vcf(
|
|
|
531
586
|
phenopacket_path (Path): Path to the Phenopacket file.
|
|
532
587
|
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
533
588
|
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
589
|
+
hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional).
|
|
590
|
+
hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional).
|
|
534
591
|
|
|
535
592
|
Raises:
|
|
536
593
|
InputError: If both hg19_template_vcf and hg38_template_vcf are None.
|
|
@@ -539,11 +596,18 @@ def create_spiked_vcf(
|
|
|
539
596
|
raise InputError("Either a hg19 template vcf or hg38 template vcf must be specified")
|
|
540
597
|
hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
|
|
541
598
|
hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
|
|
542
|
-
spike_and_update_phenopacket(
|
|
599
|
+
spike_and_update_phenopacket(
|
|
600
|
+
hg19_vcf_info, hg38_vcf_info, hg19_vcf_dir, hg38_vcf_dir, output_dir, phenopacket_path
|
|
601
|
+
)
|
|
543
602
|
|
|
544
603
|
|
|
545
604
|
def create_spiked_vcfs(
|
|
546
|
-
output_dir: Path,
|
|
605
|
+
output_dir: Path,
|
|
606
|
+
phenopacket_dir: Path,
|
|
607
|
+
hg19_template_vcf: Path,
|
|
608
|
+
hg38_template_vcf: Path,
|
|
609
|
+
hg19_vcf_dir: Path,
|
|
610
|
+
hg38_vcf_dir: Path,
|
|
547
611
|
) -> None:
|
|
548
612
|
"""
|
|
549
613
|
Create a spiked VCF for a directory of Phenopackets.
|
|
@@ -553,16 +617,25 @@ def create_spiked_vcfs(
|
|
|
553
617
|
phenopacket_dir (Path): Path to the Phenopacket directory.
|
|
554
618
|
hg19_template_vcf (Path): Path to the template hg19 VCF file (optional).
|
|
555
619
|
hg38_template_vcf (Path): Path to the template hg19 VCF file (optional).
|
|
620
|
+
hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional).
|
|
621
|
+
hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional).
|
|
556
622
|
|
|
557
623
|
Raises:
|
|
558
624
|
InputError: If both hg19_template_vcf and hg38_template_vcf are None.
|
|
559
625
|
"""
|
|
560
|
-
if
|
|
561
|
-
|
|
626
|
+
if (
|
|
627
|
+
hg19_template_vcf is None
|
|
628
|
+
and hg38_template_vcf is None
|
|
629
|
+
and hg19_vcf_dir is None
|
|
630
|
+
and hg38_vcf_dir is None
|
|
631
|
+
):
|
|
632
|
+
raise InputError("Need to specify a VCF!")
|
|
562
633
|
hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
|
|
563
634
|
hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
|
|
564
635
|
for phenopacket_path in files_with_suffix(phenopacket_dir, ".json"):
|
|
565
|
-
spike_and_update_phenopacket(
|
|
636
|
+
spike_and_update_phenopacket(
|
|
637
|
+
hg19_vcf_info, hg38_vcf_info, hg19_vcf_dir, hg38_vcf_dir, output_dir, phenopacket_path
|
|
638
|
+
)
|
|
566
639
|
|
|
567
640
|
|
|
568
641
|
def spike_vcfs(
|
|
@@ -571,6 +644,8 @@ def spike_vcfs(
|
|
|
571
644
|
phenopacket_dir: Path,
|
|
572
645
|
hg19_template_vcf: Path,
|
|
573
646
|
hg38_template_vcf: Path,
|
|
647
|
+
hg19_vcf_dir: Path,
|
|
648
|
+
hg38_vcf_dir: Path,
|
|
574
649
|
) -> None:
|
|
575
650
|
"""
|
|
576
651
|
Create spiked VCF from either a Phenopacket or a Phenopacket directory.
|
|
@@ -581,8 +656,24 @@ def spike_vcfs(
|
|
|
581
656
|
phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional).
|
|
582
657
|
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
583
658
|
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
659
|
+
hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional).
|
|
660
|
+
hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional).
|
|
584
661
|
"""
|
|
585
662
|
if phenopacket_path is not None:
|
|
586
|
-
create_spiked_vcf(
|
|
663
|
+
create_spiked_vcf(
|
|
664
|
+
output_dir,
|
|
665
|
+
phenopacket_path,
|
|
666
|
+
hg19_template_vcf,
|
|
667
|
+
hg38_template_vcf,
|
|
668
|
+
hg19_vcf_dir,
|
|
669
|
+
hg38_vcf_dir,
|
|
670
|
+
)
|
|
587
671
|
elif phenopacket_dir is not None:
|
|
588
|
-
create_spiked_vcfs(
|
|
672
|
+
create_spiked_vcfs(
|
|
673
|
+
output_dir,
|
|
674
|
+
phenopacket_dir,
|
|
675
|
+
hg19_template_vcf,
|
|
676
|
+
hg38_template_vcf,
|
|
677
|
+
hg19_vcf_dir,
|
|
678
|
+
hg38_vcf_dir,
|
|
679
|
+
)
|
pheval/prepare/prepare_corpus.py
CHANGED
|
@@ -18,6 +18,8 @@ def prepare_corpus(
|
|
|
18
18
|
gene_identifier: str,
|
|
19
19
|
hg19_template_vcf: Path,
|
|
20
20
|
hg38_template_vcf: Path,
|
|
21
|
+
hg19_vcf_dir: Path,
|
|
22
|
+
hg38_vcf_dir: Path,
|
|
21
23
|
output_dir: Path,
|
|
22
24
|
) -> None:
|
|
23
25
|
"""
|
|
@@ -34,7 +36,12 @@ def prepare_corpus(
|
|
|
34
36
|
VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required.
|
|
35
37
|
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional), to spike variants into
|
|
36
38
|
VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required.
|
|
39
|
+
hg19_vcf_dir (Path): Path to the directory containing hg19 template VCF files (optional).
|
|
40
|
+
hg38_vcf_dir (Path): Path to the directory containing hg38 template VCF files (optional).
|
|
37
41
|
output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files.
|
|
42
|
+
Notes:
|
|
43
|
+
To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf,
|
|
44
|
+
hg19_vcf_dir or hg38_vcf_dir is required.
|
|
38
45
|
"""
|
|
39
46
|
output_dir.joinpath("phenopackets").mkdir(exist_ok=True, parents=True)
|
|
40
47
|
for phenopacket_path in all_files(phenopacket_dir):
|
|
@@ -65,7 +72,12 @@ def prepare_corpus(
|
|
|
65
72
|
if hg19_template_vcf or hg38_template_vcf:
|
|
66
73
|
output_dir.joinpath("vcf").mkdir(exist_ok=True)
|
|
67
74
|
create_spiked_vcf(
|
|
68
|
-
output_dir.joinpath("vcf"),
|
|
75
|
+
output_dir.joinpath("vcf"),
|
|
76
|
+
phenopacket_path,
|
|
77
|
+
hg19_template_vcf,
|
|
78
|
+
hg38_template_vcf,
|
|
79
|
+
hg19_vcf_dir,
|
|
80
|
+
hg38_vcf_dir,
|
|
69
81
|
)
|
|
70
82
|
if gene_identifier:
|
|
71
83
|
create_updated_phenopacket(
|
|
@@ -468,10 +468,12 @@ class PhenopacketUtil:
|
|
|
468
468
|
for i in pheno_interpretation:
|
|
469
469
|
for g in i.diagnosis.genomic_interpretations:
|
|
470
470
|
variant = GenomicVariant(
|
|
471
|
-
chrom=
|
|
472
|
-
|
|
471
|
+
chrom=str(
|
|
472
|
+
g.variant_interpretation.variation_descriptor.vcf_record.chrom.replace(
|
|
473
|
+
"chr", ""
|
|
474
|
+
)
|
|
473
475
|
),
|
|
474
|
-
pos=g.variant_interpretation.variation_descriptor.vcf_record.pos,
|
|
476
|
+
pos=int(g.variant_interpretation.variation_descriptor.vcf_record.pos),
|
|
475
477
|
ref=g.variant_interpretation.variation_descriptor.vcf_record.ref,
|
|
476
478
|
alt=g.variant_interpretation.variation_descriptor.vcf_record.alt,
|
|
477
479
|
)
|
|
@@ -5,7 +5,7 @@ pheval/analyse/benchmark_generator.py,sha256=AeuwbaPb4j_dyBGPRgEBxQk2NahDb5u4xHy
|
|
|
5
5
|
pheval/analyse/benchmarking_data.py,sha256=aNZkWdmWemlnC1Tg35MtR60S9YC71QWS2rMuzkUc3w0,768
|
|
6
6
|
pheval/analyse/binary_classification_stats.py,sha256=E35YjvGM-zFnuEt8M3pgN03vBab4MH6ih726QKvuogg,12519
|
|
7
7
|
pheval/analyse/disease_prioritisation_analysis.py,sha256=mGfGYF5Eu7LxyBkAy6xMG1nDURaPiJY4rRQyKDcQe-4,12451
|
|
8
|
-
pheval/analyse/gene_prioritisation_analysis.py,sha256=
|
|
8
|
+
pheval/analyse/gene_prioritisation_analysis.py,sha256=UK41VqcO605zRamJMwj2jIaMWJ0_uYeDlrU0liJzdn0,13449
|
|
9
9
|
pheval/analyse/generate_plots.py,sha256=MFORnFTgoelYAahFlu3Dc3Rul4cwCg8Bloxe62vONSc,21350
|
|
10
10
|
pheval/analyse/generate_summary_outputs.py,sha256=s9pXMSW6xm4ZBe1aCd0UJSaFiKBvpUfPwJ2BI4qfTas,6591
|
|
11
11
|
pheval/analyse/parse_benchmark_summary.py,sha256=Y8uPTlHTEiaeVBOqxMcdOqjY3ZBtOS3DoRycL78Dzxg,2384
|
|
@@ -14,22 +14,22 @@ pheval/analyse/prioritisation_rank_recorder.py,sha256=EVe8DoEvvp0_WMAcjfVxmDGGRF
|
|
|
14
14
|
pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6hleoKDYATTkhvFF98,1228
|
|
15
15
|
pheval/analyse/rank_stats.py,sha256=knj1tsKrly17QgtOUVpqA14UjbO99N3ydkWN4xU6c2k,15785
|
|
16
16
|
pheval/analyse/run_data_parser.py,sha256=HzBKsJL2skjmrRZdrF3VYzswtKNgbX6U5qhY_kqq9mA,1552
|
|
17
|
-
pheval/analyse/variant_prioritisation_analysis.py,sha256=
|
|
17
|
+
pheval/analyse/variant_prioritisation_analysis.py,sha256=XSlAV2G7psXewPIoiUD_4jgFivcG1aOcy1jSPlSil5M,12196
|
|
18
18
|
pheval/cli.py,sha256=X4tDi7e3VB3v2RawkqIbfv4SFPCBuQwMXMnYCPTGtIo,1570
|
|
19
19
|
pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
|
|
20
|
-
pheval/cli_pheval_utils.py,sha256=
|
|
20
|
+
pheval/cli_pheval_utils.py,sha256=4jLSJm4AEXu0SBtXbg4eNYLbCNQqQgjroDpRxQX34-M,22333
|
|
21
21
|
pheval/config_parser.py,sha256=lh-Dy_FflXJUnRC3HYaEdSvPAsNZWQZlEr1hHQigrTM,1227
|
|
22
22
|
pheval/constants.py,sha256=TWBgWOc05FGXFu63fs-hEHS2IJkLLAPHtMppiWBfBOg,349
|
|
23
23
|
pheval/implementations/__init__.py,sha256=BMUTotjTdgy5j5xubWCIQgRXrSQ1ZIcjooer7r299Zo,1228
|
|
24
24
|
pheval/infra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
pheval/infra/exomiserdb.py,sha256=pM9-TfjrgurtH4OtM1Enk5oVhIxGQN3rKRlrxHuObTM,5080
|
|
26
26
|
pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
pheval/post_processing/post_processing.py,sha256=
|
|
27
|
+
pheval/post_processing/post_processing.py,sha256=tqeVRWF6PMHpOe681ONeGaqxdviLgVJgze3o6qSpXEg,13438
|
|
28
28
|
pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
pheval/prepare/create_noisy_phenopackets.py,sha256=UbBRWDD95BFHPv03VYx04v35AGwJ9ynLltYKqQJHbZ0,11236
|
|
30
|
-
pheval/prepare/create_spiked_vcf.py,sha256=
|
|
30
|
+
pheval/prepare/create_spiked_vcf.py,sha256=90A-Mi8QKhvN036vtFEVWAHgzHO37itiLYrqYlG4LiA,23953
|
|
31
31
|
pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
|
|
32
|
-
pheval/prepare/prepare_corpus.py,sha256=
|
|
32
|
+
pheval/prepare/prepare_corpus.py,sha256=eRvozzezIgAqHAumtqul0WfXfBO1iOBaSlN8fPSn0Nw,4223
|
|
33
33
|
pheval/prepare/update_phenopacket.py,sha256=21fzUPbwKN6Ey5TSh9PFzjT2x86U19RAE6WmkjG8u28,4770
|
|
34
34
|
pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
|
|
35
35
|
pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
|
|
@@ -47,11 +47,11 @@ pheval/utils/docs_gen.py,sha256=6FGtHicBC0rZKi0tdL3Epsg8d4osE44I9f1Ga0j4JLA,3193
|
|
|
47
47
|
pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
|
|
48
48
|
pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
|
|
49
49
|
pheval/utils/file_utils.py,sha256=m21cz-qjDYqnI8ClUv3J9fKizex98a-9bSEerQ75i_c,3576
|
|
50
|
-
pheval/utils/phenopacket_utils.py,sha256=
|
|
50
|
+
pheval/utils/phenopacket_utils.py,sha256=W9T_X48EJ-xn5GghzbZlt-lI-DxWoSm7_SHr8DCJg2Q,26856
|
|
51
51
|
pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
|
|
52
52
|
pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
|
|
53
|
-
pheval-0.3.
|
|
54
|
-
pheval-0.3.
|
|
55
|
-
pheval-0.3.
|
|
56
|
-
pheval-0.3.
|
|
57
|
-
pheval-0.3.
|
|
53
|
+
pheval-0.3.8.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
54
|
+
pheval-0.3.8.dist-info/METADATA,sha256=e9Go9hmem0wD_ek3KPtZ4Zfjz4m-Vy7lLOThsbAioXA,1810
|
|
55
|
+
pheval-0.3.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
56
|
+
pheval-0.3.8.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
|
|
57
|
+
pheval-0.3.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|