pheval 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pheval might be problematic. Click here for more details.
- pheval/analyse/binary_classification_stats.py +29 -1
- pheval/analyse/disease_prioritisation_analysis.py +6 -2
- pheval/analyse/gene_prioritisation_analysis.py +6 -2
- pheval/analyse/generate_plots.py +83 -1
- pheval/analyse/generate_summary_outputs.py +14 -1
- pheval/analyse/variant_prioritisation_analysis.py +6 -2
- pheval/cli_pheval_utils.py +20 -14
- pheval/prepare/create_spiked_vcf.py +132 -95
- pheval/utils/file_utils.py +21 -12
- pheval/utils/phenopacket_utils.py +3 -1
- {pheval-0.3.1.dist-info → pheval-0.3.3.dist-info}/METADATA +1 -1
- {pheval-0.3.1.dist-info → pheval-0.3.3.dist-info}/RECORD +15 -15
- {pheval-0.3.1.dist-info → pheval-0.3.3.dist-info}/WHEEL +1 -1
- {pheval-0.3.1.dist-info → pheval-0.3.3.dist-info}/LICENSE +0 -0
- {pheval-0.3.1.dist-info → pheval-0.3.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
2
|
from math import sqrt
|
|
3
3
|
from typing import List, Union
|
|
4
4
|
|
|
@@ -29,6 +29,8 @@ class BinaryClassificationStats:
|
|
|
29
29
|
true_negatives: int = 0
|
|
30
30
|
false_positives: int = 0
|
|
31
31
|
false_negatives: int = 0
|
|
32
|
+
labels: List = field(default_factory=list)
|
|
33
|
+
scores: List = field(default_factory=list)
|
|
32
34
|
|
|
33
35
|
@staticmethod
|
|
34
36
|
def remove_relevant_ranks(
|
|
@@ -84,6 +86,31 @@ class BinaryClassificationStats:
|
|
|
84
86
|
elif rank != 1:
|
|
85
87
|
self.true_negatives += 1
|
|
86
88
|
|
|
89
|
+
def add_labels_and_scores(
|
|
90
|
+
self,
|
|
91
|
+
pheval_results: Union[
|
|
92
|
+
List[RankedPhEvalGeneResult],
|
|
93
|
+
List[RankedPhEvalVariantResult],
|
|
94
|
+
List[RankedPhEvalDiseaseResult],
|
|
95
|
+
],
|
|
96
|
+
relevant_ranks: List[int],
|
|
97
|
+
):
|
|
98
|
+
"""
|
|
99
|
+
Adds scores and labels from the PhEval results.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult],
|
|
103
|
+
List[RankedPhEvalDiseaseResult]]):
|
|
104
|
+
List of all PhEval results
|
|
105
|
+
relevant_ranks (List[int]): A list of the ranks associated with the known entities.
|
|
106
|
+
"""
|
|
107
|
+
relevant_ranks_copy = relevant_ranks.copy()
|
|
108
|
+
for result in pheval_results:
|
|
109
|
+
self.scores.append(result.score)
|
|
110
|
+
label = 1 if result.rank in relevant_ranks_copy else 0
|
|
111
|
+
self.labels.append(label)
|
|
112
|
+
relevant_ranks_copy.remove(result.rank) if label == 1 else None
|
|
113
|
+
|
|
87
114
|
def add_classification(
|
|
88
115
|
self,
|
|
89
116
|
pheval_results: Union[
|
|
@@ -105,6 +132,7 @@ class BinaryClassificationStats:
|
|
|
105
132
|
self.add_classification_for_other_entities(
|
|
106
133
|
self.remove_relevant_ranks(pheval_results, relevant_ranks)
|
|
107
134
|
)
|
|
135
|
+
self.add_labels_and_scores(pheval_results, relevant_ranks)
|
|
108
136
|
|
|
109
137
|
def sensitivity(self) -> float:
|
|
110
138
|
"""
|
|
@@ -10,7 +10,11 @@ from pheval.analyse.prioritisation_result_types import DiseasePrioritisationResu
|
|
|
10
10
|
from pheval.analyse.rank_stats import RankStats
|
|
11
11
|
from pheval.analyse.run_data_parser import TrackInputOutputDirectories
|
|
12
12
|
from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
|
|
13
|
-
from pheval.utils.file_utils import
|
|
13
|
+
from pheval.utils.file_utils import (
|
|
14
|
+
all_files,
|
|
15
|
+
files_with_suffix,
|
|
16
|
+
obtain_phenopacket_path_from_pheval_result,
|
|
17
|
+
)
|
|
14
18
|
from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandDisease, phenopacket_reader
|
|
15
19
|
|
|
16
20
|
|
|
@@ -234,7 +238,7 @@ def assess_phenopacket_disease_prioritisation(
|
|
|
234
238
|
disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.
|
|
235
239
|
disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
|
|
236
240
|
"""
|
|
237
|
-
phenopacket_path =
|
|
241
|
+
phenopacket_path = obtain_phenopacket_path_from_pheval_result(
|
|
238
242
|
standardised_disease_result, all_files(results_dir_and_input.phenopacket_dir)
|
|
239
243
|
)
|
|
240
244
|
pheval_disease_result = read_standardised_result(standardised_disease_result)
|
|
@@ -10,7 +10,11 @@ from pheval.analyse.prioritisation_result_types import GenePrioritisationResult
|
|
|
10
10
|
from pheval.analyse.rank_stats import RankStats
|
|
11
11
|
from pheval.analyse.run_data_parser import TrackInputOutputDirectories
|
|
12
12
|
from pheval.post_processing.post_processing import RankedPhEvalGeneResult
|
|
13
|
-
from pheval.utils.file_utils import
|
|
13
|
+
from pheval.utils.file_utils import (
|
|
14
|
+
all_files,
|
|
15
|
+
files_with_suffix,
|
|
16
|
+
obtain_phenopacket_path_from_pheval_result,
|
|
17
|
+
)
|
|
14
18
|
from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandCausativeGene, phenopacket_reader
|
|
15
19
|
|
|
16
20
|
|
|
@@ -226,7 +230,7 @@ def assess_phenopacket_gene_prioritisation(
|
|
|
226
230
|
gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons.
|
|
227
231
|
gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
|
|
228
232
|
"""
|
|
229
|
-
phenopacket_path =
|
|
233
|
+
phenopacket_path = obtain_phenopacket_path_from_pheval_result(
|
|
230
234
|
standardised_gene_result, all_files(results_dir_and_input.phenopacket_dir)
|
|
231
235
|
)
|
|
232
236
|
pheval_gene_result = read_standardised_result(standardised_gene_result)
|
pheval/analyse/generate_plots.py
CHANGED
|
@@ -5,6 +5,7 @@ import matplotlib
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import seaborn as sns
|
|
7
7
|
from matplotlib import pyplot as plt
|
|
8
|
+
from sklearn.metrics import auc, precision_recall_curve, roc_curve
|
|
8
9
|
|
|
9
10
|
from pheval.analyse.benchmark_generator import (
|
|
10
11
|
BenchmarkRunOutputGenerator,
|
|
@@ -357,6 +358,82 @@ class PlotGenerator:
|
|
|
357
358
|
]
|
|
358
359
|
)
|
|
359
360
|
|
|
361
|
+
def generate_roc_curve(
|
|
362
|
+
self,
|
|
363
|
+
benchmarking_results: List[BenchmarkRunResults],
|
|
364
|
+
benchmark_generator: BenchmarkRunOutputGenerator,
|
|
365
|
+
):
|
|
366
|
+
"""
|
|
367
|
+
Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
|
|
371
|
+
benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
|
|
372
|
+
"""
|
|
373
|
+
for i, benchmark_result in enumerate(benchmarking_results):
|
|
374
|
+
fpr, tpr, thresh = roc_curve(
|
|
375
|
+
benchmark_result.binary_classification_stats.labels,
|
|
376
|
+
benchmark_result.binary_classification_stats.scores,
|
|
377
|
+
pos_label=1,
|
|
378
|
+
)
|
|
379
|
+
roc_auc = auc(fpr, tpr)
|
|
380
|
+
|
|
381
|
+
plt.plot(
|
|
382
|
+
fpr,
|
|
383
|
+
tpr,
|
|
384
|
+
label=f"{self.return_benchmark_name(benchmark_result)} ROC Curve (AUC = {roc_auc:.2f})",
|
|
385
|
+
color=self.palette_hex_codes[i],
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
plt.plot(linestyle="--", color="gray")
|
|
389
|
+
plt.xlabel("False Positive Rate")
|
|
390
|
+
plt.ylabel("True Positive Rate")
|
|
391
|
+
plt.title("Receiver Operating Characteristic (ROC) Curve")
|
|
392
|
+
plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
|
|
393
|
+
plt.savefig(
|
|
394
|
+
f"{benchmark_generator.prioritisation_type_file_prefix}_roc_curve.svg",
|
|
395
|
+
format="svg",
|
|
396
|
+
bbox_inches="tight",
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def generate_precision_recall(
|
|
400
|
+
self,
|
|
401
|
+
benchmarking_results: List[BenchmarkRunResults],
|
|
402
|
+
benchmark_generator: BenchmarkRunOutputGenerator,
|
|
403
|
+
):
|
|
404
|
+
"""
|
|
405
|
+
Generate and plot Precision-Recall curves for binary classification benchmark results.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
|
|
409
|
+
benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
|
|
410
|
+
"""
|
|
411
|
+
plt.figure()
|
|
412
|
+
for i, benchmark_result in enumerate(benchmarking_results):
|
|
413
|
+
precision, recall, thresh = precision_recall_curve(
|
|
414
|
+
benchmark_result.binary_classification_stats.labels,
|
|
415
|
+
benchmark_result.binary_classification_stats.scores,
|
|
416
|
+
)
|
|
417
|
+
precision_recall_auc = auc(recall, precision)
|
|
418
|
+
plt.plot(
|
|
419
|
+
recall,
|
|
420
|
+
precision,
|
|
421
|
+
label=f"{self.return_benchmark_name(benchmark_result)} Precision-Recall Curve "
|
|
422
|
+
f"(AUC = {precision_recall_auc:.2f})",
|
|
423
|
+
color=self.palette_hex_codes[i],
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
plt.plot(linestyle="--", color="gray")
|
|
427
|
+
plt.xlabel("Recall")
|
|
428
|
+
plt.ylabel("Precision")
|
|
429
|
+
plt.title("Precision-Recall Curve")
|
|
430
|
+
plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
|
|
431
|
+
plt.savefig(
|
|
432
|
+
f"{benchmark_generator.prioritisation_type_file_prefix}_precision_recall_curve.svg",
|
|
433
|
+
format="svg",
|
|
434
|
+
bbox_inches="tight",
|
|
435
|
+
)
|
|
436
|
+
|
|
360
437
|
def generate_non_cumulative_bar(
|
|
361
438
|
self,
|
|
362
439
|
benchmarking_results: List[BenchmarkRunResults],
|
|
@@ -405,6 +482,7 @@ def generate_plots(
|
|
|
405
482
|
benchmark_generator: BenchmarkRunOutputGenerator,
|
|
406
483
|
plot_type: str,
|
|
407
484
|
title: str = None,
|
|
485
|
+
generate_from_tsv: bool = False,
|
|
408
486
|
) -> None:
|
|
409
487
|
"""
|
|
410
488
|
Generate summary statistics bar plots for prioritisation.
|
|
@@ -416,8 +494,12 @@ def generate_plots(
|
|
|
416
494
|
benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
|
|
417
495
|
plot_type (str): Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").
|
|
418
496
|
title (str, optional): Title for the generated plot. Defaults to None.
|
|
497
|
+
generate_from_tsv (bool): Specify whether to generate plots from the TSV file. Defaults to False.
|
|
419
498
|
"""
|
|
420
499
|
plot_generator = PlotGenerator()
|
|
500
|
+
if not generate_from_tsv:
|
|
501
|
+
plot_generator.generate_roc_curve(benchmarking_results, benchmark_generator)
|
|
502
|
+
plot_generator.generate_precision_recall(benchmarking_results, benchmark_generator)
|
|
421
503
|
if plot_type == "bar_stacked":
|
|
422
504
|
plot_generator.generate_stacked_bar_plot(benchmarking_results, benchmark_generator, title)
|
|
423
505
|
elif plot_type == "bar_cumulative":
|
|
@@ -462,4 +544,4 @@ def generate_plots_from_benchmark_summary_tsv(
|
|
|
462
544
|
raise ValueError(
|
|
463
545
|
"Specify one analysis type (gene_analysis, variant_analysis, or disease_analysis)"
|
|
464
546
|
)
|
|
465
|
-
generate_plots(benchmarking_results, benchmark_generator, plot_type, title)
|
|
547
|
+
generate_plots(benchmarking_results, benchmark_generator, plot_type, title, True)
|
|
@@ -3,6 +3,7 @@ from collections import defaultdict
|
|
|
3
3
|
from copy import deepcopy
|
|
4
4
|
from typing import List
|
|
5
5
|
|
|
6
|
+
import numpy as np
|
|
6
7
|
import pandas as pd
|
|
7
8
|
|
|
8
9
|
from pheval.analyse.benchmark_generator import BenchmarkRunOutputGenerator
|
|
@@ -40,7 +41,19 @@ class RankComparisonGenerator:
|
|
|
40
41
|
pd.DataFrame: DataFrame containing the calculated rank differences.
|
|
41
42
|
"""
|
|
42
43
|
comparison_df = self._generate_dataframe()
|
|
43
|
-
comparison_df["
|
|
44
|
+
comparison_df["rank_change"] = comparison_df.iloc[:, 2] - comparison_df.iloc[:, 3]
|
|
45
|
+
comparison_df["rank_change"] = np.where(
|
|
46
|
+
(comparison_df.iloc[:, 2] == 0) & (comparison_df.iloc[:, 3] != 0),
|
|
47
|
+
"GAINED",
|
|
48
|
+
np.where(
|
|
49
|
+
(comparison_df.iloc[:, 3] == 0) & (comparison_df.iloc[:, 2] != 0),
|
|
50
|
+
"LOST",
|
|
51
|
+
comparison_df["rank_change"],
|
|
52
|
+
),
|
|
53
|
+
)
|
|
54
|
+
comparison_df["rank_change"] = comparison_df["rank_change"].apply(
|
|
55
|
+
lambda x: int(x) if str(x).lstrip("-").isdigit() else x
|
|
56
|
+
)
|
|
44
57
|
return comparison_df
|
|
45
58
|
|
|
46
59
|
def generate_output(self, prefix: str, suffix: str) -> None:
|
|
@@ -10,7 +10,11 @@ from pheval.analyse.prioritisation_result_types import VariantPrioritisationResu
|
|
|
10
10
|
from pheval.analyse.rank_stats import RankStats
|
|
11
11
|
from pheval.analyse.run_data_parser import TrackInputOutputDirectories
|
|
12
12
|
from pheval.post_processing.post_processing import RankedPhEvalVariantResult
|
|
13
|
-
from pheval.utils.file_utils import
|
|
13
|
+
from pheval.utils.file_utils import (
|
|
14
|
+
all_files,
|
|
15
|
+
files_with_suffix,
|
|
16
|
+
obtain_phenopacket_path_from_pheval_result,
|
|
17
|
+
)
|
|
14
18
|
from pheval.utils.phenopacket_utils import GenomicVariant, PhenopacketUtil, phenopacket_reader
|
|
15
19
|
|
|
16
20
|
|
|
@@ -228,7 +232,7 @@ def assess_phenopacket_variant_prioritisation(
|
|
|
228
232
|
variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.
|
|
229
233
|
variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
|
|
230
234
|
"""
|
|
231
|
-
phenopacket_path =
|
|
235
|
+
phenopacket_path = obtain_phenopacket_path_from_pheval_result(
|
|
232
236
|
standardised_variant_result, all_files(results_dir_and_input.phenopacket_dir)
|
|
233
237
|
)
|
|
234
238
|
proband_causative_variants = _obtain_causative_variants(phenopacket_path)
|
pheval/cli_pheval_utils.py
CHANGED
|
@@ -253,22 +253,19 @@ def update_phenopackets_command(
|
|
|
253
253
|
mutually_exclusive=["phenopacket_path"],
|
|
254
254
|
)
|
|
255
255
|
@click.option(
|
|
256
|
-
"--template-vcf
|
|
257
|
-
"-
|
|
258
|
-
cls=MutuallyExclusiveOptionError,
|
|
256
|
+
"--hg19-template-vcf",
|
|
257
|
+
"-hg19",
|
|
259
258
|
metavar="PATH",
|
|
260
259
|
required=False,
|
|
261
|
-
help="Template VCF file",
|
|
262
|
-
mutually_exclusive=["vcf_dir"],
|
|
260
|
+
help="Template hg19 VCF file",
|
|
263
261
|
type=Path,
|
|
264
262
|
)
|
|
265
263
|
@click.option(
|
|
266
|
-
"--vcf
|
|
267
|
-
"-
|
|
268
|
-
cls=MutuallyExclusiveOptionError,
|
|
264
|
+
"--hg38-template-vcf",
|
|
265
|
+
"-hg38",
|
|
269
266
|
metavar="PATH",
|
|
270
|
-
|
|
271
|
-
|
|
267
|
+
required=False,
|
|
268
|
+
help="Template hg38 VCF file",
|
|
272
269
|
type=Path,
|
|
273
270
|
)
|
|
274
271
|
@click.option(
|
|
@@ -284,13 +281,22 @@ def create_spiked_vcfs_command(
|
|
|
284
281
|
phenopacket_path: Path,
|
|
285
282
|
phenopacket_dir: Path,
|
|
286
283
|
output_dir: Path,
|
|
287
|
-
|
|
288
|
-
|
|
284
|
+
hg19_template_vcf: Path = None,
|
|
285
|
+
hg38_template_vcf: Path = None,
|
|
289
286
|
):
|
|
290
|
-
"""
|
|
287
|
+
"""
|
|
288
|
+
Create spiked VCF from either a Phenopacket or a Phenopacket directory.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
phenopacket_path (Path): Path to a single Phenopacket file (optional).
|
|
292
|
+
phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional).
|
|
293
|
+
output_dir (Path): The directory to store the generated spiked VCF file(s).
|
|
294
|
+
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
295
|
+
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
296
|
+
"""
|
|
291
297
|
if phenopacket_path is None and phenopacket_dir is None:
|
|
292
298
|
raise InputError("Either a phenopacket or phenopacket directory must be specified")
|
|
293
|
-
spike_vcfs(output_dir, phenopacket_path, phenopacket_dir,
|
|
299
|
+
spike_vcfs(output_dir, phenopacket_path, phenopacket_dir, hg19_template_vcf, hg38_template_vcf)
|
|
294
300
|
|
|
295
301
|
|
|
296
302
|
@click.command()
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import gzip
|
|
2
2
|
import logging
|
|
3
3
|
import re
|
|
4
|
-
import secrets
|
|
5
4
|
import urllib.parse
|
|
6
5
|
from copy import copy
|
|
7
6
|
from dataclasses import dataclass
|
|
@@ -10,6 +9,8 @@ from typing import List, Union
|
|
|
10
9
|
|
|
11
10
|
from phenopackets import Family, File, Phenopacket
|
|
12
11
|
|
|
12
|
+
from pheval.prepare.custom_exceptions import InputError
|
|
13
|
+
from pheval.utils.file_utils import files_with_suffix, is_gzipped
|
|
13
14
|
from pheval.utils.phenopacket_utils import (
|
|
14
15
|
IncompatibleGenomeAssemblyError,
|
|
15
16
|
PhenopacketRebuilder,
|
|
@@ -19,9 +20,6 @@ from pheval.utils.phenopacket_utils import (
|
|
|
19
20
|
write_phenopacket,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
|
-
from .custom_exceptions import InputError
|
|
23
|
-
from ..utils.file_utils import all_files, files_with_suffix, is_gzipped
|
|
24
|
-
|
|
25
23
|
info_log = logging.getLogger("info")
|
|
26
24
|
|
|
27
25
|
genome_assemblies = {
|
|
@@ -91,39 +89,6 @@ class VcfHeader:
|
|
|
91
89
|
chr_status: bool
|
|
92
90
|
|
|
93
91
|
|
|
94
|
-
class VcfPicker:
|
|
95
|
-
"""Choose a VCF file randomly from a directory if provided, otherwise selects the single template."""
|
|
96
|
-
|
|
97
|
-
def __init__(self, template_vcf: Path or None, vcf_dir: Path or None):
|
|
98
|
-
"""
|
|
99
|
-
Initialise the VcfPicker.
|
|
100
|
-
|
|
101
|
-
Args:
|
|
102
|
-
template_vcf (Path or None): The path to a template VCF file, or None if not provided.
|
|
103
|
-
vcf_dir (Path or None): The directory containing VCF files, or None if not provided.
|
|
104
|
-
"""
|
|
105
|
-
self.template_vcf = template_vcf
|
|
106
|
-
self.vcf_dir = vcf_dir
|
|
107
|
-
|
|
108
|
-
def pick_file_from_dir(self) -> Path:
|
|
109
|
-
"""
|
|
110
|
-
Selects a VCF file from a directory at random.
|
|
111
|
-
|
|
112
|
-
Returns:
|
|
113
|
-
Path: The randomly selected VCF file path from the directory.
|
|
114
|
-
"""
|
|
115
|
-
return secrets.choice(all_files(self.vcf_dir))
|
|
116
|
-
|
|
117
|
-
def pick_file(self) -> Path:
|
|
118
|
-
"""
|
|
119
|
-
Select a VCF file randomly when given a directory; if not, the template VCF is assigned.
|
|
120
|
-
|
|
121
|
-
Returns:
|
|
122
|
-
Path: The selected VCF file path.
|
|
123
|
-
"""
|
|
124
|
-
return self.pick_file_from_dir() if self.vcf_dir is not None else self.template_vcf
|
|
125
|
-
|
|
126
|
-
|
|
127
92
|
def read_vcf(vcf_file: Path) -> List[str]:
|
|
128
93
|
"""
|
|
129
94
|
Read the contents of a VCF file into memory, handling both uncompressed and gzipped files.
|
|
@@ -206,6 +171,72 @@ class VcfHeaderParser:
|
|
|
206
171
|
return VcfHeader(sample_id, assembly, chr_status)
|
|
207
172
|
|
|
208
173
|
|
|
174
|
+
@dataclass
|
|
175
|
+
class VcfFile:
|
|
176
|
+
"""
|
|
177
|
+
Represents a VCF file with its name, contents, and header information.
|
|
178
|
+
|
|
179
|
+
Attributes:
|
|
180
|
+
vcf_file_name (str): The name of the VCF file.
|
|
181
|
+
vcf_contents (List[str]): The contents of the VCF file.
|
|
182
|
+
vcf_header (VcfHeader): The parsed header information of the VCF file.
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
vcf_file_name: str = None
|
|
186
|
+
vcf_contents: List[str] = None
|
|
187
|
+
vcf_header: VcfHeader = None
|
|
188
|
+
|
|
189
|
+
@staticmethod
|
|
190
|
+
def populate_fields(template_vcf: Path):
|
|
191
|
+
"""
|
|
192
|
+
Populate the fields of the VcfFile instance using the contents of a template VCF file.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
template_vcf (Path): The path to the template VCF file.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
VcfFile: An instance of VcfFile with populated fields.
|
|
199
|
+
|
|
200
|
+
"""
|
|
201
|
+
contents = read_vcf(template_vcf)
|
|
202
|
+
return VcfFile(template_vcf.name, contents, VcfHeaderParser(contents).parse_vcf_header())
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def select_vcf_template(
|
|
206
|
+
phenopacket_path: Path,
|
|
207
|
+
proband_causative_variants: List[ProbandCausativeVariant],
|
|
208
|
+
hg19_vcf_info: VcfFile,
|
|
209
|
+
hg38_vcf_info: VcfFile,
|
|
210
|
+
) -> VcfFile:
|
|
211
|
+
"""
|
|
212
|
+
Select the appropriate VCF template based on the assembly information of the proband causative variants.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
phenopacket_path (Path): The path to the Phenopacket file.
|
|
216
|
+
proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband.
|
|
217
|
+
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
218
|
+
hg38_vcf_info (VcfFile): CF file info for hg38 template vcf.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
VcfFile: The selected VCF template file based on the assembly information of the proband causative variants.
|
|
222
|
+
|
|
223
|
+
"""
|
|
224
|
+
if proband_causative_variants[0].assembly in ["hg19", "GRCh37"]:
|
|
225
|
+
if hg19_vcf_info:
|
|
226
|
+
return hg19_vcf_info
|
|
227
|
+
else:
|
|
228
|
+
raise InputError("Must specify hg19 template VCF!")
|
|
229
|
+
elif proband_causative_variants[0].assembly in ["hg38", "GRCh38"]:
|
|
230
|
+
if hg38_vcf_info:
|
|
231
|
+
return hg38_vcf_info
|
|
232
|
+
else:
|
|
233
|
+
raise InputError("Must specify hg38 template VCF!")
|
|
234
|
+
else:
|
|
235
|
+
raise IncompatibleGenomeAssemblyError(
|
|
236
|
+
proband_causative_variants[0].assembly, phenopacket_path
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
209
240
|
def check_variant_assembly(
|
|
210
241
|
proband_causative_variants: list[ProbandCausativeVariant],
|
|
211
242
|
vcf_header: VcfHeader,
|
|
@@ -229,7 +260,13 @@ def check_variant_assembly(
|
|
|
229
260
|
raise ValueError("Too many genome assemblies!")
|
|
230
261
|
if phenopacket_assembly[0] not in compatible_genome_assembly:
|
|
231
262
|
raise IncompatibleGenomeAssemblyError(phenopacket_assembly, phenopacket_path)
|
|
232
|
-
if
|
|
263
|
+
if (
|
|
264
|
+
phenopacket_assembly[0] in {"hg19", "GRCh37"}
|
|
265
|
+
and vcf_header.assembly not in {"hg19", "GRCh37"}
|
|
266
|
+
) or (
|
|
267
|
+
phenopacket_assembly[0] in {"hg38", "GRCh38"}
|
|
268
|
+
and vcf_header.assembly not in {"hg38", "GRCh38"}
|
|
269
|
+
):
|
|
233
270
|
raise IncompatibleGenomeAssemblyError(
|
|
234
271
|
assembly=phenopacket_assembly, phenopacket=phenopacket_path
|
|
235
272
|
)
|
|
@@ -387,7 +424,8 @@ class VcfWriter:
|
|
|
387
424
|
def spike_vcf_contents(
|
|
388
425
|
phenopacket: Union[Phenopacket, Family],
|
|
389
426
|
phenopacket_path: Path,
|
|
390
|
-
|
|
427
|
+
hg19_vcf_info: VcfFile,
|
|
428
|
+
hg38_vcf_info: VcfFile,
|
|
391
429
|
) -> tuple[str, List[str]]:
|
|
392
430
|
"""
|
|
393
431
|
Spike VCF records with variants obtained from a Phenopacket or Family.
|
|
@@ -395,22 +433,28 @@ def spike_vcf_contents(
|
|
|
395
433
|
Args:
|
|
396
434
|
phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants.
|
|
397
435
|
phenopacket_path (Path): Path to the Phenopacket file.
|
|
398
|
-
|
|
436
|
+
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
437
|
+
hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf.
|
|
399
438
|
|
|
400
439
|
Returns:
|
|
401
440
|
A tuple containing:
|
|
402
441
|
assembly (str): The genome assembly information extracted from VCF header.
|
|
403
442
|
modified_vcf_contents (List[str]): Modified VCF records with spiked variants.
|
|
404
443
|
"""
|
|
405
|
-
# this is a separate function to a click command as it will fail if annotated with click annotations
|
|
406
|
-
# and referenced from another click command
|
|
407
444
|
phenopacket_causative_variants = PhenopacketUtil(phenopacket).causative_variants()
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
445
|
+
chosen_template_vcf = select_vcf_template(
|
|
446
|
+
phenopacket_path, phenopacket_causative_variants, hg19_vcf_info, hg38_vcf_info
|
|
447
|
+
)
|
|
448
|
+
check_variant_assembly(
|
|
449
|
+
phenopacket_causative_variants, chosen_template_vcf.vcf_header, phenopacket_path
|
|
450
|
+
)
|
|
411
451
|
return (
|
|
412
|
-
vcf_header.assembly,
|
|
413
|
-
VcfSpiker(
|
|
452
|
+
chosen_template_vcf.vcf_header.assembly,
|
|
453
|
+
VcfSpiker(
|
|
454
|
+
chosen_template_vcf.vcf_contents,
|
|
455
|
+
phenopacket_causative_variants,
|
|
456
|
+
chosen_template_vcf.vcf_header,
|
|
457
|
+
).construct_vcf(),
|
|
414
458
|
)
|
|
415
459
|
|
|
416
460
|
|
|
@@ -418,7 +462,8 @@ def generate_spiked_vcf_file(
|
|
|
418
462
|
output_dir: Path,
|
|
419
463
|
phenopacket: Union[Phenopacket, Family],
|
|
420
464
|
phenopacket_path: Path,
|
|
421
|
-
|
|
465
|
+
hg19_vcf_info: VcfFile,
|
|
466
|
+
hg38_vcf_info: VcfFile,
|
|
422
467
|
) -> File:
|
|
423
468
|
"""
|
|
424
469
|
Write spiked VCF contents to a new file.
|
|
@@ -427,21 +472,17 @@ def generate_spiked_vcf_file(
|
|
|
427
472
|
output_dir (Path): Path to the directory to store the generated file.
|
|
428
473
|
phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants.
|
|
429
474
|
phenopacket_path (Path): Path to the Phenopacket file.
|
|
430
|
-
|
|
431
|
-
|
|
475
|
+
hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf.
|
|
476
|
+
hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf.
|
|
432
477
|
Returns:
|
|
433
478
|
File: The generated File object representing the newly created spiked VCF file.
|
|
434
479
|
"""
|
|
435
480
|
output_dir.mkdir(exist_ok=True)
|
|
436
481
|
info_log.info(f" Created a directory {output_dir}")
|
|
437
482
|
vcf_assembly, spiked_vcf = spike_vcf_contents(
|
|
438
|
-
phenopacket, phenopacket_path,
|
|
439
|
-
)
|
|
440
|
-
spiked_vcf_path = (
|
|
441
|
-
output_dir.joinpath(phenopacket_path.name.replace(".json", ".vcf.gz"))
|
|
442
|
-
if is_gzipped(chosen_template_vcf)
|
|
443
|
-
else output_dir.joinpath(phenopacket_path.name.replace(".json", ".vcf"))
|
|
483
|
+
phenopacket, phenopacket_path, hg19_vcf_info, hg38_vcf_info
|
|
444
484
|
)
|
|
485
|
+
spiked_vcf_path = output_dir.joinpath(phenopacket_path.name.replace(".json", ".vcf.gz"))
|
|
445
486
|
VcfWriter(spiked_vcf, spiked_vcf_path).write_vcf_file()
|
|
446
487
|
return File(
|
|
447
488
|
uri=urllib.parse.unquote(spiked_vcf_path.as_uri()),
|
|
@@ -449,8 +490,19 @@ def generate_spiked_vcf_file(
|
|
|
449
490
|
)
|
|
450
491
|
|
|
451
492
|
|
|
493
|
+
def spike_and_update_phenopacket(hg19_vcf_info, hg38_vcf_info, output_dir, phenopacket_path):
|
|
494
|
+
phenopacket = phenopacket_reader(phenopacket_path)
|
|
495
|
+
spiked_vcf_file_message = generate_spiked_vcf_file(
|
|
496
|
+
output_dir, phenopacket, phenopacket_path, hg19_vcf_info, hg38_vcf_info
|
|
497
|
+
)
|
|
498
|
+
updated_phenopacket = PhenopacketRebuilder(phenopacket).add_spiked_vcf_path(
|
|
499
|
+
spiked_vcf_file_message
|
|
500
|
+
)
|
|
501
|
+
write_phenopacket(updated_phenopacket, phenopacket_path)
|
|
502
|
+
|
|
503
|
+
|
|
452
504
|
def create_spiked_vcf(
|
|
453
|
-
output_dir: Path, phenopacket_path: Path,
|
|
505
|
+
output_dir: Path, phenopacket_path: Path, hg19_template_vcf: Path, hg38_template_vcf: Path
|
|
454
506
|
) -> None:
|
|
455
507
|
"""
|
|
456
508
|
Create a spiked VCF for a Phenopacket.
|
|
@@ -458,27 +510,21 @@ def create_spiked_vcf(
|
|
|
458
510
|
Args:
|
|
459
511
|
output_dir (Path): The directory to store the generated spiked VCF file.
|
|
460
512
|
phenopacket_path (Path): Path to the Phenopacket file.
|
|
461
|
-
|
|
462
|
-
|
|
513
|
+
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
514
|
+
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
463
515
|
|
|
464
516
|
Raises:
|
|
465
|
-
InputError: If both
|
|
517
|
+
InputError: If both hg19_template_vcf and hg38_template_vcf are None.
|
|
466
518
|
"""
|
|
467
|
-
if
|
|
468
|
-
raise InputError("Either a
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
output_dir, phenopacket, phenopacket_path, vcf_file_path
|
|
473
|
-
)
|
|
474
|
-
updated_phenopacket = PhenopacketRebuilder(phenopacket).add_spiked_vcf_path(
|
|
475
|
-
spiked_vcf_file_message
|
|
476
|
-
)
|
|
477
|
-
write_phenopacket(updated_phenopacket, phenopacket_path)
|
|
519
|
+
if hg19_template_vcf is None and hg38_template_vcf is None:
|
|
520
|
+
raise InputError("Either a hg19 template vcf or hg38 template vcf must be specified")
|
|
521
|
+
hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
|
|
522
|
+
hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
|
|
523
|
+
spike_and_update_phenopacket(hg19_vcf_info, hg38_vcf_info, output_dir, phenopacket_path)
|
|
478
524
|
|
|
479
525
|
|
|
480
526
|
def create_spiked_vcfs(
|
|
481
|
-
output_dir: Path, phenopacket_dir: Path,
|
|
527
|
+
output_dir: Path, phenopacket_dir: Path, hg19_template_vcf: Path, hg38_template_vcf: Path
|
|
482
528
|
) -> None:
|
|
483
529
|
"""
|
|
484
530
|
Create a spiked VCF for a directory of Phenopackets.
|
|
@@ -486,35 +532,26 @@ def create_spiked_vcfs(
|
|
|
486
532
|
Args:
|
|
487
533
|
output_dir (Path): The directory to store the generated spiked VCF file.
|
|
488
534
|
phenopacket_dir (Path): Path to the Phenopacket directory.
|
|
489
|
-
|
|
490
|
-
|
|
535
|
+
hg19_template_vcf (Path): Path to the template hg19 VCF file (optional).
|
|
536
|
+
hg38_template_vcf (Path): Path to the template hg19 VCF file (optional).
|
|
491
537
|
|
|
492
538
|
Raises:
|
|
493
|
-
InputError: If both
|
|
539
|
+
InputError: If both hg19_template_vcf and hg38_template_vcf are None.
|
|
494
540
|
"""
|
|
495
|
-
if
|
|
496
|
-
raise InputError("Either a
|
|
541
|
+
if hg19_template_vcf is None and hg38_template_vcf is None:
|
|
542
|
+
raise InputError("Either a hg19 template vcf or hg38 template vcf must be specified")
|
|
543
|
+
hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
|
|
544
|
+
hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
|
|
497
545
|
for phenopacket_path in files_with_suffix(phenopacket_dir, ".json"):
|
|
498
|
-
|
|
499
|
-
phenopacket = phenopacket_reader(phenopacket_path)
|
|
500
|
-
spiked_vcf_file_message = generate_spiked_vcf_file(
|
|
501
|
-
output_dir, phenopacket, phenopacket_path, vcf_file_path
|
|
502
|
-
)
|
|
503
|
-
updated_phenopacket = PhenopacketRebuilder(phenopacket).add_spiked_vcf_path(
|
|
504
|
-
spiked_vcf_file_message
|
|
505
|
-
)
|
|
506
|
-
write_phenopacket(updated_phenopacket, phenopacket_path)
|
|
507
|
-
# or made a lambda one-liner for maximum wtf...
|
|
508
|
-
# [spike_vcf(path, output_dir, template_vcf, vcf_dir) for path in phenopacket_dir.iterdir() if path.suffix ==
|
|
509
|
-
# ".json"]
|
|
546
|
+
spike_and_update_phenopacket(hg19_vcf_info, hg38_vcf_info, output_dir, phenopacket_path)
|
|
510
547
|
|
|
511
548
|
|
|
512
549
|
def spike_vcfs(
|
|
513
550
|
output_dir: Path,
|
|
514
551
|
phenopacket_path: Path,
|
|
515
552
|
phenopacket_dir: Path,
|
|
516
|
-
|
|
517
|
-
|
|
553
|
+
hg19_template_vcf: Path,
|
|
554
|
+
hg38_template_vcf: Path,
|
|
518
555
|
) -> None:
|
|
519
556
|
"""
|
|
520
557
|
Create spiked VCF from either a Phenopacket or a Phenopacket directory.
|
|
@@ -523,10 +560,10 @@ def spike_vcfs(
|
|
|
523
560
|
output_dir (Path): The directory to store the generated spiked VCF file(s).
|
|
524
561
|
phenopacket_path (Path): Path to a single Phenopacket file (optional).
|
|
525
562
|
phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional).
|
|
526
|
-
|
|
527
|
-
|
|
563
|
+
hg19_template_vcf (Path): Path to the hg19 template VCF file (optional).
|
|
564
|
+
hg38_template_vcf (Path): Path to the hg38 template VCF file (optional).
|
|
528
565
|
"""
|
|
529
566
|
if phenopacket_path is not None:
|
|
530
|
-
create_spiked_vcf(output_dir, phenopacket_path,
|
|
567
|
+
create_spiked_vcf(output_dir, phenopacket_path, hg19_template_vcf, hg38_template_vcf)
|
|
531
568
|
elif phenopacket_dir is not None:
|
|
532
|
-
create_spiked_vcfs(output_dir, phenopacket_dir,
|
|
569
|
+
create_spiked_vcfs(output_dir, phenopacket_dir, hg19_template_vcf, hg38_template_vcf)
|
pheval/utils/file_utils.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import difflib
|
|
2
1
|
import itertools
|
|
3
2
|
import re
|
|
4
3
|
import unicodedata
|
|
@@ -71,23 +70,33 @@ def normalise_file_name(file_path: Path) -> str:
|
|
|
71
70
|
return re.sub("[\u0300-\u036f]", "", normalised_file_name)
|
|
72
71
|
|
|
73
72
|
|
|
74
|
-
def
|
|
73
|
+
def obtain_phenopacket_path_from_pheval_result(
|
|
74
|
+
pheval_result_path: Path, phenopacket_paths: list[Path]
|
|
75
|
+
) -> Path:
|
|
75
76
|
"""
|
|
76
|
-
Obtains the
|
|
77
|
-
and a list of full paths of
|
|
77
|
+
Obtains the phenopacket file name when given a pheval result file name
|
|
78
|
+
and a list of full paths of phenopackets to be queried.
|
|
78
79
|
|
|
79
80
|
Args:
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
pheval_result_path (Path): The PhEval result.
|
|
82
|
+
phenopacket_paths (list[Path]): List of full paths of phenopackets to be queried.
|
|
82
83
|
|
|
83
84
|
Returns:
|
|
84
|
-
Path: The
|
|
85
|
+
Path: The matching phenopacket file path from the provided list.
|
|
85
86
|
"""
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
87
|
+
pheval_result_path_stem_stripped = pheval_result_path.stem.split("-pheval_")[0]
|
|
88
|
+
matching_phenopacket_paths = [
|
|
89
|
+
phenopacket_path
|
|
90
|
+
for phenopacket_path in phenopacket_paths
|
|
91
|
+
if phenopacket_path.stem == pheval_result_path_stem_stripped
|
|
92
|
+
]
|
|
93
|
+
if matching_phenopacket_paths:
|
|
94
|
+
return matching_phenopacket_paths[0]
|
|
95
|
+
else:
|
|
96
|
+
raise FileNotFoundError(
|
|
97
|
+
f"Unable to find matching phenopacket file named "
|
|
98
|
+
f"{pheval_result_path_stem_stripped}.json for {pheval_result_path.name}"
|
|
99
|
+
)
|
|
91
100
|
|
|
92
101
|
|
|
93
102
|
def ensure_file_exists(*files: str):
|
|
@@ -467,7 +467,9 @@ class PhenopacketUtil:
|
|
|
467
467
|
for i in pheno_interpretation:
|
|
468
468
|
for g in i.diagnosis.genomic_interpretations:
|
|
469
469
|
variant = GenomicVariant(
|
|
470
|
-
chrom=g.variant_interpretation.variation_descriptor.vcf_record.chrom
|
|
470
|
+
chrom=g.variant_interpretation.variation_descriptor.vcf_record.chrom.replace(
|
|
471
|
+
"chr", ""
|
|
472
|
+
),
|
|
471
473
|
pos=g.variant_interpretation.variation_descriptor.vcf_record.pos,
|
|
472
474
|
ref=g.variant_interpretation.variation_descriptor.vcf_record.ref,
|
|
473
475
|
alt=g.variant_interpretation.variation_descriptor.vcf_record.alt,
|
|
@@ -3,21 +3,21 @@ pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
3
3
|
pheval/analyse/analysis.py,sha256=ponm3P8nvzJNmcrNZ2_KudEhWSaWshd_Gd30D-aau8s,7743
|
|
4
4
|
pheval/analyse/benchmark_generator.py,sha256=AeuwbaPb4j_dyBGPRgEBxQk2NahDb5u4xHyFiqp5Fes,5943
|
|
5
5
|
pheval/analyse/benchmarking_data.py,sha256=aNZkWdmWemlnC1Tg35MtR60S9YC71QWS2rMuzkUc3w0,768
|
|
6
|
-
pheval/analyse/binary_classification_stats.py,sha256=
|
|
7
|
-
pheval/analyse/disease_prioritisation_analysis.py,sha256=
|
|
8
|
-
pheval/analyse/gene_prioritisation_analysis.py,sha256=
|
|
9
|
-
pheval/analyse/generate_plots.py,sha256=
|
|
10
|
-
pheval/analyse/generate_summary_outputs.py,sha256=
|
|
6
|
+
pheval/analyse/binary_classification_stats.py,sha256=E35YjvGM-zFnuEt8M3pgN03vBab4MH6ih726QKvuogg,12519
|
|
7
|
+
pheval/analyse/disease_prioritisation_analysis.py,sha256=qadEVhBMtBgtjGCJLhNQA510F8Pd0Ll4NAQXoT23BYs,12649
|
|
8
|
+
pheval/analyse/gene_prioritisation_analysis.py,sha256=lAN171xfXqweK8ie6191s_6WPPGjZKJXL1Z0dIqp54k,12373
|
|
9
|
+
pheval/analyse/generate_plots.py,sha256=MFORnFTgoelYAahFlu3Dc3Rul4cwCg8Bloxe62vONSc,21350
|
|
10
|
+
pheval/analyse/generate_summary_outputs.py,sha256=s9pXMSW6xm4ZBe1aCd0UJSaFiKBvpUfPwJ2BI4qfTas,6591
|
|
11
11
|
pheval/analyse/parse_benchmark_summary.py,sha256=Y8uPTlHTEiaeVBOqxMcdOqjY3ZBtOS3DoRycL78Dzxg,2384
|
|
12
12
|
pheval/analyse/parse_pheval_result.py,sha256=j8YFVA0YXfySOkm8gMwrfIuV45DI9AX3ETn7h-r8ayE,1211
|
|
13
13
|
pheval/analyse/prioritisation_rank_recorder.py,sha256=EVe8DoEvvp0_WMAcjfVxmDGGRFPEELi7hEVjH3sIpLY,3223
|
|
14
14
|
pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6hleoKDYATTkhvFF98,1228
|
|
15
15
|
pheval/analyse/rank_stats.py,sha256=knj1tsKrly17QgtOUVpqA14UjbO99N3ydkWN4xU6c2k,15785
|
|
16
16
|
pheval/analyse/run_data_parser.py,sha256=HzBKsJL2skjmrRZdrF3VYzswtKNgbX6U5qhY_kqq9mA,1552
|
|
17
|
-
pheval/analyse/variant_prioritisation_analysis.py,sha256=
|
|
17
|
+
pheval/analyse/variant_prioritisation_analysis.py,sha256=ApmUeTW0cl_BPh7LusbApxtgjEXEkhuNFyh0DxKKpgU,12384
|
|
18
18
|
pheval/cli.py,sha256=4l9xZfxBfLCcm7PDdhMWgTvTKbQt5sJ2bYHf7kU1dO4,1493
|
|
19
19
|
pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
|
|
20
|
-
pheval/cli_pheval_utils.py,sha256=
|
|
20
|
+
pheval/cli_pheval_utils.py,sha256=i5rSLR_moti1VM0of4uOLdX7wbwUcM7spLO9zw6bHrc,17257
|
|
21
21
|
pheval/config_parser.py,sha256=lh-Dy_FflXJUnRC3HYaEdSvPAsNZWQZlEr1hHQigrTM,1227
|
|
22
22
|
pheval/constants.py,sha256=TWBgWOc05FGXFu63fs-hEHS2IJkLLAPHtMppiWBfBOg,349
|
|
23
23
|
pheval/implementations/__init__.py,sha256=BMUTotjTdgy5j5xubWCIQgRXrSQ1ZIcjooer7r299Zo,1228
|
|
@@ -27,7 +27,7 @@ pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
|
|
|
27
27
|
pheval/post_processing/post_processing.py,sha256=2srdlw2D3qMh2B3PUSDvA6COYlbXINC08Wt4eccMZp8,16030
|
|
28
28
|
pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
pheval/prepare/create_noisy_phenopackets.py,sha256=UbBRWDD95BFHPv03VYx04v35AGwJ9ynLltYKqQJHbZ0,11236
|
|
30
|
-
pheval/prepare/create_spiked_vcf.py,sha256=
|
|
30
|
+
pheval/prepare/create_spiked_vcf.py,sha256=KZIyjtDDTqJj3hxL3u4YP6P0toA4RN1oPeDrzLMB2z4,20235
|
|
31
31
|
pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
|
|
32
32
|
pheval/prepare/update_phenopacket.py,sha256=36dLIUSO_4EakGkjVwlecu-he-lOPXMhoWoOkeRYMV4,4753
|
|
33
33
|
pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
|
|
@@ -45,12 +45,12 @@ pheval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
45
45
|
pheval/utils/docs_gen.py,sha256=6FGtHicBC0rZKi0tdL3Epsg8d4osE44I9f1Ga0j4JLA,3193
|
|
46
46
|
pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
|
|
47
47
|
pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
|
|
48
|
-
pheval/utils/file_utils.py,sha256=
|
|
49
|
-
pheval/utils/phenopacket_utils.py,sha256=
|
|
48
|
+
pheval/utils/file_utils.py,sha256=9HoCmtF73D3wY6bBhFLefMBI5uhvCe_meZeHXQzF_ts,4640
|
|
49
|
+
pheval/utils/phenopacket_utils.py,sha256=iWYWfneaI47lx5w56-ILwvyLRaxHVoHnJ3EbVrja8-k,24444
|
|
50
50
|
pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
|
|
51
51
|
pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
|
|
52
|
-
pheval-0.3.
|
|
53
|
-
pheval-0.3.
|
|
54
|
-
pheval-0.3.
|
|
55
|
-
pheval-0.3.
|
|
56
|
-
pheval-0.3.
|
|
52
|
+
pheval-0.3.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
53
|
+
pheval-0.3.3.dist-info/METADATA,sha256=I0Njs6iBWs0Ag1bf81pNaRKYPcw8dkRKaG454m-6bJA,1810
|
|
54
|
+
pheval-0.3.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
55
|
+
pheval-0.3.3.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
|
|
56
|
+
pheval-0.3.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|