pheval 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pheval might be problematic. Click here for more details.
- pheval/analyse/disease_prioritisation_analysis.py +7 -14
- pheval/analyse/gene_prioritisation_analysis.py +7 -13
- pheval/analyse/parse_pheval_result.py +8 -1
- pheval/analyse/variant_prioritisation_analysis.py +8 -15
- pheval/post_processing/post_processing.py +21 -101
- pheval/prepare/create_spiked_vcf.py +32 -13
- pheval/prepare/prepare_corpus.py +5 -0
- pheval/utils/file_utils.py +0 -29
- {pheval-0.3.5.dist-info → pheval-0.3.7.dist-info}/METADATA +1 -1
- {pheval-0.3.5.dist-info → pheval-0.3.7.dist-info}/RECORD +13 -13
- {pheval-0.3.5.dist-info → pheval-0.3.7.dist-info}/LICENSE +0 -0
- {pheval-0.3.5.dist-info → pheval-0.3.7.dist-info}/WHEEL +0 -0
- {pheval-0.3.5.dist-info → pheval-0.3.7.dist-info}/entry_points.txt +0 -0
|
@@ -10,11 +10,7 @@ from pheval.analyse.prioritisation_result_types import DiseasePrioritisationResu
|
|
|
10
10
|
from pheval.analyse.rank_stats import RankStats
|
|
11
11
|
from pheval.analyse.run_data_parser import TrackInputOutputDirectories
|
|
12
12
|
from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
|
|
13
|
-
from pheval.utils.file_utils import
|
|
14
|
-
all_files,
|
|
15
|
-
files_with_suffix,
|
|
16
|
-
obtain_phenopacket_path_from_pheval_result,
|
|
17
|
-
)
|
|
13
|
+
from pheval.utils.file_utils import all_files
|
|
18
14
|
from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandDisease, phenopacket_reader
|
|
19
15
|
|
|
20
16
|
|
|
@@ -217,7 +213,7 @@ def _obtain_causative_diseases(phenopacket_path: Path) -> List[ProbandDisease]:
|
|
|
217
213
|
|
|
218
214
|
|
|
219
215
|
def assess_phenopacket_disease_prioritisation(
|
|
220
|
-
|
|
216
|
+
phenopacket_path: Path,
|
|
221
217
|
score_order: str,
|
|
222
218
|
results_dir_and_input: TrackInputOutputDirectories,
|
|
223
219
|
threshold: float,
|
|
@@ -230,7 +226,7 @@ def assess_phenopacket_disease_prioritisation(
|
|
|
230
226
|
against the recorded causative diseases for a proband in the Phenopacket.
|
|
231
227
|
|
|
232
228
|
Args:
|
|
233
|
-
|
|
229
|
+
phenopacket_path (Path): Path to the Phenopacket.
|
|
234
230
|
score_order (str): The order in which scores are arranged, either ascending or descending.
|
|
235
231
|
results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
|
|
236
232
|
threshold (float): Threshold for assessment.
|
|
@@ -238,8 +234,8 @@ def assess_phenopacket_disease_prioritisation(
|
|
|
238
234
|
disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.
|
|
239
235
|
disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
|
|
240
236
|
"""
|
|
241
|
-
|
|
242
|
-
|
|
237
|
+
standardised_disease_result = results_dir_and_input.results_dir.joinpath(
|
|
238
|
+
f"pheval_disease_results/{phenopacket_path.stem}-pheval_disease_result.tsv"
|
|
243
239
|
)
|
|
244
240
|
pheval_disease_result = read_standardised_result(standardised_disease_result)
|
|
245
241
|
proband_diseases = _obtain_causative_diseases(phenopacket_path)
|
|
@@ -276,12 +272,9 @@ def benchmark_disease_prioritisation(
|
|
|
276
272
|
"""
|
|
277
273
|
disease_rank_stats = RankStats()
|
|
278
274
|
disease_binary_classification_stats = BinaryClassificationStats()
|
|
279
|
-
for
|
|
280
|
-
results_directory_and_input.results_dir.joinpath("pheval_disease_results/"),
|
|
281
|
-
".tsv",
|
|
282
|
-
):
|
|
275
|
+
for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
|
|
283
276
|
assess_phenopacket_disease_prioritisation(
|
|
284
|
-
|
|
277
|
+
phenopacket_path,
|
|
285
278
|
score_order,
|
|
286
279
|
results_directory_and_input,
|
|
287
280
|
threshold,
|
|
@@ -10,11 +10,7 @@ from pheval.analyse.prioritisation_result_types import GenePrioritisationResult
|
|
|
10
10
|
from pheval.analyse.rank_stats import RankStats
|
|
11
11
|
from pheval.analyse.run_data_parser import TrackInputOutputDirectories
|
|
12
12
|
from pheval.post_processing.post_processing import RankedPhEvalGeneResult
|
|
13
|
-
from pheval.utils.file_utils import
|
|
14
|
-
all_files,
|
|
15
|
-
files_with_suffix,
|
|
16
|
-
obtain_phenopacket_path_from_pheval_result,
|
|
17
|
-
)
|
|
13
|
+
from pheval.utils.file_utils import all_files
|
|
18
14
|
from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandCausativeGene, phenopacket_reader
|
|
19
15
|
|
|
20
16
|
|
|
@@ -209,7 +205,7 @@ def _obtain_causative_genes(phenopacket_path: Path) -> List[ProbandCausativeGene
|
|
|
209
205
|
|
|
210
206
|
|
|
211
207
|
def assess_phenopacket_gene_prioritisation(
|
|
212
|
-
|
|
208
|
+
phenopacket_path: Path,
|
|
213
209
|
score_order: str,
|
|
214
210
|
results_dir_and_input: TrackInputOutputDirectories,
|
|
215
211
|
threshold: float,
|
|
@@ -222,7 +218,7 @@ def assess_phenopacket_gene_prioritisation(
|
|
|
222
218
|
against the recorded causative genes for a proband in the Phenopacket.
|
|
223
219
|
|
|
224
220
|
Args:
|
|
225
|
-
|
|
221
|
+
phenopacket_path (Path): Path to the Phenopacket.
|
|
226
222
|
score_order (str): The order in which scores are arranged, either ascending or descending.
|
|
227
223
|
results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
|
|
228
224
|
threshold (float): Threshold for assessment.
|
|
@@ -230,8 +226,8 @@ def assess_phenopacket_gene_prioritisation(
|
|
|
230
226
|
gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons.
|
|
231
227
|
gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
|
|
232
228
|
"""
|
|
233
|
-
|
|
234
|
-
|
|
229
|
+
standardised_gene_result = results_dir_and_input.results_dir.joinpath(
|
|
230
|
+
f"pheval_gene_results/{phenopacket_path.stem}-pheval_gene_result.tsv"
|
|
235
231
|
)
|
|
236
232
|
pheval_gene_result = read_standardised_result(standardised_gene_result)
|
|
237
233
|
proband_causative_genes = _obtain_causative_genes(phenopacket_path)
|
|
@@ -266,11 +262,9 @@ def benchmark_gene_prioritisation(
|
|
|
266
262
|
"""
|
|
267
263
|
gene_rank_stats = RankStats()
|
|
268
264
|
gene_binary_classification_stats = BinaryClassificationStats()
|
|
269
|
-
for
|
|
270
|
-
results_directory_and_input.results_dir.joinpath("pheval_gene_results/"), ".tsv"
|
|
271
|
-
):
|
|
265
|
+
for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
|
|
272
266
|
assess_phenopacket_gene_prioritisation(
|
|
273
|
-
|
|
267
|
+
phenopacket_path,
|
|
274
268
|
score_order,
|
|
275
269
|
results_directory_and_input,
|
|
276
270
|
threshold,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import List
|
|
3
4
|
|
|
@@ -5,6 +6,8 @@ import pandas as pd
|
|
|
5
6
|
|
|
6
7
|
from pheval.post_processing.post_processing import PhEvalResult
|
|
7
8
|
|
|
9
|
+
info_log = logging.getLogger("info")
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
def read_standardised_result(standardised_result_path: Path) -> List[dict]:
|
|
10
13
|
"""
|
|
@@ -16,7 +19,11 @@ def read_standardised_result(standardised_result_path: Path) -> List[dict]:
|
|
|
16
19
|
Returns:
|
|
17
20
|
List[dict]: A list of dictionaries representing the content of the standardised result file.
|
|
18
21
|
"""
|
|
19
|
-
|
|
22
|
+
if standardised_result_path.is_file():
|
|
23
|
+
return pd.read_csv(standardised_result_path, delimiter="\t").to_dict("records")
|
|
24
|
+
else:
|
|
25
|
+
info_log.info(f"Could not find {standardised_result_path}")
|
|
26
|
+
return pd.DataFrame().to_dict("records")
|
|
20
27
|
|
|
21
28
|
|
|
22
29
|
def parse_pheval_result(
|
|
@@ -10,11 +10,7 @@ from pheval.analyse.prioritisation_result_types import VariantPrioritisationResu
|
|
|
10
10
|
from pheval.analyse.rank_stats import RankStats
|
|
11
11
|
from pheval.analyse.run_data_parser import TrackInputOutputDirectories
|
|
12
12
|
from pheval.post_processing.post_processing import RankedPhEvalVariantResult
|
|
13
|
-
from pheval.utils.file_utils import
|
|
14
|
-
all_files,
|
|
15
|
-
files_with_suffix,
|
|
16
|
-
obtain_phenopacket_path_from_pheval_result,
|
|
17
|
-
)
|
|
13
|
+
from pheval.utils.file_utils import all_files
|
|
18
14
|
from pheval.utils.phenopacket_utils import GenomicVariant, PhenopacketUtil, phenopacket_reader
|
|
19
15
|
|
|
20
16
|
|
|
@@ -211,7 +207,7 @@ def _obtain_causative_variants(phenopacket_path: Path) -> List[GenomicVariant]:
|
|
|
211
207
|
|
|
212
208
|
|
|
213
209
|
def assess_phenopacket_variant_prioritisation(
|
|
214
|
-
|
|
210
|
+
phenopacket_path: Path,
|
|
215
211
|
score_order: str,
|
|
216
212
|
results_dir_and_input: TrackInputOutputDirectories,
|
|
217
213
|
threshold: float,
|
|
@@ -224,7 +220,7 @@ def assess_phenopacket_variant_prioritisation(
|
|
|
224
220
|
against the recorded causative variants for a proband in the Phenopacket.
|
|
225
221
|
|
|
226
222
|
Args:
|
|
227
|
-
|
|
223
|
+
phenopacket_path (Path): Path to the Phenopacket.
|
|
228
224
|
score_order (str): The order in which scores are arranged, either ascending or descending.
|
|
229
225
|
results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
|
|
230
226
|
threshold (float): Threshold for assessment.
|
|
@@ -232,10 +228,10 @@ def assess_phenopacket_variant_prioritisation(
|
|
|
232
228
|
variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.
|
|
233
229
|
variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
|
|
234
230
|
"""
|
|
235
|
-
phenopacket_path = obtain_phenopacket_path_from_pheval_result(
|
|
236
|
-
standardised_variant_result, all_files(results_dir_and_input.phenopacket_dir)
|
|
237
|
-
)
|
|
238
231
|
proband_causative_variants = _obtain_causative_variants(phenopacket_path)
|
|
232
|
+
standardised_variant_result = results_dir_and_input.results_dir.joinpath(
|
|
233
|
+
f"pheval_variant_results/{phenopacket_path.stem}-pheval_variant_result.tsv"
|
|
234
|
+
)
|
|
239
235
|
pheval_variant_result = read_standardised_result(standardised_variant_result)
|
|
240
236
|
AssessVariantPrioritisation(
|
|
241
237
|
phenopacket_path,
|
|
@@ -270,12 +266,9 @@ def benchmark_variant_prioritisation(
|
|
|
270
266
|
"""
|
|
271
267
|
variant_rank_stats = RankStats()
|
|
272
268
|
variant_binary_classification_stats = BinaryClassificationStats()
|
|
273
|
-
for
|
|
274
|
-
results_directory_and_input.results_dir.joinpath("pheval_variant_results/"),
|
|
275
|
-
".tsv",
|
|
276
|
-
):
|
|
269
|
+
for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
|
|
277
270
|
assess_phenopacket_variant_prioritisation(
|
|
278
|
-
|
|
271
|
+
phenopacket_path,
|
|
279
272
|
score_order,
|
|
280
273
|
results_directory_and_input,
|
|
281
274
|
threshold,
|
|
@@ -227,68 +227,7 @@ class ResultSorter:
|
|
|
227
227
|
)
|
|
228
228
|
|
|
229
229
|
|
|
230
|
-
|
|
231
|
-
"""
|
|
232
|
-
Class for ranking scores based on a given sort order
|
|
233
|
-
|
|
234
|
-
Attributes:
|
|
235
|
-
rank (int): Represents the current rank, initialised with 0
|
|
236
|
-
current_score (float): Represents the current score, initialised with positive infinity (float("inf"))
|
|
237
|
-
count (int): Used for counting, initialised with 0
|
|
238
|
-
"""
|
|
239
|
-
|
|
240
|
-
rank: int = 0
|
|
241
|
-
current_score: float = float("inf")
|
|
242
|
-
count: int = 0
|
|
243
|
-
|
|
244
|
-
def __init__(self, sort_order: SortOrder):
|
|
245
|
-
"""
|
|
246
|
-
Initialise ScoreRanker
|
|
247
|
-
|
|
248
|
-
Args:
|
|
249
|
-
sort_order (SortOrder): Sorting order to be applied
|
|
250
|
-
"""
|
|
251
|
-
self.sort_order = sort_order
|
|
252
|
-
|
|
253
|
-
def _check_rank_order(self, round_score: float) -> None:
|
|
254
|
-
"""
|
|
255
|
-
Check if the results are correctly ordered
|
|
256
|
-
|
|
257
|
-
Args:
|
|
258
|
-
round_score (float): Score to be checked against the current score
|
|
259
|
-
|
|
260
|
-
Raises:
|
|
261
|
-
ValueError: If results are not correctly sorted.
|
|
262
|
-
"""
|
|
263
|
-
if self.sort_order == SortOrder.ASCENDING and round_score < self.current_score != float(
|
|
264
|
-
"inf"
|
|
265
|
-
):
|
|
266
|
-
raise ValueError("Results are not correctly sorted!")
|
|
267
|
-
elif self.sort_order == SortOrder.DESCENDING and round_score > self.current_score != float(
|
|
268
|
-
"inf"
|
|
269
|
-
):
|
|
270
|
-
raise ValueError("Results are not correctly sorted!")
|
|
271
|
-
|
|
272
|
-
def rank_scores(self, round_score: float) -> int:
|
|
273
|
-
"""
|
|
274
|
-
Add ranks to a result; equal scores are given the same rank, e.g., 1, 1, 3
|
|
275
|
-
|
|
276
|
-
Args:
|
|
277
|
-
round_score (float): Score to be ranked
|
|
278
|
-
|
|
279
|
-
Returns:
|
|
280
|
-
int: Rank assigned to the score
|
|
281
|
-
"""
|
|
282
|
-
self._check_rank_order(round_score)
|
|
283
|
-
self.count += 1
|
|
284
|
-
if self.current_score == round_score:
|
|
285
|
-
return self.rank
|
|
286
|
-
self.current_score = round_score
|
|
287
|
-
self.rank = self.count
|
|
288
|
-
return self.rank
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) -> [PhEvalResult]:
|
|
230
|
+
def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) -> pd.DataFrame:
|
|
292
231
|
"""
|
|
293
232
|
Rank PhEval results post-processed from tool-specific output, managing tied scores (ex aequo)
|
|
294
233
|
|
|
@@ -297,35 +236,17 @@ def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) ->
|
|
|
297
236
|
sort_order (SortOrder): Sorting order based on which ranking is performed
|
|
298
237
|
|
|
299
238
|
Returns:
|
|
300
|
-
|
|
239
|
+
pd.DataFrame : Ranked PhEval results with tied scores managed
|
|
301
240
|
|
|
302
241
|
Raises:
|
|
303
242
|
ValueError: If an incompatible PhEval result type is encountered
|
|
304
243
|
"""
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
result, score_ranker.rank_scores(result.score)
|
|
312
|
-
)
|
|
313
|
-
)
|
|
314
|
-
elif type(result) == PhEvalVariantResult:
|
|
315
|
-
ranked_result.append(
|
|
316
|
-
RankedPhEvalVariantResult.from_variant_result(
|
|
317
|
-
result, score_ranker.rank_scores(result.score)
|
|
318
|
-
)
|
|
319
|
-
)
|
|
320
|
-
elif type(result) == PhEvalDiseaseResult:
|
|
321
|
-
ranked_result.append(
|
|
322
|
-
RankedPhEvalDiseaseResult.from_disease_result(
|
|
323
|
-
result, score_ranker.rank_scores(result.score)
|
|
324
|
-
)
|
|
325
|
-
)
|
|
326
|
-
else:
|
|
327
|
-
raise ValueError("Incompatible PhEval result type.")
|
|
328
|
-
return ranked_result
|
|
244
|
+
pheval_result_df = pd.DataFrame([data.__dict__ for data in pheval_result])
|
|
245
|
+
if sort_order == SortOrder.ASCENDING:
|
|
246
|
+
pheval_result_df["rank"] = pheval_result_df["score"].rank(method="max", ascending=True)
|
|
247
|
+
elif sort_order == SortOrder.DESCENDING:
|
|
248
|
+
pheval_result_df["rank"] = pheval_result_df["score"].rank(method="max", ascending=False)
|
|
249
|
+
return pheval_result_df
|
|
329
250
|
|
|
330
251
|
|
|
331
252
|
def _return_sort_order(sort_order_str: str) -> SortOrder:
|
|
@@ -347,7 +268,7 @@ def _return_sort_order(sort_order_str: str) -> SortOrder:
|
|
|
347
268
|
raise ValueError("Incompatible ordering method specified.")
|
|
348
269
|
|
|
349
270
|
|
|
350
|
-
def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
|
|
271
|
+
def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) -> pd.DataFrame:
|
|
351
272
|
"""
|
|
352
273
|
Create PhEval results with corresponding ranks based on the specified sorting order.
|
|
353
274
|
|
|
@@ -356,7 +277,7 @@ def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
|
|
|
356
277
|
sort_order_str (str): String representation of the desired sorting order.
|
|
357
278
|
|
|
358
279
|
Returns:
|
|
359
|
-
|
|
280
|
+
pd.DataFrame: PhEval results with ranks assigned.
|
|
360
281
|
"""
|
|
361
282
|
sort_order = _return_sort_order(sort_order_str)
|
|
362
283
|
sorted_pheval_result = ResultSorter(pheval_result, sort_order).sort_pheval_results()
|
|
@@ -364,7 +285,7 @@ def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
|
|
|
364
285
|
|
|
365
286
|
|
|
366
287
|
def _write_pheval_gene_result(
|
|
367
|
-
ranked_pheval_result:
|
|
288
|
+
ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
|
|
368
289
|
) -> None:
|
|
369
290
|
"""
|
|
370
291
|
Write ranked PhEval gene results to a TSV file
|
|
@@ -374,8 +295,9 @@ def _write_pheval_gene_result(
|
|
|
374
295
|
output_dir (Path): Path to the output directory
|
|
375
296
|
tool_result_path (Path): Path to the tool-specific result file
|
|
376
297
|
"""
|
|
377
|
-
|
|
378
|
-
|
|
298
|
+
pheval_gene_output = ranked_pheval_result.loc[
|
|
299
|
+
:, ["rank", "score", "gene_symbol", "gene_identifier"]
|
|
300
|
+
]
|
|
379
301
|
pheval_gene_output.to_csv(
|
|
380
302
|
output_dir.joinpath(
|
|
381
303
|
"pheval_gene_results/" + tool_result_path.stem + "-pheval_gene_result.tsv"
|
|
@@ -386,7 +308,7 @@ def _write_pheval_gene_result(
|
|
|
386
308
|
|
|
387
309
|
|
|
388
310
|
def _write_pheval_variant_result(
|
|
389
|
-
ranked_pheval_result:
|
|
311
|
+
ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
|
|
390
312
|
) -> None:
|
|
391
313
|
"""
|
|
392
314
|
Write ranked PhEval variant results to a TSV file
|
|
@@ -396,8 +318,7 @@ def _write_pheval_variant_result(
|
|
|
396
318
|
output_dir (Path): Path to the output directory
|
|
397
319
|
tool_result_path (Path): Path to the tool-specific result file
|
|
398
320
|
"""
|
|
399
|
-
|
|
400
|
-
pheval_variant_output = ranked_result.loc[
|
|
321
|
+
pheval_variant_output = ranked_pheval_result.loc[
|
|
401
322
|
:, ["rank", "score", "chromosome", "start", "end", "ref", "alt"]
|
|
402
323
|
]
|
|
403
324
|
pheval_variant_output.to_csv(
|
|
@@ -410,7 +331,7 @@ def _write_pheval_variant_result(
|
|
|
410
331
|
|
|
411
332
|
|
|
412
333
|
def _write_pheval_disease_result(
|
|
413
|
-
ranked_pheval_result:
|
|
334
|
+
ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
|
|
414
335
|
) -> None:
|
|
415
336
|
"""
|
|
416
337
|
Write ranked PhEval disease results to a TSV file
|
|
@@ -420,8 +341,7 @@ def _write_pheval_disease_result(
|
|
|
420
341
|
output_dir (Path): Path to the output directory
|
|
421
342
|
tool_result_path (Path): Path to the tool-specific result file
|
|
422
343
|
"""
|
|
423
|
-
|
|
424
|
-
pheval_disease_output = ranked_result.loc[
|
|
344
|
+
pheval_disease_output = ranked_pheval_result.loc[
|
|
425
345
|
:, ["rank", "score", "disease_name", "disease_identifier"]
|
|
426
346
|
]
|
|
427
347
|
pheval_disease_output.to_csv(
|
|
@@ -455,11 +375,11 @@ def generate_pheval_result(
|
|
|
455
375
|
info_log.warning(f"No results found for {tool_result_path.name}")
|
|
456
376
|
return
|
|
457
377
|
ranked_pheval_result = _create_pheval_result(pheval_result, sort_order_str)
|
|
458
|
-
if all(isinstance(result,
|
|
378
|
+
if all(isinstance(result, PhEvalGeneResult) for result in pheval_result):
|
|
459
379
|
_write_pheval_gene_result(ranked_pheval_result, output_dir, tool_result_path)
|
|
460
|
-
elif all(isinstance(result,
|
|
380
|
+
elif all(isinstance(result, PhEvalVariantResult) for result in pheval_result):
|
|
461
381
|
_write_pheval_variant_result(ranked_pheval_result, output_dir, tool_result_path)
|
|
462
|
-
elif all(isinstance(result,
|
|
382
|
+
elif all(isinstance(result, PhEvalDiseaseResult) for result in pheval_result):
|
|
463
383
|
_write_pheval_disease_result(ranked_pheval_result, output_dir, tool_result_path)
|
|
464
384
|
else:
|
|
465
385
|
raise ValueError("Results are not all of the same type.")
|
|
@@ -328,22 +328,35 @@ class VcfSpiker:
|
|
|
328
328
|
genotype_codes[proband_variant_data.genotype.lower()] + "\n",
|
|
329
329
|
]
|
|
330
330
|
|
|
331
|
-
def construct_vcf_records(self) -> List[str]:
|
|
331
|
+
def construct_vcf_records(self, template_vcf_name: str) -> List[str]:
|
|
332
332
|
"""
|
|
333
333
|
Construct updated VCF records by inserting spiked variants into the correct positions within the VCF.
|
|
334
334
|
|
|
335
|
+
Args:
|
|
336
|
+
template_vcf_name (str): Name of the template VCF file.
|
|
337
|
+
|
|
335
338
|
Returns:
|
|
336
339
|
List[str]: Updated VCF records containing the spiked variants.
|
|
337
340
|
"""
|
|
338
341
|
updated_vcf_records = copy(self.vcf_contents)
|
|
339
342
|
for variant in self.proband_causative_variants:
|
|
340
|
-
|
|
341
|
-
|
|
343
|
+
variant_entry = self.construct_variant_entry(variant)
|
|
344
|
+
matching_indices = [
|
|
342
345
|
i
|
|
343
346
|
for i, val in enumerate(updated_vcf_records)
|
|
344
|
-
if val.split("\t")[0] ==
|
|
345
|
-
|
|
346
|
-
|
|
347
|
+
if val.split("\t")[0] == variant_entry[0]
|
|
348
|
+
and int(val.split("\t")[1]) < int(variant_entry[1])
|
|
349
|
+
]
|
|
350
|
+
if matching_indices:
|
|
351
|
+
variant_entry_position = matching_indices[-1] + 1
|
|
352
|
+
else:
|
|
353
|
+
info_log.warning(
|
|
354
|
+
f"Could not find entry position for {variant.variant.chrom}-{variant.variant.pos}-"
|
|
355
|
+
f"{variant.variant.ref}-{variant.variant.alt} in {template_vcf_name}, "
|
|
356
|
+
"inserting at end of VCF contents."
|
|
357
|
+
)
|
|
358
|
+
variant_entry_position = len(updated_vcf_records)
|
|
359
|
+
updated_vcf_records.insert(variant_entry_position, "\t".join(variant_entry))
|
|
347
360
|
return updated_vcf_records
|
|
348
361
|
|
|
349
362
|
def construct_header(self, updated_vcf_records: List[str]) -> List[str]:
|
|
@@ -358,21 +371,27 @@ class VcfSpiker:
|
|
|
358
371
|
"""
|
|
359
372
|
updated_vcf_file = []
|
|
360
373
|
for line in updated_vcf_records:
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
374
|
+
if line.startswith("#"):
|
|
375
|
+
text = line.replace(
|
|
376
|
+
self.vcf_header.sample_id,
|
|
377
|
+
self.proband_causative_variants[0].proband_id,
|
|
378
|
+
)
|
|
379
|
+
else:
|
|
380
|
+
text = line
|
|
365
381
|
updated_vcf_file.append(text)
|
|
366
382
|
return updated_vcf_file
|
|
367
383
|
|
|
368
|
-
def construct_vcf(self) -> List[str]:
|
|
384
|
+
def construct_vcf(self, template_vcf_name: str) -> List[str]:
|
|
369
385
|
"""
|
|
370
386
|
Construct the entire spiked VCF file by incorporating the spiked variants into the VCF.
|
|
371
387
|
|
|
388
|
+
Args:
|
|
389
|
+
template_vcf_name (str): Name of the template VCF file.
|
|
390
|
+
|
|
372
391
|
Returns:
|
|
373
392
|
List[str]: The complete spiked VCF file content as a list of strings.
|
|
374
393
|
"""
|
|
375
|
-
return self.construct_header(self.construct_vcf_records())
|
|
394
|
+
return self.construct_header(self.construct_vcf_records(template_vcf_name))
|
|
376
395
|
|
|
377
396
|
|
|
378
397
|
class VcfWriter:
|
|
@@ -454,7 +473,7 @@ def spike_vcf_contents(
|
|
|
454
473
|
chosen_template_vcf.vcf_contents,
|
|
455
474
|
phenopacket_causative_variants,
|
|
456
475
|
chosen_template_vcf.vcf_header,
|
|
457
|
-
).construct_vcf(),
|
|
476
|
+
).construct_vcf(chosen_template_vcf.vcf_file_name),
|
|
458
477
|
)
|
|
459
478
|
|
|
460
479
|
|
pheval/prepare/prepare_corpus.py
CHANGED
|
@@ -39,6 +39,11 @@ def prepare_corpus(
|
|
|
39
39
|
output_dir.joinpath("phenopackets").mkdir(exist_ok=True, parents=True)
|
|
40
40
|
for phenopacket_path in all_files(phenopacket_dir):
|
|
41
41
|
phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
|
|
42
|
+
if not phenopacket_util.observed_phenotypic_features():
|
|
43
|
+
info_log.warning(
|
|
44
|
+
f"Removed {phenopacket_path.name} from the corpus due to no observed phenotypic features."
|
|
45
|
+
)
|
|
46
|
+
continue
|
|
42
47
|
if variant_analysis:
|
|
43
48
|
if phenopacket_util.check_incomplete_variant_record():
|
|
44
49
|
info_log.warning(
|
pheval/utils/file_utils.py
CHANGED
|
@@ -70,35 +70,6 @@ def normalise_file_name(file_path: Path) -> str:
|
|
|
70
70
|
return re.sub("[\u0300-\u036f]", "", normalised_file_name)
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def obtain_phenopacket_path_from_pheval_result(
|
|
74
|
-
pheval_result_path: Path, phenopacket_paths: list[Path]
|
|
75
|
-
) -> Path:
|
|
76
|
-
"""
|
|
77
|
-
Obtains the phenopacket file name when given a pheval result file name
|
|
78
|
-
and a list of full paths of phenopackets to be queried.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
pheval_result_path (Path): The PhEval result.
|
|
82
|
-
phenopacket_paths (list[Path]): List of full paths of phenopackets to be queried.
|
|
83
|
-
|
|
84
|
-
Returns:
|
|
85
|
-
Path: The matching phenopacket file path from the provided list.
|
|
86
|
-
"""
|
|
87
|
-
pheval_result_path_stem_stripped = pheval_result_path.stem.split("-pheval_")[0]
|
|
88
|
-
matching_phenopacket_paths = [
|
|
89
|
-
phenopacket_path
|
|
90
|
-
for phenopacket_path in phenopacket_paths
|
|
91
|
-
if phenopacket_path.stem == pheval_result_path_stem_stripped
|
|
92
|
-
]
|
|
93
|
-
if matching_phenopacket_paths:
|
|
94
|
-
return matching_phenopacket_paths[0]
|
|
95
|
-
else:
|
|
96
|
-
raise FileNotFoundError(
|
|
97
|
-
f"Unable to find matching phenopacket file named "
|
|
98
|
-
f"{pheval_result_path_stem_stripped}.json for {pheval_result_path.name}"
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
|
|
102
73
|
def ensure_file_exists(*files: str):
|
|
103
74
|
"""Ensures the existence of files passed as parameter
|
|
104
75
|
Raises:
|
|
@@ -4,17 +4,17 @@ pheval/analyse/analysis.py,sha256=ponm3P8nvzJNmcrNZ2_KudEhWSaWshd_Gd30D-aau8s,77
|
|
|
4
4
|
pheval/analyse/benchmark_generator.py,sha256=AeuwbaPb4j_dyBGPRgEBxQk2NahDb5u4xHyFiqp5Fes,5943
|
|
5
5
|
pheval/analyse/benchmarking_data.py,sha256=aNZkWdmWemlnC1Tg35MtR60S9YC71QWS2rMuzkUc3w0,768
|
|
6
6
|
pheval/analyse/binary_classification_stats.py,sha256=E35YjvGM-zFnuEt8M3pgN03vBab4MH6ih726QKvuogg,12519
|
|
7
|
-
pheval/analyse/disease_prioritisation_analysis.py,sha256=
|
|
8
|
-
pheval/analyse/gene_prioritisation_analysis.py,sha256=
|
|
7
|
+
pheval/analyse/disease_prioritisation_analysis.py,sha256=mGfGYF5Eu7LxyBkAy6xMG1nDURaPiJY4rRQyKDcQe-4,12451
|
|
8
|
+
pheval/analyse/gene_prioritisation_analysis.py,sha256=KSEQV6EvqtWESmO4Zc3Q9CwrjoMzxRiFUDKuAVvQtuM,12190
|
|
9
9
|
pheval/analyse/generate_plots.py,sha256=MFORnFTgoelYAahFlu3Dc3Rul4cwCg8Bloxe62vONSc,21350
|
|
10
10
|
pheval/analyse/generate_summary_outputs.py,sha256=s9pXMSW6xm4ZBe1aCd0UJSaFiKBvpUfPwJ2BI4qfTas,6591
|
|
11
11
|
pheval/analyse/parse_benchmark_summary.py,sha256=Y8uPTlHTEiaeVBOqxMcdOqjY3ZBtOS3DoRycL78Dzxg,2384
|
|
12
|
-
pheval/analyse/parse_pheval_result.py,sha256=
|
|
12
|
+
pheval/analyse/parse_pheval_result.py,sha256=2-J_c90KSs49EDjMukl8dgQyWJ0lZMlF-9ZYzD9hWzg,1438
|
|
13
13
|
pheval/analyse/prioritisation_rank_recorder.py,sha256=EVe8DoEvvp0_WMAcjfVxmDGGRFPEELi7hEVjH3sIpLY,3223
|
|
14
14
|
pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6hleoKDYATTkhvFF98,1228
|
|
15
15
|
pheval/analyse/rank_stats.py,sha256=knj1tsKrly17QgtOUVpqA14UjbO99N3ydkWN4xU6c2k,15785
|
|
16
16
|
pheval/analyse/run_data_parser.py,sha256=HzBKsJL2skjmrRZdrF3VYzswtKNgbX6U5qhY_kqq9mA,1552
|
|
17
|
-
pheval/analyse/variant_prioritisation_analysis.py,sha256=
|
|
17
|
+
pheval/analyse/variant_prioritisation_analysis.py,sha256=eF3SIvU6MNv1KR8ZmwXvTF4IoNu2qfwaBHA0uKZ8uMc,12186
|
|
18
18
|
pheval/cli.py,sha256=X4tDi7e3VB3v2RawkqIbfv4SFPCBuQwMXMnYCPTGtIo,1570
|
|
19
19
|
pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
|
|
20
20
|
pheval/cli_pheval_utils.py,sha256=kySsSa7NyewwVwYBMu93y8l5_qSJaVkdXklGchcXExU,20504
|
|
@@ -24,12 +24,12 @@ pheval/implementations/__init__.py,sha256=BMUTotjTdgy5j5xubWCIQgRXrSQ1ZIcjooer7r
|
|
|
24
24
|
pheval/infra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
pheval/infra/exomiserdb.py,sha256=pM9-TfjrgurtH4OtM1Enk5oVhIxGQN3rKRlrxHuObTM,5080
|
|
26
26
|
pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
pheval/post_processing/post_processing.py,sha256=
|
|
27
|
+
pheval/post_processing/post_processing.py,sha256=Xzcrb7I0DiLBT3tp0oM8_L8Ld64fTgRHBstQuNSrFHk,13329
|
|
28
28
|
pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
pheval/prepare/create_noisy_phenopackets.py,sha256=UbBRWDD95BFHPv03VYx04v35AGwJ9ynLltYKqQJHbZ0,11236
|
|
30
|
-
pheval/prepare/create_spiked_vcf.py,sha256=
|
|
30
|
+
pheval/prepare/create_spiked_vcf.py,sha256=A_nIAhoU48nAeocpIu5UE41db4oBGj2cSoT-U-3qQ1Q,21111
|
|
31
31
|
pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
|
|
32
|
-
pheval/prepare/prepare_corpus.py,sha256=
|
|
32
|
+
pheval/prepare/prepare_corpus.py,sha256=FweWoYMkS-kzi5RqgrSzkdp_8iWLyoGWMC_GF0szcUg,3692
|
|
33
33
|
pheval/prepare/update_phenopacket.py,sha256=21fzUPbwKN6Ey5TSh9PFzjT2x86U19RAE6WmkjG8u28,4770
|
|
34
34
|
pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
|
|
35
35
|
pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
|
|
@@ -46,12 +46,12 @@ pheval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
46
46
|
pheval/utils/docs_gen.py,sha256=6FGtHicBC0rZKi0tdL3Epsg8d4osE44I9f1Ga0j4JLA,3193
|
|
47
47
|
pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
|
|
48
48
|
pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
|
|
49
|
-
pheval/utils/file_utils.py,sha256=
|
|
49
|
+
pheval/utils/file_utils.py,sha256=m21cz-qjDYqnI8ClUv3J9fKizex98a-9bSEerQ75i_c,3576
|
|
50
50
|
pheval/utils/phenopacket_utils.py,sha256=4inrnhZ4UjYgO0Y85ls_Nxq6voAIIXQV57_fMeIX-24,26792
|
|
51
51
|
pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
|
|
52
52
|
pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
|
|
53
|
-
pheval-0.3.
|
|
54
|
-
pheval-0.3.
|
|
55
|
-
pheval-0.3.
|
|
56
|
-
pheval-0.3.
|
|
57
|
-
pheval-0.3.
|
|
53
|
+
pheval-0.3.7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
54
|
+
pheval-0.3.7.dist-info/METADATA,sha256=BwicFNwmR9Hm8o2YxBJUJvrIeGwAevFFk-DT2pm07S4,1810
|
|
55
|
+
pheval-0.3.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
56
|
+
pheval-0.3.7.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
|
|
57
|
+
pheval-0.3.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|