pheval 0.3.5__tar.gz → 0.3.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pheval might be problematic. Click here for more details.
- {pheval-0.3.5 → pheval-0.3.6}/PKG-INFO +1 -1
- {pheval-0.3.5 → pheval-0.3.6}/pyproject.toml +1 -1
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/post_processing/post_processing.py +18 -98
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/prepare/create_spiked_vcf.py +32 -13
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/prepare/prepare_corpus.py +5 -0
- {pheval-0.3.5 → pheval-0.3.6}/LICENSE +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/README.md +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/__init__.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/__init__.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/analysis.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/benchmark_generator.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/benchmarking_data.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/binary_classification_stats.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/disease_prioritisation_analysis.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/gene_prioritisation_analysis.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/generate_plots.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/generate_summary_outputs.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/parse_benchmark_summary.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/parse_pheval_result.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/prioritisation_rank_recorder.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/prioritisation_result_types.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/rank_stats.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/run_data_parser.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/analyse/variant_prioritisation_analysis.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/cli.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/cli_pheval.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/cli_pheval_utils.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/config_parser.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/constants.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/implementations/__init__.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/infra/__init__.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/infra/exomiserdb.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/post_processing/__init__.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/prepare/__init__.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/prepare/create_noisy_phenopackets.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/prepare/custom_exceptions.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/prepare/update_phenopacket.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/resources/alternate_ouputs/CADA_results.txt +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/resources/alternate_ouputs/DeepPVP_results.txt +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/resources/alternate_ouputs/OVA_results.txt +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/resources/alternate_ouputs/Phen2Gene_results.json +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/resources/alternate_ouputs/Phenolyzer_results.txt +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/resources/alternate_ouputs/lirical_results.tsv +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/resources/alternate_ouputs/svanna_results.tsv +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/resources/hgnc_complete_set.txt +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/run_metadata.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/runners/__init__.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/runners/runner.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/utils/__init__.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/utils/docs_gen.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/utils/docs_gen.sh +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/utils/exomiser.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/utils/file_utils.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/utils/phenopacket_utils.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/utils/semsim_utils.py +0 -0
- {pheval-0.3.5 → pheval-0.3.6}/src/pheval/utils/utils.py +0 -0
|
@@ -227,68 +227,7 @@ class ResultSorter:
|
|
|
227
227
|
)
|
|
228
228
|
|
|
229
229
|
|
|
230
|
-
|
|
231
|
-
"""
|
|
232
|
-
Class for ranking scores based on a given sort order
|
|
233
|
-
|
|
234
|
-
Attributes:
|
|
235
|
-
rank (int): Represents the current rank, initialised with 0
|
|
236
|
-
current_score (float): Represents the current score, initialised with positive infinity (float("inf"))
|
|
237
|
-
count (int): Used for counting, initialised with 0
|
|
238
|
-
"""
|
|
239
|
-
|
|
240
|
-
rank: int = 0
|
|
241
|
-
current_score: float = float("inf")
|
|
242
|
-
count: int = 0
|
|
243
|
-
|
|
244
|
-
def __init__(self, sort_order: SortOrder):
|
|
245
|
-
"""
|
|
246
|
-
Initialise ScoreRanker
|
|
247
|
-
|
|
248
|
-
Args:
|
|
249
|
-
sort_order (SortOrder): Sorting order to be applied
|
|
250
|
-
"""
|
|
251
|
-
self.sort_order = sort_order
|
|
252
|
-
|
|
253
|
-
def _check_rank_order(self, round_score: float) -> None:
|
|
254
|
-
"""
|
|
255
|
-
Check if the results are correctly ordered
|
|
256
|
-
|
|
257
|
-
Args:
|
|
258
|
-
round_score (float): Score to be checked against the current score
|
|
259
|
-
|
|
260
|
-
Raises:
|
|
261
|
-
ValueError: If results are not correctly sorted.
|
|
262
|
-
"""
|
|
263
|
-
if self.sort_order == SortOrder.ASCENDING and round_score < self.current_score != float(
|
|
264
|
-
"inf"
|
|
265
|
-
):
|
|
266
|
-
raise ValueError("Results are not correctly sorted!")
|
|
267
|
-
elif self.sort_order == SortOrder.DESCENDING and round_score > self.current_score != float(
|
|
268
|
-
"inf"
|
|
269
|
-
):
|
|
270
|
-
raise ValueError("Results are not correctly sorted!")
|
|
271
|
-
|
|
272
|
-
def rank_scores(self, round_score: float) -> int:
|
|
273
|
-
"""
|
|
274
|
-
Add ranks to a result; equal scores are given the same rank, e.g., 1, 1, 3
|
|
275
|
-
|
|
276
|
-
Args:
|
|
277
|
-
round_score (float): Score to be ranked
|
|
278
|
-
|
|
279
|
-
Returns:
|
|
280
|
-
int: Rank assigned to the score
|
|
281
|
-
"""
|
|
282
|
-
self._check_rank_order(round_score)
|
|
283
|
-
self.count += 1
|
|
284
|
-
if self.current_score == round_score:
|
|
285
|
-
return self.rank
|
|
286
|
-
self.current_score = round_score
|
|
287
|
-
self.rank = self.count
|
|
288
|
-
return self.rank
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) -> [PhEvalResult]:
|
|
230
|
+
def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) -> pd.DataFrame:
|
|
292
231
|
"""
|
|
293
232
|
Rank PhEval results post-processed from tool-specific output, managing tied scores (ex aequo)
|
|
294
233
|
|
|
@@ -297,35 +236,17 @@ def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) ->
|
|
|
297
236
|
sort_order (SortOrder): Sorting order based on which ranking is performed
|
|
298
237
|
|
|
299
238
|
Returns:
|
|
300
|
-
|
|
239
|
+
pd.DataFrame : Ranked PhEval results with tied scores managed
|
|
301
240
|
|
|
302
241
|
Raises:
|
|
303
242
|
ValueError: If an incompatible PhEval result type is encountered
|
|
304
243
|
"""
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
result, score_ranker.rank_scores(result.score)
|
|
312
|
-
)
|
|
313
|
-
)
|
|
314
|
-
elif type(result) == PhEvalVariantResult:
|
|
315
|
-
ranked_result.append(
|
|
316
|
-
RankedPhEvalVariantResult.from_variant_result(
|
|
317
|
-
result, score_ranker.rank_scores(result.score)
|
|
318
|
-
)
|
|
319
|
-
)
|
|
320
|
-
elif type(result) == PhEvalDiseaseResult:
|
|
321
|
-
ranked_result.append(
|
|
322
|
-
RankedPhEvalDiseaseResult.from_disease_result(
|
|
323
|
-
result, score_ranker.rank_scores(result.score)
|
|
324
|
-
)
|
|
325
|
-
)
|
|
326
|
-
else:
|
|
327
|
-
raise ValueError("Incompatible PhEval result type.")
|
|
328
|
-
return ranked_result
|
|
244
|
+
pheval_result_df = pd.DataFrame([data.__dict__ for data in pheval_result])
|
|
245
|
+
if sort_order == SortOrder.ASCENDING:
|
|
246
|
+
pheval_result_df["rank"] = pheval_result_df["score"].rank(method="max", ascending=True)
|
|
247
|
+
elif sort_order == SortOrder.DESCENDING:
|
|
248
|
+
pheval_result_df["rank"] = pheval_result_df["score"].rank(method="max", ascending=False)
|
|
249
|
+
return pheval_result_df
|
|
329
250
|
|
|
330
251
|
|
|
331
252
|
def _return_sort_order(sort_order_str: str) -> SortOrder:
|
|
@@ -347,7 +268,7 @@ def _return_sort_order(sort_order_str: str) -> SortOrder:
|
|
|
347
268
|
raise ValueError("Incompatible ordering method specified.")
|
|
348
269
|
|
|
349
270
|
|
|
350
|
-
def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
|
|
271
|
+
def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) -> pd.DataFrame:
|
|
351
272
|
"""
|
|
352
273
|
Create PhEval results with corresponding ranks based on the specified sorting order.
|
|
353
274
|
|
|
@@ -356,7 +277,7 @@ def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
|
|
|
356
277
|
sort_order_str (str): String representation of the desired sorting order.
|
|
357
278
|
|
|
358
279
|
Returns:
|
|
359
|
-
|
|
280
|
+
pd.DataFrame: PhEval results with ranks assigned.
|
|
360
281
|
"""
|
|
361
282
|
sort_order = _return_sort_order(sort_order_str)
|
|
362
283
|
sorted_pheval_result = ResultSorter(pheval_result, sort_order).sort_pheval_results()
|
|
@@ -364,7 +285,7 @@ def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
|
|
|
364
285
|
|
|
365
286
|
|
|
366
287
|
def _write_pheval_gene_result(
|
|
367
|
-
ranked_pheval_result:
|
|
288
|
+
ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
|
|
368
289
|
) -> None:
|
|
369
290
|
"""
|
|
370
291
|
Write ranked PhEval gene results to a TSV file
|
|
@@ -374,8 +295,9 @@ def _write_pheval_gene_result(
|
|
|
374
295
|
output_dir (Path): Path to the output directory
|
|
375
296
|
tool_result_path (Path): Path to the tool-specific result file
|
|
376
297
|
"""
|
|
377
|
-
|
|
378
|
-
|
|
298
|
+
pheval_gene_output = ranked_pheval_result.loc[
|
|
299
|
+
:, ["rank", "score", "gene_symbol", "gene_identifier"]
|
|
300
|
+
]
|
|
379
301
|
pheval_gene_output.to_csv(
|
|
380
302
|
output_dir.joinpath(
|
|
381
303
|
"pheval_gene_results/" + tool_result_path.stem + "-pheval_gene_result.tsv"
|
|
@@ -386,7 +308,7 @@ def _write_pheval_gene_result(
|
|
|
386
308
|
|
|
387
309
|
|
|
388
310
|
def _write_pheval_variant_result(
|
|
389
|
-
ranked_pheval_result:
|
|
311
|
+
ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
|
|
390
312
|
) -> None:
|
|
391
313
|
"""
|
|
392
314
|
Write ranked PhEval variant results to a TSV file
|
|
@@ -396,8 +318,7 @@ def _write_pheval_variant_result(
|
|
|
396
318
|
output_dir (Path): Path to the output directory
|
|
397
319
|
tool_result_path (Path): Path to the tool-specific result file
|
|
398
320
|
"""
|
|
399
|
-
|
|
400
|
-
pheval_variant_output = ranked_result.loc[
|
|
321
|
+
pheval_variant_output = ranked_pheval_result.loc[
|
|
401
322
|
:, ["rank", "score", "chromosome", "start", "end", "ref", "alt"]
|
|
402
323
|
]
|
|
403
324
|
pheval_variant_output.to_csv(
|
|
@@ -410,7 +331,7 @@ def _write_pheval_variant_result(
|
|
|
410
331
|
|
|
411
332
|
|
|
412
333
|
def _write_pheval_disease_result(
|
|
413
|
-
ranked_pheval_result:
|
|
334
|
+
ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
|
|
414
335
|
) -> None:
|
|
415
336
|
"""
|
|
416
337
|
Write ranked PhEval disease results to a TSV file
|
|
@@ -420,8 +341,7 @@ def _write_pheval_disease_result(
|
|
|
420
341
|
output_dir (Path): Path to the output directory
|
|
421
342
|
tool_result_path (Path): Path to the tool-specific result file
|
|
422
343
|
"""
|
|
423
|
-
|
|
424
|
-
pheval_disease_output = ranked_result.loc[
|
|
344
|
+
pheval_disease_output = ranked_pheval_result.loc[
|
|
425
345
|
:, ["rank", "score", "disease_name", "disease_identifier"]
|
|
426
346
|
]
|
|
427
347
|
pheval_disease_output.to_csv(
|
|
@@ -328,22 +328,35 @@ class VcfSpiker:
|
|
|
328
328
|
genotype_codes[proband_variant_data.genotype.lower()] + "\n",
|
|
329
329
|
]
|
|
330
330
|
|
|
331
|
-
def construct_vcf_records(self) -> List[str]:
|
|
331
|
+
def construct_vcf_records(self, template_vcf_name: str) -> List[str]:
|
|
332
332
|
"""
|
|
333
333
|
Construct updated VCF records by inserting spiked variants into the correct positions within the VCF.
|
|
334
334
|
|
|
335
|
+
Args:
|
|
336
|
+
template_vcf_name (str): Name of the template VCF file.
|
|
337
|
+
|
|
335
338
|
Returns:
|
|
336
339
|
List[str]: Updated VCF records containing the spiked variants.
|
|
337
340
|
"""
|
|
338
341
|
updated_vcf_records = copy(self.vcf_contents)
|
|
339
342
|
for variant in self.proband_causative_variants:
|
|
340
|
-
|
|
341
|
-
|
|
343
|
+
variant_entry = self.construct_variant_entry(variant)
|
|
344
|
+
matching_indices = [
|
|
342
345
|
i
|
|
343
346
|
for i, val in enumerate(updated_vcf_records)
|
|
344
|
-
if val.split("\t")[0] ==
|
|
345
|
-
|
|
346
|
-
|
|
347
|
+
if val.split("\t")[0] == variant_entry[0]
|
|
348
|
+
and int(val.split("\t")[1]) < int(variant_entry[1])
|
|
349
|
+
]
|
|
350
|
+
if matching_indices:
|
|
351
|
+
variant_entry_position = matching_indices[-1] + 1
|
|
352
|
+
else:
|
|
353
|
+
info_log.warning(
|
|
354
|
+
f"Could not find entry position for {variant.variant.chrom}-{variant.variant.pos}-"
|
|
355
|
+
f"{variant.variant.ref}-{variant.variant.alt} in {template_vcf_name}, "
|
|
356
|
+
"inserting at end of VCF contents."
|
|
357
|
+
)
|
|
358
|
+
variant_entry_position = len(updated_vcf_records)
|
|
359
|
+
updated_vcf_records.insert(variant_entry_position, "\t".join(variant_entry))
|
|
347
360
|
return updated_vcf_records
|
|
348
361
|
|
|
349
362
|
def construct_header(self, updated_vcf_records: List[str]) -> List[str]:
|
|
@@ -358,21 +371,27 @@ class VcfSpiker:
|
|
|
358
371
|
"""
|
|
359
372
|
updated_vcf_file = []
|
|
360
373
|
for line in updated_vcf_records:
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
374
|
+
if line.startswith("#"):
|
|
375
|
+
text = line.replace(
|
|
376
|
+
self.vcf_header.sample_id,
|
|
377
|
+
self.proband_causative_variants[0].proband_id,
|
|
378
|
+
)
|
|
379
|
+
else:
|
|
380
|
+
text = line
|
|
365
381
|
updated_vcf_file.append(text)
|
|
366
382
|
return updated_vcf_file
|
|
367
383
|
|
|
368
|
-
def construct_vcf(self) -> List[str]:
|
|
384
|
+
def construct_vcf(self, template_vcf_name: str) -> List[str]:
|
|
369
385
|
"""
|
|
370
386
|
Construct the entire spiked VCF file by incorporating the spiked variants into the VCF.
|
|
371
387
|
|
|
388
|
+
Args:
|
|
389
|
+
template_vcf_name (str): Name of the template VCF file.
|
|
390
|
+
|
|
372
391
|
Returns:
|
|
373
392
|
List[str]: The complete spiked VCF file content as a list of strings.
|
|
374
393
|
"""
|
|
375
|
-
return self.construct_header(self.construct_vcf_records())
|
|
394
|
+
return self.construct_header(self.construct_vcf_records(template_vcf_name))
|
|
376
395
|
|
|
377
396
|
|
|
378
397
|
class VcfWriter:
|
|
@@ -454,7 +473,7 @@ def spike_vcf_contents(
|
|
|
454
473
|
chosen_template_vcf.vcf_contents,
|
|
455
474
|
phenopacket_causative_variants,
|
|
456
475
|
chosen_template_vcf.vcf_header,
|
|
457
|
-
).construct_vcf(),
|
|
476
|
+
).construct_vcf(chosen_template_vcf.vcf_file_name),
|
|
458
477
|
)
|
|
459
478
|
|
|
460
479
|
|
|
@@ -39,6 +39,11 @@ def prepare_corpus(
|
|
|
39
39
|
output_dir.joinpath("phenopackets").mkdir(exist_ok=True, parents=True)
|
|
40
40
|
for phenopacket_path in all_files(phenopacket_dir):
|
|
41
41
|
phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
|
|
42
|
+
if not phenopacket_util.observed_phenotypic_features():
|
|
43
|
+
info_log.warning(
|
|
44
|
+
f"Removed {phenopacket_path.name} from the corpus due to no observed phenotypic features."
|
|
45
|
+
)
|
|
46
|
+
continue
|
|
42
47
|
if variant_analysis:
|
|
43
48
|
if phenopacket_util.check_incomplete_variant_record():
|
|
44
49
|
info_log.warning(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|