pheval 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pheval might be problematic. Click here for more details.
- pheval/analyse/benchmark_db_manager.py +1 -0
- pheval/analyse/disease_prioritisation_analysis.py +18 -7
- pheval/analyse/gene_prioritisation_analysis.py +18 -7
- pheval/analyse/generate_plots.py +17 -2
- pheval/analyse/variant_prioritisation_analysis.py +18 -9
- pheval/prepare/prepare_corpus.py +5 -0
- pheval/utils/phenopacket_utils.py +13 -0
- {pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/METADATA +2 -2
- {pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/RECORD +12 -12
- {pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/WHEEL +1 -1
- {pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/LICENSE +0 -0
- {pheval-0.4.1.dist-info → pheval-0.4.3.dist-info}/entry_points.txt +0 -0
|
@@ -75,6 +75,7 @@ class BenchmarkDBManager:
|
|
|
75
75
|
`False` otherwise.
|
|
76
76
|
"""
|
|
77
77
|
list_pattern = re.compile(r"^\[\s*(?:[^\[\],\s]+(?:\s*,\s*[^\[\],\s]+)*)?\s*]$")
|
|
78
|
+
entity = entity.replace("nan", "None").replace("NaN", "None")
|
|
78
79
|
if list_pattern.match(str(entity)):
|
|
79
80
|
list_representation = ast.literal_eval(entity)
|
|
80
81
|
if isinstance(list_representation, list):
|
|
@@ -38,11 +38,16 @@ class AssessDiseasePrioritisation(AssessPrioritisationBase):
|
|
|
38
38
|
for _i, row in df.iterrows():
|
|
39
39
|
result = (
|
|
40
40
|
self.conn.execute(
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
41
|
+
(
|
|
42
|
+
f"SELECT * FROM '{standardised_disease_result_path}' "
|
|
43
|
+
f"WHERE contains_entity_function(CAST(COALESCE(disease_identifier, '') AS VARCHAR),"
|
|
44
|
+
f" '{row['disease_identifier']}') "
|
|
45
|
+
f"OR contains_entity_function(CAST(COALESCE(disease_name, '') AS VARCHAR), "
|
|
46
|
+
f"'{row['disease_name']}')"
|
|
47
|
+
)
|
|
48
|
+
if standardised_disease_result_path.exists()
|
|
49
|
+
and standardised_disease_result_path.stat().st_size > 0
|
|
50
|
+
else "SELECT NULL WHERE FALSE"
|
|
46
51
|
)
|
|
47
52
|
.fetchdf()
|
|
48
53
|
.to_dict(orient="records")
|
|
@@ -56,9 +61,15 @@ class AssessDiseasePrioritisation(AssessPrioritisationBase):
|
|
|
56
61
|
f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
|
|
57
62
|
(disease_match, primary_key),
|
|
58
63
|
)
|
|
64
|
+
elif len(result) == 0:
|
|
65
|
+
relevant_ranks.append(0)
|
|
59
66
|
binary_classification_stats.add_classification(
|
|
60
|
-
|
|
61
|
-
|
|
67
|
+
(
|
|
68
|
+
self.db_connection.parse_table_into_dataclass(
|
|
69
|
+
str(standardised_disease_result_path), RankedPhEvalDiseaseResult
|
|
70
|
+
)
|
|
71
|
+
if standardised_disease_result_path.exists()
|
|
72
|
+
else []
|
|
62
73
|
),
|
|
63
74
|
relevant_ranks,
|
|
64
75
|
)
|
|
@@ -36,11 +36,16 @@ class AssessGenePrioritisation(AssessPrioritisationBase):
|
|
|
36
36
|
for _i, row in df.iterrows():
|
|
37
37
|
result = (
|
|
38
38
|
self.conn.execute(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
(
|
|
40
|
+
f"SELECT * FROM '{standardised_gene_result_path}' "
|
|
41
|
+
f"WHERE contains_entity_function(CAST(COALESCE(gene_identifier, '') AS VARCHAR), "
|
|
42
|
+
f"'{row['gene_identifier']}') "
|
|
43
|
+
f"OR contains_entity_function(CAST(COALESCE(gene_symbol, '') AS VARCHAR), "
|
|
44
|
+
f"'{row['gene_symbol']}')"
|
|
45
|
+
)
|
|
46
|
+
if standardised_gene_result_path.exists()
|
|
47
|
+
and standardised_gene_result_path.stat().st_size > 0
|
|
48
|
+
else "SELECT NULL WHERE FALSE"
|
|
44
49
|
)
|
|
45
50
|
.fetchdf()
|
|
46
51
|
.to_dict(orient="records")
|
|
@@ -53,9 +58,15 @@ class AssessGenePrioritisation(AssessPrioritisationBase):
|
|
|
53
58
|
f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
|
|
54
59
|
(gene_match, primary_key),
|
|
55
60
|
)
|
|
61
|
+
if not result:
|
|
62
|
+
relevant_ranks.append(0)
|
|
56
63
|
binary_classification_stats.add_classification(
|
|
57
|
-
|
|
58
|
-
|
|
64
|
+
(
|
|
65
|
+
self.db_connection.parse_table_into_dataclass(
|
|
66
|
+
str(standardised_gene_result_path), RankedPhEvalGeneResult
|
|
67
|
+
)
|
|
68
|
+
if standardised_gene_result_path.exists()
|
|
69
|
+
else []
|
|
59
70
|
),
|
|
60
71
|
relevant_ranks,
|
|
61
72
|
)
|
pheval/analyse/generate_plots.py
CHANGED
|
@@ -2,6 +2,7 @@ from pathlib import Path
|
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
4
|
import matplotlib
|
|
5
|
+
import numpy as np
|
|
5
6
|
import pandas as pd
|
|
6
7
|
import seaborn as sns
|
|
7
8
|
from matplotlib import pyplot as plt
|
|
@@ -368,9 +369,16 @@ class PlotGenerator:
|
|
|
368
369
|
"""
|
|
369
370
|
plt.clf()
|
|
370
371
|
for i, benchmark_result in enumerate(benchmarking_results):
|
|
372
|
+
y_score = np.array(benchmark_result.binary_classification_stats.scores)
|
|
373
|
+
y_score = np.nan_to_num(
|
|
374
|
+
y_score,
|
|
375
|
+
nan=0.0,
|
|
376
|
+
posinf=max(y_score[np.isfinite(y_score)]),
|
|
377
|
+
neginf=min(y_score[np.isfinite(y_score)]),
|
|
378
|
+
)
|
|
371
379
|
fpr, tpr, thresh = roc_curve(
|
|
372
380
|
benchmark_result.binary_classification_stats.labels,
|
|
373
|
-
|
|
381
|
+
y_score,
|
|
374
382
|
pos_label=1,
|
|
375
383
|
)
|
|
376
384
|
roc_auc = auc(fpr, tpr)
|
|
@@ -411,9 +419,16 @@ class PlotGenerator:
|
|
|
411
419
|
plt.clf()
|
|
412
420
|
plt.figure()
|
|
413
421
|
for i, benchmark_result in enumerate(benchmarking_results):
|
|
422
|
+
y_score = np.array(benchmark_result.binary_classification_stats.scores)
|
|
423
|
+
y_score = np.nan_to_num(
|
|
424
|
+
y_score,
|
|
425
|
+
nan=0.0,
|
|
426
|
+
posinf=max(y_score[np.isfinite(y_score)]),
|
|
427
|
+
neginf=min(y_score[np.isfinite(y_score)]),
|
|
428
|
+
)
|
|
414
429
|
precision, recall, thresh = precision_recall_curve(
|
|
415
430
|
benchmark_result.binary_classification_stats.labels,
|
|
416
|
-
|
|
431
|
+
y_score,
|
|
417
432
|
)
|
|
418
433
|
precision_recall_auc = auc(recall, precision)
|
|
419
434
|
plt.plot(
|
|
@@ -44,12 +44,16 @@ class AssessVariantPrioritisation(AssessPrioritisationBase):
|
|
|
44
44
|
)
|
|
45
45
|
result = (
|
|
46
46
|
self.conn.execute(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
47
|
+
(
|
|
48
|
+
f"SELECT * FROM '{standardised_variant_result_path}' "
|
|
49
|
+
f"WHERE "
|
|
50
|
+
f"chromosome == '{causative_variant.chrom}' AND "
|
|
51
|
+
f"start == {causative_variant.pos} AND "
|
|
52
|
+
f"ref == '{causative_variant.ref}' AND "
|
|
53
|
+
f"alt == '{causative_variant.alt}'"
|
|
54
|
+
)
|
|
55
|
+
if standardised_variant_result_path.exists()
|
|
56
|
+
else "SELECT NULL WHERE FALSE"
|
|
53
57
|
)
|
|
54
58
|
.fetchdf()
|
|
55
59
|
.to_dict(orient="records")
|
|
@@ -66,10 +70,15 @@ class AssessVariantPrioritisation(AssessPrioritisationBase):
|
|
|
66
70
|
f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
|
|
67
71
|
(variant_match, primary_key),
|
|
68
72
|
)
|
|
69
|
-
|
|
73
|
+
elif len(result) == 0:
|
|
74
|
+
relevant_ranks.append(0)
|
|
70
75
|
binary_classification_stats.add_classification(
|
|
71
|
-
|
|
72
|
-
|
|
76
|
+
(
|
|
77
|
+
self.db_connection.parse_table_into_dataclass(
|
|
78
|
+
str(standardised_variant_result_path), RankedPhEvalVariantResult
|
|
79
|
+
)
|
|
80
|
+
if standardised_variant_result_path.exists()
|
|
81
|
+
else []
|
|
73
82
|
),
|
|
74
83
|
relevant_ranks,
|
|
75
84
|
)
|
pheval/prepare/prepare_corpus.py
CHANGED
|
@@ -57,6 +57,11 @@ def prepare_corpus(
|
|
|
57
57
|
f"Removed {phenopacket_path.name} from the corpus due to missing variant fields."
|
|
58
58
|
)
|
|
59
59
|
continue
|
|
60
|
+
elif phenopacket_util.check_variant_alleles():
|
|
61
|
+
info_log.warning(
|
|
62
|
+
f"Removed {phenopacket_path.name} from the corpus due to identical "
|
|
63
|
+
"reference and alternate allele fields."
|
|
64
|
+
)
|
|
60
65
|
if gene_analysis:
|
|
61
66
|
if phenopacket_util.check_incomplete_gene_record():
|
|
62
67
|
info_log.warning(
|
|
@@ -503,6 +503,19 @@ class PhenopacketUtil:
|
|
|
503
503
|
return True
|
|
504
504
|
return False
|
|
505
505
|
|
|
506
|
+
def check_variant_alleles(self) -> bool:
|
|
507
|
+
"""
|
|
508
|
+
Check if any variant record in the phenopacket has identical reference and alternate alleles.
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
bool: True if the reference and alternate alleles are identical, False otherwise.
|
|
512
|
+
"""
|
|
513
|
+
variants = self.diagnosed_variants()
|
|
514
|
+
for variant in variants:
|
|
515
|
+
if variant.ref == variant.alt:
|
|
516
|
+
return True
|
|
517
|
+
return False
|
|
518
|
+
|
|
506
519
|
def check_incomplete_gene_record(self) -> bool:
|
|
507
520
|
"""
|
|
508
521
|
Check if any gene record in the phenopacket has incomplete information.
|
|
@@ -2,20 +2,20 @@ pheval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
2
2
|
pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
pheval/analyse/analysis.py,sha256=Yt2xH0WS_2NO13-wYvywzmCRCj8RinQ1MeozJQuGe3o,4009
|
|
4
4
|
pheval/analyse/assess_prioritisation_base.py,sha256=znBscRTqIKxxZMHR-H6KrjFJ6Uv5P5HzwTQUWS6Eoos,3434
|
|
5
|
-
pheval/analyse/benchmark_db_manager.py,sha256=
|
|
5
|
+
pheval/analyse/benchmark_db_manager.py,sha256=R1GstKKoh4PIPupyAarm7DhZyOdVDVtyY4A1nPNTkDs,5181
|
|
6
6
|
pheval/analyse/benchmark_generator.py,sha256=-LljszuKAT3oJfGQn7JHAILCGg5QXYny4nPPf273g_E,5896
|
|
7
7
|
pheval/analyse/benchmarking_data.py,sha256=aRvDmwqjFGKvWDRGjMwaQxfDZscptRBwI-rcSqY-X5s,913
|
|
8
8
|
pheval/analyse/binary_classification_stats.py,sha256=E35YjvGM-zFnuEt8M3pgN03vBab4MH6ih726QKvuogg,12519
|
|
9
|
-
pheval/analyse/disease_prioritisation_analysis.py,sha256=
|
|
10
|
-
pheval/analyse/gene_prioritisation_analysis.py,sha256=
|
|
11
|
-
pheval/analyse/generate_plots.py,sha256=
|
|
9
|
+
pheval/analyse/disease_prioritisation_analysis.py,sha256=mOIAiz_WzWQBP-3g8Twv-ii8ZxbXktAmgi6zl4sJaC8,6182
|
|
10
|
+
pheval/analyse/gene_prioritisation_analysis.py,sha256=wPH-mCW_KY3XxrwAvw-ucxLbWzUKBucO4zSo3XqBsaY,6022
|
|
11
|
+
pheval/analyse/generate_plots.py,sha256=5oxsdnAbbVgQj8ZrWTLs12rSM24EXp-IdLCjy5QB1_g,21992
|
|
12
12
|
pheval/analyse/generate_summary_outputs.py,sha256=nKqwbpA-9bbL5mCySiuyV_AUDIokmCg3vD8_JAsg1ls,4157
|
|
13
13
|
pheval/analyse/parse_benchmark_summary.py,sha256=vyAOIdIWF4rZjGTPFE69ajhEC9AkkN3QBVqSe_uYZsg,2946
|
|
14
14
|
pheval/analyse/parse_corpus.py,sha256=N88enptR4qG6cmqXU_TKg8DMmCeFog37eeK5nFEMQOQ,8678
|
|
15
15
|
pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6hleoKDYATTkhvFF98,1228
|
|
16
16
|
pheval/analyse/rank_stats.py,sha256=53ZickUtQlctYsorAIUwlCX7M6UC-wCxoV1MbL6F9gc,17987
|
|
17
17
|
pheval/analyse/run_data_parser.py,sha256=VQBUoOIRYRWc5uqURUvaWdaW3E3C7Su0JvLavQLHQaY,4105
|
|
18
|
-
pheval/analyse/variant_prioritisation_analysis.py,sha256=
|
|
18
|
+
pheval/analyse/variant_prioritisation_analysis.py,sha256=8ntbALzAAfJ4ijviQKvLiwKBeRvTLWI0Tb9dpnCYxhs,6404
|
|
19
19
|
pheval/cli.py,sha256=EBGh6TIxAiWs0eDdQiefq6YuD0mb93siGsNmsVO1j7c,1527
|
|
20
20
|
pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
|
|
21
21
|
pheval/cli_pheval_utils.py,sha256=O6tWnE85QQHGNcP08OwJGANMfXJPsZtFEu-D6ATld00,16700
|
|
@@ -29,7 +29,7 @@ pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
29
29
|
pheval/prepare/create_noisy_phenopackets.py,sha256=ydhA4mpqKTDc4hBu8YfvNW2nMubHK3dbO-cv0lA4JFQ,11504
|
|
30
30
|
pheval/prepare/create_spiked_vcf.py,sha256=90A-Mi8QKhvN036vtFEVWAHgzHO37itiLYrqYlG4LiA,23953
|
|
31
31
|
pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
|
|
32
|
-
pheval/prepare/prepare_corpus.py,sha256=
|
|
32
|
+
pheval/prepare/prepare_corpus.py,sha256=mwI3FJJ6GGSbmbvz4enrVP86MBoHSssMIRebkapmu5Y,4484
|
|
33
33
|
pheval/prepare/update_phenopacket.py,sha256=21fzUPbwKN6Ey5TSh9PFzjT2x86U19RAE6WmkjG8u28,4770
|
|
34
34
|
pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
|
|
35
35
|
pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
|
|
@@ -47,11 +47,11 @@ pheval/utils/docs_gen.py,sha256=6FGtHicBC0rZKi0tdL3Epsg8d4osE44I9f1Ga0j4JLA,3193
|
|
|
47
47
|
pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
|
|
48
48
|
pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
|
|
49
49
|
pheval/utils/file_utils.py,sha256=m21cz-qjDYqnI8ClUv3J9fKizex98a-9bSEerQ75i_c,3576
|
|
50
|
-
pheval/utils/phenopacket_utils.py,sha256=
|
|
50
|
+
pheval/utils/phenopacket_utils.py,sha256=6xQ8WCLdR1VhiU3nCDzaqEVKjGvDWrzvPA50_6ZAHXM,27310
|
|
51
51
|
pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
|
|
52
52
|
pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
|
|
53
|
-
pheval-0.4.
|
|
54
|
-
pheval-0.4.
|
|
55
|
-
pheval-0.4.
|
|
56
|
-
pheval-0.4.
|
|
57
|
-
pheval-0.4.
|
|
53
|
+
pheval-0.4.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
54
|
+
pheval-0.4.3.dist-info/METADATA,sha256=Y6dy74TNnyirSwcpgG9ktJopwoVEZWlixSR4CJOZfDo,6469
|
|
55
|
+
pheval-0.4.3.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
|
56
|
+
pheval-0.4.3.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
|
|
57
|
+
pheval-0.4.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|