pheval 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

@@ -75,6 +75,7 @@ class BenchmarkDBManager:
75
75
  `False` otherwise.
76
76
  """
77
77
  list_pattern = re.compile(r"^\[\s*(?:[^\[\],\s]+(?:\s*,\s*[^\[\],\s]+)*)?\s*]$")
78
+ entity = entity.replace("nan", "None").replace("NaN", "None")
78
79
  if list_pattern.match(str(entity)):
79
80
  list_representation = ast.literal_eval(entity)
80
81
  if isinstance(list_representation, list):
@@ -38,11 +38,16 @@ class AssessDiseasePrioritisation(AssessPrioritisationBase):
38
38
  for _i, row in df.iterrows():
39
39
  result = (
40
40
  self.conn.execute(
41
- f"SELECT * FROM '{standardised_disease_result_path}' "
42
- f"WHERE contains_entity_function(CAST(COALESCE(disease_identifier, '') AS VARCHAR),"
43
- f" '{row['disease_identifier']}') "
44
- f"OR contains_entity_function(CAST(COALESCE(disease_name, '') AS VARCHAR), "
45
- f"'{row['disease_name']}')"
41
+ (
42
+ f"SELECT * FROM '{standardised_disease_result_path}' "
43
+ f"WHERE contains_entity_function(CAST(COALESCE(disease_identifier, '') AS VARCHAR),"
44
+ f" '{row['disease_identifier']}') "
45
+ f"OR contains_entity_function(CAST(COALESCE(disease_name, '') AS VARCHAR), "
46
+ f"'{row['disease_name']}')"
47
+ )
48
+ if standardised_disease_result_path.exists()
49
+ and standardised_disease_result_path.stat().st_size > 0
50
+ else "SELECT NULL WHERE FALSE"
46
51
  )
47
52
  .fetchdf()
48
53
  .to_dict(orient="records")
@@ -56,9 +61,15 @@ class AssessDiseasePrioritisation(AssessPrioritisationBase):
56
61
  f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
57
62
  (disease_match, primary_key),
58
63
  )
64
+ elif len(result) == 0:
65
+ relevant_ranks.append(0)
59
66
  binary_classification_stats.add_classification(
60
- self.db_connection.parse_table_into_dataclass(
61
- str(standardised_disease_result_path), RankedPhEvalDiseaseResult
67
+ (
68
+ self.db_connection.parse_table_into_dataclass(
69
+ str(standardised_disease_result_path), RankedPhEvalDiseaseResult
70
+ )
71
+ if standardised_disease_result_path.exists()
72
+ else []
62
73
  ),
63
74
  relevant_ranks,
64
75
  )
@@ -36,11 +36,16 @@ class AssessGenePrioritisation(AssessPrioritisationBase):
36
36
  for _i, row in df.iterrows():
37
37
  result = (
38
38
  self.conn.execute(
39
- f"SELECT * FROM '{standardised_gene_result_path}' "
40
- f"WHERE contains_entity_function(CAST(COALESCE(gene_identifier, '') AS VARCHAR),"
41
- f" '{row['gene_identifier']}') "
42
- f"OR contains_entity_function(CAST(COALESCE(gene_symbol, '') AS VARCHAR), "
43
- f"'{row['gene_symbol']}')"
39
+ (
40
+ f"SELECT * FROM '{standardised_gene_result_path}' "
41
+ f"WHERE contains_entity_function(CAST(COALESCE(gene_identifier, '') AS VARCHAR), "
42
+ f"'{row['gene_identifier']}') "
43
+ f"OR contains_entity_function(CAST(COALESCE(gene_symbol, '') AS VARCHAR), "
44
+ f"'{row['gene_symbol']}')"
45
+ )
46
+ if standardised_gene_result_path.exists()
47
+ and standardised_gene_result_path.stat().st_size > 0
48
+ else "SELECT NULL WHERE FALSE"
44
49
  )
45
50
  .fetchdf()
46
51
  .to_dict(orient="records")
@@ -53,9 +58,15 @@ class AssessGenePrioritisation(AssessPrioritisationBase):
53
58
  f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
54
59
  (gene_match, primary_key),
55
60
  )
61
+ if not result:
62
+ relevant_ranks.append(0)
56
63
  binary_classification_stats.add_classification(
57
- self.db_connection.parse_table_into_dataclass(
58
- str(standardised_gene_result_path), RankedPhEvalGeneResult
64
+ (
65
+ self.db_connection.parse_table_into_dataclass(
66
+ str(standardised_gene_result_path), RankedPhEvalGeneResult
67
+ )
68
+ if standardised_gene_result_path.exists()
69
+ else []
59
70
  ),
60
71
  relevant_ranks,
61
72
  )
@@ -2,6 +2,7 @@ from pathlib import Path
2
2
  from typing import List
3
3
 
4
4
  import matplotlib
5
+ import numpy as np
5
6
  import pandas as pd
6
7
  import seaborn as sns
7
8
  from matplotlib import pyplot as plt
@@ -368,9 +369,16 @@ class PlotGenerator:
368
369
  """
369
370
  plt.clf()
370
371
  for i, benchmark_result in enumerate(benchmarking_results):
372
+ y_score = np.array(benchmark_result.binary_classification_stats.scores)
373
+ y_score = np.nan_to_num(
374
+ y_score,
375
+ nan=0.0,
376
+ posinf=max(y_score[np.isfinite(y_score)]),
377
+ neginf=min(y_score[np.isfinite(y_score)]),
378
+ )
371
379
  fpr, tpr, thresh = roc_curve(
372
380
  benchmark_result.binary_classification_stats.labels,
373
- benchmark_result.binary_classification_stats.scores,
381
+ y_score,
374
382
  pos_label=1,
375
383
  )
376
384
  roc_auc = auc(fpr, tpr)
@@ -411,9 +419,16 @@ class PlotGenerator:
411
419
  plt.clf()
412
420
  plt.figure()
413
421
  for i, benchmark_result in enumerate(benchmarking_results):
422
+ y_score = np.array(benchmark_result.binary_classification_stats.scores)
423
+ y_score = np.nan_to_num(
424
+ y_score,
425
+ nan=0.0,
426
+ posinf=max(y_score[np.isfinite(y_score)]),
427
+ neginf=min(y_score[np.isfinite(y_score)]),
428
+ )
414
429
  precision, recall, thresh = precision_recall_curve(
415
430
  benchmark_result.binary_classification_stats.labels,
416
- benchmark_result.binary_classification_stats.scores,
431
+ y_score,
417
432
  )
418
433
  precision_recall_auc = auc(recall, precision)
419
434
  plt.plot(
@@ -44,12 +44,16 @@ class AssessVariantPrioritisation(AssessPrioritisationBase):
44
44
  )
45
45
  result = (
46
46
  self.conn.execute(
47
- f"SELECT * FROM '{standardised_variant_result_path}' "
48
- f"WHERE "
49
- f"chromosome == '{causative_variant.chrom}' AND "
50
- f"start == {causative_variant.pos} AND "
51
- f"ref == '{causative_variant.ref}' AND "
52
- f"alt == '{causative_variant.alt}'"
47
+ (
48
+ f"SELECT * FROM '{standardised_variant_result_path}' "
49
+ f"WHERE "
50
+ f"chromosome == '{causative_variant.chrom}' AND "
51
+ f"start == {causative_variant.pos} AND "
52
+ f"ref == '{causative_variant.ref}' AND "
53
+ f"alt == '{causative_variant.alt}'"
54
+ )
55
+ if standardised_variant_result_path.exists()
56
+ else "SELECT NULL WHERE FALSE"
53
57
  )
54
58
  .fetchdf()
55
59
  .to_dict(orient="records")
@@ -66,10 +70,15 @@ class AssessVariantPrioritisation(AssessPrioritisationBase):
66
70
  f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
67
71
  (variant_match, primary_key),
68
72
  )
69
-
73
+ elif len(result) == 0:
74
+ relevant_ranks.append(0)
70
75
  binary_classification_stats.add_classification(
71
- self.db_connection.parse_table_into_dataclass(
72
- str(standardised_variant_result_path), RankedPhEvalVariantResult
76
+ (
77
+ self.db_connection.parse_table_into_dataclass(
78
+ str(standardised_variant_result_path), RankedPhEvalVariantResult
79
+ )
80
+ if standardised_variant_result_path.exists()
81
+ else []
73
82
  ),
74
83
  relevant_ranks,
75
84
  )
@@ -57,6 +57,11 @@ def prepare_corpus(
57
57
  f"Removed {phenopacket_path.name} from the corpus due to missing variant fields."
58
58
  )
59
59
  continue
60
+ elif phenopacket_util.check_variant_alleles():
61
+ info_log.warning(
62
+ f"Removed {phenopacket_path.name} from the corpus due to identical "
63
+ "reference and alternate allele fields."
64
+ )
60
65
  if gene_analysis:
61
66
  if phenopacket_util.check_incomplete_gene_record():
62
67
  info_log.warning(
@@ -503,6 +503,19 @@ class PhenopacketUtil:
503
503
  return True
504
504
  return False
505
505
 
506
+ def check_variant_alleles(self) -> bool:
507
+ """
508
+ Check if any variant record in the phenopacket has identical reference and alternate alleles.
509
+
510
+ Returns:
511
+ bool: True if the reference and alternate alleles are identical, False otherwise.
512
+ """
513
+ variants = self.diagnosed_variants()
514
+ for variant in variants:
515
+ if variant.ref == variant.alt:
516
+ return True
517
+ return False
518
+
506
519
  def check_incomplete_gene_record(self) -> bool:
507
520
  """
508
521
  Check if any gene record in the phenopacket has incomplete information.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: pheval
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary:
5
5
  Author: Yasemin Bridges
6
6
  Author-email: y.bridges@qmul.ac.uk
@@ -2,20 +2,20 @@ pheval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  pheval/analyse/analysis.py,sha256=Yt2xH0WS_2NO13-wYvywzmCRCj8RinQ1MeozJQuGe3o,4009
4
4
  pheval/analyse/assess_prioritisation_base.py,sha256=znBscRTqIKxxZMHR-H6KrjFJ6Uv5P5HzwTQUWS6Eoos,3434
5
- pheval/analyse/benchmark_db_manager.py,sha256=RaAnvq5Tfvsw8161iQUe_P146X98BckMRhMq6BibwNI,5111
5
+ pheval/analyse/benchmark_db_manager.py,sha256=R1GstKKoh4PIPupyAarm7DhZyOdVDVtyY4A1nPNTkDs,5181
6
6
  pheval/analyse/benchmark_generator.py,sha256=-LljszuKAT3oJfGQn7JHAILCGg5QXYny4nPPf273g_E,5896
7
7
  pheval/analyse/benchmarking_data.py,sha256=aRvDmwqjFGKvWDRGjMwaQxfDZscptRBwI-rcSqY-X5s,913
8
8
  pheval/analyse/binary_classification_stats.py,sha256=E35YjvGM-zFnuEt8M3pgN03vBab4MH6ih726QKvuogg,12519
9
- pheval/analyse/disease_prioritisation_analysis.py,sha256=1Ut4u6p9mDGbGBoXrVxTuUgv_nrqxzBhq5N9jbGWRWs,5725
10
- pheval/analyse/gene_prioritisation_analysis.py,sha256=_7r16BC0S2H9rOY016mLLUg6PcxaCQOh5FQcNKFTy-4,5582
11
- pheval/analyse/generate_plots.py,sha256=rMSdgawGYYc7BAAolqz73TAnanKtrxCC48bI6WQq6xc,21455
9
+ pheval/analyse/disease_prioritisation_analysis.py,sha256=mOIAiz_WzWQBP-3g8Twv-ii8ZxbXktAmgi6zl4sJaC8,6182
10
+ pheval/analyse/gene_prioritisation_analysis.py,sha256=wPH-mCW_KY3XxrwAvw-ucxLbWzUKBucO4zSo3XqBsaY,6022
11
+ pheval/analyse/generate_plots.py,sha256=5oxsdnAbbVgQj8ZrWTLs12rSM24EXp-IdLCjy5QB1_g,21992
12
12
  pheval/analyse/generate_summary_outputs.py,sha256=nKqwbpA-9bbL5mCySiuyV_AUDIokmCg3vD8_JAsg1ls,4157
13
13
  pheval/analyse/parse_benchmark_summary.py,sha256=vyAOIdIWF4rZjGTPFE69ajhEC9AkkN3QBVqSe_uYZsg,2946
14
14
  pheval/analyse/parse_corpus.py,sha256=N88enptR4qG6cmqXU_TKg8DMmCeFog37eeK5nFEMQOQ,8678
15
15
  pheval/analyse/prioritisation_result_types.py,sha256=qJoB6O-lFYmzAMcTQeDJZQNLJ6hleoKDYATTkhvFF98,1228
16
16
  pheval/analyse/rank_stats.py,sha256=53ZickUtQlctYsorAIUwlCX7M6UC-wCxoV1MbL6F9gc,17987
17
17
  pheval/analyse/run_data_parser.py,sha256=VQBUoOIRYRWc5uqURUvaWdaW3E3C7Su0JvLavQLHQaY,4105
18
- pheval/analyse/variant_prioritisation_analysis.py,sha256=LPEZDhFfzx-sQl9g8pM700l4Zzpy6qdOS68vOV2mZA0,6020
18
+ pheval/analyse/variant_prioritisation_analysis.py,sha256=8ntbALzAAfJ4ijviQKvLiwKBeRvTLWI0Tb9dpnCYxhs,6404
19
19
  pheval/cli.py,sha256=EBGh6TIxAiWs0eDdQiefq6YuD0mb93siGsNmsVO1j7c,1527
20
20
  pheval/cli_pheval.py,sha256=fWbKUcPTZZSa1EJEtH_lNn1XE6qRApRHihqUZS5owrA,2424
21
21
  pheval/cli_pheval_utils.py,sha256=O6tWnE85QQHGNcP08OwJGANMfXJPsZtFEu-D6ATld00,16700
@@ -29,7 +29,7 @@ pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  pheval/prepare/create_noisy_phenopackets.py,sha256=ydhA4mpqKTDc4hBu8YfvNW2nMubHK3dbO-cv0lA4JFQ,11504
30
30
  pheval/prepare/create_spiked_vcf.py,sha256=90A-Mi8QKhvN036vtFEVWAHgzHO37itiLYrqYlG4LiA,23953
31
31
  pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
32
- pheval/prepare/prepare_corpus.py,sha256=eRvozzezIgAqHAumtqul0WfXfBO1iOBaSlN8fPSn0Nw,4223
32
+ pheval/prepare/prepare_corpus.py,sha256=mwI3FJJ6GGSbmbvz4enrVP86MBoHSssMIRebkapmu5Y,4484
33
33
  pheval/prepare/update_phenopacket.py,sha256=21fzUPbwKN6Ey5TSh9PFzjT2x86U19RAE6WmkjG8u28,4770
34
34
  pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
35
35
  pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
@@ -47,11 +47,11 @@ pheval/utils/docs_gen.py,sha256=6FGtHicBC0rZKi0tdL3Epsg8d4osE44I9f1Ga0j4JLA,3193
47
47
  pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
48
48
  pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
49
49
  pheval/utils/file_utils.py,sha256=m21cz-qjDYqnI8ClUv3J9fKizex98a-9bSEerQ75i_c,3576
50
- pheval/utils/phenopacket_utils.py,sha256=W9T_X48EJ-xn5GghzbZlt-lI-DxWoSm7_SHr8DCJg2Q,26856
50
+ pheval/utils/phenopacket_utils.py,sha256=6xQ8WCLdR1VhiU3nCDzaqEVKjGvDWrzvPA50_6ZAHXM,27310
51
51
  pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
52
52
  pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
53
- pheval-0.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
54
- pheval-0.4.1.dist-info/METADATA,sha256=H14Zz0k7MLDs1eEryeaXjCfNUm1FqAyv0M1mXhXq740,6469
55
- pheval-0.4.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
56
- pheval-0.4.1.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
57
- pheval-0.4.1.dist-info/RECORD,,
53
+ pheval-0.4.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
54
+ pheval-0.4.3.dist-info/METADATA,sha256=Y6dy74TNnyirSwcpgG9ktJopwoVEZWlixSR4CJOZfDo,6469
55
+ pheval-0.4.3.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
56
+ pheval-0.4.3.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
57
+ pheval-0.4.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.1
2
+ Generator: poetry-core 2.0.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any