pheval 0.5.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

Files changed (52) hide show
  1. {pheval-0.5.0 → pheval-0.5.1}/PKG-INFO +1 -1
  2. {pheval-0.5.0 → pheval-0.5.1}/pyproject.toml +1 -1
  3. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/post_processing/phenopacket_truth_set.py +21 -6
  4. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/post_processing/post_processing.py +3 -17
  5. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/post_processing/validate_result_format.py +9 -10
  6. {pheval-0.5.0 → pheval-0.5.1}/LICENSE +0 -0
  7. {pheval-0.5.0 → pheval-0.5.1}/README.md +0 -0
  8. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/__init__.py +0 -0
  9. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/__init__.py +0 -0
  10. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/benchmark.py +0 -0
  11. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/benchmark_db_manager.py +0 -0
  12. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/benchmark_output_type.py +0 -0
  13. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/binary_classification_curves.py +0 -0
  14. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/binary_classification_stats.py +0 -0
  15. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/generate_plots.py +0 -0
  16. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/generate_rank_comparisons.py +0 -0
  17. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/rank_stats.py +0 -0
  18. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/analyse/run_data_parser.py +0 -0
  19. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/cli.py +0 -0
  20. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/cli_pheval.py +0 -0
  21. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/cli_pheval_utils.py +0 -0
  22. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/config_parser.py +0 -0
  23. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/implementations/__init__.py +0 -0
  24. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/infra/__init__.py +0 -0
  25. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/infra/exomiserdb.py +0 -0
  26. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/post_processing/__init__.py +0 -0
  27. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/prepare/__init__.py +0 -0
  28. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/prepare/create_noisy_phenopackets.py +0 -0
  29. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/prepare/create_spiked_vcf.py +0 -0
  30. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/prepare/custom_exceptions.py +0 -0
  31. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/prepare/prepare_corpus.py +0 -0
  32. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/prepare/update_phenopacket.py +0 -0
  33. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/resources/alternate_ouputs/CADA_results.txt +0 -0
  34. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/resources/alternate_ouputs/DeepPVP_results.txt +0 -0
  35. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/resources/alternate_ouputs/OVA_results.txt +0 -0
  36. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/resources/alternate_ouputs/Phen2Gene_results.json +0 -0
  37. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/resources/alternate_ouputs/Phenolyzer_results.txt +0 -0
  38. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/resources/alternate_ouputs/lirical_results.tsv +0 -0
  39. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/resources/alternate_ouputs/svanna_results.tsv +0 -0
  40. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/resources/hgnc_complete_set.txt +0 -0
  41. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/run_metadata.py +0 -0
  42. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/runners/__init__.py +0 -0
  43. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/runners/runner.py +0 -0
  44. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/utils/__init__.py +0 -0
  45. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/utils/docs_gen.py +0 -0
  46. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/utils/docs_gen.sh +0 -0
  47. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/utils/exomiser.py +0 -0
  48. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/utils/file_utils.py +0 -0
  49. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/utils/logger.py +0 -0
  50. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/utils/phenopacket_utils.py +0 -0
  51. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/utils/semsim_utils.py +0 -0
  52. {pheval-0.5.0 → pheval-0.5.1}/src/pheval/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pheval
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary:
5
5
  Author: Yasemin Bridges
6
6
  Author-email: y.bridges@qmul.ac.uk
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pheval"
3
- version = "0.5.0"
3
+ version = "0.5.1"
4
4
  description = ""
5
5
  authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
6
6
  "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",
@@ -12,6 +12,18 @@ from pheval.utils.phenopacket_utils import (
12
12
  )
13
13
 
14
14
 
15
+ def calculate_end_pos(variant_start: int, variant_ref: str) -> int:
16
+ """Calculate the end position for a variant
17
+ Args:
18
+ variant_start (int): The start position of the variant
19
+ variant_ref (str): The reference allele of the variant
20
+
21
+ Returns:
22
+ int: The end position of the variant
23
+ """
24
+ return variant_start + len(variant_ref) - 1
25
+
26
+
15
27
  class PhenopacketTruthSet:
16
28
  """Class for finding the causative gene/disease/variant from a phenopacket"""
17
29
 
@@ -139,13 +151,14 @@ class PhenopacketTruthSet:
139
151
  return pl.DataFrame(
140
152
  {
141
153
  "chrom": [v.chrom for v in variants],
142
- "pos": [v.pos for v in variants],
154
+ "start": [v.pos for v in variants],
155
+ "end": [calculate_end_pos(v.pos, v.ref) for v in variants],
143
156
  "ref": [v.ref for v in variants],
144
157
  "alt": [v.alt for v in variants],
145
158
  }
146
159
  ).with_columns(
147
160
  [
148
- pl.concat_str(["chrom", "pos", "ref", "alt"], separator="-").alias("variant_id"),
161
+ pl.concat_str(["chrom", "start", "ref", "alt"], separator="-").alias("variant_id"),
149
162
  pl.lit(0.0).cast(pl.Float64).alias("score"),
150
163
  pl.lit(0).cast(pl.Int64).alias("rank"),
151
164
  pl.lit(True).alias("true_positive"),
@@ -166,10 +179,10 @@ class PhenopacketTruthSet:
166
179
  return (
167
180
  ranked_results.with_columns(
168
181
  [
169
- pl.struct(["chrom", "pos", "ref", "alt"])
182
+ pl.struct(["chrom", "start", "end", "ref", "alt"])
170
183
  .is_in(
171
184
  classified_results.select(
172
- pl.struct(["chrom", "pos", "ref", "alt"])
185
+ pl.struct(["chrom", "start", "end", "ref", "alt"])
173
186
  ).to_series()
174
187
  )
175
188
  .alias("true_positive")
@@ -179,8 +192,10 @@ class PhenopacketTruthSet:
179
192
  .select(classified_results.columns)
180
193
  .vstack(
181
194
  classified_results.filter(
182
- ~pl.struct(["chrom", "pos", "ref", "alt"]).is_in(
183
- ranked_results.select(pl.struct(["chrom", "pos", "ref", "alt"])).to_series()
195
+ ~pl.struct(["chrom", "start", "end", "ref", "alt"]).is_in(
196
+ ranked_results.select(
197
+ pl.struct(["chrom", "start", "end", "ref", "alt"])
198
+ ).to_series()
184
199
  )
185
200
  )
186
201
  )
@@ -22,18 +22,6 @@ class ResultType(Enum):
22
22
  VARIANT = "variant"
23
23
 
24
24
 
25
- def calculate_end_pos(variant_start: int, variant_ref: str) -> int:
26
- """Calculate the end position for a variant
27
- Args:
28
- variant_start (int): The start position of the variant
29
- variant_ref (str): The reference allele of the variant
30
-
31
- Returns:
32
- int: The end position of the variant
33
- """
34
- return variant_start + len(variant_ref) - 1
35
-
36
-
37
25
  class SortOrder(Enum):
38
26
  """Enumeration representing sorting orders."""
39
27
 
@@ -106,7 +94,7 @@ def _write_variant_result(ranked_results: pl.DataFrame, output_file: Path) -> No
106
94
  output_file (Path): Path to the output file.
107
95
  """
108
96
  variant_output = ranked_results.select(
109
- ["rank", "score", "chromosome", "start", "end", "ref", "alt", "variant_id", "true_positive"]
97
+ ["rank", "score", "chrom", "start", "end", "ref", "alt", "variant_id", "true_positive"]
110
98
  )
111
99
  _write_results_file(output_file, variant_output)
112
100
 
@@ -119,9 +107,7 @@ def _write_disease_result(ranked_results: pl.DataFrame, output_file: Path) -> No
119
107
  ranked_results ([PhEvalResult]): List of ranked PhEval disease results.
120
108
  output_file (Path): Path to the output file.
121
109
  """
122
- disease_output = ranked_results.select(
123
- ["rank", "score", "disease_name", "disease_identifier", "true_positive"]
124
- )
110
+ disease_output = ranked_results.select(["rank", "score", "disease_identifier", "true_positive"])
125
111
  _write_results_file(output_file, disease_output)
126
112
 
127
113
 
@@ -228,7 +214,7 @@ def generate_variant_result(
228
214
  phenopacket_dir, output_dir.joinpath("pheval_variant_results"), ResultType.VARIANT
229
215
  )
230
216
  ranked_results = _rank_results(results, sort_order).with_columns(
231
- pl.concat_str(["chrom", "pos", "ref", "alt"], separator="-").alias("variant_id")
217
+ pl.concat_str(["chrom", "start", "ref", "alt"], separator="-").alias("variant_id")
232
218
  )
233
219
  classified_results = PhenopacketTruthSet(phenopacket_dir).merge_variant_results(
234
220
  ranked_results, output_file
@@ -35,18 +35,17 @@ class ResultSchema(Enum):
35
35
  )
36
36
  DISEASE_RESULT_SCHEMA = pl.Schema(
37
37
  {
38
- "disease_name": pl.String,
39
38
  "disease_identifier": pl.String,
40
39
  "score": pl.Float64,
41
40
  "grouping_id": pl.Utf8,
42
41
  }
43
42
  )
44
43
 
45
- def validate(self, df: pl.DataFrame) -> bool:
44
+ def validate(self, results: pl.DataFrame) -> bool:
46
45
  """
47
46
  Validate that a DataFrame follows the expected schema.
48
47
  Args:
49
- df (pl.DataFrame): The DataFrame to validate.
48
+ results (pl.DataFrame): The DataFrame to validate.
50
49
  Raises:
51
50
  ValueError: If a required column is missing or the grouping_id column contains a null value.
52
51
  TypeError: If a column exists but has an incorrect data type.
@@ -55,18 +54,18 @@ class ResultSchema(Enum):
55
54
  """
56
55
  expected_schema = self.value
57
56
 
58
- if "grouping_id" in df.columns and df["grouping_id"].null_count() > 0:
57
+ if "grouping_id" in results.columns and results["grouping_id"].null_count() > 0:
59
58
  raise ValueError("'grouping_id' column should not contain null values if provided.")
60
59
 
61
60
  for col_name, expected_type in expected_schema.items():
62
- if col_name not in df.schema:
61
+ if col_name not in results.schema:
63
62
  if col_name == "grouping_id":
64
63
  continue
65
64
  raise ValueError(f"Missing required column: {col_name}")
66
65
 
67
- if df.schema[col_name] != expected_type:
66
+ if results.schema[col_name] != expected_type:
68
67
  raise TypeError(
69
- f"Column '{col_name}' has type {df.schema[col_name]}, expected {expected_type}"
68
+ f"Column '{col_name}' has type {results.schema[col_name]}, expected {expected_type}"
70
69
  )
71
70
 
72
71
  return True
@@ -83,9 +82,9 @@ def validate_dataframe(schema: ResultSchema) -> Callable:
83
82
 
84
83
  def decorator(func: Callable) -> Callable:
85
84
  @wraps(func)
86
- def wrapper(df: pl.DataFrame, *args, **kwargs):
87
- schema.validate(df)
88
- return func(df, *args, **kwargs)
85
+ def wrapper(results: pl.DataFrame, *args, **kwargs):
86
+ schema.validate(results)
87
+ return func(results, *args, **kwargs)
89
88
 
90
89
  return wrapper
91
90
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes