pheval-exomiser 0.2.7__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@ def post_process_result_format(
8
8
  config: ExomiserConfigurations,
9
9
  raw_results_dir: Path,
10
10
  output_dir: Path,
11
+ phenopacket_dir: Path,
11
12
  variant_analysis: bool,
12
13
  gene_analysis: bool,
13
14
  disease_analysis: bool,
@@ -15,12 +16,13 @@ def post_process_result_format(
15
16
  """Standardise Exomiser json format to separated gene and variant results."""
16
17
  print("...standardising results format...")
17
18
  create_standardised_results(
18
- results_dir=raw_results_dir,
19
+ result_dir=raw_results_dir,
19
20
  output_dir=output_dir,
20
- score_name=config.post_process.score_name,
21
+ phenopacket_dir=phenopacket_dir,
21
22
  sort_order=config.post_process.sort_order,
22
- variant_analysis=variant_analysis,
23
+ score_name=config.post_process.score_name,
23
24
  gene_analysis=gene_analysis,
24
25
  disease_analysis=disease_analysis,
26
+ variant_analysis=variant_analysis,
25
27
  )
26
28
  print("done")
@@ -1,23 +1,24 @@
1
- #!/usr/bin/python
2
- import json
1
+ import uuid
2
+ from enum import Enum
3
3
  from pathlib import Path
4
4
 
5
5
  import click
6
+ import polars as pl
6
7
  from pheval.post_processing.post_processing import (
7
- PhEvalDiseaseResult,
8
- PhEvalGeneResult,
9
- PhEvalVariantResult,
10
- generate_pheval_result,
8
+ SortOrder,
9
+ generate_disease_result,
10
+ generate_gene_result,
11
+ generate_variant_result,
11
12
  )
12
13
  from pheval.utils.file_utils import files_with_suffix
13
14
 
14
15
 
15
- def read_exomiser_json_result(exomiser_result_path: Path) -> dict:
16
- """Load Exomiser json result."""
17
- with open(exomiser_result_path) as exomiser_json_result:
18
- exomiser_result = json.load(exomiser_json_result)
19
- exomiser_json_result.close()
20
- return exomiser_result
16
+ class ModeOfInheritance(Enum):
17
+ AUTOSOMAL_DOMINANT = 1
18
+ AUTOSOMAL_RECESSIVE = 2
19
+ X_DOMINANT = 1
20
+ X_RECESSIVE = 2
21
+ MITOCHONDRIAL = 3
21
22
 
22
23
 
23
24
  def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
@@ -25,213 +26,144 @@ def trim_exomiser_result_filename(exomiser_result_path: Path) -> Path:
25
26
  return Path(str(exomiser_result_path.name).replace("-exomiser", ""))
26
27
 
27
28
 
28
- class PhEvalGeneResultFromExomiserJsonCreator:
29
- def __init__(self, exomiser_json_result: [dict], score_name: str):
30
- self.exomiser_json_result = exomiser_json_result
31
- self.score_name = score_name
32
-
33
- @staticmethod
34
- def _find_gene_symbol(result_entry: dict) -> str:
35
- """Return gene symbol from Exomiser result entry."""
36
- return result_entry["geneSymbol"]
37
-
38
- @staticmethod
39
- def _find_gene_identifier(result_entry: dict) -> str:
40
- """Return ensembl gene identifier from Exomiser result entry."""
41
- return result_entry["geneIdentifier"]["geneId"]
29
+ def extract_gene_results_from_json(
30
+ exomiser_json_result: pl.DataFrame, score_name: str
31
+ ) -> pl.DataFrame:
32
+ return exomiser_json_result.select(
33
+ [
34
+ pl.col("geneSymbol").alias("gene_symbol"),
35
+ pl.col("geneIdentifier").struct.field("geneId").alias("gene_identifier"),
36
+ pl.col(score_name).fill_null(0).round(4).alias("score"),
37
+ ]
38
+ ).drop_nulls()
39
+
40
+
41
+ def extract_disease_results_from_json(exomiser_json_result: pl.DataFrame) -> pl.DataFrame:
42
+ return (
43
+ exomiser_json_result.select(
44
+ [
45
+ pl.col("priorityResults")
46
+ .struct.field("HIPHIVE_PRIORITY")
47
+ .struct.field("diseaseMatches")
48
+ ]
49
+ )
50
+ .explode("diseaseMatches")
51
+ .unnest("diseaseMatches")
52
+ .unnest("model")
53
+ .select([pl.col("diseaseId").alias("disease_identifier"), pl.col("score").round(4)])
54
+ .drop_nulls()
55
+ )
42
56
 
43
- def _find_relevant_score(self, result_entry: dict):
44
- """Return score from Exomiser result entry."""
45
- return round(result_entry[self.score_name], 4)
46
57
 
47
- def extract_pheval_gene_requirements(self) -> [PhEvalGeneResult]:
48
- """Extract data required to produce PhEval gene output."""
49
- simplified_exomiser_result = []
50
- for result_entry in self.exomiser_json_result:
51
- if self.score_name in result_entry:
52
- simplified_exomiser_result.append(
53
- PhEvalGeneResult(
54
- gene_symbol=self._find_gene_symbol(result_entry),
55
- gene_identifier=self._find_gene_identifier(result_entry),
56
- score=self._find_relevant_score(result_entry),
58
+ def extract_variant_results_from_json(
59
+ exomiser_json_result: pl.DataFrame, score_name: str
60
+ ) -> pl.DataFrame:
61
+ return (
62
+ exomiser_json_result.filter(pl.col("geneScores").is_not_null())
63
+ .select([pl.col("geneScores"), pl.col(score_name).alias("score"), pl.col("geneSymbol")])
64
+ .explode("geneScores")
65
+ .unnest("geneScores")
66
+ .filter(pl.col("contributingVariants").is_not_null())
67
+ .explode("contributingVariants")
68
+ .with_columns(
69
+ [
70
+ pl.col("contributingVariants").struct.field("contigName").alias("chrom"),
71
+ pl.col("contributingVariants").struct.field("start"),
72
+ pl.col("contributingVariants").struct.field("end"),
73
+ pl.col("contributingVariants").struct.field("ref"),
74
+ pl.col("contributingVariants")
75
+ .struct.field("alt")
76
+ .fill_null("")
77
+ .str.strip_chars("<>")
78
+ .alias("alt"),
79
+ pl.col("modeOfInheritance")
80
+ .map_elements(lambda moi: ModeOfInheritance[moi].value, return_dtype=pl.Int8)
81
+ .alias("moi_enum"),
82
+ ]
83
+ )
84
+ .with_columns(
85
+ [
86
+ (pl.col("moi_enum") == 2).alias("is_recessive"),
87
+ pl.when(pl.col("moi_enum") == 2)
88
+ .then(
89
+ pl.format(
90
+ "recessive|{}|{}|{}",
91
+ pl.col("geneSymbol"),
92
+ pl.col("score"),
93
+ pl.col("moi_enum"),
57
94
  )
58
95
  )
59
-
60
- return simplified_exomiser_result
61
-
62
-
63
- class PhEvalVariantResultFromExomiserJsonCreator:
64
-
65
- def __init__(self, exomiser_json_result: [dict], score_name: str):
66
- self.exomiser_json_result = exomiser_json_result
67
- self.score_name = score_name
68
-
69
- @staticmethod
70
- def _find_chromosome(result_entry: dict) -> str:
71
- """Return chromosome from Exomiser result entry."""
72
- return result_entry["contigName"]
73
-
74
- @staticmethod
75
- def _find_start_pos(result_entry: dict) -> int:
76
- """Return start position from Exomiser result entry."""
77
- return result_entry["start"]
78
-
79
- @staticmethod
80
- def _find_end_pos(result_entry: dict) -> int:
81
- """Return end position from Exomiser result entry."""
82
- return result_entry["end"]
83
-
84
- @staticmethod
85
- def _find_ref(result_entry: dict) -> str:
86
- """Return reference allele from Exomiser result entry."""
87
- return result_entry["ref"]
88
-
89
- @staticmethod
90
- def _find_alt(result_entry: dict) -> str:
91
- """Return alternate allele from Exomiser result entry."""
92
- if "alt" in result_entry and result_entry["alt"] is not None:
93
- return result_entry["alt"].strip(">").strip("<")
94
- else:
95
- return ""
96
-
97
- def _find_relevant_score(self, result_entry) -> float:
98
- """Return score from Exomiser result entry."""
99
- return round(result_entry[self.score_name], 4)
100
-
101
- def _filter_for_acmg_assignments(
102
- self, variant: PhEvalVariantResult, score: float, variant_acmg_assignments: dict
103
- ) -> bool:
104
- """Filter variants if they meet the PATHOGENIC or LIKELY_PATHOGENIC ACMG classification."""
105
- for assignment in variant_acmg_assignments:
106
- if variant == PhEvalVariantResult(
107
- chromosome=self._find_chromosome(assignment["variantEvaluation"]),
108
- start=self._find_start_pos(assignment["variantEvaluation"]),
109
- end=self._find_end_pos(assignment["variantEvaluation"]),
110
- ref=self._find_ref(assignment["variantEvaluation"]),
111
- alt=self._find_alt(assignment["variantEvaluation"]),
112
- score=score,
113
- ) and (
114
- assignment["acmgClassification"] == "PATHOGENIC"
115
- or assignment["acmgClassification"] == "LIKELY_PATHOGENIC"
116
- ):
117
- return True
118
-
119
- def extract_pheval_variant_requirements(
120
- self, use_acmg_filter: bool = False
121
- ) -> [PhEvalVariantResult]:
122
- """Extract data required to produce PhEval variant output."""
123
- simplified_exomiser_result = []
124
- for result_entry in self.exomiser_json_result:
125
- for gene_hit in result_entry["geneScores"]:
126
- if self.score_name in result_entry:
127
- if "contributingVariants" in gene_hit:
128
- score = self._find_relevant_score(result_entry)
129
- contributing_variants = gene_hit["contributingVariants"]
130
- variant_acmg_assignments = gene_hit["acmgAssignments"]
131
- for cv in contributing_variants:
132
- variant = PhEvalVariantResult(
133
- chromosome=self._find_chromosome(cv),
134
- start=self._find_start_pos(cv),
135
- end=self._find_end_pos(cv),
136
- ref=self._find_ref(cv),
137
- alt=self._find_alt(cv),
138
- score=score,
139
- )
140
- if use_acmg_filter and self._filter_for_acmg_assignments(
141
- variant, score, variant_acmg_assignments
142
- ):
143
- simplified_exomiser_result.append(variant)
144
- if not use_acmg_filter:
145
- simplified_exomiser_result.append(variant)
146
- return simplified_exomiser_result
147
-
148
-
149
- class PhEvalDiseaseResultFromExomiserJsonCreator:
150
- def __init__(self, exomiser_json_result: [dict]):
151
- self.exomiser_json_result = exomiser_json_result
152
-
153
- @staticmethod
154
- def _find_disease_name(result_entry: dict) -> str:
155
- """Return disease term from Exomiser result entry."""
156
- return result_entry["diseaseTerm"]
157
-
158
- @staticmethod
159
- def _find_disease_identifier(result_entry: dict) -> int:
160
- """Return disease ID from Exomiser result entry."""
161
- return result_entry["diseaseId"]
162
-
163
- @staticmethod
164
- def _find_relevant_score(result_entry) -> float:
165
- """Return score from Exomiser result entry."""
166
- return round(result_entry["score"], 4)
167
-
168
- def extract_pheval_disease_requirements(self) -> [PhEvalDiseaseResult]:
169
- """Extract data required to produce PhEval disease output."""
170
- simplified_exomiser_result = []
171
- for result_entry in self.exomiser_json_result:
172
- try:
173
- for disease in result_entry["priorityResults"]["HIPHIVE_PRIORITY"][
174
- "diseaseMatches"
175
- ]:
176
- simplified_exomiser_result.append(
177
- PhEvalDiseaseResult(
178
- disease_name=self._find_disease_name(disease["model"]),
179
- disease_identifier=self._find_disease_identifier(disease["model"]),
180
- score=self._find_relevant_score(disease),
181
- )
96
+ .otherwise(
97
+ pl.format(
98
+ "dominant|{}|{}|{}|{}|{}|{}",
99
+ pl.col("chrom"),
100
+ pl.col("start"),
101
+ pl.col("end"),
102
+ pl.col("ref"),
103
+ pl.col("alt"),
104
+ pl.col("score"),
182
105
  )
183
- except KeyError:
184
- pass
185
- return list(
186
- {
187
- (result.disease_identifier, result.score): result
188
- for result in simplified_exomiser_result
189
- }.values()
106
+ )
107
+ .alias("group_key"),
108
+ ]
109
+ )
110
+ .with_columns(
111
+ [
112
+ pl.col("group_key")
113
+ .rank("dense")
114
+ .cast(pl.UInt32)
115
+ .map_elements(
116
+ lambda i: str(uuid.uuid5(uuid.NAMESPACE_DNS, str(i))), return_dtype=pl.String
117
+ )
118
+ .alias("grouping_id")
119
+ ]
190
120
  )
121
+ .select(
122
+ ["chrom", "start", "end", "ref", "alt", "score", "modeOfInheritance", "grouping_id"]
123
+ )
124
+ )
191
125
 
192
126
 
193
127
  def create_standardised_results(
194
- results_dir: Path,
128
+ result_dir: Path,
195
129
  output_dir: Path,
130
+ phenopacket_dir: Path,
196
131
  score_name: str,
197
132
  sort_order: str,
198
- variant_analysis: bool,
199
133
  gene_analysis: bool,
200
134
  disease_analysis: bool,
201
- include_acmg: bool = False,
202
- ) -> None:
203
- """Write standardised gene/variant/disease results from default Exomiser json output."""
204
- for exomiser_json_result in files_with_suffix(results_dir, ".json"):
205
- exomiser_result = read_exomiser_json_result(exomiser_json_result)
135
+ variant_analysis: bool,
136
+ ):
137
+ sort_order = SortOrder.ASCENDING if sort_order.lower() == "ascending" else SortOrder.DESCENDING
138
+ for exomiser_json_result_path in files_with_suffix(result_dir, ".json"):
139
+ exomiser_json_result = pl.read_json(exomiser_json_result_path)
206
140
  if gene_analysis:
207
- pheval_gene_requirements = PhEvalGeneResultFromExomiserJsonCreator(
208
- exomiser_result, score_name
209
- ).extract_pheval_gene_requirements()
210
- generate_pheval_result(
211
- pheval_result=pheval_gene_requirements,
212
- sort_order_str=sort_order,
141
+ gene_results = extract_gene_results_from_json(exomiser_json_result, score_name)
142
+ generate_gene_result(
143
+ results=gene_results,
144
+ sort_order=sort_order,
213
145
  output_dir=output_dir,
214
- tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
146
+ result_path=trim_exomiser_result_filename(exomiser_json_result_path),
147
+ phenopacket_dir=phenopacket_dir,
215
148
  )
216
- if variant_analysis:
217
- pheval_variant_requirements = PhEvalVariantResultFromExomiserJsonCreator(
218
- exomiser_result, score_name
219
- ).extract_pheval_variant_requirements(include_acmg)
220
- generate_pheval_result(
221
- pheval_result=pheval_variant_requirements,
222
- sort_order_str=sort_order,
149
+ if disease_analysis:
150
+ disease_results = extract_disease_results_from_json(exomiser_json_result)
151
+ generate_disease_result(
152
+ results=disease_results,
153
+ sort_order=sort_order,
223
154
  output_dir=output_dir,
224
- tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
155
+ result_path=trim_exomiser_result_filename(exomiser_json_result_path),
156
+ phenopacket_dir=phenopacket_dir,
225
157
  )
226
- if disease_analysis:
227
- pheval_disease_requirements = PhEvalDiseaseResultFromExomiserJsonCreator(
228
- exomiser_result
229
- ).extract_pheval_disease_requirements()
230
- generate_pheval_result(
231
- pheval_result=pheval_disease_requirements,
232
- sort_order_str=sort_order,
158
+
159
+ if variant_analysis:
160
+ variant_results = extract_variant_results_from_json(exomiser_json_result, score_name)
161
+ generate_variant_result(
162
+ results=variant_results,
163
+ sort_order=sort_order,
233
164
  output_dir=output_dir,
234
- tool_result_path=trim_exomiser_result_filename(exomiser_json_result),
165
+ result_path=trim_exomiser_result_filename(exomiser_json_result_path),
166
+ phenopacket_dir=phenopacket_dir,
235
167
  )
236
168
 
237
169
 
@@ -252,6 +184,14 @@ def create_standardised_results(
252
184
  help="Full path to Exomiser results directory to be standardised.",
253
185
  type=Path,
254
186
  )
187
+ @click.option(
188
+ "--phenopacket-dir",
189
+ "-p",
190
+ required=True,
191
+ metavar="DIRECTORY",
192
+ help="Full path to phenopacket dir used to generate the raw results.",
193
+ type=Path,
194
+ )
255
195
  @click.option(
256
196
  "--score-name",
257
197
  "-s",
@@ -288,22 +228,15 @@ def create_standardised_results(
288
228
  default=False,
289
229
  help="Specify whether to create PhEval disease results.",
290
230
  )
291
- @click.option(
292
- "--include-acmg",
293
- is_flag=True,
294
- type=bool,
295
- default=False,
296
- help="Specify whether to include ACMG filter for PATHOGENIC or LIKELY_PATHOGENIC classifications.",
297
- )
298
231
  def post_process_exomiser_results(
299
232
  output_dir: Path,
300
233
  results_dir: Path,
234
+ phenopacket_dir: Path,
301
235
  score_name: str,
302
236
  sort_order: str,
303
237
  gene_analysis: bool,
304
238
  variant_analysis: bool,
305
239
  disease_analysis: bool,
306
- include_acmg: bool,
307
240
  ):
308
241
  """Post-process Exomiser json results into PhEval gene and variant outputs."""
309
242
  (
@@ -322,12 +255,12 @@ def post_process_exomiser_results(
322
255
  else None
323
256
  )
324
257
  create_standardised_results(
325
- results_dir,
326
- output_dir,
327
- score_name,
328
- sort_order,
329
- variant_analysis,
330
- gene_analysis,
331
- disease_analysis,
332
- include_acmg,
258
+ result_dir=results_dir,
259
+ output_dir=output_dir,
260
+ phenopacket_dir=phenopacket_dir,
261
+ score_name=score_name,
262
+ sort_order=sort_order,
263
+ variant_analysis=variant_analysis,
264
+ gene_analysis=gene_analysis,
265
+ disease_analysis=disease_analysis,
333
266
  )
@@ -100,8 +100,14 @@ class CommandCreator:
100
100
  )
101
101
 
102
102
  def add_variant_analysis_arguments(self, vcf_dir: Path) -> ExomiserCommandLineArguments:
103
- vcf_file_data = PhenopacketUtil(self.phenopacket).vcf_file_data(
104
- self.phenopacket_path, vcf_dir
103
+ vcf_file_data = (
104
+ PhenopacketUtil(self.phenopacket).vcf_file_data(self.phenopacket_path, vcf_dir)
105
+ if vcf_dir.exists()
106
+ else [
107
+ file
108
+ for file in self.phenopacket.files
109
+ if file.file_attributes["fileFormat"] == "vcf"
110
+ ][0]
105
111
  )
106
112
  output_options_file = self.assign_output_options_file()
107
113
  if self.environment == "local":
pheval_exomiser/runner.py CHANGED
@@ -67,6 +67,7 @@ class ExomiserPhEvalRunner(PhEvalRunner):
67
67
  config=config,
68
68
  raw_results_dir=self.raw_results_dir,
69
69
  output_dir=self.output_dir,
70
+ phenopacket_dir=self.testdata_dir.joinpath("phenopackets"),
70
71
  variant_analysis=self.input_dir_config.variant_analysis,
71
72
  gene_analysis=self.input_dir_config.gene_analysis,
72
73
  disease_analysis=self.input_dir_config.disease_analysis,
@@ -1,12 +1,11 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: pheval_exomiser
3
- Version: 0.2.7
3
+ Version: 0.3.1
4
4
  Summary:
5
5
  Author: Yasemin Bridges
6
6
  Author-email: y.bridges@qmul.ac.uk
7
- Requires-Python: >=3.9,<4.0.0
7
+ Requires-Python: >=3.10,<4.0.0
8
8
  Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.9
10
9
  Classifier: Programming Language :: Python :: 3.10
11
10
  Classifier: Programming Language :: Python :: 3.11
12
11
  Classifier: Programming Language :: Python :: 3.12
@@ -16,9 +15,8 @@ Requires-Dist: docker (>=6.0.1,<7.0.0)
16
15
  Requires-Dist: google (>=3.0.0,<4.0.0)
17
16
  Requires-Dist: numpy (<2)
18
17
  Requires-Dist: oaklib (>=0.5.12,<0.6.0)
19
- Requires-Dist: pandas (>=1.5.2,<2.0.0)
20
18
  Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
21
- Requires-Dist: pheval (>=0.4.0,<0.5.0)
19
+ Requires-Dist: pheval (>=0.5.1,<0.6.0)
22
20
  Requires-Dist: pyaml (>=21.10.1,<22.0.0)
23
21
  Requires-Dist: pydantic (>=2.7.1,<3.0.0)
24
22
  Description-Content-Type: text/markdown
@@ -164,6 +162,9 @@ e.g.,
164
162
     └── vcf
165
163
  ```
166
164
 
165
+ > [!IMPORTANT]
166
+ > If a `vcf` directory is not found in the testdata directory then the path to the VCF will be taken from the phenopacket if `variant_analysis` is set to True.
167
+
167
168
  ## Run command
168
169
 
169
170
  Once the testdata and input directories are correctly configured for the run, the `pheval run` command can be executed.
@@ -2,17 +2,16 @@ pheval_exomiser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  pheval_exomiser/cli.py,sha256=0SR1-L2sREEkFRfUPwYwkbSaBsz_L_Sxq1S4c9LQLJg,350
3
3
  pheval_exomiser/constants.py,sha256=o_pLWF8kX74BqyTsAZa7twwSKzedLnpupCI90k_bMqY,517
4
4
  pheval_exomiser/post_process/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- pheval_exomiser/post_process/post_process.py,sha256=ZLIGPeADGZn08jFc152QraiJnYSADlL35GOwxkCQDwA,901
6
- pheval_exomiser/post_process/post_process_results_format.py,sha256=F1TpgLgeoNFFZTk7XaXLptCLOr6G3tdZC2cltXFtHx8,12261
5
+ pheval_exomiser/post_process/post_process.py,sha256=2vkwe60Ptf7UuPCR2ShcI80-kn-1WaPDa74cCBTUKF0,968
6
+ pheval_exomiser/post_process/post_process_results_format.py,sha256=zMz2HwAJENuKyRjkbJZrydrZDoULWuPjaDWC5Dc5hxg,8432
7
7
  pheval_exomiser/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- pheval_exomiser/prepare/create_batch_commands.py,sha256=tDUBtpfSmNGqHte-vrGnusYZzM59pOX1IAumqRDlnBE,17205
8
+ pheval_exomiser/prepare/create_batch_commands.py,sha256=R06cn1c5pf_agUQfrFUQ2KEo8il0Z4fJntS2HKYCQXw,17410
9
9
  pheval_exomiser/prepare/tool_specific_configuration_options.py,sha256=4gedZ9iadRXK6tF9P-ju-dhj8-F2-fhrXVhfYIsAxFQ,2922
10
10
  pheval_exomiser/prepare/write_application_properties.py,sha256=KmG7GvkQo8AhnhRyqohTFvqjfhEhbcs78UYYoigxJ3w,8933
11
- pheval_exomiser/prepare/yaml_to_family_phenopacket.py,sha256=Hz77dHpVaRMV1fQWKmOCqCKJfmk_hdpZh_6o7hq9Sec,14452
12
11
  pheval_exomiser/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
12
  pheval_exomiser/run/run.py,sha256=bK_gL52zRl71Lxe-i-P6L4-dMstxFAG6SVNPO6G823o,7109
14
- pheval_exomiser/runner.py,sha256=LaWhC0F9LoPvP0Ie1sG2GkC8EG-tWjBBY_tFYmx6dxA,2548
15
- pheval_exomiser-0.2.7.dist-info/METADATA,sha256=bN6tBR-8becl8WG4nD7cHpbH-xTnOmD9uVKCWFEnHMw,7551
16
- pheval_exomiser-0.2.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
17
- pheval_exomiser-0.2.7.dist-info/entry_points.txt,sha256=lbZMu-x7ns8UrFveWSqEQ1UB5l33TbRMomqBUyGYIwI,131
18
- pheval_exomiser-0.2.7.dist-info/RECORD,,
13
+ pheval_exomiser/runner.py,sha256=3-0kec2yzQoZNpqZXSBIWBD1QR24s_BmHGCLXmP4fos,2620
14
+ pheval_exomiser-0.3.1.dist-info/METADATA,sha256=qq-FsEKnIUuILxreIMSsaoQ3gRFogzE-XZ87bf01c7E,7641
15
+ pheval_exomiser-0.3.1.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
16
+ pheval_exomiser-0.3.1.dist-info/entry_points.txt,sha256=lbZMu-x7ns8UrFveWSqEQ1UB5l33TbRMomqBUyGYIwI,131
17
+ pheval_exomiser-0.3.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.1
2
+ Generator: poetry-core 2.1.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,392 +0,0 @@
1
- from copy import copy
2
- from pathlib import Path
3
-
4
- import click
5
- import pandas as pd
6
- import yaml
7
- from google.protobuf.timestamp_pb2 import Timestamp
8
- from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation
9
- from oaklib.resource import OntologyResource
10
- from phenopackets import (
11
- Diagnosis,
12
- Family,
13
- File,
14
- GeneDescriptor,
15
- GenomicInterpretation,
16
- Individual,
17
- Interpretation,
18
- MetaData,
19
- OntologyClass,
20
- Pedigree,
21
- Phenopacket,
22
- PhenotypicFeature,
23
- Resource,
24
- VariantInterpretation,
25
- VariationDescriptor,
26
- VcfRecord,
27
- )
28
- from pheval.prepare.create_noisy_phenopackets import load_ontology
29
- from pheval.utils.file_utils import files_with_suffix
30
- from pheval.utils.phenopacket_utils import create_hgnc_dict, write_phenopacket
31
-
32
-
33
- def load_genotype_ontology():
34
- """Load genotype ontology"""
35
- genotype_resource = OntologyResource(slug="geno.owl", local=False)
36
- return ProntoImplementation(genotype_resource)
37
-
38
-
39
- def exomiser_analysis_yml_reader(yaml_job_file_path: Path) -> dict:
40
- """Read an exomiser analysis yaml file."""
41
- with open(yaml_job_file_path) as yaml_job_file:
42
- yaml_job = yaml.safe_load(yaml_job_file)
43
- yaml_job_file.close()
44
- return yaml_job
45
-
46
-
47
- def read_diagnoses_file(diagnoses_file_path: Path) -> pd.DataFrame:
48
- """Read a diagnoses file."""
49
- return pd.read_csv(diagnoses_file_path, delimiter="t")
50
-
51
-
52
- def read_pedigree_file(pedigree_path: Path) -> list[str]:
53
- """Return the contents of a pedigree file"""
54
- return open(pedigree_path).readlines()
55
-
56
-
57
- class ExomiserYamlToPhenopacketConverter:
58
- def __init__(self, genotype_ontology, human_phenotype_ontology, hgnc_data):
59
- self.genotype_ontology = genotype_ontology
60
- self.human_phenotype_ontology = human_phenotype_ontology
61
- self.hgnc_data = hgnc_data
62
-
63
- @staticmethod
64
- def construct_individual(yaml_job: dict, diagnoses: pd.DataFrame) -> Individual:
65
- """Construct individual for phenopacket."""
66
- return Individual(
67
- id=yaml_job["analysis"]["proband"],
68
- sex=diagnoses[diagnoses.ProbandId == yaml_job["analysis"]["proband"]]
69
- .iloc[0]["Sex"]
70
- .upper(),
71
- )
72
-
73
- @staticmethod
74
- def get_diagnoses_for_proband(yaml_job: dict, diagnoses: pd.DataFrame):
75
- """Get all diagnoses for proband."""
76
- return diagnoses.loc[diagnoses["ProbandId"] == yaml_job["analysis"]["proband"]]
77
-
78
- def construct_phenotypic_interpretations(self, yaml_job: dict) -> list[PhenotypicFeature]:
79
- """Construct the phenotypic features for the proband."""
80
- hpo_ids = yaml_job["analysis"]["hpoIds"]
81
- phenotypic_features = []
82
- for hpo_id in hpo_ids:
83
- try:
84
- rels = self.human_phenotype_ontology.entity_alias_map(hpo_id)
85
- hpo_term = "".join(rels[(list(rels.keys())[0])])
86
- hpo = PhenotypicFeature(type=OntologyClass(id=hpo_id, label=hpo_term))
87
- phenotypic_features.append(hpo)
88
- except AttributeError:
89
- hpo = PhenotypicFeature(type=OntologyClass(id=hpo_id))
90
- phenotypic_features.append(hpo)
91
- return phenotypic_features
92
-
93
- @staticmethod
94
- def construct_vcf_record(yaml_job: dict, diagnosis: pd.DataFrame) -> VcfRecord:
95
- """Construct the VCF record for a diagnosis."""
96
- return VcfRecord(
97
- genome_assembly=yaml_job["analysis"]["genomeAssembly"],
98
- chrom=diagnosis["Chr"],
99
- pos=int(diagnosis["Start"]),
100
- ref=str(diagnosis["Ref/Alt"]).split("/")[0],
101
- alt=str(diagnosis["Ref/Alt"]).split("/")[1],
102
- )
103
-
104
- def construct_allelic_state(self, diagnosis: pd.DataFrame) -> OntologyClass:
105
- """Construct the allelic state for a diagnosis."""
106
- return OntologyClass(
107
- id=list(self.genotype_ontology.basic_search(diagnosis["Genotype"].lower()))[0],
108
- label=diagnosis["Genotype"].lower(),
109
- )
110
-
111
- def construct_gene_descriptor(self, diagnosis: pd.DataFrame) -> GeneDescriptor:
112
- """Construct the Gene Descriptor for a diagnosis."""
113
- try:
114
- return GeneDescriptor(
115
- value_id=self.hgnc_data[diagnosis["Gene"]]["ensembl_id"],
116
- symbol=diagnosis["Gene"],
117
- )
118
- except KeyError:
119
- for _gene, gene_info in self.hgnc_data.items():
120
- for previous_name in gene_info["previous_names"]:
121
- if diagnosis["Gene"] == previous_name:
122
- return GeneDescriptor(
123
- value_id=self.hgnc_data[gene_info["ensembl_id"]],
124
- symbol=diagnosis["Gene"],
125
- )
126
-
127
- def construct_variation_descriptor(
128
- self, yaml_job: dict, diagnosis: pd.DataFrame
129
- ) -> VariationDescriptor:
130
- """Construct a variation descriptor for a diagnosis."""
131
- return VariationDescriptor(
132
- id=yaml_job["analysis"]["proband"]
133
- + ":"
134
- + diagnosis["Chr"]
135
- + ":"
136
- + diagnosis["Start"]
137
- + ":"
138
- + diagnosis["Ref/Alt"],
139
- gene_context=self.construct_gene_descriptor(diagnosis),
140
- vcf_record=self.construct_vcf_record(yaml_job, diagnosis),
141
- allelic_state=self.construct_allelic_state(diagnosis),
142
- )
143
-
144
- def construct_variant_interpretation(
145
- self, yaml_job: dict, diagnosis: pd.DataFrame
146
- ) -> VariantInterpretation:
147
- """Construct the variant interpretation for a diagnosis."""
148
- return VariantInterpretation(
149
- variation_descriptor=self.construct_variation_descriptor(yaml_job, diagnosis),
150
- )
151
-
152
- def construct_genomic_interpretations(
153
- self, yaml_job: dict, diagnoses: pd.DataFrame
154
- ) -> list[GenomicInterpretation]:
155
- """Construct a list of genomic interpretations for a proband."""
156
- genomic_interpretations = []
157
- for _index, row in self.get_diagnoses_for_proband(yaml_job, diagnoses).iterrows():
158
- genomic_interpretation = GenomicInterpretation(
159
- subject_or_biosample_id=yaml_job["analysis"]["proband"],
160
- variant_interpretation=self.construct_variant_interpretation(
161
- yaml_job=yaml_job, diagnosis=row
162
- ),
163
- )
164
- genomic_interpretations.append(genomic_interpretation)
165
- return genomic_interpretations
166
-
167
- def construct_diagnosis(self, yaml_job: dict, diagnoses: pd.DataFrame) -> Diagnosis:
168
- """Construct the diagnosis for a proband."""
169
- return Diagnosis(
170
- genomic_interpretations=self.construct_genomic_interpretations(yaml_job, diagnoses)
171
- )
172
-
173
- def construct_interpretations(
174
- self, yaml_job: dict, diagnoses: pd.DataFrame
175
- ) -> list[Interpretation]:
176
- """Construct interpretations for a proband."""
177
- return [
178
- Interpretation(
179
- id=yaml_job["analysis"]["proband"] + "-interpretation",
180
- diagnosis=self.construct_diagnosis(yaml_job, diagnoses),
181
- )
182
- ]
183
-
184
- @staticmethod
185
- def construct_meta_data() -> MetaData:
186
- """Construct the meta-data."""
187
- timestamp = Timestamp()
188
- timestamp.GetCurrentTime()
189
- return MetaData(
190
- created=timestamp,
191
- created_by="pheval-converter",
192
- resources=[
193
- Resource(
194
- id="hp",
195
- name="human phenotype ontology",
196
- url="http://purl.obolibrary.org/obo/hp.owl",
197
- version="hp/releases/2019-11-08",
198
- namespace_prefix="HP",
199
- iri_prefix="http://purl.obolibrary.org/obo/HP_",
200
- )
201
- ],
202
- phenopacket_schema_version="2.0",
203
- )
204
-
205
- @staticmethod
206
- def construct_files(yaml_job_file: dict) -> list[File]:
207
- """Construct the files."""
208
- return [
209
- File(
210
- uri=yaml_job_file["analysis"]["vcf"],
211
- file_attributes={
212
- "fileFormat": "VCF",
213
- "genomeAssembly": yaml_job_file["analysis"]["genomeAssembly"],
214
- },
215
- )
216
- ]
217
-
218
-
219
- def construct_pedigree(pedigree: list[str]) -> tuple[str, Pedigree]:
220
- """Construct the pedigree message from a ped file."""
221
- persons = []
222
- family_id = None
223
- for individual in pedigree:
224
- entry = individual.split("\t")
225
- family_id = entry[0]
226
- sex = "."
227
- if (
228
- int(entry[4]) == 1
229
- ): # until this is fixed with the phenopackets package, sex has to be reassigned
230
- sex = 2
231
- if int(entry[4]) == 2:
232
- sex = 1
233
- if str(entry[3]) == "0" and str(entry[2]) == "0":
234
- person = Pedigree.Person(
235
- family_id=family_id, individual_id=entry[1], sex=sex, affected_status=int(entry[5])
236
- )
237
- persons.append(person)
238
- if str(entry[3]) == "0" and str(entry[2]) != "0":
239
- person = Pedigree.Person(
240
- family_id=family_id,
241
- individual_id=entry[1],
242
- paternal_id=entry[2],
243
- sex=sex,
244
- affected_status=int(entry[5]),
245
- )
246
- persons.append(person)
247
- if str(entry[2]) == "0" and str(entry[3]) != "0":
248
- person = Pedigree.Person(
249
- family_id=family_id,
250
- individual_id=entry[1],
251
- maternal_id=entry[3],
252
- sex=sex,
253
- affected_status=int(entry[5]),
254
- )
255
- persons.append(person)
256
- if str(entry[2]) != "0" and str(entry[3] != "0"):
257
- person = Pedigree.Person(
258
- family_id=family_id,
259
- individual_id=entry[1],
260
- paternal_id=entry[2],
261
- maternal_id=entry[3],
262
- sex=sex,
263
- affected_status=int(entry[5]),
264
- )
265
- persons.append(person)
266
- return family_id, Pedigree(persons=persons)
267
-
268
-
269
- def construct_phenopacket(
270
- yaml_job_file: dict,
271
- diagnoses: pd.DataFrame,
272
- exomiser_yaml_to_phenopacket_converter: ExomiserYamlToPhenopacketConverter,
273
- ) -> Phenopacket:
274
- """Construct a phenopacket."""
275
- return Phenopacket(
276
- id=yaml_job_file["analysis"]["proband"],
277
- subject=exomiser_yaml_to_phenopacket_converter.construct_individual(
278
- yaml_job=yaml_job_file, diagnoses=diagnoses
279
- ),
280
- phenotypic_features=exomiser_yaml_to_phenopacket_converter.construct_phenotypic_interpretations(
281
- yaml_job=yaml_job_file
282
- ),
283
- interpretations=exomiser_yaml_to_phenopacket_converter.construct_interpretations(
284
- yaml_job=yaml_job_file, diagnoses=diagnoses
285
- ),
286
- files=exomiser_yaml_to_phenopacket_converter.construct_files(yaml_job_file),
287
- meta_data=exomiser_yaml_to_phenopacket_converter.construct_meta_data(),
288
- )
289
-
290
-
291
- def construct_family(
292
- yaml_job_file: dict,
293
- diagnoses: pd.DataFrame,
294
- exomiser_yaml_to_phenopacket_converter: ExomiserYamlToPhenopacketConverter,
295
- pedigree: list[str],
296
- ) -> Family:
297
- """Construct a Family"""
298
- phenopacket = construct_phenopacket(
299
- yaml_job_file, diagnoses, exomiser_yaml_to_phenopacket_converter
300
- )
301
- proband = copy(phenopacket)
302
- del proband.files[:]
303
- del proband.meta_data[:]
304
- family_id, ped = construct_pedigree(pedigree)
305
- return Family(
306
- id=family_id,
307
- proband=proband,
308
- pedigree=ped,
309
- files=phenopacket.files,
310
- meta_data=phenopacket.meta_data,
311
- )
312
-
313
-
314
- def create_phenopacket(
315
- yaml_job_file: Path,
316
- diagnoses: pd.DataFrame,
317
- exomiser_converter: ExomiserYamlToPhenopacketConverter,
318
- ) -> Phenopacket or Family:
319
- """Construct either a family or phenopacket from an analysis yaml."""
320
- yaml_job = exomiser_analysis_yml_reader(yaml_job_file)
321
- phenopacket = (
322
- construct_phenopacket(yaml_job, diagnoses, exomiser_converter)
323
- if yaml_job["analysis"]["ped"] == ""
324
- else construct_family(
325
- yaml_job,
326
- diagnoses,
327
- exomiser_converter,
328
- read_pedigree_file(yaml_job["analysis"]["ped"]),
329
- )
330
- )
331
- return phenopacket
332
-
333
-
334
- @click.command()
335
- @click.option(
336
- "--directory",
337
- "-d",
338
- required=True,
339
- help="Directory for Exomiser yaml job files to be converted.",
340
- type=Path,
341
- )
342
- @click.option("--diagnoses-file", "-d", required=True, help="Diagnoses file", type=Path)
343
- @click.option(
344
- "--output-dir", "-o", required=True, help="Output directory to write phenopackets", type=Path
345
- )
346
- def convert_exomiser_analysis_yamls_to_phenopacket(
347
- output_dir: Path, directory: Path, diagnoses_file: Path
348
- ):
349
- """Convert an Exomiser YAML file to a phenopacket schema given a .tsv diagnoses file containing the following
350
- required fields: ..."""
351
- try:
352
- output_dir.mkdir()
353
- except FileExistsError:
354
- pass
355
- diagnoses = read_diagnoses_file(diagnoses_file)
356
- exomiser_converter = ExomiserYamlToPhenopacketConverter(
357
- load_genotype_ontology(), load_ontology(), create_hgnc_dict()
358
- )
359
- for yaml_job_file in files_with_suffix(directory, ".yml"):
360
- phenopacket = create_phenopacket(yaml_job_file, diagnoses, exomiser_converter)
361
- write_phenopacket(
362
- phenopacket, output_dir.joinpath(yaml_job_file.name.replace(".yml", ".json"))
363
- )
364
-
365
-
366
- @click.command()
367
- @click.option(
368
- "--yaml-file",
369
- "-y",
370
- required=True,
371
- help="Path to Exomiser analysis yaml file for phenopacket conversion.",
372
- type=Path,
373
- )
374
- @click.option("--diagnoses-file", "-d", required=True, help="Diagnoses file", type=Path)
375
- @click.option(
376
- "--output-dir", "-o", required=True, help="Output directory to write phenopackets", type=Path
377
- )
378
- def convert_exomiser_analysis_yaml_to_phenopacket(
379
- output_dir: Path, yaml_file: Path, diagnoses_file: Path
380
- ):
381
- """Convert Exomiser YAML files to the phenopacket schema given a .tsv diagnoses file containing the following
382
- required fields: ..."""
383
- try:
384
- output_dir.mkdir()
385
- except FileExistsError:
386
- pass
387
- diagnoses = read_diagnoses_file(diagnoses_file)
388
- exomiser_converter = ExomiserYamlToPhenopacketConverter(
389
- load_genotype_ontology(), load_ontology(), create_hgnc_dict()
390
- )
391
- phenopacket = create_phenopacket(yaml_file, diagnoses, exomiser_converter)
392
- write_phenopacket(phenopacket, Path(yaml_file.name + ".json"))