pheval 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

@@ -227,68 +227,7 @@ class ResultSorter:
227
227
  )
228
228
 
229
229
 
230
- class ScoreRanker:
231
- """
232
- Class for ranking scores based on a given sort order
233
-
234
- Attributes:
235
- rank (int): Represents the current rank, initialised with 0
236
- current_score (float): Represents the current score, initialised with positive infinity (float("inf"))
237
- count (int): Used for counting, initialised with 0
238
- """
239
-
240
- rank: int = 0
241
- current_score: float = float("inf")
242
- count: int = 0
243
-
244
- def __init__(self, sort_order: SortOrder):
245
- """
246
- Initialise ScoreRanker
247
-
248
- Args:
249
- sort_order (SortOrder): Sorting order to be applied
250
- """
251
- self.sort_order = sort_order
252
-
253
- def _check_rank_order(self, round_score: float) -> None:
254
- """
255
- Check if the results are correctly ordered
256
-
257
- Args:
258
- round_score (float): Score to be checked against the current score
259
-
260
- Raises:
261
- ValueError: If results are not correctly sorted.
262
- """
263
- if self.sort_order == SortOrder.ASCENDING and round_score < self.current_score != float(
264
- "inf"
265
- ):
266
- raise ValueError("Results are not correctly sorted!")
267
- elif self.sort_order == SortOrder.DESCENDING and round_score > self.current_score != float(
268
- "inf"
269
- ):
270
- raise ValueError("Results are not correctly sorted!")
271
-
272
- def rank_scores(self, round_score: float) -> int:
273
- """
274
- Add ranks to a result; equal scores are given the same rank, e.g., 1, 1, 3
275
-
276
- Args:
277
- round_score (float): Score to be ranked
278
-
279
- Returns:
280
- int: Rank assigned to the score
281
- """
282
- self._check_rank_order(round_score)
283
- self.count += 1
284
- if self.current_score == round_score:
285
- return self.rank
286
- self.current_score = round_score
287
- self.rank = self.count
288
- return self.rank
289
-
290
-
291
- def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) -> [PhEvalResult]:
230
+ def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) -> pd.DataFrame:
292
231
  """
293
232
  Rank PhEval results post-processed from tool-specific output, managing tied scores (ex aequo)
294
233
 
@@ -297,35 +236,17 @@ def _rank_pheval_result(pheval_result: [PhEvalResult], sort_order: SortOrder) ->
297
236
  sort_order (SortOrder): Sorting order based on which ranking is performed
298
237
 
299
238
  Returns:
300
- List[PhEvalResult]: Ranked PhEval results with tied scores managed
239
+ pd.DataFrame : Ranked PhEval results with tied scores managed
301
240
 
302
241
  Raises:
303
242
  ValueError: If an incompatible PhEval result type is encountered
304
243
  """
305
- score_ranker = ScoreRanker(sort_order)
306
- ranked_result = []
307
- for result in pheval_result:
308
- if type(result) == PhEvalGeneResult:
309
- ranked_result.append(
310
- RankedPhEvalGeneResult.from_gene_result(
311
- result, score_ranker.rank_scores(result.score)
312
- )
313
- )
314
- elif type(result) == PhEvalVariantResult:
315
- ranked_result.append(
316
- RankedPhEvalVariantResult.from_variant_result(
317
- result, score_ranker.rank_scores(result.score)
318
- )
319
- )
320
- elif type(result) == PhEvalDiseaseResult:
321
- ranked_result.append(
322
- RankedPhEvalDiseaseResult.from_disease_result(
323
- result, score_ranker.rank_scores(result.score)
324
- )
325
- )
326
- else:
327
- raise ValueError("Incompatible PhEval result type.")
328
- return ranked_result
244
+ pheval_result_df = pd.DataFrame([data.__dict__ for data in pheval_result])
245
+ if sort_order == SortOrder.ASCENDING:
246
+ pheval_result_df["rank"] = pheval_result_df["score"].rank(method="max", ascending=True)
247
+ elif sort_order == SortOrder.DESCENDING:
248
+ pheval_result_df["rank"] = pheval_result_df["score"].rank(method="max", ascending=False)
249
+ return pheval_result_df
329
250
 
330
251
 
331
252
  def _return_sort_order(sort_order_str: str) -> SortOrder:
@@ -347,7 +268,7 @@ def _return_sort_order(sort_order_str: str) -> SortOrder:
347
268
  raise ValueError("Incompatible ordering method specified.")
348
269
 
349
270
 
350
- def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) -> [PhEvalResult]:
271
+ def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) -> pd.DataFrame:
351
272
  """
352
273
  Create PhEval results with corresponding ranks based on the specified sorting order.
353
274
 
@@ -356,7 +277,7 @@ def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
356
277
  sort_order_str (str): String representation of the desired sorting order.
357
278
 
358
279
  Returns:
359
- List[PhEvalResult]: PhEval results with ranks assigned.
280
+ pd.DataFrame: PhEval results with ranks assigned.
360
281
  """
361
282
  sort_order = _return_sort_order(sort_order_str)
362
283
  sorted_pheval_result = ResultSorter(pheval_result, sort_order).sort_pheval_results()
@@ -364,7 +285,7 @@ def _create_pheval_result(pheval_result: [PhEvalResult], sort_order_str: str) ->
364
285
 
365
286
 
366
287
  def _write_pheval_gene_result(
367
- ranked_pheval_result: [PhEvalResult], output_dir: Path, tool_result_path: Path
288
+ ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
368
289
  ) -> None:
369
290
  """
370
291
  Write ranked PhEval gene results to a TSV file
@@ -374,8 +295,9 @@ def _write_pheval_gene_result(
374
295
  output_dir (Path): Path to the output directory
375
296
  tool_result_path (Path): Path to the tool-specific result file
376
297
  """
377
- ranked_result = pd.DataFrame([data.__dict__ for data in ranked_pheval_result])
378
- pheval_gene_output = ranked_result.loc[:, ["rank", "score", "gene_symbol", "gene_identifier"]]
298
+ pheval_gene_output = ranked_pheval_result.loc[
299
+ :, ["rank", "score", "gene_symbol", "gene_identifier"]
300
+ ]
379
301
  pheval_gene_output.to_csv(
380
302
  output_dir.joinpath(
381
303
  "pheval_gene_results/" + tool_result_path.stem + "-pheval_gene_result.tsv"
@@ -386,7 +308,7 @@ def _write_pheval_gene_result(
386
308
 
387
309
 
388
310
  def _write_pheval_variant_result(
389
- ranked_pheval_result: [PhEvalResult], output_dir: Path, tool_result_path: Path
311
+ ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
390
312
  ) -> None:
391
313
  """
392
314
  Write ranked PhEval variant results to a TSV file
@@ -396,8 +318,7 @@ def _write_pheval_variant_result(
396
318
  output_dir (Path): Path to the output directory
397
319
  tool_result_path (Path): Path to the tool-specific result file
398
320
  """
399
- ranked_result = pd.DataFrame([data.__dict__ for data in ranked_pheval_result])
400
- pheval_variant_output = ranked_result.loc[
321
+ pheval_variant_output = ranked_pheval_result.loc[
401
322
  :, ["rank", "score", "chromosome", "start", "end", "ref", "alt"]
402
323
  ]
403
324
  pheval_variant_output.to_csv(
@@ -410,7 +331,7 @@ def _write_pheval_variant_result(
410
331
 
411
332
 
412
333
  def _write_pheval_disease_result(
413
- ranked_pheval_result: [RankedPhEvalDiseaseResult], output_dir: Path, tool_result_path: Path
334
+ ranked_pheval_result: pd.DataFrame, output_dir: Path, tool_result_path: Path
414
335
  ) -> None:
415
336
  """
416
337
  Write ranked PhEval disease results to a TSV file
@@ -420,8 +341,7 @@ def _write_pheval_disease_result(
420
341
  output_dir (Path): Path to the output directory
421
342
  tool_result_path (Path): Path to the tool-specific result file
422
343
  """
423
- ranked_result = pd.DataFrame([data.__dict__ for data in ranked_pheval_result])
424
- pheval_disease_output = ranked_result.loc[
344
+ pheval_disease_output = ranked_pheval_result.loc[
425
345
  :, ["rank", "score", "disease_name", "disease_identifier"]
426
346
  ]
427
347
  pheval_disease_output.to_csv(
@@ -328,22 +328,35 @@ class VcfSpiker:
328
328
  genotype_codes[proband_variant_data.genotype.lower()] + "\n",
329
329
  ]
330
330
 
331
- def construct_vcf_records(self) -> List[str]:
331
+ def construct_vcf_records(self, template_vcf_name: str) -> List[str]:
332
332
  """
333
333
  Construct updated VCF records by inserting spiked variants into the correct positions within the VCF.
334
334
 
335
+ Args:
336
+ template_vcf_name (str): Name of the template VCF file.
337
+
335
338
  Returns:
336
339
  List[str]: Updated VCF records containing the spiked variants.
337
340
  """
338
341
  updated_vcf_records = copy(self.vcf_contents)
339
342
  for variant in self.proband_causative_variants:
340
- variant = self.construct_variant_entry(variant)
341
- variant_entry_position = [
343
+ variant_entry = self.construct_variant_entry(variant)
344
+ matching_indices = [
342
345
  i
343
346
  for i, val in enumerate(updated_vcf_records)
344
- if val.split("\t")[0] == variant[0] and int(val.split("\t")[1]) < int(variant[1])
345
- ][-1] + 1
346
- updated_vcf_records.insert(variant_entry_position, "\t".join(variant))
347
+ if val.split("\t")[0] == variant_entry[0]
348
+ and int(val.split("\t")[1]) < int(variant_entry[1])
349
+ ]
350
+ if matching_indices:
351
+ variant_entry_position = matching_indices[-1] + 1
352
+ else:
353
+ info_log.warning(
354
+ f"Could not find entry position for {variant.variant.chrom}-{variant.variant.pos}-"
355
+ f"{variant.variant.ref}-{variant.variant.alt} in {template_vcf_name}, "
356
+ "inserting at end of VCF contents."
357
+ )
358
+ variant_entry_position = len(updated_vcf_records)
359
+ updated_vcf_records.insert(variant_entry_position, "\t".join(variant_entry))
347
360
  return updated_vcf_records
348
361
 
349
362
  def construct_header(self, updated_vcf_records: List[str]) -> List[str]:
@@ -358,21 +371,27 @@ class VcfSpiker:
358
371
  """
359
372
  updated_vcf_file = []
360
373
  for line in updated_vcf_records:
361
- text = line.replace(
362
- self.vcf_header.sample_id,
363
- self.proband_causative_variants[0].proband_id,
364
- )
374
+ if line.startswith("#"):
375
+ text = line.replace(
376
+ self.vcf_header.sample_id,
377
+ self.proband_causative_variants[0].proband_id,
378
+ )
379
+ else:
380
+ text = line
365
381
  updated_vcf_file.append(text)
366
382
  return updated_vcf_file
367
383
 
368
- def construct_vcf(self) -> List[str]:
384
+ def construct_vcf(self, template_vcf_name: str) -> List[str]:
369
385
  """
370
386
  Construct the entire spiked VCF file by incorporating the spiked variants into the VCF.
371
387
 
388
+ Args:
389
+ template_vcf_name (str): Name of the template VCF file.
390
+
372
391
  Returns:
373
392
  List[str]: The complete spiked VCF file content as a list of strings.
374
393
  """
375
- return self.construct_header(self.construct_vcf_records())
394
+ return self.construct_header(self.construct_vcf_records(template_vcf_name))
376
395
 
377
396
 
378
397
  class VcfWriter:
@@ -454,7 +473,7 @@ def spike_vcf_contents(
454
473
  chosen_template_vcf.vcf_contents,
455
474
  phenopacket_causative_variants,
456
475
  chosen_template_vcf.vcf_header,
457
- ).construct_vcf(),
476
+ ).construct_vcf(chosen_template_vcf.vcf_file_name),
458
477
  )
459
478
 
460
479
 
@@ -39,6 +39,11 @@ def prepare_corpus(
39
39
  output_dir.joinpath("phenopackets").mkdir(exist_ok=True, parents=True)
40
40
  for phenopacket_path in all_files(phenopacket_dir):
41
41
  phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
42
+ if not phenopacket_util.observed_phenotypic_features():
43
+ info_log.warning(
44
+ f"Removed {phenopacket_path.name} from the corpus due to no observed phenotypic features."
45
+ )
46
+ continue
42
47
  if variant_analysis:
43
48
  if phenopacket_util.check_incomplete_variant_record():
44
49
  info_log.warning(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pheval
3
- Version: 0.3.5
3
+ Version: 0.3.6
4
4
  Summary:
5
5
  Author: Yasemin Bridges
6
6
  Author-email: y.bridges@qmul.ac.uk
@@ -24,12 +24,12 @@ pheval/implementations/__init__.py,sha256=BMUTotjTdgy5j5xubWCIQgRXrSQ1ZIcjooer7r
24
24
  pheval/infra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  pheval/infra/exomiserdb.py,sha256=pM9-TfjrgurtH4OtM1Enk5oVhIxGQN3rKRlrxHuObTM,5080
26
26
  pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- pheval/post_processing/post_processing.py,sha256=2srdlw2D3qMh2B3PUSDvA6COYlbXINC08Wt4eccMZp8,16030
27
+ pheval/post_processing/post_processing.py,sha256=tjdk-LKj5TORwGDKNzEiLViy9oMRLVR9hG1b7E8RfkI,13368
28
28
  pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  pheval/prepare/create_noisy_phenopackets.py,sha256=UbBRWDD95BFHPv03VYx04v35AGwJ9ynLltYKqQJHbZ0,11236
30
- pheval/prepare/create_spiked_vcf.py,sha256=KZIyjtDDTqJj3hxL3u4YP6P0toA4RN1oPeDrzLMB2z4,20235
30
+ pheval/prepare/create_spiked_vcf.py,sha256=A_nIAhoU48nAeocpIu5UE41db4oBGj2cSoT-U-3qQ1Q,21111
31
31
  pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
32
- pheval/prepare/prepare_corpus.py,sha256=BzRra3VyPWHeT7zwH78vYEabo_hddBanJxVwPzw0YEU,3456
32
+ pheval/prepare/prepare_corpus.py,sha256=FweWoYMkS-kzi5RqgrSzkdp_8iWLyoGWMC_GF0szcUg,3692
33
33
  pheval/prepare/update_phenopacket.py,sha256=21fzUPbwKN6Ey5TSh9PFzjT2x86U19RAE6WmkjG8u28,4770
34
34
  pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
35
35
  pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
@@ -50,8 +50,8 @@ pheval/utils/file_utils.py,sha256=9HoCmtF73D3wY6bBhFLefMBI5uhvCe_meZeHXQzF_ts,46
50
50
  pheval/utils/phenopacket_utils.py,sha256=4inrnhZ4UjYgO0Y85ls_Nxq6voAIIXQV57_fMeIX-24,26792
51
51
  pheval/utils/semsim_utils.py,sha256=s7ZCR2VfPYnOh7ApX6rv66eGoVSm9QJaVYOWBEhlXpo,6151
52
52
  pheval/utils/utils.py,sha256=9V6vCT8l1g4O2-ZATYqsVyd7AYZdWGd-Ksy7_oIC3eE,2343
53
- pheval-0.3.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
54
- pheval-0.3.5.dist-info/METADATA,sha256=LB8Oe6wFj-iebn32q3kH5Yqk23ElXvF6My1w_cytWds,1810
55
- pheval-0.3.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
56
- pheval-0.3.5.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
57
- pheval-0.3.5.dist-info/RECORD,,
53
+ pheval-0.3.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
54
+ pheval-0.3.6.dist-info/METADATA,sha256=VC-lX9dK2KZUkh91WuDK4ygdu6tTw3WSBRkrVe-EZJ0,1810
55
+ pheval-0.3.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
56
+ pheval-0.3.6.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
57
+ pheval-0.3.6.dist-info/RECORD,,
File without changes