pheval 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

@@ -56,15 +56,11 @@ def prepare_corpus(
56
56
  for phenopacket_path in all_files(phenopacket_dir):
57
57
  phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
58
58
  if not phenopacket_util.observed_phenotypic_features():
59
- logger.warning(
60
- f"Removed {phenopacket_path.name} from the corpus due to no observed phenotypic features."
61
- )
59
+ logger.warning(f"Removed {phenopacket_path.name} from the corpus due to no observed phenotypic features.")
62
60
  continue
63
61
  if variant_analysis:
64
62
  if phenopacket_util.check_incomplete_variant_record():
65
- logger.warning(
66
- f"Removed {phenopacket_path.name} from the corpus due to missing variant fields."
67
- )
63
+ logger.warning(f"Removed {phenopacket_path.name} from the corpus due to missing variant fields.")
68
64
  continue
69
65
  elif phenopacket_util.check_variant_alleles():
70
66
  logger.warning(
@@ -73,15 +69,11 @@ def prepare_corpus(
73
69
  )
74
70
  if gene_analysis:
75
71
  if phenopacket_util.check_incomplete_gene_record():
76
- logger.warning(
77
- f"Removed {phenopacket_path.name} from the corpus due to missing gene fields."
78
- )
72
+ logger.warning(f"Removed {phenopacket_path.name} from the corpus due to missing gene fields.")
79
73
  continue
80
74
  if disease_analysis:
81
75
  if phenopacket_util.check_incomplete_disease_record():
82
- logger.warning(
83
- f"Removed {phenopacket_path.name} from the corpus due to missing disease fields."
84
- )
76
+ logger.warning(f"Removed {phenopacket_path.name} from the corpus due to missing disease fields.")
85
77
  continue
86
78
  logger.info(f"{phenopacket_path.name} OK!")
87
79
  if hg19_template_vcf or hg38_template_vcf:
@@ -107,13 +99,10 @@ def prepare_corpus(
107
99
  else:
108
100
  # if not updating phenopacket gene identifiers then copy phenopacket as is to output directory
109
101
  (
110
- shutil.copy(
111
- phenopacket_path, output_dir.joinpath(f"phenopackets/{phenopacket_path.name}")
112
- )
102
+ shutil.copy(phenopacket_path, output_dir.joinpath(f"phenopackets/{phenopacket_path.name}"))
113
103
  if phenopacket_path != output_dir.joinpath(f"phenopackets/{phenopacket_path.name}")
114
104
  else None
115
105
  )
116
106
  logger.info(
117
- f"Finished preparing corpus for {phenopacket_dir}. "
118
- f"Total time: {time.perf_counter() - start_time:.2f} seconds."
107
+ f"Finished preparing corpus for {phenopacket_dir}. Total time: {time.perf_counter() - start_time:.2f} seconds."
119
108
  )
@@ -1,6 +1,5 @@
1
1
  import time
2
2
  from pathlib import Path
3
- from typing import Union
4
3
 
5
4
  import polars as pl
6
5
  from phenopackets import Family, Phenopacket
@@ -21,7 +20,7 @@ logger = get_logger()
21
20
 
22
21
  def update_outdated_gene_context(
23
22
  phenopacket_path: Path, gene_identifier: str, identifier_map: pl.DataFrame
24
- ) -> Union[Phenopacket, Family]:
23
+ ) -> Phenopacket | Family:
25
24
  """
26
25
  Update the gene context of the Phenopacket.
27
26
 
@@ -66,15 +65,11 @@ def create_updated_phenopacket(
66
65
  to describe the gene identifiers.
67
66
  """
68
67
  identifier_map = create_gene_identifier_map() if identifier_map is None else identifier_map
69
- updated_phenopacket = update_outdated_gene_context(
70
- phenopacket_path, gene_identifier, identifier_map
71
- )
68
+ updated_phenopacket = update_outdated_gene_context(phenopacket_path, gene_identifier, identifier_map)
72
69
  write_phenopacket(updated_phenopacket, output_dir.joinpath(phenopacket_path.name))
73
70
 
74
71
 
75
- def create_updated_phenopackets(
76
- gene_identifier: str, phenopacket_dir: Path, output_dir: Path
77
- ) -> None:
72
+ def create_updated_phenopackets(gene_identifier: str, phenopacket_dir: Path, output_dir: Path) -> None:
78
73
  """
79
74
  Update the gene context within the interpretations for a directory of Phenopackets
80
75
  and writes the updated Phenopackets.
@@ -91,15 +86,11 @@ def create_updated_phenopackets(
91
86
  identifier_map = create_gene_identifier_map()
92
87
  for phenopacket_path in all_files(phenopacket_dir):
93
88
  logger.info(f"Updating gene context for: {phenopacket_path.name}")
94
- updated_phenopacket = update_outdated_gene_context(
95
- phenopacket_path, gene_identifier, identifier_map
96
- )
89
+ updated_phenopacket = update_outdated_gene_context(phenopacket_path, gene_identifier, identifier_map)
97
90
  write_phenopacket(updated_phenopacket, output_dir.joinpath(phenopacket_path.name))
98
91
 
99
92
 
100
- def update_phenopackets(
101
- gene_identifier: str, phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path
102
- ) -> None:
93
+ def update_phenopackets(gene_identifier: str, phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path) -> None:
103
94
  """
104
95
  Update the gene identifiers in either a single phenopacket or a directory of phenopackets.
105
96
 
@@ -122,8 +113,6 @@ def update_phenopackets(
122
113
  logger.info(f"Updating {phenopacket_path}.")
123
114
  create_updated_phenopacket(gene_identifier, phenopacket_path, output_dir)
124
115
  elif phenopacket_dir is not None:
125
- logger.info(
126
- f"Updating {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
127
- )
116
+ logger.info(f"Updating {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}.")
128
117
  create_updated_phenopackets(gene_identifier, phenopacket_dir, output_dir)
129
118
  logger.info(f"Updating finished! Total time: {time.perf_counter() - start_time:.2f} seconds.")
pheval/utils/docs_gen.py CHANGED
@@ -13,7 +13,7 @@ def find_methods_in_python_file(file_path):
13
13
  file_path ([type]): [description]
14
14
  """
15
15
  methods = []
16
- with open(file_path, "r", encoding="utf-8") as file:
16
+ with open(file_path, encoding="utf-8") as file:
17
17
  text = file.read()
18
18
  parsed = ast.parse(text)
19
19
  for node in ast.walk(parsed):
@@ -73,8 +73,8 @@ def print_cli_doc(file_item):
73
73
  for method in methods:
74
74
  content = f"""
75
75
  ::: mkdocs-click
76
- :package: {file_item['folder'].replace("./", '').replace('/', '.')}.{file_item['basename']}
77
- :module: {file_item['folder'].replace("./", '').replace('/', '.').replace('src.', '')}.{file_item['basename']}
76
+ :package: {file_item["folder"].replace("./", "").replace("/", ".")}.{file_item["basename"]}
77
+ :module: {file_item["folder"].replace("./", "").replace("/", ".").replace("src.", "")}.{file_item["basename"]}
78
78
  :command: {method}
79
79
  :depth: 4
80
80
  :style: table
@@ -3,7 +3,6 @@ import re
3
3
  import unicodedata
4
4
  from os import path
5
5
  from pathlib import Path
6
- from typing import List
7
6
 
8
7
  import pandas as pd
9
8
  import yaml
@@ -80,7 +79,7 @@ def ensure_file_exists(*files: str):
80
79
  raise FileNotFoundError(f"File {file} not found")
81
80
 
82
81
 
83
- def ensure_columns_exists(cols: list, dataframes: List[pd.DataFrame], err_message: str = ""):
82
+ def ensure_columns_exists(cols: list, dataframes: list[pd.DataFrame], err_message: str = ""):
84
83
  """Ensures the columns exist in dataframes passed as argument (e.g)
85
84
 
86
85
  "
@@ -3,7 +3,6 @@ import os
3
3
  from copy import copy
4
4
  from dataclasses import dataclass
5
5
  from pathlib import Path
6
- from typing import List, Union
7
6
 
8
7
  import polars as pl
9
8
  from google.protobuf.json_format import MessageToJson, Parse
@@ -183,7 +182,7 @@ def create_gene_identifier_map() -> pl.DataFrame:
183
182
  )
184
183
 
185
184
 
186
- def phenopacket_reader(file: Path) -> Union[Phenopacket, Family]:
185
+ def phenopacket_reader(file: Path) -> Phenopacket | Family:
187
186
  """
188
187
  Read a Phenopacket file and returns its contents as a Phenopacket or Family object
189
188
 
@@ -194,7 +193,7 @@ def phenopacket_reader(file: Path) -> Union[Phenopacket, Family]:
194
193
  Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object
195
194
  """
196
195
  logger.info(f"Parsing Phenopacket: {file.name}")
197
- file = open(file, "r")
196
+ file = open(file)
198
197
  phenopacket = json.load(file)
199
198
  file.close()
200
199
  if "proband" in phenopacket:
@@ -206,7 +205,7 @@ def phenopacket_reader(file: Path) -> Union[Phenopacket, Family]:
206
205
  class PhenopacketUtil:
207
206
  """Class for retrieving data from a Phenopacket or Family object"""
208
207
 
209
- def __init__(self, phenopacket_contents: Union[Phenopacket, Family]):
208
+ def __init__(self, phenopacket_contents: Phenopacket | Family):
210
209
  """Initialise PhenopacketUtil
211
210
 
212
211
  Args:
@@ -226,7 +225,7 @@ class PhenopacketUtil:
226
225
  else:
227
226
  return self.phenopacket_contents.subject.id
228
227
 
229
- def phenotypic_features(self) -> List[PhenotypicFeature]:
228
+ def phenotypic_features(self) -> list[PhenotypicFeature]:
230
229
  """
231
230
  Retrieve a list of all HPO terms
232
231
 
@@ -238,7 +237,7 @@ class PhenopacketUtil:
238
237
  else:
239
238
  return self.phenopacket_contents.phenotypic_features
240
239
 
241
- def observed_phenotypic_features(self) -> List[PhenotypicFeature]:
240
+ def observed_phenotypic_features(self) -> list[PhenotypicFeature]:
242
241
  """
243
242
  Retrieve a list of all observed HPO terms
244
243
 
@@ -253,7 +252,7 @@ class PhenopacketUtil:
253
252
  phenotypic_features.append(p)
254
253
  return phenotypic_features
255
254
 
256
- def negated_phenotypic_features(self) -> List[PhenotypicFeature]:
255
+ def negated_phenotypic_features(self) -> list[PhenotypicFeature]:
257
256
  """
258
257
  Retrieve a list of all negated HPO terms
259
258
 
@@ -267,7 +266,7 @@ class PhenopacketUtil:
267
266
  negated_phenotypic_features.append(p)
268
267
  return negated_phenotypic_features
269
268
 
270
- def diseases(self) -> List[Disease]:
269
+ def diseases(self) -> list[Disease]:
271
270
  """
272
271
  Retrieve a list of Diseases associated with the proband
273
272
 
@@ -279,7 +278,7 @@ class PhenopacketUtil:
279
278
  else:
280
279
  return self.phenopacket_contents.diseases
281
280
 
282
- def _diagnosis_from_interpretations(self) -> List[ProbandDisease]:
281
+ def _diagnosis_from_interpretations(self) -> list[ProbandDisease]:
283
282
  """
284
283
  Retrieve a list of disease diagnoses associated with the proband from the interpretations object
285
284
 
@@ -301,7 +300,7 @@ class PhenopacketUtil:
301
300
  )
302
301
  return diagnoses
303
302
 
304
- def _diagnosis_from_disease(self) -> List[ProbandDisease]:
303
+ def _diagnosis_from_disease(self) -> list[ProbandDisease]:
305
304
  """
306
305
  Retrieve a list of disease diagnoses associated with the proband from the diseases object
307
306
 
@@ -310,12 +309,10 @@ class PhenopacketUtil:
310
309
  """
311
310
  diagnoses = []
312
311
  for disease in self.diseases():
313
- diagnoses.append(
314
- ProbandDisease(disease_name=disease.term.label, disease_identifier=disease.term.id)
315
- )
312
+ diagnoses.append(ProbandDisease(disease_name=disease.term.label, disease_identifier=disease.term.id))
316
313
  return diagnoses
317
314
 
318
- def diagnoses(self) -> List[ProbandDisease]:
315
+ def diagnoses(self) -> list[ProbandDisease]:
319
316
  """
320
317
  Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket
321
318
 
@@ -324,7 +321,7 @@ class PhenopacketUtil:
324
321
  """
325
322
  return list(set(self._diagnosis_from_interpretations() + self._diagnosis_from_disease()))
326
323
 
327
- def interpretations(self) -> List[Interpretation]:
324
+ def interpretations(self) -> list[Interpretation]:
328
325
  """
329
326
  Retrieve a list of interpretations from a Phenopacket
330
327
 
@@ -336,7 +333,7 @@ class PhenopacketUtil:
336
333
  else:
337
334
  return self.phenopacket_contents.interpretations
338
335
 
339
- def causative_variants(self) -> List[ProbandCausativeVariant]:
336
+ def causative_variants(self) -> list[ProbandCausativeVariant]:
340
337
  """
341
338
  Retrieve a list of causative variants listed in a Phenopacket
342
339
 
@@ -364,7 +361,7 @@ class PhenopacketUtil:
364
361
  all_variants.append(variant_data)
365
362
  return all_variants
366
363
 
367
- def files(self) -> List[File]:
364
+ def files(self) -> list[File]:
368
365
  """
369
366
  Retrieve a list of files associated with a phenopacket
370
367
 
@@ -394,15 +391,11 @@ class PhenopacketUtil:
394
391
  URI of the VCF file to the specified directory and returns the modified file object.
395
392
  """
396
393
  compatible_genome_assembly = ["GRCh37", "hg19", "GRCh38", "hg38"]
397
- vcf_data = [file for file in self.files() if file.file_attributes["fileFormat"] == "vcf"][0]
398
- if not Path(vcf_data.uri).name.endswith(".vcf") and not Path(vcf_data.uri).name.endswith(
399
- ".vcf.gz"
400
- ):
394
+ vcf_data = next(file for file in self.files() if file.file_attributes["fileFormat"] == "vcf")
395
+ if not Path(vcf_data.uri).name.endswith(".vcf") and not Path(vcf_data.uri).name.endswith(".vcf.gz"):
401
396
  raise IncorrectFileFormatError(Path(vcf_data.uri), ".vcf or .vcf.gz file")
402
397
  if vcf_data.file_attributes["genomeAssembly"] not in compatible_genome_assembly:
403
- raise IncompatibleGenomeAssemblyError(
404
- vcf_data.file_attributes["genomeAssembly"], phenopacket_path
405
- )
398
+ raise IncompatibleGenomeAssemblyError(vcf_data.file_attributes["genomeAssembly"], phenopacket_path)
406
399
  vcf_data.uri = str(vcf_dir.joinpath(Path(vcf_data.uri).name))
407
400
  return vcf_data
408
401
 
@@ -430,7 +423,7 @@ class PhenopacketUtil:
430
423
  gene_identifier=genomic_interpretation.gene.value_id,
431
424
  )
432
425
 
433
- def diagnosed_genes(self) -> List[ProbandCausativeGene]:
426
+ def diagnosed_genes(self) -> list[ProbandCausativeGene]:
434
427
  """
435
428
  Retrieve the disease causing genes from a phenopacket.
436
429
  Returns:
@@ -444,7 +437,7 @@ class PhenopacketUtil:
444
437
  genes = list({gene.gene_symbol: gene for gene in genes}.values())
445
438
  return genes
446
439
 
447
- def diagnosed_variants(self) -> List[GenomicVariant]:
440
+ def diagnosed_variants(self) -> list[GenomicVariant]:
448
441
  """
449
442
  Retrieve a list of all known causative variants from a phenopacket.
450
443
  Returns:
@@ -455,11 +448,7 @@ class PhenopacketUtil:
455
448
  for i in pheno_interpretation:
456
449
  for g in i.diagnosis.genomic_interpretations:
457
450
  variant = GenomicVariant(
458
- chrom=str(
459
- g.variant_interpretation.variation_descriptor.vcf_record.chrom.replace(
460
- "chr", ""
461
- )
462
- ),
451
+ chrom=str(g.variant_interpretation.variation_descriptor.vcf_record.chrom.replace("chr", "")),
463
452
  pos=int(g.variant_interpretation.variation_descriptor.vcf_record.pos),
464
453
  ref=g.variant_interpretation.variation_descriptor.vcf_record.ref,
465
454
  alt=g.variant_interpretation.variation_descriptor.vcf_record.alt,
@@ -480,13 +469,7 @@ class PhenopacketUtil:
480
469
  """
481
470
  variants = self.diagnosed_variants()
482
471
  for variant in variants:
483
- if (
484
- variant.chrom == ""
485
- or variant.pos == 0
486
- or variant.pos == ""
487
- or variant.ref == ""
488
- or variant.alt == ""
489
- ):
472
+ if variant.chrom == "" or variant.pos in (0, "") or variant.ref == "" or variant.alt == "":
490
473
  return True
491
474
  return False
492
475
 
@@ -537,7 +520,7 @@ class PhenopacketUtil:
537
520
  class PhenopacketRebuilder:
538
521
  """Class for rebuilding a Phenopacket"""
539
522
 
540
- def __init__(self, phenopacket: Union[Phenopacket, Family]):
523
+ def __init__(self, phenopacket: Phenopacket | Family):
541
524
  """Initialise PhenopacketUtil
542
525
 
543
526
  Attributes:
@@ -545,9 +528,7 @@ class PhenopacketRebuilder:
545
528
  """
546
529
  self.phenopacket = phenopacket
547
530
 
548
- def update_interpretations(
549
- self, interpretations: [Interpretation]
550
- ) -> Union[Phenopacket, Family]:
531
+ def update_interpretations(self, interpretations: [Interpretation]) -> Phenopacket | Family:
551
532
  """
552
533
  Add the updated interpretations to a Phenopacket or Family.
553
534
 
@@ -566,7 +547,7 @@ class PhenopacketRebuilder:
566
547
  phenopacket.interpretations.extend(interpretations)
567
548
  return phenopacket
568
549
 
569
- def add_randomised_hpo(self, randomised_hpo: [PhenotypicFeature]) -> Union[Phenopacket, Family]:
550
+ def add_randomised_hpo(self, randomised_hpo: [PhenotypicFeature]) -> Phenopacket | Family:
570
551
  """
571
552
  Add randomised phenotypic profiles to a Phenopacket or Family.
572
553
 
@@ -585,7 +566,7 @@ class PhenopacketRebuilder:
585
566
  phenopacket.phenotypic_features.extend(randomised_hpo)
586
567
  return phenopacket
587
568
 
588
- def add_spiked_vcf_path(self, spiked_vcf_file_data: File) -> Union[Phenopacket, Family]:
569
+ def add_spiked_vcf_path(self, spiked_vcf_file_data: File) -> Phenopacket | Family:
589
570
  """
590
571
  Add a spiked VCF path to a Phenopacket or Family.
591
572
 
@@ -597,16 +578,14 @@ class PhenopacketRebuilder:
597
578
  """
598
579
  logger.info(f"Adding spiked VCF path {spiked_vcf_file_data.uri} to phenopacket.")
599
580
  phenopacket = copy(self.phenopacket)
600
- phenopacket_files = [
601
- file for file in phenopacket.files if file.file_attributes["fileFormat"] != "vcf"
602
- ]
581
+ phenopacket_files = [file for file in phenopacket.files if file.file_attributes["fileFormat"] != "vcf"]
603
582
  phenopacket_files.append(spiked_vcf_file_data)
604
583
  del phenopacket.files[:]
605
584
  phenopacket.files.extend(phenopacket_files)
606
585
  return phenopacket
607
586
 
608
587
 
609
- def create_json_message(phenopacket: Union[Phenopacket, Family]) -> str:
588
+ def create_json_message(phenopacket: Phenopacket | Family) -> str:
610
589
  """
611
590
  Create a JSON message for writing to a file.
612
591
 
@@ -619,7 +598,7 @@ def create_json_message(phenopacket: Union[Phenopacket, Family]) -> str:
619
598
  return MessageToJson(phenopacket)
620
599
 
621
600
 
622
- def write_phenopacket(phenopacket: Union[Phenopacket, Family], output_file: Path) -> None:
601
+ def write_phenopacket(phenopacket: Phenopacket | Family, output_file: Path) -> None:
623
602
  """
624
603
  Write a Phenopacket or Family object to a file in JSON format.
625
604
 
@@ -667,15 +646,13 @@ class GeneIdentifierUpdater:
667
646
  str: The identified gene identifier.
668
647
  """
669
648
  matches = self.identifier_map.filter(
670
- (pl.col("gene_symbol") == gene_symbol)
671
- & (pl.col("identifier_type") == self.gene_identifier)
649
+ (pl.col("gene_symbol") == gene_symbol) & (pl.col("identifier_type") == self.gene_identifier)
672
650
  )
673
651
 
674
652
  if matches.height > 0:
675
653
  return matches["identifier"][0]
676
654
  prev_symbol_matches = self.identifier_map.filter(
677
- (pl.col("identifier_type") == self.gene_identifier)
678
- & (pl.col("prev_symbols").list.contains(gene_symbol))
655
+ (pl.col("identifier_type") == self.gene_identifier) & (pl.col("prev_symbols").list.contains(gene_symbol))
679
656
  )
680
657
  if prev_symbol_matches.height > 0:
681
658
  return prev_symbol_matches["identifier"][0]
@@ -692,11 +669,9 @@ class GeneIdentifierUpdater:
692
669
  Returns:
693
670
  str: The gene symbol corresponding to the identifier.
694
671
  """
695
- return self.identifier_map.filter(pl.col("identifier") == query_gene_identifier)[
696
- "gene_symbol"
697
- ][0]
672
+ return self.identifier_map.filter(pl.col("identifier") == query_gene_identifier)["gene_symbol"][0]
698
673
 
699
- def _find_alternate_ids(self, gene_symbol: str) -> List[str]:
674
+ def _find_alternate_ids(self, gene_symbol: str) -> list[str]:
700
675
  """
701
676
  Find the alternate IDs for a gene symbol.
702
677
 
@@ -706,24 +681,21 @@ class GeneIdentifierUpdater:
706
681
  Returns:
707
682
  List[str]: List of alternate IDs for the gene symbol.
708
683
  """
709
- matches = self.identifier_map.filter((pl.col("gene_symbol") == gene_symbol))
684
+ matches = self.identifier_map.filter(pl.col("gene_symbol") == gene_symbol)
710
685
  if matches.height > 0:
711
686
  return [f"{row['prefix']}{row['identifier']}" for row in matches.rows(named=True)] + [
712
687
  f"symbol:{gene_symbol}"
713
688
  ]
714
- prev_symbol_matches = self.identifier_map.filter(
715
- (pl.col("prev_symbols").list.contains(gene_symbol))
716
- )
689
+ prev_symbol_matches = self.identifier_map.filter(pl.col("prev_symbols").list.contains(gene_symbol))
717
690
  if prev_symbol_matches.height > 0:
718
- return [
719
- f"{row['prefix']}{row['identifier']}"
720
- for row in prev_symbol_matches.rows(named=True)
721
- ] + [f"symbol:{gene_symbol}"]
691
+ return [f"{row['prefix']}{row['identifier']}" for row in prev_symbol_matches.rows(named=True)] + [
692
+ f"symbol:{gene_symbol}"
693
+ ]
722
694
  return None
723
695
 
724
696
  def update_genomic_interpretations_gene_identifier(
725
- self, interpretations: List[Interpretation], phenopacket_path: Path
726
- ) -> List[Interpretation]:
697
+ self, interpretations: list[Interpretation], phenopacket_path: Path
698
+ ) -> list[Interpretation]:
727
699
  """
728
700
  Update the genomic interpretations of a Phenopacket.
729
701
 
@@ -745,13 +717,9 @@ class GeneIdentifierUpdater:
745
717
  f"{g.variant_interpretation.variation_descriptor.gene_context.value_id}"
746
718
  f" to {updated_gene_identifier}"
747
719
  )
748
- g.variant_interpretation.variation_descriptor.gene_context.value_id = (
749
- updated_gene_identifier
750
- )
720
+ g.variant_interpretation.variation_descriptor.gene_context.value_id = updated_gene_identifier
751
721
  del g.variant_interpretation.variation_descriptor.gene_context.alternate_ids[:]
752
722
  g.variant_interpretation.variation_descriptor.gene_context.alternate_ids.extend(
753
- self._find_alternate_ids(
754
- g.variant_interpretation.variation_descriptor.gene_context.symbol
755
- )
723
+ self._find_alternate_ids(g.variant_interpretation.variation_descriptor.gene_context.symbol)
756
724
  )
757
725
  return updated_interpretations
@@ -8,7 +8,7 @@ import numpy
8
8
  import pandas as pd
9
9
  import plotly.express as px
10
10
 
11
- import pheval.utils.file_utils as file_utils
11
+ from pheval.utils import file_utils
12
12
 
13
13
 
14
14
  def filter_non_0_score(data: pd.DataFrame, col: str) -> pd.DataFrame:
@@ -58,9 +58,7 @@ def diff_semsim(
58
58
  if absolute_diff:
59
59
  df["diff"] = df[f"{score_column}_x"] - df[f"{score_column}_y"]
60
60
  return df[["subject_id", "object_id", "diff"]]
61
- df["diff"] = df.apply(
62
- lambda row: get_percentage_diff(row[f"{score_column}_x"], row[f"{score_column}_y"]), axis=1
63
- )
61
+ df["diff"] = df.apply(lambda row: get_percentage_diff(row[f"{score_column}_x"], row[f"{score_column}_y"]), axis=1)
64
62
  return df[["subject_id", "object_id", f"{score_column}_x", f"{score_column}_y", "diff"]]
65
63
 
66
64
 
@@ -91,9 +89,7 @@ def semsim_heatmap_plot(semsim_left: Path, semsim_right: Path, score_column: str
91
89
  fig.show()
92
90
 
93
91
 
94
- def semsim_analysis(
95
- semsim_left: Path, semsim_right: Path, score_column: str, absolute_diff=True
96
- ) -> pd.DataFrame:
92
+ def semsim_analysis(semsim_left: Path, semsim_right: Path, score_column: str, absolute_diff=True) -> pd.DataFrame:
97
93
  """semsim_analysis
98
94
 
99
95
  Args:
@@ -147,11 +143,11 @@ def get_percentage_diff(current_number: float, previous_number: float) -> float:
147
143
  """
148
144
  try:
149
145
  if current_number == previous_number:
150
- return "{:.2%}".format(0)
146
+ return f"{0:.2%}"
151
147
  if current_number > previous_number:
152
- number = (1 - ((current_number / previous_number))) * 100
148
+ number = (1 - (current_number / previous_number)) * 100
153
149
  else:
154
150
  number = (100 - ((previous_number / current_number) * 100)) * -1
155
- return "{:.2%}".format(number / 100)
151
+ return f"{number / 100:.2%}"
156
152
  except ZeroDivisionError:
157
153
  return None
pheval/utils/utils.py CHANGED
@@ -4,7 +4,6 @@ import json
4
4
  import random
5
5
  from datetime import datetime
6
6
  from pathlib import Path
7
- from typing import List
8
7
 
9
8
  import pandas as pd
10
9
  import requests
@@ -42,7 +41,7 @@ def rand(df: pd.DataFrame, min_num: int, max_num: int, scramble_factor: float) -
42
41
  def semsim_scramble(
43
42
  input: Path,
44
43
  output: Path,
45
- columns_to_be_scrambled: List[str],
44
+ columns_to_be_scrambled: list[str],
46
45
  scramble_factor: float = 0.5,
47
46
  ) -> pd.DataFrame:
48
47
  """
@@ -66,7 +65,7 @@ def semsim_scramble(
66
65
 
67
66
  def semsim_scramble_df(
68
67
  dataframe: pd.DataFrame,
69
- columns_to_be_scrambled: List[str],
68
+ columns_to_be_scrambled: list[str],
70
69
  scramble_factor: float,
71
70
  ) -> pd.DataFrame:
72
71
  """scramble_semsim_df
@@ -136,6 +135,6 @@ def get_resource_timestamp(file_name: str) -> str | None:
136
135
  file_name (str): The file name.
137
136
  """
138
137
  if METADATA_PATH.exists():
139
- with open(METADATA_PATH, "r") as f:
138
+ with open(METADATA_PATH) as f:
140
139
  return json.load(f).get(file_name)
141
140
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pheval
3
- Version: 0.6.2
3
+ Version: 0.6.4
4
4
  Summary:
5
5
  Author: Yasemin Bridges
6
6
  Author-email: y.bridges@qmul.ac.uk
@@ -0,0 +1,57 @@
1
+ pheval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ pheval/analyse/benchmark.py,sha256=Ktjovg3uEuaBi02AIiGX4OaZ73yAruwcsohhfBrDY44,6874
4
+ pheval/analyse/benchmark_db_manager.py,sha256=zS1TI76YuV2_YXLipHLSyh-XDR5kTxyOwhRhHRFHfjQ,764
5
+ pheval/analyse/benchmark_output_type.py,sha256=AG4HtEfscbDqESMBQ_M5Brnj8AmfrFxU6q7Gi2FOebw,1493
6
+ pheval/analyse/binary_classification_curves.py,sha256=b5YseLqv519DT7rsOweMRx7ElxYv9LcukXtLeAxflQE,4953
7
+ pheval/analyse/binary_classification_stats.py,sha256=oWkaj-A2-2MaUIsJjlehwLApx-wGLx-TQ49v9O4lMAs,6910
8
+ pheval/analyse/generate_plots.py,sha256=fyUMOgmbqYeYMPW843VR-CVArt3R75HgGbyq1i2XO7A,14489
9
+ pheval/analyse/generate_rank_comparisons.py,sha256=BmksHkvJhpR0Rcrnc-r2_OF5L3ROHB8o3HuDSXjgeK4,1660
10
+ pheval/analyse/rank_stats.py,sha256=io8UWTEUZfZQSnxXBjk2Z_1u6WWoxX0kSfEvxtpC-Kg,8241
11
+ pheval/analyse/run_data_parser.py,sha256=da8-J9sSwyOUow80A2ETKdVP7GUX-zuEiiBix-M18Mo,3601
12
+ pheval/cli.py,sha256=1kPhBYFSKjvPv9YcpknDj3Y7DZl5CA41ucqDRR7fAjk,1599
13
+ pheval/cli_pheval.py,sha256=N8xp3r8avYqLswPhakxtTQyemVdgHAvnpAIj_FmoN5k,3510
14
+ pheval/cli_pheval_utils.py,sha256=fLbskjHQSTN29qFXmjvsXYn7dE8-3OZuJUqlEQB-wyI,16481
15
+ pheval/config_parser.py,sha256=6wK8x9hXHg-kTDkkqfKshYvIdn91SGhucTYUul3fjNs,1353
16
+ pheval/implementations/__init__.py,sha256=BUTnw9urZOApRFVy6NYsq8TCLphHWsr3vhxvsx2RZ3E,1318
17
+ pheval/implementations/pheval_class_resolver.py,sha256=YNNk3PoQbSvbKmbihlt_bsHNxRM95O-VrtUIbQnfBcw,1567
18
+ pheval/infra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ pheval/infra/exomiserdb.py,sha256=0NXkqYT59Ueu2F5o7u4iCWJCAqTIL4KxShQuePFmpLo,5015
20
+ pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ pheval/post_processing/mondo_mapping.py,sha256=Vetk0acOtR__FXnx9wy2pcDDWFz1mQkc88alKUn0muI,937
22
+ pheval/post_processing/phenopacket_truth_set.py,sha256=rK_iIZm7OwdDEj_7SV7jMEROPHPe06baXLlHVO8wrSE,9889
23
+ pheval/post_processing/post_processing.py,sha256=mQWBpGmWd3ZPwZtDutekn_osJdGQFj4fPf6ibb7o8xA,10040
24
+ pheval/post_processing/validate_result_format.py,sha256=jg3HjvMwGI8rsLtOM0gpcVlGB3weGZiZek5JwFzw1zE,2862
25
+ pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
+ pheval/prepare/create_noisy_phenopackets.py,sha256=U-tYex4kbXT9OEkGnMl4p4pt4LRuqBKwKTgVBvhA7xU,12142
27
+ pheval/prepare/create_spiked_vcf.py,sha256=Po7WSQZAHaBObLa7SU50djUJ_XwAlUfTBeE9C0-7GA8,24299
28
+ pheval/prepare/custom_exceptions.py,sha256=8Bwi5SmDgWuvzLWPI7foUl2m5C6QdirlxPeeeC594AU,1637
29
+ pheval/prepare/prepare_corpus.py,sha256=726Ez5xLyvsibfEN3NqC1CFN0BgRl4Uswj4CaNTHS4o,5264
30
+ pheval/prepare/update_phenopacket.py,sha256=IeLKtp2i60LmBbGOh387ccq5J7Mk8y0hx4K55J7Mnbs,5550
31
+ pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
32
+ pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
33
+ pheval/resources/alternate_ouputs/OVA_results.txt,sha256=_5XFCR4W04D-W7DObpALLsa0-693g2kiIUB_uo79aHk,9845
34
+ pheval/resources/alternate_ouputs/Phen2Gene_results.json,sha256=xxKsuiHKW9qQOz2baFlLW9RYphA4kxjoTsg1weZkTY8,14148
35
+ pheval/resources/alternate_ouputs/Phenolyzer_results.txt,sha256=TltiEzYm2PY79u6EdZR3f4ZqadNDCUN_d4f0TFF-t5A,594
36
+ pheval/resources/alternate_ouputs/lirical_results.tsv,sha256=0juf5HY6ttg-w7aWgYJUmSP5zmoaooEQDY8xhOcerLk,431068
37
+ pheval/resources/alternate_ouputs/svanna_results.tsv,sha256=OpTamPhJwh12wkdAxoIGb0wWs_T7TcqNWgqkQzgOek4,714
38
+ pheval/resources/hgnc_complete_set.txt,sha256=9-aNcyGZzarD1DnO_780NK0r-ppwbyu9e4-cQDmtUC8,16593567
39
+ pheval/resources/metadata.json,sha256=aabSMPCwE-KR6cAxBCMRZmDD1fGD7qCeSvPLduvO3gA,112
40
+ pheval/resources/mondo.sssom.tsv,sha256=Egu8UqHPL6TbSRQKlRgQ7kNq5S4e1VElR5JWHWgc7F0,12700455
41
+ pheval/run_metadata.py,sha256=isEs63c-O6LZ6TBmk9wmAOmzjNE3Nf_k9V9uiYu0x8s,1122
42
+ pheval/runners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
+ pheval/runners/runner.py,sha256=zkS6yvbnC6UBCtV2VR3FEfr5naWRDdNcQFN1CoH1ha8,4902
44
+ pheval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
+ pheval/utils/docs_gen.py,sha256=8roEavgcVlEgOg54zTn9-xUOTu3qsQF-JfKSj3-mKMg,3188
46
+ pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
47
+ pheval/utils/exomiser.py,sha256=m2u0PH2z9lFPaB3LVkZCmPmH5e55q1NoTzNl46zRRP8,683
48
+ pheval/utils/file_utils.py,sha256=FY4B17NVS7tXeTso0OZYgde-H9KJdUbQbWYT6fMdnCM,3552
49
+ pheval/utils/logger.py,sha256=5DZl5uMltUDQorhkvg_B7_ZhFwApAmEkWneFIOKfRGQ,1566
50
+ pheval/utils/phenopacket_utils.py,sha256=qt816uYkJKAe2MATFoulqEr78zg0-Z2Z_FxfEO9iSFE,26740
51
+ pheval/utils/semsim_utils.py,sha256=tSDin3PwCdtMjtMXubIXTiGaCEFNz7iF4IngrjNHprI,6104
52
+ pheval/utils/utils.py,sha256=T9zzqMlzY2hrcYn9ObatYgOHtKWTpWbW5nU0tTTcYxI,4489
53
+ pheval-0.6.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
54
+ pheval-0.6.4.dist-info/METADATA,sha256=QNcXx4I83wCz7jEkPn1sbKC8phoFxYuil5Gzlb6JSIQ,6494
55
+ pheval-0.6.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
56
+ pheval-0.6.4.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
57
+ pheval-0.6.4.dist-info/RECORD,,