pheval 0.5.2__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

Files changed (52) hide show
  1. {pheval-0.5.2 → pheval-0.5.3}/PKG-INFO +1 -1
  2. {pheval-0.5.2 → pheval-0.5.3}/pyproject.toml +1 -1
  3. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/cli_pheval.py +11 -0
  4. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/config_parser.py +5 -0
  5. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/post_processing/post_processing.py +12 -0
  6. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/prepare/create_noisy_phenopackets.py +17 -1
  7. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/prepare/create_spiked_vcf.py +18 -6
  8. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/prepare/prepare_corpus.py +29 -9
  9. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/prepare/update_phenopacket.py +20 -2
  10. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/runners/runner.py +7 -0
  11. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/utils/phenopacket_utils.py +10 -5
  12. {pheval-0.5.2 → pheval-0.5.3}/LICENSE +0 -0
  13. {pheval-0.5.2 → pheval-0.5.3}/README.md +0 -0
  14. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/__init__.py +0 -0
  15. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/__init__.py +0 -0
  16. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/benchmark.py +0 -0
  17. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/benchmark_db_manager.py +0 -0
  18. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/benchmark_output_type.py +0 -0
  19. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/binary_classification_curves.py +0 -0
  20. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/binary_classification_stats.py +0 -0
  21. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/generate_plots.py +0 -0
  22. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/generate_rank_comparisons.py +0 -0
  23. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/rank_stats.py +0 -0
  24. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/analyse/run_data_parser.py +0 -0
  25. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/cli.py +0 -0
  26. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/cli_pheval_utils.py +0 -0
  27. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/implementations/__init__.py +0 -0
  28. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/infra/__init__.py +0 -0
  29. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/infra/exomiserdb.py +0 -0
  30. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/post_processing/__init__.py +0 -0
  31. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/post_processing/phenopacket_truth_set.py +0 -0
  32. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/post_processing/validate_result_format.py +0 -0
  33. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/prepare/__init__.py +0 -0
  34. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/prepare/custom_exceptions.py +0 -0
  35. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/resources/alternate_ouputs/CADA_results.txt +0 -0
  36. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/resources/alternate_ouputs/DeepPVP_results.txt +0 -0
  37. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/resources/alternate_ouputs/OVA_results.txt +0 -0
  38. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/resources/alternate_ouputs/Phen2Gene_results.json +0 -0
  39. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/resources/alternate_ouputs/Phenolyzer_results.txt +0 -0
  40. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/resources/alternate_ouputs/lirical_results.tsv +0 -0
  41. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/resources/alternate_ouputs/svanna_results.tsv +0 -0
  42. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/resources/hgnc_complete_set.txt +0 -0
  43. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/run_metadata.py +0 -0
  44. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/runners/__init__.py +0 -0
  45. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/utils/__init__.py +0 -0
  46. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/utils/docs_gen.py +0 -0
  47. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/utils/docs_gen.sh +0 -0
  48. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/utils/exomiser.py +0 -0
  49. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/utils/file_utils.py +0 -0
  50. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/utils/logger.py +0 -0
  51. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/utils/semsim_utils.py +0 -0
  52. {pheval-0.5.2 → pheval-0.5.3}/src/pheval/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pheval
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary:
5
5
  Author: Yasemin Bridges
6
6
  Author-email: y.bridges@qmul.ac.uk
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pheval"
3
- version = "0.5.2"
3
+ version = "0.5.3"
4
4
  description = ""
5
5
  authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
6
6
  "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",
@@ -2,12 +2,16 @@
2
2
  Monarch Initiative
3
3
  """
4
4
 
5
+ import time
5
6
  from pathlib import Path
6
7
 
7
8
  import click
8
9
 
9
10
  from pheval.implementations import get_implementation_resolver
10
11
  from pheval.utils.file_utils import write_metadata
12
+ from pheval.utils.logger import get_logger
13
+
14
+ logger = get_logger()
11
15
 
12
16
 
13
17
  @click.command()
@@ -84,11 +88,18 @@ def run(
84
88
  config (Path): The path of the configuration file (optional e.g., config.yaml)
85
89
  version (str): The version of the tool implementation
86
90
  """
91
+ logger.info(f"Executing {runner}.")
92
+ start_time = time.perf_counter()
87
93
  runner_class = get_implementation_resolver().lookup(runner)
88
94
  runner_instance = runner_class(input_dir, testdata_dir, tmp_dir, output_dir, config, version)
89
95
  runner_instance.build_output_directory_structure()
96
+ logger.info("Executing prepare phase.")
90
97
  runner_instance.prepare()
98
+ logger.info("Executing run phase.")
91
99
  runner_instance.run()
100
+ logger.info("Executing post-processing phase.")
92
101
  runner_instance.post_process()
93
102
  run_metadata = runner_instance.construct_meta_data()
103
+ logger.info(f"Writing metadata for run to {output_dir}.")
94
104
  write_metadata(output_dir, run_metadata)
105
+ logger.info(f"Run completed! Total time: {time.perf_counter() - start_time:.2f} seconds.")
@@ -6,6 +6,10 @@ import yaml
6
6
  from serde import serde
7
7
  from serde.yaml import from_yaml
8
8
 
9
+ from pheval.utils.logger import get_logger
10
+
11
+ logger = get_logger()
12
+
9
13
 
10
14
  @serde
11
15
  @dataclass
@@ -34,6 +38,7 @@ class InputDirConfig:
34
38
 
35
39
  def parse_input_dir_config(input_dir: Path) -> InputDirConfig:
36
40
  """Reads the config file."""
41
+ logger.info(f"Parsing config.yaml located in {input_dir}.")
37
42
  with open(Path(input_dir).joinpath("config.yaml"), "r") as config_file:
38
43
  config = yaml.safe_load(config_file)
39
44
  config_file.close()
@@ -180,6 +180,10 @@ def generate_gene_result(
180
180
  phenopacket_dir (Path): Path to the Phenopacket directory
181
181
  """
182
182
  output_file = output_dir.joinpath(f"pheval_gene_results/{result_path.stem}-gene_result.parquet")
183
+ logger.info(
184
+ f"Writing classified results for {len(all_files(phenopacket_dir))} "
185
+ f"phenopackets to {output_dir.joinpath('pheval_gene_results')}"
186
+ )
183
187
  create_empty_pheval_result(
184
188
  phenopacket_dir, output_dir.joinpath("pheval_gene_results"), ResultType.GENE
185
189
  )
@@ -210,6 +214,10 @@ def generate_variant_result(
210
214
  output_file = output_dir.joinpath(
211
215
  f"pheval_variant_results/{result_path.stem}-variant_result.parquet"
212
216
  )
217
+ logger.info(
218
+ f"Writing classified results for {len(all_files(phenopacket_dir))} "
219
+ f"phenopackets to {output_dir.joinpath('pheval_variant_results')}"
220
+ )
213
221
  create_empty_pheval_result(
214
222
  phenopacket_dir, output_dir.joinpath("pheval_variant_results"), ResultType.VARIANT
215
223
  )
@@ -242,6 +250,10 @@ def generate_disease_result(
242
250
  output_file = output_dir.joinpath(
243
251
  f"pheval_disease_results/{result_path.stem}-disease_result.parquet"
244
252
  )
253
+ logger.info(
254
+ f"Writing classified results for {len(all_files(phenopacket_dir))} "
255
+ f"phenopackets to {output_dir.joinpath('pheval_disease_results')}"
256
+ )
245
257
  create_empty_pheval_result(
246
258
  phenopacket_dir, output_dir.joinpath("pheval_disease_results"), ResultType.DISEASE
247
259
  )
@@ -1,4 +1,5 @@
1
1
  import random
2
+ import time
2
3
  from pathlib import Path
3
4
  from typing import List, Union
4
5
 
@@ -6,7 +7,8 @@ from oaklib.implementations.pronto.pronto_implementation import ProntoImplementa
6
7
  from oaklib.resource import OntologyResource
7
8
  from phenopackets import Family, OntologyClass, Phenopacket, PhenotypicFeature
8
9
 
9
- from pheval.utils.file_utils import files_with_suffix
10
+ from pheval.utils.file_utils import all_files, files_with_suffix
11
+ from pheval.utils.logger import get_logger
10
12
  from pheval.utils.phenopacket_utils import (
11
13
  PhenopacketRebuilder,
12
14
  PhenopacketUtil,
@@ -14,6 +16,8 @@ from pheval.utils.phenopacket_utils import (
14
16
  write_phenopacket,
15
17
  )
16
18
 
19
+ logger = get_logger()
20
+
17
21
 
18
22
  def load_ontology(local_cached_ontology: Path = None) -> ProntoImplementation:
19
23
  """
@@ -24,9 +28,11 @@ def load_ontology(local_cached_ontology: Path = None) -> ProntoImplementation:
24
28
  ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO.
25
29
  """
26
30
  if local_cached_ontology is None:
31
+ logger.warning("No local cached ontology found, using default ontology.")
27
32
  resource = OntologyResource(slug="hp.obo", local=False)
28
33
  return ProntoImplementation(resource)
29
34
  else:
35
+ logger.info(f"Loading local ontology from {local_cached_ontology}.")
30
36
  resource = OntologyResource(slug=local_cached_ontology, local=True)
31
37
  return ProntoImplementation(resource)
32
38
 
@@ -241,6 +247,7 @@ class HpoRandomiser:
241
247
  """
242
248
  phenopacket_files = files_with_suffix(phenopacket_dir, ".json")
243
249
  for phenopacket_path in phenopacket_files:
250
+ logger.info(f"Scrambling {phenopacket_path.name}.")
244
251
  phenopacket = phenopacket_reader(phenopacket_path)
245
252
  created_noisy_phenopacket = self.add_noise_to_phenotypic_profile(phenopacket)
246
253
  write_phenopacket(
@@ -268,14 +275,23 @@ def scramble_phenopackets(
268
275
  scramble_factor (float): A factor determining the level of scrambling for phenotypic features.
269
276
  local_cached_ontology (Path): The path to the local cached ontology.
270
277
  """
278
+ start_time = time.perf_counter()
279
+ logger.info("Initiating scrambling.")
280
+ logger.info(f"Created directory {output_dir}.")
281
+ logger.info(f"Scramble factor set to {scramble_factor}.")
271
282
  output_dir.mkdir(exist_ok=True)
272
283
  ontology = load_ontology(local_cached_ontology)
273
284
  if phenopacket_path is not None:
285
+ logger.info(f"Scrambling {phenopacket_path}.")
274
286
  HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopacket(
275
287
  output_dir, phenopacket_path
276
288
  )
277
289
  elif phenopacket_dir is not None:
290
+ logger.info(
291
+ f"Scrambling {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
292
+ )
278
293
  HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopackets(
279
294
  output_dir,
280
295
  phenopacket_dir,
281
296
  )
297
+ logger.info(f"Finished scrambling! Total time: {time.perf_counter() - start_time:.2f} seconds.")
@@ -1,7 +1,7 @@
1
1
  import gzip
2
- import logging
3
2
  import random
4
3
  import re
4
+ import time
5
5
  import urllib.parse
6
6
  from copy import copy
7
7
  from dataclasses import dataclass
@@ -12,6 +12,7 @@ from phenopackets import Family, File, Phenopacket
12
12
 
13
13
  from pheval.prepare.custom_exceptions import InputError
14
14
  from pheval.utils.file_utils import all_files, files_with_suffix, is_gzipped
15
+ from pheval.utils.logger import get_logger
15
16
  from pheval.utils.phenopacket_utils import (
16
17
  IncompatibleGenomeAssemblyError,
17
18
  PhenopacketRebuilder,
@@ -21,8 +22,7 @@ from pheval.utils.phenopacket_utils import (
21
22
  write_phenopacket,
22
23
  )
23
24
 
24
- info_log = logging.getLogger("info")
25
-
25
+ logger = get_logger()
26
26
  genome_assemblies = {
27
27
  "GRCh38": {
28
28
  "1": 248956422,
@@ -357,9 +357,13 @@ class VcfSpiker:
357
357
  and int(val.split("\t")[1]) < int(variant_entry[1])
358
358
  ]
359
359
  if matching_indices:
360
+ logger.info(
361
+ f"Successfully spiked variant {variant.variant.chrom}-{variant.variant.pos}-"
362
+ f"{variant.variant.ref}-{variant.variant.alt} in {template_vcf_name}"
363
+ )
360
364
  variant_entry_position = matching_indices[-1] + 1
361
365
  else:
362
- info_log.warning(
366
+ logger.warning(
363
367
  f"Could not find entry position for {variant.variant.chrom}-{variant.variant.pos}-"
364
368
  f"{variant.variant.ref}-{variant.variant.alt} in {template_vcf_name}, "
365
369
  "inserting at end of VCF contents."
@@ -518,8 +522,6 @@ def generate_spiked_vcf_file(
518
522
  Returns:
519
523
  File: The generated File object representing the newly created spiked VCF file.
520
524
  """
521
- output_dir.mkdir(exist_ok=True)
522
- info_log.info(f" Created a directory {output_dir}")
523
525
  vcf_assembly, spiked_vcf = spike_vcf_contents(
524
526
  phenopacket, phenopacket_path, hg19_vcf_info, hg38_vcf_info, hg19_vcf_dir, hg38_vcf_dir
525
527
  )
@@ -633,6 +635,7 @@ def create_spiked_vcfs(
633
635
  hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
634
636
  hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
635
637
  for phenopacket_path in files_with_suffix(phenopacket_dir, ".json"):
638
+ logger.info(f"Creating spiked VCF for: {phenopacket_path.name}")
636
639
  spike_and_update_phenopacket(
637
640
  hg19_vcf_info, hg38_vcf_info, hg19_vcf_dir, hg38_vcf_dir, output_dir, phenopacket_path
638
641
  )
@@ -659,7 +662,12 @@ def spike_vcfs(
659
662
  hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional).
660
663
  hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional).
661
664
  """
665
+ start_time = time.perf_counter()
666
+ logger.info("Creating spiked VCFs.")
667
+ output_dir.mkdir(exist_ok=True)
668
+ logger.info(f" Created output directory: {output_dir}")
662
669
  if phenopacket_path is not None:
670
+ logger.info(f"Spiking variants from {phenopacket_path}.")
663
671
  create_spiked_vcf(
664
672
  output_dir,
665
673
  phenopacket_path,
@@ -669,6 +677,9 @@ def spike_vcfs(
669
677
  hg38_vcf_dir,
670
678
  )
671
679
  elif phenopacket_dir is not None:
680
+ logger.info(
681
+ f"Spiking variants from {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
682
+ )
672
683
  create_spiked_vcfs(
673
684
  output_dir,
674
685
  phenopacket_dir,
@@ -677,3 +688,4 @@ def spike_vcfs(
677
688
  hg19_vcf_dir,
678
689
  hg38_vcf_dir,
679
690
  )
691
+ logger.info(f"Finished spiking! Total time: {time.perf_counter() - start_time:.2f} seconds.")
@@ -1,13 +1,18 @@
1
- import logging
2
1
  import shutil
2
+ import time
3
3
  from pathlib import Path
4
4
 
5
5
  from pheval.prepare.create_spiked_vcf import create_spiked_vcf
6
6
  from pheval.prepare.update_phenopacket import create_updated_phenopacket
7
7
  from pheval.utils.file_utils import all_files
8
- from pheval.utils.phenopacket_utils import PhenopacketUtil, phenopacket_reader
8
+ from pheval.utils.logger import get_logger
9
+ from pheval.utils.phenopacket_utils import (
10
+ PhenopacketUtil,
11
+ create_gene_identifier_map,
12
+ phenopacket_reader,
13
+ )
9
14
 
10
- info_log = logging.getLogger("info")
15
+ logger = get_logger()
11
16
 
12
17
 
13
18
  def prepare_corpus(
@@ -43,39 +48,46 @@ def prepare_corpus(
43
48
  To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf,
44
49
  hg19_vcf_dir or hg38_vcf_dir is required.
45
50
  """
51
+ start_time = time.perf_counter()
52
+ logger.info(f"Preparing corpus for {phenopacket_dir}")
46
53
  output_dir.joinpath("phenopackets").mkdir(exist_ok=True, parents=True)
54
+ logger.info(f" Created output directory: {output_dir.joinpath('phenopackets')}")
55
+ identifier_map = create_gene_identifier_map()
47
56
  for phenopacket_path in all_files(phenopacket_dir):
48
57
  phenopacket_util = PhenopacketUtil(phenopacket_reader(phenopacket_path))
49
58
  if not phenopacket_util.observed_phenotypic_features():
50
- info_log.warning(
59
+ logger.warning(
51
60
  f"Removed {phenopacket_path.name} from the corpus due to no observed phenotypic features."
52
61
  )
53
62
  continue
54
63
  if variant_analysis:
55
64
  if phenopacket_util.check_incomplete_variant_record():
56
- info_log.warning(
65
+ logger.warning(
57
66
  f"Removed {phenopacket_path.name} from the corpus due to missing variant fields."
58
67
  )
59
68
  continue
60
69
  elif phenopacket_util.check_variant_alleles():
61
- info_log.warning(
70
+ logger.warning(
62
71
  f"Removed {phenopacket_path.name} from the corpus due to identical "
63
72
  "reference and alternate allele fields."
64
73
  )
65
74
  if gene_analysis:
66
75
  if phenopacket_util.check_incomplete_gene_record():
67
- info_log.warning(
76
+ logger.warning(
68
77
  f"Removed {phenopacket_path.name} from the corpus due to missing gene fields."
69
78
  )
70
79
  continue
71
80
  if disease_analysis:
72
81
  if phenopacket_util.check_incomplete_disease_record():
73
- info_log.warning(
82
+ logger.warning(
74
83
  f"Removed {phenopacket_path.name} from the corpus due to missing disease fields."
75
84
  )
76
85
  continue
86
+ logger.info(f"{phenopacket_path.name} OK!")
77
87
  if hg19_template_vcf or hg38_template_vcf:
78
88
  output_dir.joinpath("vcf").mkdir(exist_ok=True)
89
+ logger.info(f" Created output directory: {output_dir.joinpath('vcf')}")
90
+ logger.info(f"Spiking VCF for {phenopacket_path}.")
79
91
  create_spiked_vcf(
80
92
  output_dir.joinpath("vcf"),
81
93
  phenopacket_path,
@@ -85,8 +97,12 @@ def prepare_corpus(
85
97
  hg38_vcf_dir,
86
98
  )
87
99
  if gene_identifier:
100
+ logger.info(f"Updating gene identifiers to {gene_identifier} for {phenopacket_dir}")
88
101
  create_updated_phenopacket(
89
- gene_identifier, phenopacket_path, output_dir.joinpath("phenopackets")
102
+ gene_identifier,
103
+ phenopacket_path,
104
+ output_dir.joinpath("phenopackets"),
105
+ identifier_map,
90
106
  )
91
107
  else:
92
108
  # if not updating phenopacket gene identifiers then copy phenopacket as is to output directory
@@ -97,3 +113,7 @@ def prepare_corpus(
97
113
  if phenopacket_path != output_dir.joinpath(f"phenopackets/{phenopacket_path.name}")
98
114
  else None
99
115
  )
116
+ logger.info(
117
+ f"Finished preparing corpus for {phenopacket_dir}. "
118
+ f"Total time: {time.perf_counter() - start_time:.2f} seconds."
119
+ )
@@ -1,3 +1,4 @@
1
+ import time
1
2
  from pathlib import Path
2
3
  from typing import Union
3
4
 
@@ -5,6 +6,7 @@ import polars as pl
5
6
  from phenopackets import Family, Phenopacket
6
7
 
7
8
  from pheval.utils.file_utils import all_files
9
+ from pheval.utils.logger import get_logger
8
10
  from pheval.utils.phenopacket_utils import (
9
11
  GeneIdentifierUpdater,
10
12
  PhenopacketRebuilder,
@@ -14,6 +16,8 @@ from pheval.utils.phenopacket_utils import (
14
16
  write_phenopacket,
15
17
  )
16
18
 
19
+ logger = get_logger()
20
+
17
21
 
18
22
  def update_outdated_gene_context(
19
23
  phenopacket_path: Path, gene_identifier: str, identifier_map: pl.DataFrame
@@ -43,7 +47,10 @@ def update_outdated_gene_context(
43
47
 
44
48
 
45
49
  def create_updated_phenopacket(
46
- gene_identifier: str, phenopacket_path: Path, output_dir: Path
50
+ gene_identifier: str,
51
+ phenopacket_path: Path,
52
+ output_dir: Path,
53
+ identifier_map: pl.DataFrame = None,
47
54
  ) -> None:
48
55
  """
49
56
  Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket.
@@ -52,12 +59,13 @@ def create_updated_phenopacket(
52
59
  gene_identifier (str): Identifier used to update the gene context.
53
60
  phenopacket_path (Path): The path to the input Phenopacket file.
54
61
  output_dir (Path): The directory where the updated Phenopacket will be written.
62
+ identifier_map (pl.DataFrame): The gene identifier map used for updating.
55
63
  Notes:
56
64
  The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
57
65
  to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
58
66
  to describe the gene identifiers.
59
67
  """
60
- identifier_map = create_gene_identifier_map()
68
+ identifier_map = create_gene_identifier_map() if identifier_map is None else identifier_map
61
69
  updated_phenopacket = update_outdated_gene_context(
62
70
  phenopacket_path, gene_identifier, identifier_map
63
71
  )
@@ -82,6 +90,7 @@ def create_updated_phenopackets(
82
90
  """
83
91
  identifier_map = create_gene_identifier_map()
84
92
  for phenopacket_path in all_files(phenopacket_dir):
93
+ logger.info(f"Updating gene context for: {phenopacket_path.name}")
85
94
  updated_phenopacket = update_outdated_gene_context(
86
95
  phenopacket_path, gene_identifier, identifier_map
87
96
  )
@@ -104,8 +113,17 @@ def update_phenopackets(
104
113
  to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
105
114
  to describe the gene identifiers.
106
115
  """
116
+ start_time = time.perf_counter()
117
+ logger.info("Updating phenopackets.")
107
118
  output_dir.mkdir(exist_ok=True)
119
+ logger.info(f"Created directory {output_dir}.")
120
+ logger.info(f"Gene identifier set to: {gene_identifier}.")
108
121
  if phenopacket_path is not None:
122
+ logger.info(f"Updating {phenopacket_path}.")
109
123
  create_updated_phenopacket(gene_identifier, phenopacket_path, output_dir)
110
124
  elif phenopacket_dir is not None:
125
+ logger.info(
126
+ f"Updating {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
127
+ )
111
128
  create_updated_phenopackets(gene_identifier, phenopacket_dir, output_dir)
129
+ logger.info(f"Updating finished! Total time: {time.perf_counter() - start_time:.2f} seconds.")
@@ -7,6 +7,9 @@ from pathlib import Path
7
7
 
8
8
  from pheval.config_parser import parse_input_dir_config
9
9
  from pheval.run_metadata import BasicOutputRunMetaData
10
+ from pheval.utils.logger import get_logger
11
+
12
+ logger = get_logger()
10
13
 
11
14
 
12
15
  @dataclass
@@ -86,6 +89,10 @@ class PhEvalRunner(ABC):
86
89
 
87
90
  def build_output_directory_structure(self):
88
91
  """build output directory structure"""
92
+ logger.info(
93
+ f"Building output directory structure for {self.input_dir_config.tool} "
94
+ f"version {self.input_dir_config.tool_version}"
95
+ )
89
96
  self.tool_input_commands_dir.mkdir(exist_ok=True)
90
97
  self.raw_results_dir.mkdir(exist_ok=True)
91
98
  if self._get_variant_analysis():
@@ -1,5 +1,4 @@
1
1
  import json
2
- import logging
3
2
  import os
4
3
  from copy import copy
5
4
  from dataclasses import dataclass
@@ -19,8 +18,9 @@ from phenopackets import (
19
18
  )
20
19
 
21
20
  from pheval.prepare.custom_exceptions import IncorrectFileFormatError
21
+ from pheval.utils.logger import get_logger
22
22
 
23
- info_log = logging.getLogger("info")
23
+ logger = get_logger()
24
24
 
25
25
 
26
26
  class IncompatibleGenomeAssemblyError(Exception):
@@ -161,6 +161,7 @@ def create_gene_identifier_map() -> pl.DataFrame:
161
161
  Returns:
162
162
  pl.DataFrame: A mapping of gene identifiers to gene symbols.
163
163
  """
164
+ logger.info("Creating gene identifier map.")
164
165
  hgnc_df = parse_hgnc_data()
165
166
  return hgnc_df.melt(
166
167
  id_vars=["gene_symbol", "prev_symbols"],
@@ -192,6 +193,7 @@ def phenopacket_reader(file: Path) -> Union[Phenopacket, Family]:
192
193
  Returns:
193
194
  Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object
194
195
  """
196
+ logger.info(f"Parsing Phenopacket: {file.name}")
195
197
  file = open(file, "r")
196
198
  phenopacket = json.load(file)
197
199
  file.close()
@@ -593,6 +595,7 @@ class PhenopacketRebuilder:
593
595
  Returns:
594
596
  - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path.
595
597
  """
598
+ logger.info(f"Adding spiked VCF path {spiked_vcf_file_data.uri} to phenopacket.")
596
599
  phenopacket = copy(self.phenopacket)
597
600
  phenopacket_files = [
598
601
  file for file in phenopacket.files if file.file_attributes["fileFormat"] != "vcf"
@@ -627,6 +630,7 @@ def write_phenopacket(phenopacket: Union[Phenopacket, Family], output_file: Path
627
630
  Returns:
628
631
  None
629
632
  """
633
+ logger.info(f"Writing Phenopacket to {output_file}.")
630
634
  phenopacket_json = create_json_message(phenopacket)
631
635
  with open(output_file, "w") as outfile:
632
636
  outfile.write(phenopacket_json)
@@ -675,6 +679,7 @@ class GeneIdentifierUpdater:
675
679
  )
676
680
  if prev_symbol_matches.height > 0:
677
681
  return prev_symbol_matches["identifier"][0]
682
+ logger.warn(f"Could not find {self.gene_identifier} for {gene_symbol}.")
678
683
  return None
679
684
 
680
685
  def obtain_gene_symbol_from_identifier(self, query_gene_identifier: str) -> str:
@@ -735,10 +740,10 @@ class GeneIdentifierUpdater:
735
740
  updated_gene_identifier = self.find_identifier(
736
741
  g.variant_interpretation.variation_descriptor.gene_context.symbol
737
742
  )
738
- info_log.info(
739
- f"Updating gene identifier in {phenopacket_path} from "
743
+ logger.info(
744
+ f"Updating gene identifier in {phenopacket_path.name} from "
740
745
  f"{g.variant_interpretation.variation_descriptor.gene_context.value_id}"
741
- f"to {updated_gene_identifier}"
746
+ f" to {updated_gene_identifier}"
742
747
  )
743
748
  g.variant_interpretation.variation_descriptor.gene_context.value_id = (
744
749
  updated_gene_identifier
File without changes
File without changes
File without changes
File without changes
File without changes