pheval 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

@@ -1,5 +1,4 @@
1
1
  from pathlib import Path
2
- from typing import List
3
2
 
4
3
  import polars as pl
5
4
 
@@ -56,7 +55,7 @@ class PhenopacketTruthSet:
56
55
  phenopacket = phenopacket_reader(phenopacket_path)
57
56
  return PhenopacketUtil(phenopacket)
58
57
 
59
- def _get_causative_genes(self, phenopacket_name: str) -> List[ProbandCausativeGene]:
58
+ def _get_causative_genes(self, phenopacket_name: str) -> list[ProbandCausativeGene]:
60
59
  """
61
60
  Get the causative genes for a given phenopacket.
62
61
  Args:
@@ -67,7 +66,7 @@ class PhenopacketTruthSet:
67
66
  phenopacket_util = self._get_phenopacket_util(phenopacket_name)
68
67
  return phenopacket_util.diagnosed_genes()
69
68
 
70
- def _get_causative_variants(self, phenopacket_name: str) -> List[GenomicVariant]:
69
+ def _get_causative_variants(self, phenopacket_name: str) -> list[GenomicVariant]:
71
70
  """
72
71
  Get the causative variants for a given phenopacket.
73
72
  Args:
@@ -78,7 +77,7 @@ class PhenopacketTruthSet:
78
77
  phenopacket_util = self._get_phenopacket_util(phenopacket_name)
79
78
  return phenopacket_util.diagnosed_variants()
80
79
 
81
- def _get_causative_diseases(self, phenopacket_name: str) -> List[ProbandDisease]:
80
+ def _get_causative_diseases(self, phenopacket_name: str) -> list[ProbandDisease]:
82
81
  """
83
82
  Get the diseases for a given phenopacket.
84
83
  Args:
@@ -133,11 +132,7 @@ class PhenopacketTruthSet:
133
132
  )
134
133
  .with_columns(pl.col("rank").cast(pl.Int64))
135
134
  .select(classified_results.columns)
136
- .vstack(
137
- classified_results.filter(
138
- ~pl.col("gene_symbol").is_in(ranked_results["gene_symbol"])
139
- )
140
- )
135
+ .vstack(classified_results.filter(~pl.col("gene_symbol").is_in(ranked_results["gene_symbol"])))
141
136
  )
142
137
 
143
138
  def classified_variant(self, result_name: str) -> pl.DataFrame:
@@ -181,11 +176,7 @@ class PhenopacketTruthSet:
181
176
  ranked_results.with_columns(
182
177
  [
183
178
  pl.struct(["chrom", "start", "end", "ref", "alt"])
184
- .is_in(
185
- classified_results.select(
186
- pl.struct(["chrom", "start", "end", "ref", "alt"])
187
- ).to_series()
188
- )
179
+ .is_in(classified_results.select(pl.struct(["chrom", "start", "end", "ref", "alt"])).to_series())
189
180
  .alias("true_positive")
190
181
  ]
191
182
  )
@@ -194,17 +185,13 @@ class PhenopacketTruthSet:
194
185
  .vstack(
195
186
  classified_results.filter(
196
187
  ~pl.struct(["chrom", "start", "end", "ref", "alt"]).is_in(
197
- ranked_results.select(
198
- pl.struct(["chrom", "start", "end", "ref", "alt"])
199
- ).to_series()
188
+ ranked_results.select(pl.struct(["chrom", "start", "end", "ref", "alt"])).to_series()
200
189
  )
201
190
  )
202
191
  )
203
192
  )
204
193
 
205
- def classified_disease(
206
- self, result_name: str, mondo_mapping_table: pl.DataFrame
207
- ) -> pl.DataFrame:
194
+ def classified_disease(self, result_name: str, mondo_mapping_table: pl.DataFrame) -> pl.DataFrame:
208
195
  """
209
196
  Classify disease results for a given phenopacket.
210
197
  Args:
@@ -225,9 +212,7 @@ class PhenopacketTruthSet:
225
212
  pl.lit(0).cast(pl.Int64).alias("rank"),
226
213
  pl.lit(True).alias("true_positive"),
227
214
  pl.col("disease_identifier")
228
- .map_elements(
229
- lambda x: map_disease_id(x, mondo_mapping_table), return_dtype=pl.Utf8
230
- )
215
+ .map_elements(lambda x: map_disease_id(x, mondo_mapping_table), return_dtype=pl.Utf8)
231
216
  .alias("mondo_identifier"),
232
217
  ]
233
218
  )
@@ -260,15 +245,9 @@ class PhenopacketTruthSet:
260
245
  )
261
246
  return (
262
247
  ranked_results.with_columns(
263
- (
264
- pl.col("disease_identifier").is_in(classified_results["disease_identifier"])
265
- ).alias("true_positive")
248
+ (pl.col("mondo_identifier").is_in(classified_results["mondo_identifier"])).alias("true_positive")
266
249
  )
267
250
  .with_columns(pl.col("rank").cast(pl.Int64))
268
251
  .select(classified_results.columns)
269
- .vstack(
270
- classified_results.filter(
271
- ~pl.col("disease_identifier").is_in(ranked_results["disease_identifier"])
272
- )
273
- )
252
+ .vstack(classified_results.filter(~pl.col("mondo_identifier").is_in(ranked_results["mondo_identifier"])))
274
253
  )
@@ -1,6 +1,6 @@
1
+ from collections.abc import Callable
1
2
  from enum import Enum
2
3
  from pathlib import Path
3
- from typing import Callable, Tuple
4
4
 
5
5
  import polars as pl
6
6
 
@@ -57,7 +57,7 @@ def _rank_results(results: pl.DataFrame, sort_order: SortOrder) -> pl.DataFrame:
57
57
  results = (
58
58
  results.sort("score", descending=sort_descending)
59
59
  .with_columns(
60
- pl.struct(["score"] + group_by)
60
+ pl.struct(["score"] + group_by) # noqa
61
61
  .rank(method="dense", descending=sort_descending)
62
62
  .cast(pl.Int32)
63
63
  .alias("min_rank")
@@ -89,9 +89,7 @@ def _write_gene_result(ranked_results: pl.DataFrame, output_file: Path) -> None:
89
89
  ranked_results ([PhEvalResult]): List of ranked PhEval gene results.
90
90
  output_file (Path): Path to the output file.
91
91
  """
92
- gene_output = ranked_results.select(
93
- ["rank", "score", "gene_symbol", "gene_identifier", "true_positive"]
94
- )
92
+ gene_output = ranked_results.select(["rank", "score", "gene_symbol", "gene_identifier", "true_positive"])
95
93
  _write_results_file(output_file, gene_output)
96
94
 
97
95
 
@@ -127,15 +125,11 @@ def _write_disease_result(ranked_results: pl.DataFrame, output_file: Path) -> No
127
125
  ranked_results ([PhEvalResult]): List of ranked PhEval disease results.
128
126
  output_file (Path): Path to the output file.
129
127
  """
130
- disease_output = ranked_results.select(
131
- ["rank", "score", "disease_identifier", "mondo_identifier", "true_positive"]
132
- )
128
+ disease_output = ranked_results.select(["rank", "score", "disease_identifier", "mondo_identifier", "true_positive"])
133
129
  _write_results_file(output_file, disease_output)
134
130
 
135
131
 
136
- def _get_result_type(
137
- result_type: ResultType, phenopacket_truth_set: PhenopacketTruthSet
138
- ) -> Tuple[Callable, Callable]:
132
+ def _get_result_type(result_type: ResultType, phenopacket_truth_set: PhenopacketTruthSet) -> tuple[Callable, Callable]:
139
133
  """
140
134
  Get the methods for extracting the entity and writing the result for a given result type.
141
135
  Args:
@@ -156,9 +150,7 @@ def _get_result_type(
156
150
  )
157
151
 
158
152
 
159
- def create_empty_pheval_result(
160
- phenopacket_dir: Path, output_dir: Path, result_type: ResultType
161
- ) -> None:
153
+ def create_empty_pheval_result(phenopacket_dir: Path, output_dir: Path, result_type: ResultType) -> None:
162
154
  """
163
155
  Create an empty PhEval result for a given result type (gene, variant, or disease).
164
156
 
@@ -176,10 +168,7 @@ def create_empty_pheval_result(
176
168
  """
177
169
  if result_type in executed_results:
178
170
  return
179
- logger.info(
180
- f"Writing classified results for {len(all_files(phenopacket_dir))} "
181
- f"phenopackets to {output_dir}"
182
- )
171
+ logger.info(f"Writing classified results for {len(all_files(phenopacket_dir))} phenopackets to {output_dir}")
183
172
  executed_results.add(result_type)
184
173
  phenopacket_truth_set = PhenopacketTruthSet(phenopacket_dir)
185
174
  classify_method, write_method = _get_result_type(result_type, phenopacket_truth_set)
@@ -209,13 +198,9 @@ def generate_gene_result(
209
198
  phenopacket_dir (Path): Path to the Phenopacket directory
210
199
  """
211
200
  output_file = output_dir.joinpath(f"pheval_gene_results/{result_path.stem}-gene_result.parquet")
212
- create_empty_pheval_result(
213
- phenopacket_dir, output_dir.joinpath("pheval_gene_results"), ResultType.GENE
214
- )
201
+ create_empty_pheval_result(phenopacket_dir, output_dir.joinpath("pheval_gene_results"), ResultType.GENE)
215
202
  ranked_results = _rank_results(results, sort_order)
216
- classified_results = PhenopacketTruthSet(phenopacket_dir).merge_gene_results(
217
- ranked_results, output_file
218
- )
203
+ classified_results = PhenopacketTruthSet(phenopacket_dir).merge_gene_results(ranked_results, output_file)
219
204
  _write_gene_result(classified_results, output_file)
220
205
 
221
206
 
@@ -236,9 +221,7 @@ def generate_variant_result(
236
221
  result_path (Path): Path to the tool-specific result file.
237
222
  phenopacket_dir (Path): Path to the Phenopacket directory
238
223
  """
239
- output_file = output_dir.joinpath(
240
- f"pheval_variant_results/{result_path.stem}-variant_result.parquet"
241
- )
224
+ output_file = output_dir.joinpath(f"pheval_variant_results/{result_path.stem}-variant_result.parquet")
242
225
  create_empty_pheval_result(
243
226
  phenopacket_dir,
244
227
  output_dir.joinpath("pheval_variant_results"),
@@ -247,9 +230,7 @@ def generate_variant_result(
247
230
  ranked_results = _rank_results(results, sort_order).with_columns(
248
231
  pl.concat_str(["chrom", "start", "ref", "alt"], separator="-").alias("variant_id")
249
232
  )
250
- classified_results = PhenopacketTruthSet(phenopacket_dir).merge_variant_results(
251
- ranked_results, output_file
252
- )
233
+ classified_results = PhenopacketTruthSet(phenopacket_dir).merge_variant_results(ranked_results, output_file)
253
234
  _write_variant_result(classified_results, output_file)
254
235
 
255
236
 
@@ -270,9 +251,7 @@ def generate_disease_result(
270
251
  result_path (Path): Path to the tool-specific result file.
271
252
  phenopacket_dir (Path): Path to the Phenopacket directory
272
253
  """
273
- output_file = output_dir.joinpath(
274
- f"pheval_disease_results/{result_path.stem}-disease_result.parquet"
275
- )
254
+ output_file = output_dir.joinpath(f"pheval_disease_results/{result_path.stem}-disease_result.parquet")
276
255
  create_empty_pheval_result(
277
256
  phenopacket_dir,
278
257
  output_dir.joinpath("pheval_disease_results"),
@@ -1,6 +1,6 @@
1
+ from collections.abc import Callable
1
2
  from enum import Enum
2
3
  from functools import wraps
3
- from typing import Callable
4
4
 
5
5
  import polars as pl
6
6
 
@@ -63,9 +63,7 @@ class ResultSchema(Enum):
63
63
  raise ValueError(f"Missing required column: {col_name}")
64
64
 
65
65
  if results.schema[col_name] != expected_type:
66
- raise TypeError(
67
- f"Column '{col_name}' has type {results.schema[col_name]}, expected {expected_type}"
68
- )
66
+ raise TypeError(f"Column '{col_name}' has type {results.schema[col_name]}, expected {expected_type}")
69
67
 
70
68
  return True
71
69
 
@@ -1,7 +1,6 @@
1
1
  import random
2
2
  import time
3
3
  from pathlib import Path
4
- from typing import List, Union
5
4
 
6
5
  from oaklib.implementations.pronto.pronto_implementation import ProntoImplementation
7
6
  from oaklib.resource import OntologyResource
@@ -19,7 +18,7 @@ from pheval.utils.phenopacket_utils import (
19
18
  logger = get_logger()
20
19
 
21
20
 
22
- def load_ontology(local_cached_ontology: Path = None) -> ProntoImplementation:
21
+ def load_ontology(local_cached_ontology: Path | None = None) -> ProntoImplementation:
23
22
  """
24
23
  Load the Human Phenotype Ontology (HPO).
25
24
  Args:
@@ -78,14 +77,14 @@ class HpoRandomiser:
78
77
  PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term.
79
78
  """
80
79
  rels = self.hpo_ontology.entity_alias_map(hpo_id)
81
- hpo_term = "".join(rels[(list(rels.keys())[0])])
80
+ hpo_term = "".join(rels[next(iter(rels))])
82
81
  return PhenotypicFeature(type=OntologyClass(id=hpo_id, label=hpo_term))
83
82
 
84
83
  @staticmethod
85
84
  def retain_real_patient_terms(
86
- phenotypic_features: List[PhenotypicFeature],
85
+ phenotypic_features: list[PhenotypicFeature],
87
86
  number_of_scrambled_terms: int,
88
- ) -> List[PhenotypicFeature]:
87
+ ) -> list[PhenotypicFeature]:
89
88
  """
90
89
  Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms.
91
90
 
@@ -104,10 +103,10 @@ class HpoRandomiser:
104
103
 
105
104
  def convert_patient_terms_to_parent(
106
105
  self,
107
- phenotypic_features: List[PhenotypicFeature],
108
- retained_phenotypic_features: List[PhenotypicFeature],
106
+ phenotypic_features: list[PhenotypicFeature],
107
+ retained_phenotypic_features: list[PhenotypicFeature],
109
108
  number_of_scrambled_terms: int,
110
- ) -> List[PhenotypicFeature]:
109
+ ) -> list[PhenotypicFeature]:
111
110
  """
112
111
  Convert a subset of patient HPO terms to their respective parent terms.
113
112
 
@@ -133,7 +132,7 @@ class HpoRandomiser:
133
132
  for term in hpo_terms_to_be_changed:
134
133
  if self.hpo_ontology.label(term.type.id).startswith("obsolete"):
135
134
  obsolete_term = self.hpo_ontology.entity_metadata_map(term.type.id)
136
- updated_term = list(obsolete_term.values())[0][0]
135
+ updated_term = next(iter(obsolete_term.values()))[0]
137
136
  parents = self.hpo_ontology.hierarchical_parents(updated_term)
138
137
  else:
139
138
  parents = self.hpo_ontology.hierarchical_parents(term.type.id)
@@ -143,7 +142,7 @@ class HpoRandomiser:
143
142
  parent_terms.append(self.retrieve_hpo_term(random.choice(parents)))
144
143
  return parent_terms
145
144
 
146
- def create_random_hpo_terms(self, number_of_scrambled_terms: int) -> List[PhenotypicFeature]:
145
+ def create_random_hpo_terms(self, number_of_scrambled_terms: int) -> list[PhenotypicFeature]:
147
146
  """
148
147
  Generate a list of random HPO terms.
149
148
 
@@ -153,15 +152,13 @@ class HpoRandomiser:
153
152
  Returns:
154
153
  List[PhenotypicFeature]: A list of randomly selected HPO terms.
155
154
  """
156
- random_ids = list(
157
- random.sample(sorted(self.phenotypic_abnormalities), number_of_scrambled_terms)
158
- )
155
+ random_ids = list(random.sample(sorted(self.phenotypic_abnormalities), number_of_scrambled_terms))
159
156
  return [self.retrieve_hpo_term(random_id) for random_id in random_ids]
160
157
 
161
158
  def randomise_hpo_terms(
162
159
  self,
163
- phenotypic_features: List[PhenotypicFeature],
164
- ) -> List[PhenotypicFeature]:
160
+ phenotypic_features: list[PhenotypicFeature],
161
+ ) -> list[PhenotypicFeature]:
165
162
  """
166
163
  Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms.
167
164
 
@@ -181,9 +178,7 @@ class HpoRandomiser:
181
178
  of randomised HPO terms to be used in the phenotypic features.
182
179
  """
183
180
  number_of_scrambled_terms = self.scramble_factor_proportions(phenotypic_features)
184
- retained_patient_terms = self.retain_real_patient_terms(
185
- phenotypic_features, number_of_scrambled_terms
186
- )
181
+ retained_patient_terms = self.retain_real_patient_terms(phenotypic_features, number_of_scrambled_terms)
187
182
  return (
188
183
  retained_patient_terms
189
184
  + self.convert_patient_terms_to_parent(
@@ -194,8 +189,8 @@ class HpoRandomiser:
194
189
 
195
190
  def add_noise_to_phenotypic_profile(
196
191
  self,
197
- phenopacket: Union[Phenopacket, Family],
198
- ) -> Union[Phenopacket, Family]:
192
+ phenopacket: Phenopacket | Family,
193
+ ) -> Phenopacket | Family:
199
194
  """
200
195
  Randomise the phenotypic profile of a Phenopacket or Family.
201
196
 
@@ -207,9 +202,7 @@ class HpoRandomiser:
207
202
  """
208
203
  phenotypic_features = PhenopacketUtil(phenopacket).observed_phenotypic_features()
209
204
  random_phenotypes = self.randomise_hpo_terms(phenotypic_features)
210
- randomised_phenopacket = PhenopacketRebuilder(phenopacket).add_randomised_hpo(
211
- random_phenotypes
212
- )
205
+ randomised_phenopacket = PhenopacketRebuilder(phenopacket).add_randomised_hpo(random_phenotypes)
213
206
  return randomised_phenopacket
214
207
 
215
208
  def create_scrambled_phenopacket(
@@ -283,13 +276,9 @@ def scramble_phenopackets(
283
276
  ontology = load_ontology(local_cached_ontology)
284
277
  if phenopacket_path is not None:
285
278
  logger.info(f"Scrambling {phenopacket_path}.")
286
- HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopacket(
287
- output_dir, phenopacket_path
288
- )
279
+ HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopacket(output_dir, phenopacket_path)
289
280
  elif phenopacket_dir is not None:
290
- logger.info(
291
- f"Scrambling {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
292
- )
281
+ logger.info(f"Scrambling {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}.")
293
282
  HpoRandomiser(ontology, scramble_factor).create_scrambled_phenopackets(
294
283
  output_dir,
295
284
  phenopacket_dir,
@@ -6,7 +6,6 @@ import urllib.parse
6
6
  from copy import copy
7
7
  from dataclasses import dataclass
8
8
  from pathlib import Path
9
- from typing import List, Union
10
9
 
11
10
  from phenopackets import Family, File, Phenopacket
12
11
 
@@ -90,7 +89,7 @@ class VcfHeader:
90
89
  chr_status: bool
91
90
 
92
91
 
93
- def read_vcf(vcf_file: Path) -> List[str]:
92
+ def read_vcf(vcf_file: Path) -> list[str]:
94
93
  """
95
94
  Read the contents of a VCF file into memory, handling both uncompressed and gzipped files.
96
95
 
@@ -102,9 +101,7 @@ def read_vcf(vcf_file: Path) -> List[str]:
102
101
  """
103
102
  open_fn = gzip.open if is_gzipped(vcf_file) else open
104
103
  vcf = open_fn(vcf_file)
105
- vcf_contents = (
106
- [line.decode() for line in vcf.readlines()] if is_gzipped(vcf_file) else vcf.readlines()
107
- )
104
+ vcf_contents = [line.decode() for line in vcf.readlines()] if is_gzipped(vcf_file) else vcf.readlines()
108
105
  vcf.close()
109
106
  return vcf_contents
110
107
 
@@ -133,20 +130,14 @@ class VcfHeaderParser:
133
130
  for line in self.vcf_contents:
134
131
  if line.startswith("##contig=<ID"):
135
132
  tokens = line.split(",")
136
- chromosome = re.sub(
137
- r"^.*?ID=", "", [token for token in tokens if "ID=" in token][0]
138
- )
133
+ chromosome = re.sub(r"^.*?ID=", "", next(token for token in tokens if "ID=" in token))
139
134
  if "chr" in chromosome:
140
135
  chr_status = True
141
136
  chromosome = chromosome.replace("chr", "")
142
- contig_length = re.sub(
143
- "[^0-9]+",
144
- "",
145
- [token for token in tokens if "length=" in token][0],
146
- )
137
+ contig_length = re.sub("[^0-9]+", "", next(token for token in tokens if "length=" in token))
147
138
  vcf_assembly[chromosome] = int(contig_length)
148
139
  vcf_assembly = {i: vcf_assembly[i] for i in vcf_assembly if i.isdigit()}
149
- assembly = [k for k, v in genome_assemblies.items() if v == vcf_assembly][0]
140
+ assembly = next(k for k, v in genome_assemblies.items() if v == vcf_assembly)
150
141
  return assembly, chr_status
151
142
 
152
143
  def parse_sample_id(self) -> str:
@@ -184,7 +175,7 @@ class VcfFile:
184
175
  """
185
176
 
186
177
  vcf_file_name: str = None
187
- vcf_contents: List[str] = None
178
+ vcf_contents: list[str] = None
188
179
  vcf_header: VcfHeader = None
189
180
 
190
181
  @staticmethod
@@ -205,7 +196,7 @@ class VcfFile:
205
196
 
206
197
  def select_vcf_template(
207
198
  phenopacket_path: Path,
208
- proband_causative_variants: List[ProbandCausativeVariant],
199
+ proband_causative_variants: list[ProbandCausativeVariant],
209
200
  hg19_vcf_info: VcfFile,
210
201
  hg38_vcf_info: VcfFile,
211
202
  hg19_vcf_dir: Path,
@@ -241,9 +232,7 @@ def select_vcf_template(
241
232
  else:
242
233
  raise InputError("Must specify hg38 template VCF!")
243
234
  else:
244
- raise IncompatibleGenomeAssemblyError(
245
- proband_causative_variants[0].assembly, phenopacket_path
246
- )
235
+ raise IncompatibleGenomeAssemblyError(proband_causative_variants[0].assembly, phenopacket_path)
247
236
 
248
237
 
249
238
  def check_variant_assembly(
@@ -269,16 +258,10 @@ def check_variant_assembly(
269
258
  raise ValueError("Too many genome assemblies!")
270
259
  if phenopacket_assembly[0] not in compatible_genome_assembly:
271
260
  raise IncompatibleGenomeAssemblyError(phenopacket_assembly, phenopacket_path)
272
- if (
273
- phenopacket_assembly[0] in {"hg19", "GRCh37"}
274
- and vcf_header.assembly not in {"hg19", "GRCh37"}
275
- ) or (
276
- phenopacket_assembly[0] in {"hg38", "GRCh38"}
277
- and vcf_header.assembly not in {"hg38", "GRCh38"}
261
+ if (phenopacket_assembly[0] in {"hg19", "GRCh37"} and vcf_header.assembly not in {"hg19", "GRCh37"}) or (
262
+ phenopacket_assembly[0] in {"hg38", "GRCh38"} and vcf_header.assembly not in {"hg38", "GRCh38"}
278
263
  ):
279
- raise IncompatibleGenomeAssemblyError(
280
- assembly=phenopacket_assembly, phenopacket=phenopacket_path
281
- )
264
+ raise IncompatibleGenomeAssemblyError(assembly=phenopacket_assembly, phenopacket=phenopacket_path)
282
265
 
283
266
 
284
267
  class VcfSpiker:
@@ -302,7 +285,7 @@ class VcfSpiker:
302
285
  self.proband_causative_variants = proband_causative_variants
303
286
  self.vcf_header = vcf_header
304
287
 
305
- def construct_variant_entry(self, proband_variant_data: ProbandCausativeVariant) -> List[str]:
288
+ def construct_variant_entry(self, proband_variant_data: ProbandCausativeVariant) -> list[str]:
306
289
  """
307
290
  Construct variant entries.
308
291
 
@@ -337,7 +320,7 @@ class VcfSpiker:
337
320
  genotype_codes[proband_variant_data.genotype.lower()] + "\n",
338
321
  ]
339
322
 
340
- def construct_vcf_records(self, template_vcf_name: str) -> List[str]:
323
+ def construct_vcf_records(self, template_vcf_name: str) -> list[str]:
341
324
  """
342
325
  Construct updated VCF records by inserting spiked variants into the correct positions within the VCF.
343
326
 
@@ -353,8 +336,7 @@ class VcfSpiker:
353
336
  matching_indices = [
354
337
  i
355
338
  for i, val in enumerate(updated_vcf_records)
356
- if val.split("\t")[0] == variant_entry[0]
357
- and int(val.split("\t")[1]) < int(variant_entry[1])
339
+ if val.split("\t")[0] == variant_entry[0] and int(val.split("\t")[1]) < int(variant_entry[1])
358
340
  ]
359
341
  if matching_indices:
360
342
  logger.info(
@@ -372,7 +354,7 @@ class VcfSpiker:
372
354
  updated_vcf_records.insert(variant_entry_position, "\t".join(variant_entry))
373
355
  return updated_vcf_records
374
356
 
375
- def construct_header(self, updated_vcf_records: List[str]) -> List[str]:
357
+ def construct_header(self, updated_vcf_records: list[str]) -> list[str]:
376
358
  """
377
359
  Construct the header of the VCF.
378
360
 
@@ -394,7 +376,7 @@ class VcfSpiker:
394
376
  updated_vcf_file.append(text)
395
377
  return updated_vcf_file
396
378
 
397
- def construct_vcf(self, template_vcf_name: str) -> List[str]:
379
+ def construct_vcf(self, template_vcf_name: str) -> list[str]:
398
380
  """
399
381
  Construct the entire spiked VCF file by incorporating the spiked variants into the VCF.
400
382
 
@@ -412,7 +394,7 @@ class VcfWriter:
412
394
 
413
395
  def __init__(
414
396
  self,
415
- vcf_contents: List[str],
397
+ vcf_contents: list[str],
416
398
  spiked_vcf_file_path: Path,
417
399
  ):
418
400
  """
@@ -454,13 +436,13 @@ class VcfWriter:
454
436
 
455
437
 
456
438
  def spike_vcf_contents(
457
- phenopacket: Union[Phenopacket, Family],
439
+ phenopacket: Phenopacket | Family,
458
440
  phenopacket_path: Path,
459
441
  hg19_vcf_info: VcfFile,
460
442
  hg38_vcf_info: VcfFile,
461
443
  hg19_vcf_dir: Path,
462
444
  hg38_vcf_dir: Path,
463
- ) -> tuple[str, List[str]]:
445
+ ) -> tuple[str, list[str]]:
464
446
  """
465
447
  Spike VCF records with variants obtained from a Phenopacket or Family.
466
448
 
@@ -486,9 +468,7 @@ def spike_vcf_contents(
486
468
  hg19_vcf_dir,
487
469
  hg38_vcf_dir,
488
470
  )
489
- check_variant_assembly(
490
- phenopacket_causative_variants, chosen_template_vcf.vcf_header, phenopacket_path
491
- )
471
+ check_variant_assembly(phenopacket_causative_variants, chosen_template_vcf.vcf_header, phenopacket_path)
492
472
  return (
493
473
  chosen_template_vcf.vcf_header.assembly,
494
474
  VcfSpiker(
@@ -501,7 +481,7 @@ def spike_vcf_contents(
501
481
 
502
482
  def generate_spiked_vcf_file(
503
483
  output_dir: Path,
504
- phenopacket: Union[Phenopacket, Family],
484
+ phenopacket: Phenopacket | Family,
505
485
  phenopacket_path: Path,
506
486
  hg19_vcf_info: VcfFile,
507
487
  hg38_vcf_info: VcfFile,
@@ -566,9 +546,7 @@ def spike_and_update_phenopacket(
566
546
  hg19_vcf_dir,
567
547
  hg38_vcf_dir,
568
548
  )
569
- updated_phenopacket = PhenopacketRebuilder(phenopacket).add_spiked_vcf_path(
570
- spiked_vcf_file_message
571
- )
549
+ updated_phenopacket = PhenopacketRebuilder(phenopacket).add_spiked_vcf_path(spiked_vcf_file_message)
572
550
  write_phenopacket(updated_phenopacket, phenopacket_path)
573
551
 
574
552
 
@@ -598,9 +576,7 @@ def create_spiked_vcf(
598
576
  raise InputError("Either a hg19 template vcf or hg38 template vcf must be specified")
599
577
  hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
600
578
  hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
601
- spike_and_update_phenopacket(
602
- hg19_vcf_info, hg38_vcf_info, hg19_vcf_dir, hg38_vcf_dir, output_dir, phenopacket_path
603
- )
579
+ spike_and_update_phenopacket(hg19_vcf_info, hg38_vcf_info, hg19_vcf_dir, hg38_vcf_dir, output_dir, phenopacket_path)
604
580
 
605
581
 
606
582
  def create_spiked_vcfs(
@@ -625,12 +601,7 @@ def create_spiked_vcfs(
625
601
  Raises:
626
602
  InputError: If both hg19_template_vcf and hg38_template_vcf are None.
627
603
  """
628
- if (
629
- hg19_template_vcf is None
630
- and hg38_template_vcf is None
631
- and hg19_vcf_dir is None
632
- and hg38_vcf_dir is None
633
- ):
604
+ if hg19_template_vcf is None and hg38_template_vcf is None and hg19_vcf_dir is None and hg38_vcf_dir is None:
634
605
  raise InputError("Need to specify a VCF!")
635
606
  hg19_vcf_info = VcfFile.populate_fields(hg19_template_vcf) if hg19_template_vcf else None
636
607
  hg38_vcf_info = VcfFile.populate_fields(hg38_template_vcf) if hg38_template_vcf else None
@@ -677,9 +648,7 @@ def spike_vcfs(
677
648
  hg38_vcf_dir,
678
649
  )
679
650
  elif phenopacket_dir is not None:
680
- logger.info(
681
- f"Spiking variants from {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}."
682
- )
651
+ logger.info(f"Spiking variants from {len(all_files(phenopacket_dir))} phenopackets in {phenopacket_dir}.")
683
652
  create_spiked_vcfs(
684
653
  output_dir,
685
654
  phenopacket_dir,
@@ -21,19 +21,18 @@ class MutuallyExclusiveOptionError(Option):
21
21
  help_ = kwargs.get("help", "")
22
22
  if self.mutually_exclusive:
23
23
  ex_str = ", ".join(self.mutually_exclusive)
24
- kwargs["help"] = help_ + (
25
- " NOTE: This argument is mutually exclusive with " " arguments: [" + ex_str + "]."
26
- )
27
- super(MutuallyExclusiveOptionError, self).__init__(*args, **kwargs)
24
+ kwargs["help"] = help_ + (" NOTE: This argument is mutually exclusive with arguments: [" + ex_str + "].")
25
+ super().__init__(*args, **kwargs)
28
26
 
29
27
  def handle_parse_result(self, ctx, opts, args):
30
28
  if self.mutually_exclusive.intersection(opts) and self.name in opts:
31
29
  raise UsageError(
32
- "Illegal usage: `{}` is mutually exclusive with "
33
- "arguments `{}`.".format(self.name, ", ".join(self.mutually_exclusive))
30
+ "Illegal usage: `{}` is mutually exclusive with arguments `{}`.".format(
31
+ self.name, ", ".join(self.mutually_exclusive)
32
+ )
34
33
  )
35
34
 
36
- return super(MutuallyExclusiveOptionError, self).handle_parse_result(ctx, opts, args)
35
+ return super().handle_parse_result(ctx, opts, args)
37
36
 
38
37
 
39
38
  class IncorrectFileFormatError(Exception):