pheval 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pheval might be problematic. Click here for more details.

Files changed (42) hide show
  1. pheval/__init__.py +0 -5
  2. pheval/analyse/__init__.py +0 -0
  3. pheval/analyse/analysis.py +703 -0
  4. pheval/analyse/generate_plots.py +312 -0
  5. pheval/analyse/generate_summary_outputs.py +186 -0
  6. pheval/analyse/rank_stats.py +61 -0
  7. pheval/cli.py +22 -7
  8. pheval/cli_pheval.py +37 -12
  9. pheval/cli_pheval_utils.py +225 -8
  10. pheval/config_parser.py +36 -0
  11. pheval/constants.py +1 -0
  12. pheval/implementations/__init__.py +1 -3
  13. pheval/post_processing/__init__.py +0 -0
  14. pheval/post_processing/post_processing.py +210 -0
  15. pheval/prepare/__init__.py +0 -0
  16. pheval/prepare/create_noisy_phenopackets.py +173 -0
  17. pheval/prepare/create_spiked_vcf.py +366 -0
  18. pheval/prepare/custom_exceptions.py +47 -0
  19. pheval/prepare/update_phenopacket.py +53 -0
  20. pheval/resources/alternate_ouputs/CADA_results.txt +11 -0
  21. pheval/resources/alternate_ouputs/DeepPVP_results.txt +22 -0
  22. pheval/resources/alternate_ouputs/OVA_results.txt +11 -0
  23. pheval/resources/alternate_ouputs/Phen2Gene_results.json +814 -0
  24. pheval/resources/alternate_ouputs/Phenolyzer_results.txt +12 -0
  25. pheval/resources/alternate_ouputs/lirical_results.tsv +152 -0
  26. pheval/resources/alternate_ouputs/svanna_results.tsv +9 -0
  27. pheval/resources/hgnc_complete_set_2022-10-01.txt +43222 -0
  28. pheval/run_metadata.py +27 -0
  29. pheval/runners/runner.py +92 -11
  30. pheval/utils/__init__.py +0 -0
  31. pheval/utils/docs_gen.py +105 -0
  32. pheval/utils/docs_gen.sh +18 -0
  33. pheval/utils/file_utils.py +88 -0
  34. pheval/utils/phenopacket_utils.py +356 -0
  35. pheval/utils/semsim_utils.py +156 -0
  36. {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/METADATA +12 -4
  37. pheval-0.2.0.dist-info/RECORD +41 -0
  38. {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/WHEEL +1 -1
  39. pheval/utils.py +0 -7
  40. pheval-0.1.0.dist-info/RECORD +0 -13
  41. {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/LICENSE +0 -0
  42. {pheval-0.1.0.dist-info → pheval-0.2.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,356 @@
1
+ import json
2
+
3
+ # import logging
4
+ import os
5
+ from collections import defaultdict
6
+ from copy import copy
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ import pandas as pd
11
+ from google.protobuf.json_format import MessageToJson, Parse
12
+ from phenopackets import (
13
+ Family,
14
+ File,
15
+ GenomicInterpretation,
16
+ Interpretation,
17
+ Phenopacket,
18
+ PhenotypicFeature,
19
+ )
20
+
21
+ from pheval.prepare.custom_exceptions import IncorrectFileFormatError
22
+
23
+
24
+ class IncompatibleGenomeAssemblyError(Exception):
25
+ """Exception raised for incompatible genome assembly."""
26
+
27
+ def __init__(self, assembly, phenopacket, message="Incompatible Genome Assembly"):
28
+ self.assembly: str = assembly
29
+ self.phenopacket: Path = phenopacket
30
+ self.message: str = message
31
+ super().__init__(self.message)
32
+
33
+ def __str__(self):
34
+ return f"{self.message} -> {self.assembly} in {self.phenopacket}"
35
+
36
+
37
+ @dataclass
38
+ class GenomicVariant:
39
+ chrom: str
40
+ pos: int
41
+ ref: str
42
+ alt: str
43
+
44
+
45
+ @dataclass
46
+ class ProbandCausativeVariant:
47
+ proband_id: str
48
+ assembly: str
49
+ variant: GenomicVariant
50
+ genotype: str
51
+ info: str = None
52
+
53
+
54
+ @dataclass
55
+ class ProbandCausativeGene:
56
+ gene_symbol: str
57
+ gene_identifier: str
58
+
59
+
60
+ def read_hgnc_data() -> pd.DataFrame:
61
+ return pd.read_csv(
62
+ os.path.dirname(__file__).replace("utils", "resources/hgnc_complete_set_2022-10-01.txt"),
63
+ delimiter="\t",
64
+ dtype=str,
65
+ )
66
+
67
+
68
+ def create_hgnc_dict() -> defaultdict:
69
+ """Creates reference for updating gene symbols and identifiers."""
70
+ hgnc_df = read_hgnc_data()
71
+ hgnc_data = defaultdict(dict)
72
+ for _index, row in hgnc_df.iterrows():
73
+ previous_names = []
74
+ hgnc_data[row["symbol"]]["ensembl_id"] = row["ensembl_gene_id"]
75
+ hgnc_data[row["symbol"]]["hgnc_id"] = row["hgnc_id"]
76
+ hgnc_data[row["symbol"]]["entrez_id"] = row["entrez_id"]
77
+ hgnc_data[row["symbol"]]["refseq_accession"] = row["refseq_accession"]
78
+ previous = str(row["prev_symbol"]).split("|")
79
+ for p in previous:
80
+ previous_names.append(p.strip('"'))
81
+ hgnc_data[row["symbol"]]["previous_symbol"] = previous_names
82
+
83
+ return hgnc_data
84
+
85
+
86
+ def create_gene_identifier_map() -> dict:
87
+ hgnc_df = read_hgnc_data()
88
+ identifier_map = {}
89
+ for _index, row in hgnc_df.iterrows():
90
+ identifier_map[row["ensembl_gene_id"]] = row["symbol"]
91
+ identifier_map[row["hgnc_id"]] = row["symbol"]
92
+ identifier_map[row["entrez_id"]] = row["symbol"]
93
+ identifier_map[row["refseq_accession"]] = row["symbol"]
94
+ return identifier_map
95
+
96
+
97
+ def phenopacket_reader(file: Path):
98
+ """Reads a phenopacket file, returning its contents."""
99
+ file = open(file, "r")
100
+ phenopacket = json.load(file)
101
+ file.close()
102
+ if "proband" in phenopacket:
103
+ return Parse(json.dumps(phenopacket), Family())
104
+ else:
105
+ return Parse(json.dumps(phenopacket), Phenopacket())
106
+
107
+
108
+ class PhenopacketUtil:
109
+ """Retrieves relevant data from a phenopacket."""
110
+
111
+ def __init__(self, phenopacket_contents: Phenopacket):
112
+ self.phenopacket_contents = phenopacket_contents
113
+
114
+ def sample_id(self) -> str:
115
+ """Retrieve the sample ID from a phenopacket or proband of a family."""
116
+ if hasattr(self.phenopacket_contents, "proband"):
117
+ return self.phenopacket_contents.proband.subject.id
118
+ else:
119
+ return self.phenopacket_contents.subject.id
120
+
121
+ def phenotypic_features(self) -> list[PhenotypicFeature]:
122
+ """Retrieves a list of all HPO terms."""
123
+ if hasattr(self.phenopacket_contents, "proband"):
124
+ return self.phenopacket_contents.proband.phenotypic_features
125
+ else:
126
+ return self.phenopacket_contents.phenotypic_features
127
+
128
+ def observed_phenotypic_features(self) -> list[PhenotypicFeature]:
129
+ """Removes any HPO terms labelled as excluded."""
130
+ phenotypic_features = []
131
+ all_phenotypic_features = self.phenotypic_features()
132
+ for p in all_phenotypic_features:
133
+ if p.excluded:
134
+ continue
135
+ phenotypic_features.append(p)
136
+ return phenotypic_features
137
+
138
+ def negated_phenotypic_features(self) -> [PhenotypicFeature]:
139
+ """Retrieve negated phenotypic features."""
140
+ negated_phenotypic_features = []
141
+ all_phenotypic_features = self.phenotypic_features()
142
+ for p in all_phenotypic_features:
143
+ if p.excluded:
144
+ negated_phenotypic_features.append(p)
145
+ return negated_phenotypic_features
146
+
147
+ def interpretations(self) -> list[Interpretation]:
148
+ """Returns all interpretations of a phenopacket."""
149
+ if hasattr(self.phenopacket_contents, "proband"):
150
+ return self.phenopacket_contents.proband.interpretations
151
+ else:
152
+ return self.phenopacket_contents.interpretations
153
+
154
+ def causative_variants(self) -> list[ProbandCausativeVariant]:
155
+ """Returns a list of all causative variants listed in a phenopacket."""
156
+ all_variants = []
157
+ interpretation = self.interpretations()
158
+ for i in interpretation:
159
+ for g in i.diagnosis.genomic_interpretations:
160
+ vcf_record = g.variant_interpretation.variation_descriptor.vcf_record
161
+ genotype = g.variant_interpretation.variation_descriptor.allelic_state
162
+ variant_data = ProbandCausativeVariant(
163
+ self.phenopacket_contents.subject.id,
164
+ vcf_record.genome_assembly,
165
+ GenomicVariant(
166
+ vcf_record.chrom,
167
+ vcf_record.pos,
168
+ vcf_record.ref,
169
+ vcf_record.alt,
170
+ ),
171
+ genotype.label,
172
+ vcf_record.info,
173
+ )
174
+ all_variants.append(variant_data)
175
+ return all_variants
176
+
177
+ def files(self) -> list:
178
+ """Returns all files associated with a phenopacket."""
179
+ return self.phenopacket_contents.files
180
+
181
+ def vcf_file_data(self, phenopacket_path: Path, vcf_dir: Path) -> File:
182
+ """Retrieves the genome assembly and vcf name from a phenopacket."""
183
+ compatible_genome_assembly = ["GRCh37", "hg19", "GRCh38", "hg38"]
184
+ vcf_data = [file for file in self.files() if file.file_attributes["fileFormat"] == "vcf"][0]
185
+ if not Path(vcf_data.uri).name.endswith(".vcf") and not Path(vcf_data.uri).name.endswith(
186
+ ".vcf.gz"
187
+ ):
188
+ raise IncorrectFileFormatError(Path(vcf_data.uri), ".vcf or .vcf.gz file")
189
+ if vcf_data.file_attributes["genomeAssembly"] not in compatible_genome_assembly:
190
+ raise IncompatibleGenomeAssemblyError(
191
+ vcf_data.file_attributes["genomeAssembly"], phenopacket_path
192
+ )
193
+ vcf_data.uri = str(vcf_dir.joinpath(Path(vcf_data.uri).name))
194
+ return vcf_data
195
+
196
+ @staticmethod
197
+ def _extract_diagnosed_gene(
198
+ genomic_interpretation: GenomicInterpretation,
199
+ ) -> ProbandCausativeGene:
200
+ """Returns the disease causative gene from the variant descriptor field if not empty,
201
+ otherwise, returns from the gene descriptor from a phenopacket."""
202
+ if genomic_interpretation.variant_interpretation.ByteSize() != 0:
203
+ return ProbandCausativeGene(
204
+ genomic_interpretation.variant_interpretation.variation_descriptor.gene_context.symbol,
205
+ genomic_interpretation.variant_interpretation.variation_descriptor.gene_context.value_id,
206
+ )
207
+
208
+ else:
209
+ return ProbandCausativeGene(
210
+ gene_symbol=genomic_interpretation.gene.symbol,
211
+ gene_identifier=genomic_interpretation.gene.value_id,
212
+ )
213
+
214
+ def diagnosed_genes(self) -> list[ProbandCausativeGene]:
215
+ """Returns a unique list of all causative genes and the corresponding gene identifiers from a phenopacket."""
216
+ pheno_interpretation = self.interpretations()
217
+ genes = []
218
+ for i in pheno_interpretation:
219
+ for g in i.diagnosis.genomic_interpretations:
220
+ genes.append(self._extract_diagnosed_gene(g))
221
+ genes = list({gene.gene_symbol: gene for gene in genes}.values())
222
+ return genes
223
+
224
+ def diagnosed_variants(self) -> list[GenomicVariant]:
225
+ """Returns a list of all variants from a phenopacket - for use in assess-prioritisation."""
226
+ variants = []
227
+ pheno_interpretation = self.interpretations()
228
+ for i in pheno_interpretation:
229
+ for g in i.diagnosis.genomic_interpretations:
230
+ variant = GenomicVariant(
231
+ chrom=g.variant_interpretation.variation_descriptor.vcf_record.chrom,
232
+ pos=g.variant_interpretation.variation_descriptor.vcf_record.pos,
233
+ ref=g.variant_interpretation.variation_descriptor.vcf_record.ref,
234
+ alt=g.variant_interpretation.variation_descriptor.vcf_record.alt,
235
+ )
236
+ variants.append(variant)
237
+ return variants
238
+
239
+
240
+ class PhenopacketRebuilder:
241
+ """Rebuilds a Phenopacket."""
242
+
243
+ def __init__(self, phenopacket: Phenopacket or Family):
244
+ self.phenopacket = phenopacket
245
+
246
+ def update_interpretations(self, interpretations) -> Phenopacket or Family:
247
+ """Adds the updated interpretations to a phenopacket."""
248
+ phenopacket = copy(self.phenopacket)
249
+ if hasattr(phenopacket, "proband"):
250
+ del phenopacket.proband.interpretations[:]
251
+ phenopacket.proband.interpretations.extend(interpretations)
252
+ else:
253
+ del phenopacket.interpretations[:]
254
+ phenopacket.interpretations.extend(interpretations)
255
+ return phenopacket
256
+
257
+ def add_randomised_hpo(self, randomised_hpo) -> Phenopacket or Family:
258
+ """Adds randomised phenotypic profile to phenopacket."""
259
+ phenopacket = copy(self.phenopacket)
260
+ if hasattr(phenopacket, "proband"):
261
+ del phenopacket.proband.phenotypic_features[:]
262
+ phenopacket.proband.phenotypic_features.extend(randomised_hpo)
263
+ else:
264
+ del phenopacket.phenotypic_features[:]
265
+ phenopacket.phenotypic_features.extend(randomised_hpo)
266
+ return phenopacket
267
+
268
+ def add_spiked_vcf_path(self, spiked_vcf_file_data: File) -> Phenopacket or Family:
269
+ """Adds spiked vcf path to phenopacket."""
270
+ phenopacket = copy(self.phenopacket)
271
+ phenopacket_files = [
272
+ file for file in phenopacket.files if file.file_attributes["fileFormat"] != "vcf"
273
+ ]
274
+ phenopacket_files.append(spiked_vcf_file_data)
275
+ del phenopacket.files[:]
276
+ phenopacket.files.extend(phenopacket_files)
277
+ return phenopacket
278
+
279
+
280
+ def create_json_message(phenopacket: Phenopacket or Family) -> str:
281
+ """Creates json message for writing to file."""
282
+ return MessageToJson(phenopacket)
283
+
284
+
285
+ def write_phenopacket(phenopacket: Phenopacket or Family, output_file: Path) -> None:
286
+ """Writes a phenopacket."""
287
+ phenopacket_json = create_json_message(phenopacket)
288
+ with open(output_file, "w") as outfile:
289
+ outfile.write(phenopacket_json)
290
+ outfile.close()
291
+
292
+
293
+ class GeneIdentifierUpdater:
294
+ def __init__(self, gene_identifier: str, hgnc_data: dict = None, identifier_map: dict = None):
295
+ self.hgnc_data = hgnc_data
296
+ self.gene_identifier = gene_identifier
297
+ self.identifier_map = identifier_map
298
+
299
+ def find_identifier(self, gene_symbol: str) -> str:
300
+ """Finds the specified gene identifier for a gene symbol."""
301
+ if gene_symbol in self.hgnc_data.keys():
302
+ return self.hgnc_data[gene_symbol][self.gene_identifier]
303
+ else:
304
+ for _symbol, data in self.hgnc_data.items():
305
+ for prev_symbol in data["previous_symbol"]:
306
+ if prev_symbol == gene_symbol:
307
+ return data[self.gene_identifier]
308
+
309
+ def obtain_gene_symbol_from_identifier(self, query_gene_identifier: str) -> str:
310
+ """
311
+ Obtain gene symbol from a gene identifier. (e.g.)
312
+ "
313
+ obtain_gene_symbol_from_identifier(query_gene_identifier="HGNC:5")
314
+ "
315
+ """
316
+ return self.identifier_map[query_gene_identifier]
317
+
318
+ def _find_alternate_ids(self, gene_symbol: str) -> list[str]:
319
+ """Finds the alternate IDs for a gene symbol."""
320
+ if gene_symbol in self.hgnc_data.keys():
321
+ return [
322
+ self.hgnc_data[gene_symbol]["hgnc_id"],
323
+ "ncbigene:" + self.hgnc_data[gene_symbol]["entrez_id"],
324
+ "ensembl:" + self.hgnc_data[gene_symbol]["ensembl_id"],
325
+ "symbol:" + gene_symbol,
326
+ ]
327
+ else:
328
+ for symbol, data in self.hgnc_data.items():
329
+ for prev_symbol in data["previous_symbol"]:
330
+ if prev_symbol == gene_symbol:
331
+ return [
332
+ data["hgnc_id"],
333
+ "ncbigene:" + data["entrez_id"],
334
+ "ensembl:" + data["ensembl_id"],
335
+ "symbol:" + symbol,
336
+ ]
337
+
338
+ def update_genomic_interpretations_gene_identifier(
339
+ self, interpretations: list[Interpretation]
340
+ ) -> list[Interpretation]:
341
+ """Updates the genomic interpretations of a phenopacket."""
342
+ updated_interpretations = copy(list(interpretations))
343
+ for updated_interpretation in updated_interpretations:
344
+ for g in updated_interpretation.diagnosis.genomic_interpretations:
345
+ g.variant_interpretation.variation_descriptor.gene_context.value_id = (
346
+ self.find_identifier(
347
+ g.variant_interpretation.variation_descriptor.gene_context.symbol
348
+ )
349
+ )
350
+ del g.variant_interpretation.variation_descriptor.gene_context.alternate_ids[:]
351
+ g.variant_interpretation.variation_descriptor.gene_context.alternate_ids.extend(
352
+ self._find_alternate_ids(
353
+ g.variant_interpretation.variation_descriptor.gene_context.symbol
354
+ )
355
+ )
356
+ return updated_interpretations
@@ -0,0 +1,156 @@
1
+ """
2
+ Contains all pheval utility methods
3
+ """
4
+ from pathlib import Path
5
+
6
+ import numpy
7
+ import pandas as pd
8
+ import plotly.express as px
9
+
10
+ import pheval.utils.file_utils as file_utils
11
+
12
+
13
+ def filter_non_0_score(data: pd.DataFrame, col: str) -> pd.DataFrame:
14
+ """Removes rows that have value equal to 0 based on the given column passed by col parameter
15
+
16
+ Args:
17
+ data (pd.DataFrame): Dirty dataframe
18
+ col (str): Column to be filtered
19
+
20
+ Returns:
21
+ pd.DataFrame: Filtered dataframe
22
+ """
23
+ return data[data[col] != 0]
24
+
25
+
26
+ def parse_semsim(df: pd.DataFrame, cols: list) -> pd.DataFrame:
27
+ """Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones
28
+
29
+ Args:
30
+ df (pd.DataFrame): semantic similarity profile dataframe
31
+ cols (list): list of columns that will be selected on semsim data
32
+
33
+ Returns:
34
+ pd.Dataframe: parsed semantic similarity dataframe
35
+ """
36
+ df[cols[-1]] = pd.to_numeric(df[cols[-1]], errors="coerce")
37
+ df.replace("None", numpy.nan).dropna(subset=cols[-1], inplace=True)
38
+ return df
39
+
40
+
41
+ def diff_semsim(
42
+ semsim_left: pd.DataFrame, semsim_right: pd.DataFrame, score_column: str, absolute_diff: bool
43
+ ) -> pd.DataFrame:
44
+ """Calculates score difference between two semantic similarity profiles
45
+
46
+ Args:
47
+ semsim_left (pd.DataFrame): first semantic similarity dataframe
48
+ semsim_right (pd.DataFrame): second semantic similarity dataframe
49
+ score_column (str): Score column that will be computed (e.g. jaccard_similarity)
50
+ absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False).
51
+ Defaults to True.
52
+
53
+ Returns:
54
+ pd.DataFrame: A dataframe with terms and its scores differences
55
+ """
56
+ df = pd.merge(semsim_left, semsim_right, on=["subject_id", "object_id"], how="outer")
57
+ if absolute_diff:
58
+ df["diff"] = df[f"{score_column}_x"] - df[f"{score_column}_y"]
59
+ return df[["subject_id", "object_id", "diff"]]
60
+ df["diff"] = df.apply(
61
+ lambda row: get_percentage_diff(row[f"{score_column}_x"], row[f"{score_column}_y"]), axis=1
62
+ )
63
+ return df[["subject_id", "object_id", f"{score_column}_x", f"{score_column}_y", "diff"]]
64
+
65
+
66
+ def percentage_diff(semsim_left: Path, semsim_right: Path, score_column: str, output: Path):
67
+ """Compares two semantic similarity profiles
68
+
69
+ Args:
70
+ semsim_left (Path): File path of the first semantic similarity profile
71
+ semsim_right (Path): File path of the second semantic similarity profile
72
+ score_column (str): Score column that will be computed (e.g. jaccard_similarity)
73
+ output (Path): Output path for the difference tsv file
74
+ """
75
+ clean_df = semsim_analysis(semsim_left, semsim_right, score_column, absolute_diff=False)
76
+ clean_df.sort_values(by="diff", ascending=False).to_csv(output, sep="\t", index=False)
77
+
78
+
79
+ def semsim_heatmap_plot(semsim_left: Path, semsim_right: Path, score_column: str):
80
+ """Plots semantic similarity profiles heatmap
81
+
82
+ Args:
83
+ semsim_left (Path): File path of the first semantic similarity profile
84
+ semsim_right (Path): File path of the second semantic similarity profile
85
+ score_column (str): Score column that will be computed (e.g. jaccard_similarity)
86
+ """
87
+ clean_df = semsim_analysis(semsim_left, semsim_right, score_column)
88
+ df = clean_df.pivot(index="subject_id", columns="object_id", values="diff")
89
+ fig = px.imshow(df, text_auto=True)
90
+ fig.show()
91
+
92
+
93
+ def semsim_analysis(
94
+ semsim_left: Path, semsim_right: Path, score_column: str, absolute_diff=True
95
+ ) -> pd.DataFrame:
96
+ """semsim_analysis
97
+
98
+ Args:
99
+ semsim_left (Path): File path of the first semantic similarity profile
100
+ semsim_right (Path): File path of the second semantic similarity profile
101
+ score_column (str): Score column that will be computed (e.g. jaccard_similarity)
102
+ absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False).
103
+ Defaults to True.
104
+
105
+ Returns:
106
+ [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles
107
+ """
108
+ validate_semsim_file_comparison(semsim_left, semsim_right)
109
+ cols = ["subject_id", "object_id", score_column]
110
+ semsim_left = pd.read_csv(semsim_left, sep="\t")
111
+ semsim_right = pd.read_csv(semsim_right, sep="\t")
112
+ file_utils.ensure_columns_exists(
113
+ cols=cols,
114
+ err_message="must exist in semsim dataframes",
115
+ dataframes=[semsim_left, semsim_right],
116
+ )
117
+ semsim_left = parse_semsim(semsim_left, cols)
118
+ semsim_right = parse_semsim(semsim_right, cols)
119
+ diff_df = diff_semsim(semsim_left, semsim_right, score_column, absolute_diff)
120
+ return filter_non_0_score(diff_df, "diff")
121
+
122
+
123
+ def validate_semsim_file_comparison(semsim_left: Path, semsim_right: Path):
124
+ """Checks if files exist and whether they're different
125
+ Args:
126
+ semsim_left (Path): File path of the first semantic similarity profile
127
+ semsim_right (Path): File path of the second semantic similarity profile
128
+ Raises:
129
+ Exception: FileNotFoundException
130
+ """
131
+ if semsim_left == semsim_right:
132
+ errmsg = "Semantic similarity profiles are equal. Make sure you have selected different files to analyze"
133
+ raise Exception(errmsg)
134
+ file_utils.ensure_file_exists(semsim_left, semsim_right)
135
+
136
+
137
+ def get_percentage_diff(current_number: float, previous_number: float) -> float:
138
+ """Gets the percentage difference between two numbers
139
+
140
+ Args:
141
+ current_number (float): second number in comparison
142
+ previous_number (float): first number in comparison
143
+
144
+ Returns:
145
+ float: percentage difference between two numbers
146
+ """
147
+ try:
148
+ if current_number == previous_number:
149
+ return "{:.2%}".format(0)
150
+ if current_number > previous_number:
151
+ number = (1 - ((current_number / previous_number))) * 100
152
+ else:
153
+ number = (100 - ((previous_number / current_number) * 100)) * -1
154
+ return "{:.2%}".format(number / 100)
155
+ except ZeroDivisionError:
156
+ return None
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pheval
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary:
5
- Author: Nico Matentzoglu
6
- Author-email: nicolas.matentzoglu@gmail.com
7
- Requires-Python: >=3.9
5
+ Author: Yasemin Bridges
6
+ Author-email: y.bridges@qmul.ac.uk
7
+ Requires-Python: >=3.9,<4.0.0
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
@@ -12,8 +12,16 @@ Classifier: Programming Language :: Python :: 3.11
12
12
  Requires-Dist: class-resolver (>=0.3.10,<0.4.0)
13
13
  Requires-Dist: click (>=8.1.3)
14
14
  Requires-Dist: deprecation (>=2.1.0)
15
+ Requires-Dist: google (>=3.0.0,<4.0.0)
15
16
  Requires-Dist: jaydebeapi (>=1.2.3)
17
+ Requires-Dist: matplotlib (>=3.7.0,<4.0.0)
18
+ Requires-Dist: oaklib (>=0.1.55,<0.2.0)
16
19
  Requires-Dist: pandas (>=1.5.1)
20
+ Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
21
+ Requires-Dist: plotly (>=5.13.0,<6.0.0)
22
+ Requires-Dist: pyaml (>=21.10.1,<22.0.0)
23
+ Requires-Dist: pyserde (>=0.9.8,<0.10.0)
24
+ Requires-Dist: seaborn (>=0.12.2,<0.13.0)
17
25
  Requires-Dist: tqdm (>=4.64.1)
18
26
  Description-Content-Type: text/markdown
19
27
 
@@ -0,0 +1,41 @@
1
+ pheval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ pheval/analyse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ pheval/analyse/analysis.py,sha256=4Yhlkokx3pezXrslJDZtYfF2Y-BrP6y4_mCetpegkT4,24825
4
+ pheval/analyse/generate_plots.py,sha256=KDV1x7JnS9hX5cwMEUI63TVndC59-Fm0HNfaLtP8tJ4,14483
5
+ pheval/analyse/generate_summary_outputs.py,sha256=HGpg916t5MthSpTSjKQI3sS5Y7jjO1QVqzn4TdR0veE,7266
6
+ pheval/analyse/rank_stats.py,sha256=HMmLECGGCJrCdNstmCbEkODoYgqYdJRfv3NgqaUaA94,1933
7
+ pheval/cli.py,sha256=Ubw4Rup_hF18UszJIFTUB7_dhnr2P88dD33T0WzLblc,1412
8
+ pheval/cli_pheval.py,sha256=aP7UAvmNZdj74raSANF6uSxHk_wCQ9ckHOVOySaW4dE,2423
9
+ pheval/cli_pheval_utils.py,sha256=_rmgFcee8-u4A5eQg4TtY1KbduERSHsm2DQGboYb91E,7307
10
+ pheval/config_parser.py,sha256=R_ivbMBVHMOuctQxVUIl9ojQTE0cX-X6v6YToLlwh64,1030
11
+ pheval/constants.py,sha256=07xfY0nVEkHeDiZXfo5X7TTCOV0GrsERkm2mx6-JiiI,45
12
+ pheval/implementations/__init__.py,sha256=2mkbEaA7o-NAkfoLYkn2q50xp82cavu_qGcjt3k8m-I,1227
13
+ pheval/post_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ pheval/post_processing/post_processing.py,sha256=clrWnGds8QXcOTeKt3bl4FuqyyuYeM19GjScYpQrA04,7244
15
+ pheval/prepare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ pheval/prepare/create_noisy_phenopackets.py,sha256=405f_kAOFlBN6J9ClB5ZyJ4Z094JE6XEQgrwPZK7LyQ,6920
17
+ pheval/prepare/create_spiked_vcf.py,sha256=1cYMcaU66lnsWoF7PRJ5SZXLZy0Ao3Myux2T_4YIXNU,12988
18
+ pheval/prepare/custom_exceptions.py,sha256=_G3_95dPtHIs1SviYBV1j7cYc-hxlhuw8hhnYdzByYY,1719
19
+ pheval/prepare/update_phenopacket.py,sha256=ZC-i8VVQbzAP_pebat9_Xy13c4MRWSdVplRdUigdXrM,2252
20
+ pheval/resources/alternate_ouputs/CADA_results.txt,sha256=Rinn2TtfwFNsx0aEWegKJOkjKnBm-Mf54gdaT3bWP0k,547
21
+ pheval/resources/alternate_ouputs/DeepPVP_results.txt,sha256=MF9MZJYa4r4PEvFzALpi-lNGLxjENOnq_YgrgFMn-oQ,1508
22
+ pheval/resources/alternate_ouputs/OVA_results.txt,sha256=_5XFCR4W04D-W7DObpALLsa0-693g2kiIUB_uo79aHk,9845
23
+ pheval/resources/alternate_ouputs/Phen2Gene_results.json,sha256=xxKsuiHKW9qQOz2baFlLW9RYphA4kxjoTsg1weZkTY8,14148
24
+ pheval/resources/alternate_ouputs/Phenolyzer_results.txt,sha256=TltiEzYm2PY79u6EdZR3f4ZqadNDCUN_d4f0TFF-t5A,594
25
+ pheval/resources/alternate_ouputs/lirical_results.tsv,sha256=0juf5HY6ttg-w7aWgYJUmSP5zmoaooEQDY8xhOcerLk,431068
26
+ pheval/resources/alternate_ouputs/svanna_results.tsv,sha256=OpTamPhJwh12wkdAxoIGb0wWs_T7TcqNWgqkQzgOek4,714
27
+ pheval/resources/hgnc_complete_set_2022-10-01.txt,sha256=PLD2-FJizl0detUtjvgeC1qc1FNq2jnykRvfw7ahF2w,16274884
28
+ pheval/run_metadata.py,sha256=lDiLNFSRueX2pfyuRwNRbcRo_XxWQbSTLy45Yhgicsc,919
29
+ pheval/runners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
+ pheval/runners/runner.py,sha256=FOpFxUuEeV2-2vYQkaDVye8BTfN9WqSJHIpBF0X14Os,3774
31
+ pheval/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ pheval/utils/docs_gen.py,sha256=rYP_76SSRx-G95r25aJcGtEEHCYgfi6-1hR0aV2UZXA,3192
33
+ pheval/utils/docs_gen.sh,sha256=LyKLKjaZuf4UJ962CWfM-XqkxtvM8O2N9wHZS5mcb9A,477
34
+ pheval/utils/file_utils.py,sha256=n3GKOOkd1mENpWOWcWHIUFvhh1iV2TCp1daMPOP0f_c,3068
35
+ pheval/utils/phenopacket_utils.py,sha256=uZ_SGtrctZgg6iJqyOaseV9bqQ5paYN8FJvnTSg1qMg,14245
36
+ pheval/utils/semsim_utils.py,sha256=jDqSUYBP6Q5yPNq024kDe2fpqWmwwRzv41o2fP1q-vA,6150
37
+ pheval-0.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
+ pheval-0.2.0.dist-info/METADATA,sha256=2Elv0CtIfOBLW79u8EtQtbgmWjTqa-QCyX1Hyzwt5Gw,1688
39
+ pheval-0.2.0.dist-info/WHEEL,sha256=WGfLGfLX43Ei_YORXSnT54hxFygu34kMpcQdmgmEwCQ,88
40
+ pheval-0.2.0.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
41
+ pheval-0.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.3.2
2
+ Generator: poetry-core 1.6.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
pheval/utils.py DELETED
@@ -1,7 +0,0 @@
1
- """
2
- Contains all pheval utility methods
3
- """
4
-
5
-
6
- def example():
7
- return True
@@ -1,13 +0,0 @@
1
- pheval/__init__.py,sha256=amR2KDYWGlaKIqzfpeFlXYvDacBV-zCV_KH-Po9SQVk,44
2
- pheval/cli.py,sha256=FAj7ImzjoAMGu41L6odPUamtLLYli9sbWPYGioQSCnk,970
3
- pheval/cli_pheval.py,sha256=ThAKnnodHwrDxTeb0NLysZJHL1fxN3TRUGKJgpB90zw,1851
4
- pheval/cli_pheval_utils.py,sha256=tmFzWhQMZYrhQml8XC7HP8dcIYwCDvFlzrzmzGxUgo0,606
5
- pheval/implementations/__init__.py,sha256=rZoxRBHzQWTUlNeofu_gZcVCYZNF9kEbiw3BKaeMHso,1241
6
- pheval/runners/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- pheval/runners/runner.py,sha256=A9m_ZfAKvFhWvrj7dRml8JsDIotc6HR-8krTxV2IP44,942
8
- pheval/utils.py,sha256=M63N6EkO_-AajNWy1YgxxgdE1bqdaFLy5nD47DBYhE4,77
9
- pheval-0.1.0.dist-info/entry_points.txt,sha256=o9gSwDkvT4-lqKy4mlsftd1nzP9WUOXQCfnbqycURd0,81
10
- pheval-0.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
- pheval-0.1.0.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
12
- pheval-0.1.0.dist-info/METADATA,sha256=CBwt3Ufa7WM9-ffr_lTp-asefXOmbXc4KinIA81sj1Y,1360
13
- pheval-0.1.0.dist-info/RECORD,,