pg-sui 0.2.3__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {pg_sui-0.2.3.dist-info → pg_sui-1.6.14.dev9.dist-info}/METADATA +99 -77
  2. pg_sui-1.6.14.dev9.dist-info/RECORD +81 -0
  3. {pg_sui-0.2.3.dist-info → pg_sui-1.6.14.dev9.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.14.dev9.dist-info/entry_points.txt +4 -0
  5. {pg_sui-0.2.3.dist-info → pg_sui-1.6.14.dev9.dist-info/licenses}/LICENSE +0 -0
  6. pg_sui-1.6.14.dev9.dist-info/top_level.txt +1 -0
  7. pgsui/__init__.py +35 -54
  8. pgsui/_version.py +34 -0
  9. pgsui/cli.py +909 -0
  10. pgsui/data_processing/__init__.py +0 -0
  11. pgsui/data_processing/config.py +565 -0
  12. pgsui/data_processing/containers.py +1424 -0
  13. pgsui/data_processing/transformers.py +557 -907
  14. pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  15. pgsui/electron/app/__main__.py +5 -0
  16. pgsui/electron/app/extra-resources/.gitkeep +1 -0
  17. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  18. pgsui/electron/app/icons/icons/128x128.png +0 -0
  19. pgsui/electron/app/icons/icons/16x16.png +0 -0
  20. pgsui/electron/app/icons/icons/24x24.png +0 -0
  21. pgsui/electron/app/icons/icons/256x256.png +0 -0
  22. pgsui/electron/app/icons/icons/32x32.png +0 -0
  23. pgsui/electron/app/icons/icons/48x48.png +0 -0
  24. pgsui/electron/app/icons/icons/512x512.png +0 -0
  25. pgsui/electron/app/icons/icons/64x64.png +0 -0
  26. pgsui/electron/app/icons/icons/icon.icns +0 -0
  27. pgsui/electron/app/icons/icons/icon.ico +0 -0
  28. pgsui/electron/app/main.js +227 -0
  29. pgsui/electron/app/package-lock.json +6894 -0
  30. pgsui/electron/app/package.json +51 -0
  31. pgsui/electron/app/preload.js +15 -0
  32. pgsui/electron/app/server.py +157 -0
  33. pgsui/electron/app/ui/logo.png +0 -0
  34. pgsui/electron/app/ui/renderer.js +131 -0
  35. pgsui/electron/app/ui/styles.css +59 -0
  36. pgsui/electron/app/ui/ui_shim.js +72 -0
  37. pgsui/electron/bootstrap.py +43 -0
  38. pgsui/electron/launch.py +57 -0
  39. pgsui/electron/package.json +14 -0
  40. pgsui/example_data/__init__.py +0 -0
  41. pgsui/example_data/phylip_files/__init__.py +0 -0
  42. pgsui/example_data/phylip_files/test.phy +0 -0
  43. pgsui/example_data/popmaps/__init__.py +0 -0
  44. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  45. pgsui/example_data/structure_files/__init__.py +0 -0
  46. pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
  47. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  48. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  49. pgsui/impute/__init__.py +0 -0
  50. pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
  51. pgsui/impute/deterministic/imputers/mode.py +844 -0
  52. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  53. pgsui/impute/deterministic/imputers/phylo.py +973 -0
  54. pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
  55. pgsui/impute/supervised/__init__.py +0 -0
  56. pgsui/impute/supervised/base.py +343 -0
  57. pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  58. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
  59. pgsui/impute/supervised/imputers/random_forest.py +291 -0
  60. pgsui/impute/unsupervised/__init__.py +0 -0
  61. pgsui/impute/unsupervised/base.py +1118 -0
  62. pgsui/impute/unsupervised/callbacks.py +92 -262
  63. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
  64. pgsui/impute/unsupervised/imputers/autoencoder.py +1285 -0
  65. pgsui/impute/unsupervised/imputers/nlpca.py +1554 -0
  66. pgsui/impute/unsupervised/imputers/ubp.py +1575 -0
  67. pgsui/impute/unsupervised/imputers/vae.py +1228 -0
  68. pgsui/impute/unsupervised/loss_functions.py +261 -0
  69. pgsui/impute/unsupervised/models/__init__.py +0 -0
  70. pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
  71. pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
  72. pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
  73. pgsui/impute/unsupervised/models/vae_model.py +269 -630
  74. pgsui/impute/unsupervised/nn_scorers.py +255 -0
  75. pgsui/utils/__init__.py +0 -0
  76. pgsui/utils/classification_viz.py +608 -0
  77. pgsui/utils/logging_utils.py +22 -0
  78. pgsui/utils/misc.py +35 -480
  79. pgsui/utils/plotting.py +996 -829
  80. pgsui/utils/pretty_metrics.py +290 -0
  81. pgsui/utils/scorers.py +213 -666
  82. pg_sui-0.2.3.dist-info/RECORD +0 -75
  83. pg_sui-0.2.3.dist-info/top_level.txt +0 -3
  84. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  85. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  86. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  87. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  88. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  89. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  90. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  91. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  92. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  93. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  94. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  95. pgsui/example_data/trees/test.iqtree +0 -376
  96. pgsui/example_data/trees/test.qmat +0 -5
  97. pgsui/example_data/trees/test.rate +0 -2033
  98. pgsui/example_data/trees/test.tre +0 -1
  99. pgsui/example_data/trees/test_n10.rate +0 -19
  100. pgsui/example_data/trees/test_n100.rate +0 -109
  101. pgsui/example_data/trees/test_n500.rate +0 -509
  102. pgsui/example_data/trees/test_siterates.txt +0 -2024
  103. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  104. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  105. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  106. pgsui/example_data/vcf_files/test.vcf +0 -244
  107. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  108. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  109. pgsui/impute/estimators.py +0 -1268
  110. pgsui/impute/impute.py +0 -1463
  111. pgsui/impute/simple_imputers.py +0 -1431
  112. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
  113. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
  114. pgsui/impute/unsupervised/keras_classifiers.py +0 -697
  115. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  116. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
  117. pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
  118. pgsui/pg_sui.py +0 -261
  119. pgsui/utils/sequence_tools.py +0 -407
  120. simulation/sim_benchmarks.py +0 -333
  121. simulation/sim_treeparams.py +0 -475
  122. test/__init__.py +0 -0
  123. test/pg_sui_simtest.py +0 -215
  124. test/pg_sui_testing.py +0 -523
  125. test/test.py +0 -151
  126. test/test_pgsui.py +0 -374
  127. test/test_tkc.py +0 -185
@@ -0,0 +1,255 @@
1
+ from typing import TYPE_CHECKING, Dict, Literal
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.metrics import (
6
+ accuracy_score,
7
+ average_precision_score,
8
+ f1_score,
9
+ precision_score,
10
+ recall_score,
11
+ roc_auc_score,
12
+ )
13
+ from snpio.utils.logging import LoggerManager
14
+ from torch import Tensor
15
+
16
+ from pgsui.utils.logging_utils import configure_logger
17
+ from pgsui.utils.misc import validate_input_type
18
+
19
+
20
+ class Scorer:
21
+ """Class for evaluating the performance of a model using various metrics.
22
+
23
+ This module provides a unified interface for computing common evaluation metrics. It supports accuracy, F1 score, precision, recall, ROC AUC, average precision, and macro-average precision. The class can handle both raw and one-hot encoded labels and includes options for logging and averaging methods.
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ prefix: str,
29
+ average: Literal["weighted", "macro", "micro"] = "macro",
30
+ verbose: bool = False,
31
+ debug: bool = False,
32
+ ) -> None:
33
+ """Initialize a Scorer object.
34
+
35
+ This class provides a unified interface for computing common evaluation metrics. It supports accuracy, F1 score, precision, recall, ROC AUC, average precision, and macro-average precision. The class can handle both raw and one-hot encoded labels and includes options for logging and averaging methods.
36
+
37
+ Args:
38
+ prefix (str): The prefix to use for logging.
39
+ average (Literal["weighted", "macro", "micro"]): The averaging method to use for metrics. Must be one of 'micro', 'macro', or 'weighted'. Defaults to 'weighted'.
40
+ verbose (bool): If True, enable verbose logging. Defaults to False.
41
+ debug (bool): If True, enable debug logging. Defaults to False.
42
+ """
43
+ logman = LoggerManager(
44
+ name=__name__, prefix=prefix, debug=debug, verbose=verbose
45
+ )
46
+ self.logger = configure_logger(
47
+ logman.get_logger(), verbose=verbose, debug=debug
48
+ )
49
+
50
+ if average not in {"weighted", "micro", "macro"}:
51
+ msg = f"Invalid average parameter: {average}. Must be one of 'micro', 'macro', or 'weighted'."
52
+ self.logger.error(msg)
53
+ raise ValueError(msg)
54
+
55
+ self.average: Literal["micro", "macro", "weighted"] = average
56
+
57
+ def accuracy(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
58
+ """Compute the accuracy score.
59
+
60
+ Args:
61
+ y_true (np.ndarray): Ground truth (correct) target values.
62
+ y_pred (np.ndarray): Estimated target values.
63
+
64
+ Returns:
65
+ float: The accuracy score.
66
+ """
67
+ return float(accuracy_score(y_true, y_pred))
68
+
69
+ def f1(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
70
+ """Compute the F1 score.
71
+
72
+ Args:
73
+ y_true (np.ndarray): Ground truth (correct) target values.
74
+ y_pred (np.ndarray): Estimated target values.
75
+
76
+ Returns:
77
+ float: The F1 score.
78
+ """
79
+ return float(f1_score(y_true, y_pred, average=self.average, zero_division=0))
80
+
81
+ def precision(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
82
+ """Compute the precision score.
83
+
84
+ Args:
85
+ y_true (np.ndarray): Ground truth (correct) target values.
86
+ y_pred (np.ndarray): Estimated target values.
87
+
88
+ Returns:
89
+ float: The precision score.
90
+ """
91
+ return float(
92
+ precision_score(y_true, y_pred, average=self.average, zero_division=0)
93
+ )
94
+
95
+ def recall(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
96
+ """Compute the recall score.
97
+
98
+ Args:
99
+ y_true (np.ndarray): Ground truth (correct) target values.
100
+ y_pred (np.ndarray): Estimated target values.
101
+
102
+ Returns:
103
+ float: The recall score.
104
+ """
105
+ return float(
106
+ recall_score(y_true, y_pred, average=self.average, zero_division=0)
107
+ )
108
+
109
+ def roc_auc(self, y_true: np.ndarray, y_pred_proba: np.ndarray) -> float:
110
+ """Compute the ROC AUC score.
111
+
112
+ Args:
113
+ y_true (np.ndarray): Ground truth (correct) target values.
114
+ y_pred_proba (np.ndarray): Predicted probabilities.
115
+
116
+ Returns:
117
+ float: The ROC AUC score.
118
+ """
119
+ if len(np.unique(y_true)) < 2:
120
+ return 0.5
121
+
122
+ if y_pred_proba.shape[-1] == 2:
123
+ # Binary classification case
124
+ # Use probabilities for the positive class
125
+ # Otherwise it throws an error.
126
+ y_pred_proba = y_pred_proba[:, 1]
127
+
128
+ try:
129
+ return float(
130
+ roc_auc_score(
131
+ y_true, y_pred_proba, average=self.average, multi_class="ovr"
132
+ )
133
+ )
134
+ except Exception:
135
+ return float(roc_auc_score(y_true, y_pred_proba, average=self.average))
136
+
137
+ # This method now correctly expects one-hot encoded true labels
138
+ def average_precision(
139
+ self, y_true_ohe: np.ndarray, y_pred_proba: np.ndarray
140
+ ) -> float:
141
+ """Compute the average precision score.
142
+
143
+ Args:
144
+ y_true_ohe (np.ndarray): One-hot encoded ground truth target values.
145
+ y_pred_proba (np.ndarray): Predicted probabilities.
146
+
147
+ Returns:
148
+ float: The average precision score.
149
+ """
150
+ if y_pred_proba.shape[-1] == 2:
151
+ # Binary classification case
152
+ # Use probabilities for the positive class
153
+ y_pred_proba = y_pred_proba[:, 1]
154
+
155
+ if y_true_ohe.shape[1] == 2:
156
+ # Binary classification case
157
+ y_true_ohe = y_true_ohe[:, 1]
158
+
159
+ return float(
160
+ average_precision_score(y_true_ohe, y_pred_proba, average=self.average)
161
+ )
162
+
163
+ def pr_macro(self, y_true_ohe: np.ndarray, y_pred_proba: np.ndarray) -> float:
164
+ """Compute the macro-average precision score.
165
+
166
+ Args:
167
+ y_true_ohe (np.ndarray): One-hot encoded ground truth target values.
168
+ y_pred_proba (np.ndarray): Predicted probabilities.
169
+
170
+ Returns:
171
+ float: The macro-average precision score.
172
+ """
173
+ if y_pred_proba.shape[-1] == 2:
174
+ # Binary classification case
175
+ # Use probabilities for the positive class
176
+ y_pred_proba = y_pred_proba[:, 1]
177
+
178
+ if y_true_ohe.shape[1] == 2:
179
+ # Binary classification case
180
+ y_true_ohe = y_true_ohe[:, 1]
181
+
182
+ return float(average_precision_score(y_true_ohe, y_pred_proba, average="macro"))
183
+
184
+ def evaluate(
185
+ self,
186
+ y_true: pd.DataFrame | np.ndarray | Tensor | list,
187
+ y_pred: pd.DataFrame | np.ndarray | Tensor | list,
188
+ y_true_ohe: pd.DataFrame | np.ndarray | Tensor | list,
189
+ y_pred_proba: pd.DataFrame | np.ndarray | Tensor | list,
190
+ objective_mode: bool = False,
191
+ tune_metric: Literal[
192
+ "pr_macro",
193
+ "roc_auc",
194
+ "average_precision",
195
+ "accuracy",
196
+ "f1",
197
+ "precision",
198
+ "recall",
199
+ ] = "pr_macro",
200
+ ) -> Dict[str, float]:
201
+ """Evaluate the model using various metrics.
202
+
203
+ Args:
204
+ y_true: Ground truth (correct) target values.
205
+ y_pred: Estimated target values.
206
+ y_true_ohe: One-hot encoded ground truth target values.
207
+ y_pred_proba: Predicted probabilities.
208
+ objective_mode: If True, only compute the metric specified by ``tune_metric``. Defaults to False.
209
+ tune_metric: The metric to optimize during tuning. Defaults to "pr_macro".
210
+ """
211
+ y_true, y_pred, y_true_ohe, y_pred_proba = [
212
+ validate_input_type(x) for x in (y_true, y_pred, y_true_ohe, y_pred_proba)
213
+ ]
214
+
215
+ if objective_mode:
216
+ metric_calculators = {
217
+ "pr_macro": lambda: self.pr_macro(
218
+ np.asarray(y_true_ohe), np.asarray(y_pred_proba)
219
+ ),
220
+ "roc_auc": lambda: self.roc_auc(
221
+ np.asarray(y_true), np.asarray(y_pred_proba)
222
+ ),
223
+ "average_precision": lambda: self.average_precision(
224
+ np.asarray(y_true_ohe), np.asarray(y_pred_proba)
225
+ ),
226
+ "accuracy": lambda: self.accuracy(
227
+ np.asarray(y_true), np.asarray(y_pred)
228
+ ),
229
+ "f1": lambda: self.f1(np.asarray(y_true), np.asarray(y_pred)),
230
+ "precision": lambda: self.precision(
231
+ np.asarray(y_true), np.asarray(y_pred)
232
+ ),
233
+ "recall": lambda: self.recall(np.asarray(y_true), np.asarray(y_pred)),
234
+ }
235
+ if tune_metric not in metric_calculators:
236
+ msg = f"Invalid tune_metric provided: '{tune_metric}'."
237
+ self.logger.error(msg)
238
+ raise ValueError(msg)
239
+
240
+ metrics = {tune_metric: metric_calculators[tune_metric]()}
241
+ else:
242
+ metrics = {
243
+ "accuracy": self.accuracy(np.asarray(y_true), np.asarray(y_pred)),
244
+ "f1": self.f1(np.asarray(y_true), np.asarray(y_pred)),
245
+ "precision": self.precision(np.asarray(y_true), np.asarray(y_pred)),
246
+ "recall": self.recall(np.asarray(y_true), np.asarray(y_pred)),
247
+ "roc_auc": self.roc_auc(np.asarray(y_true), np.asarray(y_pred_proba)),
248
+ "average_precision": self.average_precision(
249
+ np.asarray(y_true_ohe), np.asarray(y_pred_proba)
250
+ ),
251
+ "pr_macro": self.pr_macro(
252
+ np.asarray(y_true_ohe), np.asarray(y_pred_proba)
253
+ ),
254
+ }
255
+ return {k: float(v) for k, v in metrics.items()}
pgsui/utils/__init__.py CHANGED
File without changes