pg-sui 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/METADATA +101 -79
- pg_sui-1.6.14.dev9.dist-info/RECORD +81 -0
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/WHEEL +1 -1
- pg_sui-1.6.14.dev9.dist-info/entry_points.txt +4 -0
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info/licenses}/LICENSE +0 -0
- pg_sui-1.6.14.dev9.dist-info/top_level.txt +1 -0
- pgsui/__init__.py +35 -54
- pgsui/_version.py +34 -0
- pgsui/cli.py +909 -0
- pgsui/data_processing/__init__.py +0 -0
- pgsui/data_processing/config.py +565 -0
- pgsui/data_processing/containers.py +1424 -0
- pgsui/data_processing/transformers.py +557 -907
- pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
- pgsui/electron/app/__main__.py +5 -0
- pgsui/electron/app/extra-resources/.gitkeep +1 -0
- pgsui/electron/app/icons/icons/1024x1024.png +0 -0
- pgsui/electron/app/icons/icons/128x128.png +0 -0
- pgsui/electron/app/icons/icons/16x16.png +0 -0
- pgsui/electron/app/icons/icons/24x24.png +0 -0
- pgsui/electron/app/icons/icons/256x256.png +0 -0
- pgsui/electron/app/icons/icons/32x32.png +0 -0
- pgsui/electron/app/icons/icons/48x48.png +0 -0
- pgsui/electron/app/icons/icons/512x512.png +0 -0
- pgsui/electron/app/icons/icons/64x64.png +0 -0
- pgsui/electron/app/icons/icons/icon.icns +0 -0
- pgsui/electron/app/icons/icons/icon.ico +0 -0
- pgsui/electron/app/main.js +227 -0
- pgsui/electron/app/package-lock.json +6894 -0
- pgsui/electron/app/package.json +51 -0
- pgsui/electron/app/preload.js +15 -0
- pgsui/electron/app/server.py +157 -0
- pgsui/electron/app/ui/logo.png +0 -0
- pgsui/electron/app/ui/renderer.js +131 -0
- pgsui/electron/app/ui/styles.css +59 -0
- pgsui/electron/app/ui/ui_shim.js +72 -0
- pgsui/electron/bootstrap.py +43 -0
- pgsui/electron/launch.py +57 -0
- pgsui/electron/package.json +14 -0
- pgsui/example_data/__init__.py +0 -0
- pgsui/example_data/phylip_files/__init__.py +0 -0
- pgsui/example_data/phylip_files/test.phy +0 -0
- pgsui/example_data/popmaps/__init__.py +0 -0
- pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
- pgsui/example_data/structure_files/__init__.py +0 -0
- pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
- pgsui/impute/__init__.py +0 -0
- pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
- pgsui/impute/deterministic/imputers/mode.py +844 -0
- pgsui/impute/deterministic/imputers/nmf.py +221 -0
- pgsui/impute/deterministic/imputers/phylo.py +973 -0
- pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
- pgsui/impute/supervised/__init__.py +0 -0
- pgsui/impute/supervised/base.py +343 -0
- pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
- pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
- pgsui/impute/supervised/imputers/random_forest.py +291 -0
- pgsui/impute/unsupervised/__init__.py +0 -0
- pgsui/impute/unsupervised/base.py +1118 -0
- pgsui/impute/unsupervised/callbacks.py +92 -262
- {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
- pgsui/impute/unsupervised/imputers/autoencoder.py +1285 -0
- pgsui/impute/unsupervised/imputers/nlpca.py +1554 -0
- pgsui/impute/unsupervised/imputers/ubp.py +1575 -0
- pgsui/impute/unsupervised/imputers/vae.py +1228 -0
- pgsui/impute/unsupervised/loss_functions.py +261 -0
- pgsui/impute/unsupervised/models/__init__.py +0 -0
- pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
- pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
- pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
- pgsui/impute/unsupervised/models/vae_model.py +269 -630
- pgsui/impute/unsupervised/nn_scorers.py +255 -0
- pgsui/utils/__init__.py +0 -0
- pgsui/utils/classification_viz.py +608 -0
- pgsui/utils/logging_utils.py +22 -0
- pgsui/utils/misc.py +35 -480
- pgsui/utils/plotting.py +996 -829
- pgsui/utils/pretty_metrics.py +290 -0
- pgsui/utils/scorers.py +213 -666
- pg_sui-0.2.0.dist-info/RECORD +0 -75
- pg_sui-0.2.0.dist-info/top_level.txt +0 -3
- pgsui/example_data/phylip_files/test_n10.phy +0 -118
- pgsui/example_data/phylip_files/test_n100.phy +0 -118
- pgsui/example_data/phylip_files/test_n2.phy +0 -118
- pgsui/example_data/phylip_files/test_n500.phy +0 -118
- pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
- pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
- pgsui/example_data/trees/test.iqtree +0 -376
- pgsui/example_data/trees/test.qmat +0 -5
- pgsui/example_data/trees/test.rate +0 -2033
- pgsui/example_data/trees/test.tre +0 -1
- pgsui/example_data/trees/test_n10.rate +0 -19
- pgsui/example_data/trees/test_n100.rate +0 -109
- pgsui/example_data/trees/test_n500.rate +0 -509
- pgsui/example_data/trees/test_siterates.txt +0 -2024
- pgsui/example_data/trees/test_siterates_n10.txt +0 -10
- pgsui/example_data/trees/test_siterates_n100.txt +0 -100
- pgsui/example_data/trees/test_siterates_n500.txt +0 -500
- pgsui/example_data/vcf_files/test.vcf +0 -244
- pgsui/example_data/vcf_files/test.vcf.gz +0 -0
- pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
- pgsui/impute/estimators.py +0 -1268
- pgsui/impute/impute.py +0 -1463
- pgsui/impute/simple_imputers.py +0 -1431
- pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
- pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
- pgsui/impute/unsupervised/keras_classifiers.py +0 -697
- pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
- pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
- pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
- pgsui/pg_sui.py +0 -261
- pgsui/utils/sequence_tools.py +0 -407
- simulation/sim_benchmarks.py +0 -333
- simulation/sim_treeparams.py +0 -475
- test/__init__.py +0 -0
- test/pg_sui_simtest.py +0 -215
- test/pg_sui_testing.py +0 -523
- test/test.py +0 -151
- test/test_pgsui.py +0 -374
- test/test_tkc.py +0 -185
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from typing import Any, Iterable, List, Mapping, Optional, Sequence, Tuple
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
import numpy as np
|
|
8
|
+
except Exception:
|
|
9
|
+
np = None # type: ignore
|
|
10
|
+
|
|
11
|
+
# Optional Rich console; falls back to ASCII if not installed.
|
|
12
|
+
try:
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
from rich.table import Table
|
|
15
|
+
from rich.text import Text
|
|
16
|
+
|
|
17
|
+
_HAS_RICH = True
|
|
18
|
+
_CONSOLE = Console()
|
|
19
|
+
except Exception:
|
|
20
|
+
_HAS_RICH = False
|
|
21
|
+
_CONSOLE = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class PrettyMetrics:
|
|
25
|
+
"""Pretty-print and export nested metric dictionaries.
|
|
26
|
+
|
|
27
|
+
Handles scalars, 1D sequences, and nested dicts. Summarizes sequences with
|
|
28
|
+
mean ± std and last value. Uses `rich` colors if available.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
metrics (Mapping[str, Any]): Metrics payload.
|
|
32
|
+
precision (int): Decimal precision for numeric formatting.
|
|
33
|
+
title (Optional[str]): Optional table title.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
metrics: Mapping[str, Any],
|
|
39
|
+
*,
|
|
40
|
+
precision: int = 4,
|
|
41
|
+
title: Optional[str] = "Metrics",
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Initialize the printer.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
metrics (Mapping[str, Any]): Mapping of metric names to values. Values can be scalars, 1D sequences (lists or 1D numpy arrays), or nested dicts.
|
|
47
|
+
precision (int): Decimal places for numeric formatting.
|
|
48
|
+
title (Optional[str]): Optional table title shown when rendering.
|
|
49
|
+
"""
|
|
50
|
+
self.metrics = metrics
|
|
51
|
+
self.precision = precision
|
|
52
|
+
self.title = title
|
|
53
|
+
|
|
54
|
+
# ------------------------- Public API ---------------------------------
|
|
55
|
+
|
|
56
|
+
def render(self) -> None:
|
|
57
|
+
"""Print the table to stdout.
|
|
58
|
+
|
|
59
|
+
Uses a Rich table if Rich is installed. Otherwise prints a clean ASCII table.
|
|
60
|
+
"""
|
|
61
|
+
rows = self._rows()
|
|
62
|
+
|
|
63
|
+
if _HAS_RICH:
|
|
64
|
+
table = Table(
|
|
65
|
+
title=self.title or None, header_style="bold", show_lines=False
|
|
66
|
+
)
|
|
67
|
+
table.add_column("Metric", no_wrap=True)
|
|
68
|
+
table.add_column("Value", justify="right")
|
|
69
|
+
for metric, value, last_val in rows:
|
|
70
|
+
table.add_row(metric, self._color_val_rich(metric, value, last_val))
|
|
71
|
+
|
|
72
|
+
if _CONSOLE is not None:
|
|
73
|
+
_CONSOLE.print(table)
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
# ASCII fallback
|
|
77
|
+
m_w = max(len("Metric"), *(len(r[0]) for r in rows)) if rows else len("Metric")
|
|
78
|
+
v_w = max(len("Value"), *(len(r[1]) for r in rows)) if rows else len("Value")
|
|
79
|
+
title = (self.title or "Metrics").strip()
|
|
80
|
+
line = "=" * (m_w + v_w + 5)
|
|
81
|
+
print(title)
|
|
82
|
+
print(line)
|
|
83
|
+
print(f"{'Metric':<{m_w}} | {'Value':>{v_w}}")
|
|
84
|
+
print("-" * (m_w + v_w + 5))
|
|
85
|
+
for metric, value, _ in rows:
|
|
86
|
+
print(f"{metric:<{m_w}} | {value:>{v_w}}")
|
|
87
|
+
print(line)
|
|
88
|
+
|
|
89
|
+
def to_text(self) -> str:
|
|
90
|
+
"""Return the rendered table as plain text.
|
|
91
|
+
|
|
92
|
+
Uses Rich capture when available, else builds the ASCII table used by render().
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
str: Pretty-printed metrics table.
|
|
96
|
+
"""
|
|
97
|
+
rows = self._rows()
|
|
98
|
+
if _HAS_RICH:
|
|
99
|
+
table = Table(
|
|
100
|
+
title=self.title or None, header_style="bold", show_lines=False
|
|
101
|
+
)
|
|
102
|
+
table.add_column("Metric", no_wrap=True)
|
|
103
|
+
table.add_column("Value", justify="right")
|
|
104
|
+
for metric, value, last_val in rows:
|
|
105
|
+
table.add_row(metric, self._color_val_rich(metric, value, last_val))
|
|
106
|
+
console = Console(record=True)
|
|
107
|
+
console.print(table)
|
|
108
|
+
return console.export_text(clear=False)
|
|
109
|
+
|
|
110
|
+
m_w = max(len("Metric"), *(len(r[0]) for r in rows)) if rows else len("Metric")
|
|
111
|
+
v_w = max(len("Value"), *(len(r[1]) for r in rows)) if rows else len("Value")
|
|
112
|
+
title = (self.title or "Metrics").strip()
|
|
113
|
+
line = "=" * (m_w + v_w + 5)
|
|
114
|
+
parts = [
|
|
115
|
+
title,
|
|
116
|
+
line,
|
|
117
|
+
f"{'Metric':<{m_w}} | {'Value':>{v_w}}",
|
|
118
|
+
"-" * (m_w + v_w + 5),
|
|
119
|
+
]
|
|
120
|
+
parts += [f"{metric:<{m_w}} | {value:>{v_w}}" for metric, value, _ in rows]
|
|
121
|
+
parts.append(line)
|
|
122
|
+
return "\n".join(parts)
|
|
123
|
+
|
|
124
|
+
def to_dataframe(self):
|
|
125
|
+
"""Return a tidy pandas DataFrame of flattened metrics.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
pandas.DataFrame: Columns ['metric', 'value'] with scalars and sequence elements.
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
ImportError: If pandas is not installed.
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
import pandas as pd
|
|
135
|
+
except Exception as e: # pragma: no cover
|
|
136
|
+
raise ImportError("pandas is required for to_dataframe()") from e
|
|
137
|
+
|
|
138
|
+
out: List[Tuple[str, Any]] = []
|
|
139
|
+
for k, v in self._flatten(self.metrics):
|
|
140
|
+
if self._is_numeric(v):
|
|
141
|
+
out.append((k, float(v))) # type: ignore[arg-type]
|
|
142
|
+
elif self._is_num_seq(v):
|
|
143
|
+
seq = self._to_float_seq(v)
|
|
144
|
+
out.extend((f"{k}[{i}]", float(x)) for i, x in enumerate(seq))
|
|
145
|
+
else:
|
|
146
|
+
out.append((k, str(v)))
|
|
147
|
+
return pd.DataFrame(out, columns=["metric", "value"])
|
|
148
|
+
|
|
149
|
+
def to_json(self) -> str:
|
|
150
|
+
"""Return a compact JSON string of the metrics.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
str: Compact JSON representation, suitable for logging artifacts.
|
|
154
|
+
"""
|
|
155
|
+
import json
|
|
156
|
+
|
|
157
|
+
return json.dumps(self.metrics, separators=(",", ":"), ensure_ascii=False)
|
|
158
|
+
|
|
159
|
+
# ----------------------- Internal helpers -----------------------------
|
|
160
|
+
|
|
161
|
+
def _rows(self) -> List[Tuple[str, str, Optional[float]]]:
|
|
162
|
+
"""Build rows as (metric_name, formatted_value, last_numeric_for_coloring)."""
|
|
163
|
+
rows: List[Tuple[str, str, Optional[float]]] = []
|
|
164
|
+
for name, val in self._flatten(self.metrics):
|
|
165
|
+
if self._is_numeric(val):
|
|
166
|
+
val_num = float(val)
|
|
167
|
+
rows.append((name, self._format_scalar(val_num), val_num))
|
|
168
|
+
elif self._is_num_seq(val):
|
|
169
|
+
seq = self._to_float_seq(val)
|
|
170
|
+
summary = self._fmt_mean_std(seq)
|
|
171
|
+
last_val = seq[-1] if seq else None
|
|
172
|
+
rows.append((name, summary, last_val))
|
|
173
|
+
else:
|
|
174
|
+
rows.append((name, str(val), None))
|
|
175
|
+
return rows
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _flatten(d: Mapping[str, Any], prefix: str = "") -> Iterable[Tuple[str, Any]]:
|
|
179
|
+
for k, v in d.items():
|
|
180
|
+
name = f"{prefix} → {k}" if prefix else str(k)
|
|
181
|
+
if isinstance(v, Mapping):
|
|
182
|
+
yield from PrettyMetrics._flatten(v, name)
|
|
183
|
+
else:
|
|
184
|
+
yield name, v
|
|
185
|
+
|
|
186
|
+
def _fmt_mean_std(self, seq: Sequence[float]) -> str:
|
|
187
|
+
"""Format mean ± std and last value for a numeric sequence."""
|
|
188
|
+
if np is not None:
|
|
189
|
+
arr = np.asarray(seq, dtype=float)
|
|
190
|
+
mean = float(arr.mean()) if arr.size else float("nan")
|
|
191
|
+
std = float(arr.std(ddof=1)) if arr.size > 1 else 0.0
|
|
192
|
+
last = float(arr[-1]) if arr.size else float("nan")
|
|
193
|
+
else:
|
|
194
|
+
n = len(seq)
|
|
195
|
+
if n == 0:
|
|
196
|
+
mean = std = last = float("nan")
|
|
197
|
+
else:
|
|
198
|
+
mean = sum(seq) / n
|
|
199
|
+
var = sum((x - mean) ** 2 for x in seq) / (n - 1) if n > 1 else 0.0
|
|
200
|
+
std = var**0.5
|
|
201
|
+
last = seq[-1]
|
|
202
|
+
return f"{mean:.{self.precision}f} ± {std:.{self.precision}f} (last {last:.{self.precision}f})"
|
|
203
|
+
|
|
204
|
+
@staticmethod
|
|
205
|
+
def _to_float_seq(val: Any) -> List[float]:
|
|
206
|
+
if np is not None and hasattr(val, "tolist"):
|
|
207
|
+
return list(map(float, val.tolist()))
|
|
208
|
+
return list(map(float, val))
|
|
209
|
+
|
|
210
|
+
def _format_scalar(self, v: float) -> str:
|
|
211
|
+
if abs(v) >= 1000 or (0 < abs(v) < 1e-3):
|
|
212
|
+
return f"{v:.{self.precision}e}"
|
|
213
|
+
return f"{v:.{self.precision}f}"
|
|
214
|
+
|
|
215
|
+
@staticmethod
|
|
216
|
+
def _is_numeric(x: Any) -> bool:
|
|
217
|
+
return (
|
|
218
|
+
isinstance(x, (int, float))
|
|
219
|
+
and not isinstance(x, bool)
|
|
220
|
+
and math.isfinite(float(x))
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
@staticmethod
|
|
224
|
+
def _is_num_seq(x: Any) -> bool:
|
|
225
|
+
if (
|
|
226
|
+
np is not None
|
|
227
|
+
and isinstance(x, np.ndarray)
|
|
228
|
+
and getattr(x, "ndim", 0) == 1
|
|
229
|
+
and x.size > 0
|
|
230
|
+
):
|
|
231
|
+
return np.issubdtype(x.dtype, np.number)
|
|
232
|
+
return (
|
|
233
|
+
isinstance(x, Sequence)
|
|
234
|
+
and len(x) > 0
|
|
235
|
+
and all(isinstance(v, (int, float)) for v in x)
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
@staticmethod
|
|
239
|
+
def _better_is_higher(metric_name: str) -> Optional[bool]:
|
|
240
|
+
name = metric_name.lower()
|
|
241
|
+
higher = (
|
|
242
|
+
"acc",
|
|
243
|
+
"f1",
|
|
244
|
+
"auc",
|
|
245
|
+
"precision",
|
|
246
|
+
"recall",
|
|
247
|
+
"specificity",
|
|
248
|
+
"r2",
|
|
249
|
+
"matthews",
|
|
250
|
+
"iou",
|
|
251
|
+
"dice",
|
|
252
|
+
)
|
|
253
|
+
lower = (
|
|
254
|
+
"loss",
|
|
255
|
+
"mae",
|
|
256
|
+
"mse",
|
|
257
|
+
"rmse",
|
|
258
|
+
"nll",
|
|
259
|
+
"perplexity",
|
|
260
|
+
"ece",
|
|
261
|
+
"brier",
|
|
262
|
+
"cross-entropy",
|
|
263
|
+
)
|
|
264
|
+
if any(k in name for k in higher):
|
|
265
|
+
return True
|
|
266
|
+
if any(k in name for k in lower):
|
|
267
|
+
return False
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
def _color_val_rich(
|
|
271
|
+
self, metric: str, value_text: str, value_num: Optional[float]
|
|
272
|
+
) -> "Text | str":
|
|
273
|
+
if not _HAS_RICH:
|
|
274
|
+
return value_text
|
|
275
|
+
t = Text(value_text)
|
|
276
|
+
pref = self._better_is_higher(metric)
|
|
277
|
+
if value_num is None or pref is None:
|
|
278
|
+
return t
|
|
279
|
+
if 0.0 <= value_num <= 1.0:
|
|
280
|
+
good = value_num if pref else 1.0 - value_num
|
|
281
|
+
if good >= 0.8:
|
|
282
|
+
t.stylize("bold green")
|
|
283
|
+
elif good >= 0.6:
|
|
284
|
+
t.stylize("green")
|
|
285
|
+
elif good <= 0.3:
|
|
286
|
+
t.stylize("red")
|
|
287
|
+
else:
|
|
288
|
+
if pref is False and value_num <= 0.1:
|
|
289
|
+
t.stylize("bold green")
|
|
290
|
+
return t
|