jarvisplot 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvisplot/Figure/adapters.py +773 -0
- jarvisplot/Figure/cards/std_axes_adapter_config.json +23 -0
- jarvisplot/Figure/data_pipelines.py +87 -0
- jarvisplot/Figure/figure.py +1573 -0
- jarvisplot/Figure/helper.py +217 -0
- jarvisplot/Figure/load_data.py +252 -0
- jarvisplot/__init__.py +0 -0
- jarvisplot/cards/a4paper/1x1/ternary.json +6 -0
- jarvisplot/cards/a4paper/2x1/rect.json +106 -0
- jarvisplot/cards/a4paper/2x1/rect5x1.json +344 -0
- jarvisplot/cards/a4paper/2x1/rect_cmap.json +181 -0
- jarvisplot/cards/a4paper/2x1/ternary.json +139 -0
- jarvisplot/cards/a4paper/2x1/ternary_cmap.json +189 -0
- jarvisplot/cards/a4paper/4x1/rect.json +106 -0
- jarvisplot/cards/a4paper/4x1/rect_cmap.json +174 -0
- jarvisplot/cards/a4paper/4x1/ternary.json +139 -0
- jarvisplot/cards/a4paper/4x1/ternary_cmap.json +189 -0
- jarvisplot/cards/args.json +50 -0
- jarvisplot/cards/colors/colormaps.json +140 -0
- jarvisplot/cards/default/output.json +11 -0
- jarvisplot/cards/gambit/1x1/ternary.json +6 -0
- jarvisplot/cards/gambit/2x1/rect_cmap.json +200 -0
- jarvisplot/cards/gambit/2x1/ternary.json +139 -0
- jarvisplot/cards/gambit/2x1/ternary_cmap.json +205 -0
- jarvisplot/cards/icons/JarvisHEP.png +0 -0
- jarvisplot/cards/icons/gambit.png +0 -0
- jarvisplot/cards/icons/gambit_small.png +0 -0
- jarvisplot/cards/style_preference.json +23 -0
- jarvisplot/cli.py +64 -0
- jarvisplot/client.py +6 -0
- jarvisplot/config.py +69 -0
- jarvisplot/core.py +237 -0
- jarvisplot/data_loader.py +441 -0
- jarvisplot/inner_func.py +162 -0
- jarvisplot/utils/__init__.py +0 -0
- jarvisplot/utils/cmaps.py +258 -0
- jarvisplot/utils/interpolator.py +377 -0
- jarvisplot-1.0.1.dist-info/METADATA +80 -0
- jarvisplot-1.0.1.dist-info/RECORD +42 -0
- jarvisplot-1.0.1.dist-info/WHEEL +5 -0
- jarvisplot-1.0.1.dist-info/entry_points.txt +2 -0
- jarvisplot-1.0.1.dist-info/top_level.txt +1 -0
jarvisplot/core.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional, Any, Dict
|
|
6
|
+
from .cli import CLI
|
|
7
|
+
from loguru import logger
|
|
8
|
+
import os, sys
|
|
9
|
+
from .config import ConfigLoader
|
|
10
|
+
from .data_loader import DataSet
|
|
11
|
+
import io
|
|
12
|
+
from contextlib import redirect_stdout
|
|
13
|
+
jppwd = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
14
|
+
import json
|
|
15
|
+
from .Figure.data_pipelines import SharedContent, DataContext
|
|
16
|
+
|
|
17
|
+
class JarvisPLOT():
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
self.dataset = {}
|
|
20
|
+
self.variables = {}
|
|
21
|
+
self.yaml = ConfigLoader()
|
|
22
|
+
self.style = {}
|
|
23
|
+
self.profiles = {}
|
|
24
|
+
self.cli = CLI()
|
|
25
|
+
self.logger = None
|
|
26
|
+
self.dataset: Optional[Dict[DataSet]] = []
|
|
27
|
+
self.shared = None
|
|
28
|
+
self.ctx = None
|
|
29
|
+
self.interpolators = None
|
|
30
|
+
|
|
31
|
+
def init(self):
|
|
32
|
+
self.args = self.cli.args.parse_args()
|
|
33
|
+
|
|
34
|
+
# Initialize logger early
|
|
35
|
+
self.init_logger()
|
|
36
|
+
|
|
37
|
+
self.load_cmaps()
|
|
38
|
+
|
|
39
|
+
self.load_yaml()
|
|
40
|
+
|
|
41
|
+
# sys.exit()
|
|
42
|
+
if self.args.parse_data:
|
|
43
|
+
if self.args.out is None and not self.args.inplace:
|
|
44
|
+
self.args.out = self.yaml.path
|
|
45
|
+
elif self.args.out is None and self.args.inplace:
|
|
46
|
+
self.args.out = self.yaml.path
|
|
47
|
+
elif self.args.out is not None and self.args.inplace:
|
|
48
|
+
self.logger.error("Conflicting arguments: --out and --inplace. Please choose only one.")
|
|
49
|
+
sys.exit(2)
|
|
50
|
+
self.load_dataset()
|
|
51
|
+
self.rename_hdf5_and_renew_yaml()
|
|
52
|
+
else:
|
|
53
|
+
self.load_dataset()
|
|
54
|
+
if self.shared is None:
|
|
55
|
+
self.shared = SharedContent(logger=self.logger)
|
|
56
|
+
self.ctx = DataContext(self.shared)
|
|
57
|
+
for dts in self.dataset:
|
|
58
|
+
self.ctx.update(dts.name, dts.data)
|
|
59
|
+
|
|
60
|
+
# Register external functions (e.g. lazy-loaded interpolators) into the expression runtime.
|
|
61
|
+
self.load_interpolators()
|
|
62
|
+
|
|
63
|
+
self.load_styles()
|
|
64
|
+
self.plot()
|
|
65
|
+
|
|
66
|
+
def load_cmaps(self):
|
|
67
|
+
"""Load and register JarvisPLOT colormaps from the internal JSON bundle."""
|
|
68
|
+
try:
|
|
69
|
+
# Prefer the project's colormap setup helper
|
|
70
|
+
from .utils import cmaps
|
|
71
|
+
|
|
72
|
+
json_path = "&JP/jarvisplot/cards/colors/colormaps.json"
|
|
73
|
+
cmap_summary = cmaps.setup(self.load_path(json_path), force=True)
|
|
74
|
+
|
|
75
|
+
if self.logger:
|
|
76
|
+
self.logger.debug(f"JarvisPLOT: colormaps registered: {cmap_summary}")
|
|
77
|
+
try:
|
|
78
|
+
self.logger.debug(
|
|
79
|
+
f"JarvisPLOT: available colormaps sample: {cmaps.list_available()}"
|
|
80
|
+
)
|
|
81
|
+
except Exception:
|
|
82
|
+
pass
|
|
83
|
+
except Exception as e:
|
|
84
|
+
if self.logger:
|
|
85
|
+
self.logger.warning(f"JarvisPLOT: failed to initialize colormaps: {e}")
|
|
86
|
+
|
|
87
|
+
def load_interpolators(self):
|
|
88
|
+
"""Parse YAML interpolator specs and register them for lazy use in expressions."""
|
|
89
|
+
cfg = self.yaml.config.get("Functions", None)
|
|
90
|
+
if cfg is not None:
|
|
91
|
+
from .inner_func import set_external_funcs_getter
|
|
92
|
+
from .utils.interpolator import InterpolatorManager
|
|
93
|
+
mgr = InterpolatorManager.from_yaml(
|
|
94
|
+
cfg,
|
|
95
|
+
yaml_dir=self.yaml.dir,
|
|
96
|
+
shared=self.shared,
|
|
97
|
+
logger=self.logger,
|
|
98
|
+
)
|
|
99
|
+
self.interpolators = mgr
|
|
100
|
+
set_external_funcs_getter(lambda: (mgr.as_eval_funcs() or {}))
|
|
101
|
+
if self.interpolators:
|
|
102
|
+
self.logger.debug(f"JarvisPLOT: Functions registered: {mgr.summary()}")
|
|
103
|
+
|
|
104
|
+
def load_styles(self):
|
|
105
|
+
spp = "&JP/jarvisplot/cards/style_preference.json"
|
|
106
|
+
self.logger.debug("Loading internal Format set -> {}".format(self.load_path(spp)))
|
|
107
|
+
with open(self.load_path(spp), 'r') as f1:
|
|
108
|
+
stl = json.load(f1)
|
|
109
|
+
for sty, boudle in stl.items():
|
|
110
|
+
self.style[sty] = {}
|
|
111
|
+
for kk, vv in boudle.items():
|
|
112
|
+
vpath = self.load_path(vv)
|
|
113
|
+
if os.path.exists(vpath):
|
|
114
|
+
self.logger.debug("Loading '{}' boudle, {} Style \n\t-> {}".format(sty, kk, vpath))
|
|
115
|
+
with open(vpath, 'r') as f2:
|
|
116
|
+
self.style[sty][kk] = json.load(f2)
|
|
117
|
+
else:
|
|
118
|
+
self.logger.error("Style Not Found: '{}' boudle, {} Style \n\t-> {}".format(sty, kk, vpath))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def load_path(self, path):
|
|
122
|
+
if "&JP/" == path[0:4]:
|
|
123
|
+
path = os.path.abspath( os.path.join(jppwd, path[4:]) )
|
|
124
|
+
else:
|
|
125
|
+
path = Path(path).expanduser().resolve()
|
|
126
|
+
return path
|
|
127
|
+
|
|
128
|
+
def plot(self):
|
|
129
|
+
for fig in self.yaml.config["Figures"]:
|
|
130
|
+
from .Figure.figure import Figure
|
|
131
|
+
figobj = Figure()
|
|
132
|
+
figobj._yaml_dir = self.yaml.dir
|
|
133
|
+
figobj.config = self.yaml.config
|
|
134
|
+
figobj.logger = self.logger
|
|
135
|
+
figobj.jpstyles = self.style
|
|
136
|
+
figobj.context = self.ctx
|
|
137
|
+
if getattr(self.args, "no_logo", False):
|
|
138
|
+
figobj.print = True
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
if figobj.set(fig):
|
|
142
|
+
self.logger.warning(f"Succefully loading figure -> {figobj.name} setting")
|
|
143
|
+
figobj.plot()
|
|
144
|
+
except Exception as e:
|
|
145
|
+
self.logger.warning(f"Figure {fig.get('name', '<noname>')} failed: {e}")
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# print(fig)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def load_yaml(self):
|
|
157
|
+
# If no YAML file provided, show a friendly message and help, then return gracefully
|
|
158
|
+
yaml_path = getattr(self.args, 'file', None)
|
|
159
|
+
if not yaml_path:
|
|
160
|
+
self.logger.error("No input YAML file specified. Please provide one.\n")
|
|
161
|
+
try:
|
|
162
|
+
buf = io.StringIO()
|
|
163
|
+
with redirect_stdout(buf):
|
|
164
|
+
self.cli.args.print_help()
|
|
165
|
+
help_text = buf.getvalue()
|
|
166
|
+
self.logger.warning("JarvisPLOT " + help_text)
|
|
167
|
+
except Exception:
|
|
168
|
+
pass
|
|
169
|
+
return
|
|
170
|
+
self.parser_yaml(os.path.abspath(yaml_path))
|
|
171
|
+
|
|
172
|
+
def init_logger(self) -> None:
|
|
173
|
+
from datetime import datetime
|
|
174
|
+
current_time = datetime.now().strftime("%Y-%m-%d[%H:%M:%S]")
|
|
175
|
+
|
|
176
|
+
# Remove Loguru's default handler to avoid duplicate console lines
|
|
177
|
+
try:
|
|
178
|
+
logger.remove()
|
|
179
|
+
except Exception:
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
def global_log_filter(record):
|
|
183
|
+
return record["extra"].get("JPlot", False)
|
|
184
|
+
|
|
185
|
+
def stream_filter(record):
|
|
186
|
+
return record["extra"].get("to_console", False)
|
|
187
|
+
|
|
188
|
+
def custom_format(record):
|
|
189
|
+
module = record["extra"].get("module", "No module")
|
|
190
|
+
return f"\n\n<cyan>{module}</cyan> \n\t-> <green>{record['time']:MM-DD HH:mm:ss.SSS}</green> - [<level>{record['level']}</level>] >>> \n<level>{record['message']}</level> "
|
|
191
|
+
|
|
192
|
+
logger.add(
|
|
193
|
+
sys.stdout,
|
|
194
|
+
filter=stream_filter,
|
|
195
|
+
format=custom_format,
|
|
196
|
+
colorize=True,
|
|
197
|
+
enqueue=True,
|
|
198
|
+
level="DEBUG" if self.args.debug else "WARNING"
|
|
199
|
+
)
|
|
200
|
+
self.logger = logger.bind(module="JarvisPLOT", to_console=True, JPlot=True)
|
|
201
|
+
self.logger.warning("JarvisPLOT logging system initialized successful!")
|
|
202
|
+
if self.args.debug:
|
|
203
|
+
self.logger.debug("JarvisPLOT run in debug mode!")
|
|
204
|
+
|
|
205
|
+
def parser_yaml(self, file):
|
|
206
|
+
self.yaml.file = os.path.abspath(file)
|
|
207
|
+
self.yaml.load()
|
|
208
|
+
self.logger.debug("Resolved YAML file -> {}".format(self.yaml.path))
|
|
209
|
+
|
|
210
|
+
def load_dataset(self):
|
|
211
|
+
dts = self.yaml.config['DataSet']
|
|
212
|
+
for dt in dts:
|
|
213
|
+
dataset = DataSet()
|
|
214
|
+
dataset.logger = self.logger
|
|
215
|
+
dataset.setinfo(dt, self.yaml.dir)
|
|
216
|
+
self.dataset.append(dataset)
|
|
217
|
+
|
|
218
|
+
def rename_hdf5_and_renew_yaml(self):
|
|
219
|
+
for dt in self.dataset:
|
|
220
|
+
self.logger.warning("DataSet -> {}, type -> {}".format(dt.name, dt.type))
|
|
221
|
+
vmap_dict = {}
|
|
222
|
+
vmap_list = []
|
|
223
|
+
if dt.type == "hdf5":
|
|
224
|
+
for ii, kk in enumerate(dt.keys):
|
|
225
|
+
vname = "Var{}@{}".format(ii, dt.name)
|
|
226
|
+
vmap_dict[kk] = vname
|
|
227
|
+
vmap_list.append({
|
|
228
|
+
"source_name": r"{}".format(kk),
|
|
229
|
+
"new_name": vname
|
|
230
|
+
})
|
|
231
|
+
self.yaml.update_dataset(dt.name, {"columnmap": {"list": vmap_list}})
|
|
232
|
+
dt.rename_columns(vmap_dict)
|
|
233
|
+
print(dt.keys)
|
|
234
|
+
|
|
235
|
+
import yaml
|
|
236
|
+
with open(self.args.out, 'w', encoding='utf-8') as f1:
|
|
237
|
+
yaml.dump(self.yaml.config, f1, sort_keys=False, default_flow_style=False, indent=2)
|
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional, Any, Dict, List
|
|
6
|
+
import yaml
|
|
7
|
+
import os, sys
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import h5py
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
class DataSet():
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self._file: Optional[str] = None
|
|
15
|
+
self.path: Optional[str] = None
|
|
16
|
+
self._type: Optional[str] = None
|
|
17
|
+
self.base: Optional[str] = None
|
|
18
|
+
self.keys: Optional[List[str]] = None
|
|
19
|
+
self._logger = None
|
|
20
|
+
self.data = None
|
|
21
|
+
self.group = None
|
|
22
|
+
self.is_gambit = False
|
|
23
|
+
|
|
24
|
+
def setinfo(self, dtinfo, rootpath):
|
|
25
|
+
self.file = os.path.join(rootpath, dtinfo['path'])
|
|
26
|
+
self.name = dtinfo['name']
|
|
27
|
+
self.type = dtinfo['type'].lower()
|
|
28
|
+
if self.type == "csv":
|
|
29
|
+
self.load_csv()
|
|
30
|
+
if self.type == "hdf5" and dtinfo.get('dataset'):
|
|
31
|
+
self.group = dtinfo['dataset']
|
|
32
|
+
self.is_gambit = dtinfo.get('is_gambit', False)
|
|
33
|
+
self.columnmap = dtinfo.get('columnmap', {})
|
|
34
|
+
self.load_hdf5()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def file(self) -> Optional[str]:
|
|
41
|
+
return self._file
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def type(self) -> Optional[str]:
|
|
45
|
+
return self._type
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def logger(self):
|
|
49
|
+
return self._logger
|
|
50
|
+
|
|
51
|
+
@logger.setter
|
|
52
|
+
def logger(self, logger) -> None:
|
|
53
|
+
if logger is None:
|
|
54
|
+
self._logger = None
|
|
55
|
+
self._logger = logger
|
|
56
|
+
|
|
57
|
+
@file.setter
|
|
58
|
+
def file(self, value: Optional[str]) -> None:
|
|
59
|
+
if value is None:
|
|
60
|
+
self._file = None
|
|
61
|
+
self.path = None
|
|
62
|
+
self.base = None
|
|
63
|
+
|
|
64
|
+
p = Path(value).expanduser().resolve()
|
|
65
|
+
self._file = str(p)
|
|
66
|
+
self.path = os.path.abspath(p)
|
|
67
|
+
self.base = os.path.basename(p)
|
|
68
|
+
|
|
69
|
+
@type.setter
|
|
70
|
+
def type(self, value: Optional[str]) -> None:
|
|
71
|
+
if value is None:
|
|
72
|
+
self._type = None
|
|
73
|
+
|
|
74
|
+
self._type = str(value).lower()
|
|
75
|
+
self.logger.debug("Dataset -> {} is assigned as \n\t-> {}\ttype".format(self.base, self.type))
|
|
76
|
+
|
|
77
|
+
def load_csv(self):
|
|
78
|
+
if self.type == "csv":
|
|
79
|
+
if self.logger:
|
|
80
|
+
self.logger.debug("Loading CSV from {}".format(self.path))
|
|
81
|
+
|
|
82
|
+
self.data = pd.read_csv(self.path)
|
|
83
|
+
self.keys = list(self.data.columns)
|
|
84
|
+
|
|
85
|
+
# Emit the same pretty summary used for HDF5 datasets
|
|
86
|
+
summary_name = f" CSV loaded!\n\t name -> {self.name}\n\t path -> {self.path}"
|
|
87
|
+
try:
|
|
88
|
+
summary_msg = dataframe_summary(self.data, name=summary_name)
|
|
89
|
+
except Exception:
|
|
90
|
+
# Fallback minimal summary if something goes wrong
|
|
91
|
+
summary_msg = f"CSV loaded {summary_name}\nDataFrame shape: {self.data.shape}"
|
|
92
|
+
|
|
93
|
+
if self.logger:
|
|
94
|
+
self.logger.warning("\n" + summary_msg)
|
|
95
|
+
else:
|
|
96
|
+
print(summary_msg)
|
|
97
|
+
|
|
98
|
+
def load_hdf5(self):
|
|
99
|
+
def _iter_datasets(hobj, prefix=""):
|
|
100
|
+
for k, v in hobj.items():
|
|
101
|
+
path = f"{prefix}/{k}" if prefix else k
|
|
102
|
+
if isinstance(v, h5py.Dataset):
|
|
103
|
+
yield path, v
|
|
104
|
+
elif isinstance(v, h5py.Group):
|
|
105
|
+
yield from _iter_datasets(v, path)
|
|
106
|
+
|
|
107
|
+
def _pick_dataset(hfile: h5py.File):
|
|
108
|
+
# Heuristic: prefer structured arrays, then 2D arrays
|
|
109
|
+
best = None
|
|
110
|
+
for path, ds in _iter_datasets(hfile):
|
|
111
|
+
shape = getattr(ds, "shape", ())
|
|
112
|
+
dt = getattr(ds, "dtype", None)
|
|
113
|
+
score = 0
|
|
114
|
+
if dt is not None and getattr(dt, "names", None):
|
|
115
|
+
score += 10 # structured array → good for DataFrame
|
|
116
|
+
if len(shape) == 2:
|
|
117
|
+
score += 5
|
|
118
|
+
if shape[1] >= 2:
|
|
119
|
+
score += 1
|
|
120
|
+
if best is None or score > best[0]:
|
|
121
|
+
best = (score, path, ds)
|
|
122
|
+
if best is None:
|
|
123
|
+
raise RuntimeError("No datasets found in HDF5 file.")
|
|
124
|
+
_, path, ds = best
|
|
125
|
+
return path, ds[()]
|
|
126
|
+
|
|
127
|
+
def _to_dataframe(arr, name=""):
|
|
128
|
+
if isinstance(arr, np.ndarray) and getattr(arr.dtype, "names", None):
|
|
129
|
+
df = pd.DataFrame.from_records(arr)
|
|
130
|
+
# prefix columns to keep dataset origin
|
|
131
|
+
if name:
|
|
132
|
+
df.columns = [f"{name}:{c}" for c in df.columns]
|
|
133
|
+
return df
|
|
134
|
+
elif hasattr(arr, "ndim") and arr.ndim == 2:
|
|
135
|
+
cols = [f"col{i}" for i in range(arr.shape[1])]
|
|
136
|
+
if name:
|
|
137
|
+
cols = [f"{name}:{c}" for c in cols]
|
|
138
|
+
return pd.DataFrame(arr, columns=cols)
|
|
139
|
+
else:
|
|
140
|
+
col = name if name else "value"
|
|
141
|
+
return pd.DataFrame({col: np.ravel(arr)})
|
|
142
|
+
|
|
143
|
+
def _collect_group_datasets(g: h5py.Group, prefix: str=""):
|
|
144
|
+
"""Recursively collect (path, ndarray) for all datasets under a group."""
|
|
145
|
+
items = []
|
|
146
|
+
for k, v in g.items():
|
|
147
|
+
path = f"{prefix}/{k}" if prefix else k
|
|
148
|
+
if isinstance(v, h5py.Dataset):
|
|
149
|
+
items.append((path, v[()]))
|
|
150
|
+
elif isinstance(v, h5py.Group):
|
|
151
|
+
items.extend(_collect_group_datasets(v, path))
|
|
152
|
+
return items
|
|
153
|
+
|
|
154
|
+
with h5py.File(self.path, "r") as f1:
|
|
155
|
+
# Log top-level keys to help the user
|
|
156
|
+
print_hdf5_tree_ascii(f1[self.group], root_name=self.group, logger=self.logger)
|
|
157
|
+
|
|
158
|
+
if self.group in f1 and isinstance(f1[self.group], h5py.Group):
|
|
159
|
+
group = f1[self.group]
|
|
160
|
+
self.logger.debug("Loading HDF5 group '{}' from {}".format(self.group, self.path))
|
|
161
|
+
if self.is_gambit:
|
|
162
|
+
self.logger.debug("GAMBIT Standard Output")
|
|
163
|
+
|
|
164
|
+
# Collect all datasets under the group (recursively)
|
|
165
|
+
items = _collect_group_datasets(group, prefix=self.group)
|
|
166
|
+
if not items:
|
|
167
|
+
raise RuntimeError(f"HDF5 group '{self.group}' contains no datasets.")
|
|
168
|
+
|
|
169
|
+
# If there is only one dataset, behave like before
|
|
170
|
+
kkeys = []
|
|
171
|
+
if len(items) == 1:
|
|
172
|
+
path, arr = items[0]
|
|
173
|
+
dfs = [(path, _to_dataframe(arr, name=path))]
|
|
174
|
+
kkeys.append(path)
|
|
175
|
+
else:
|
|
176
|
+
# Build a dataframe per dataset
|
|
177
|
+
dfs = [(p, _to_dataframe(arr, name=p)) for p, arr in items]
|
|
178
|
+
kkeys = [p for p, arr in items]
|
|
179
|
+
|
|
180
|
+
# Try to concatenate along columns; all datasets must have identical row counts
|
|
181
|
+
lengths = {len(df) for _, df in dfs}
|
|
182
|
+
if len(lengths) == 1:
|
|
183
|
+
# safe to concat by columns → single merged DataFrame only
|
|
184
|
+
self.data = pd.concat([df for _, df in dfs], axis=1)
|
|
185
|
+
|
|
186
|
+
self.keys = list(self.data.columns)
|
|
187
|
+
|
|
188
|
+
# Deal GAMBIT filtering
|
|
189
|
+
if self.is_gambit:
|
|
190
|
+
self.gambit_filtering(kkeys)
|
|
191
|
+
if self.columnmap.get("list", False):
|
|
192
|
+
self.logger.warning("{}: Loading Column Maps".format(self.name))
|
|
193
|
+
cmap = {}
|
|
194
|
+
for item in self.columnmap.get("list", False):
|
|
195
|
+
cmap[item['source_name']] = item['new_name']
|
|
196
|
+
self.rename_columns(cmap)
|
|
197
|
+
|
|
198
|
+
# Emit a pretty summary BEFORE returning
|
|
199
|
+
summary_name = f" HDF5 loaded!\n\t name -> {self.name}\n\t group -> {self.group}\n\t path -> {self.path}"
|
|
200
|
+
summary_msg = dataframe_summary(self.data, name=summary_name)
|
|
201
|
+
if self.logger:
|
|
202
|
+
self.logger.warning("\n" + summary_msg)
|
|
203
|
+
else:
|
|
204
|
+
print(summary_msg)
|
|
205
|
+
|
|
206
|
+
return # IMPORTANT: stop here; avoid falling through to single-dataset path
|
|
207
|
+
else:
|
|
208
|
+
# Not mergeable → print tree for diagnostics and raise a hard error
|
|
209
|
+
try:
|
|
210
|
+
print_hdf5_tree_ascii(group, root_name=self.group, logger=self.logger)
|
|
211
|
+
except Exception:
|
|
212
|
+
pass
|
|
213
|
+
shapes = {p: df.shape for p, df in dfs}
|
|
214
|
+
raise ValueError(
|
|
215
|
+
"HDF5 group '{grp}' is invalid for merging: datasets have different row counts. "
|
|
216
|
+
"Please fix the input or choose a different dataset/group. Details: {details}".format(
|
|
217
|
+
grp=self.group,
|
|
218
|
+
details=shapes,
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
else:
|
|
222
|
+
path, arr = _pick_dataset(f1)
|
|
223
|
+
|
|
224
|
+
def gambit_filtering(self, kkeys):
|
|
225
|
+
isvalids = []
|
|
226
|
+
for kk in kkeys:
|
|
227
|
+
if "_isvalid" == kk[-8:] and kk[:-8] in self.keys:
|
|
228
|
+
isvalids.append(kk)
|
|
229
|
+
self.logger.warning("Filtering Invalid Data from GAMBIT Output")
|
|
230
|
+
sps = self.data.shape
|
|
231
|
+
mask = self.data[isvalids].all(axis=1)
|
|
232
|
+
self.data = self.data[mask].drop(columns=isvalids)
|
|
233
|
+
self.logger.warning("DataSet Shape: \n\t Before filtering -> {}\n\t After filtering -> {}".format(sps, self.data.shape))
|
|
234
|
+
self.keys = list(self.data.columns)
|
|
235
|
+
|
|
236
|
+
def rename_columns(self, vdict):
|
|
237
|
+
self.data = self.data.rename(columns=vdict)
|
|
238
|
+
self.keys = list(self.data.columns)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def dataframe_summary(df: pd.DataFrame, name: str = "") -> str:
|
|
242
|
+
"""Pretty, compact multi-line summary for a DataFrame.
|
|
243
|
+
|
|
244
|
+
Sections:
|
|
245
|
+
• header: dataset path (if any) and shape
|
|
246
|
+
• columns table (first max_cols): name | dtype | non-null% | unique (for small card.) | min..max (numeric)
|
|
247
|
+
• tiny preview of first rows/cols
|
|
248
|
+
"""
|
|
249
|
+
import pandas as _pd
|
|
250
|
+
import numpy as _np
|
|
251
|
+
import shutil
|
|
252
|
+
|
|
253
|
+
def term_width(default=120):
|
|
254
|
+
try:
|
|
255
|
+
return shutil.get_terminal_size().columns
|
|
256
|
+
except Exception:
|
|
257
|
+
return default
|
|
258
|
+
|
|
259
|
+
def trunc(s: str, width: int) -> str:
|
|
260
|
+
if len(s) <= width:
|
|
261
|
+
return s
|
|
262
|
+
# keep both ends
|
|
263
|
+
head = max(0, width // 2 - 2)
|
|
264
|
+
tail = max(0, width - head - 3)
|
|
265
|
+
return s[:head] + "..." + s[-tail:]
|
|
266
|
+
|
|
267
|
+
nrows, ncols = df.shape
|
|
268
|
+
cols = list(df.columns)
|
|
269
|
+
show_cols = cols[:]
|
|
270
|
+
|
|
271
|
+
# Compute per-column stats for the shown columns
|
|
272
|
+
dtypes = df[show_cols].dtypes.astype(str)
|
|
273
|
+
non_null_pct = (df[show_cols].notna().sum() / max(1, nrows) * 100.0).round(1)
|
|
274
|
+
|
|
275
|
+
# numeric min/max; categorical unique count (cap at 20)
|
|
276
|
+
is_num = [_pd.api.types.is_numeric_dtype(df[c]) for c in show_cols]
|
|
277
|
+
num_cols = [c for c, ok in zip(show_cols, is_num) if ok]
|
|
278
|
+
cat_cols = [c for c, ok in zip(show_cols, is_num) if not ok]
|
|
279
|
+
|
|
280
|
+
num_min = {}
|
|
281
|
+
num_max = {}
|
|
282
|
+
if num_cols:
|
|
283
|
+
try:
|
|
284
|
+
desc = df[num_cols].agg(["min", "max"]).T
|
|
285
|
+
for c in num_cols:
|
|
286
|
+
mn = desc.loc[c, "min"]
|
|
287
|
+
mx = desc.loc[c, "max"]
|
|
288
|
+
num_min[c] = mn
|
|
289
|
+
num_max[c] = mx
|
|
290
|
+
except Exception:
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
uniques = {}
|
|
294
|
+
if cat_cols:
|
|
295
|
+
for c in cat_cols:
|
|
296
|
+
try:
|
|
297
|
+
u = df[c].nunique(dropna=True)
|
|
298
|
+
uniques[c] = int(u)
|
|
299
|
+
except Exception:
|
|
300
|
+
pass
|
|
301
|
+
|
|
302
|
+
# Build a compact table
|
|
303
|
+
tw = term_width()
|
|
304
|
+
name_w = 34 if tw < 120 else 48
|
|
305
|
+
dtype_w = 10
|
|
306
|
+
nn_w = 8
|
|
307
|
+
stat_w = max(12, tw - (name_w + dtype_w + nn_w + 8)) # 8 for separators/padding
|
|
308
|
+
|
|
309
|
+
def fmt_stat(c: str) -> str:
|
|
310
|
+
if c in num_min and c in num_max:
|
|
311
|
+
try:
|
|
312
|
+
mn = num_min[c]
|
|
313
|
+
mx = num_max[c]
|
|
314
|
+
return f"{mn:>10.4g} .. {mx:>10.4g}"
|
|
315
|
+
except Exception:
|
|
316
|
+
return f"{str(num_min[c]):>10} .. {str(num_max[c]):>10}"
|
|
317
|
+
if c in uniques:
|
|
318
|
+
return f"uniq={uniques[c]}"
|
|
319
|
+
return ""
|
|
320
|
+
|
|
321
|
+
head_lines = []
|
|
322
|
+
if name:
|
|
323
|
+
head_lines.append(f"Selected dataset:{name}")
|
|
324
|
+
head_lines.append(f"DataFrame shape:\n\t {nrows}\t rows × {ncols} \tcols\n")
|
|
325
|
+
head_lines.append("=== DataFrame Summary Table ===")
|
|
326
|
+
|
|
327
|
+
# Column table header
|
|
328
|
+
rows = []
|
|
329
|
+
header = f"{'name':<{name_w}} {'dtype':<{dtype_w}} {'nonnull%':>{nn_w}} {' [min] .. [max]':<{stat_w}}"
|
|
330
|
+
rows.append("-" * len(header))
|
|
331
|
+
rows.append(header)
|
|
332
|
+
rows.append("-" * len(header))
|
|
333
|
+
|
|
334
|
+
for c in show_cols:
|
|
335
|
+
c_name = trunc(str(c), name_w)
|
|
336
|
+
c_dtype = trunc(dtypes[c], dtype_w)
|
|
337
|
+
c_nn = f"{non_null_pct[c]:.1f}%" if nrows else "n/a"
|
|
338
|
+
c_stat = trunc(fmt_stat(c), stat_w)
|
|
339
|
+
rows.append(f"{c_name:<{name_w}} {c_dtype:<{dtype_w}} {c_nn:>{nn_w}} {c_stat:<{stat_w}}")
|
|
340
|
+
rows.append("-" * len(header))
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
parts = []
|
|
344
|
+
parts.extend(head_lines)
|
|
345
|
+
if show_cols:
|
|
346
|
+
parts.extend(rows)
|
|
347
|
+
|
|
348
|
+
return "\n".join(parts)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def print_hdf5_tree_ascii(hobj, root_name='/', logger=None, max_depth=None):
|
|
352
|
+
"""
|
|
353
|
+
Pretty-print an ASCII tree of an h5py.File or Group.
|
|
354
|
+
|
|
355
|
+
Example output:
|
|
356
|
+
/
|
|
357
|
+
├── data (Group)
|
|
358
|
+
│ ├── samples (Dataset, shape=(1000, 3), dtype=float64)
|
|
359
|
+
│ └── extra (Group)
|
|
360
|
+
│ ├── X (Dataset, shape=(..., ...), dtype=...)
|
|
361
|
+
│ └── Y (Dataset, shape=(..., ...), dtype=...)
|
|
362
|
+
└── metadata (Group)
|
|
363
|
+
└── attrs (Dataset, shape=(...,), dtype=...)
|
|
364
|
+
|
|
365
|
+
Parameters
|
|
366
|
+
----------
|
|
367
|
+
hobj : h5py.File or h5py.Group
|
|
368
|
+
root_name : str
|
|
369
|
+
Name shown at the root.
|
|
370
|
+
logger : logging-like object (optional)
|
|
371
|
+
If provided, uses logger.debug(...) instead of print.
|
|
372
|
+
max_depth : int or None
|
|
373
|
+
Limit recursion depth (0=only root). None = unlimited.
|
|
374
|
+
"""
|
|
375
|
+
try:
|
|
376
|
+
import h5py # noqa: F401
|
|
377
|
+
except Exception:
|
|
378
|
+
raise RuntimeError("h5py is required for HDF5 tree printing.")
|
|
379
|
+
|
|
380
|
+
def emit(msg):
|
|
381
|
+
if logger is None:
|
|
382
|
+
print(msg)
|
|
383
|
+
else:
|
|
384
|
+
try:
|
|
385
|
+
logger.debug(msg)
|
|
386
|
+
except Exception:
|
|
387
|
+
print(msg)
|
|
388
|
+
|
|
389
|
+
def is_dataset(x):
|
|
390
|
+
import h5py
|
|
391
|
+
return isinstance(x, h5py.Dataset)
|
|
392
|
+
|
|
393
|
+
def is_group(x):
|
|
394
|
+
import h5py
|
|
395
|
+
return isinstance(x, h5py.Group)
|
|
396
|
+
|
|
397
|
+
def fmt_leaf(name, obj):
|
|
398
|
+
# maxlen = 60
|
|
399
|
+
def shorten(n):
|
|
400
|
+
if len(n) > 50:
|
|
401
|
+
return f"{n[:15]}...{n[-30:]}"
|
|
402
|
+
else:
|
|
403
|
+
return "{:48}".format(n)
|
|
404
|
+
# return n
|
|
405
|
+
if is_dataset(obj):
|
|
406
|
+
shp = getattr(obj, "shape", None)
|
|
407
|
+
# dt = getattr(obj, "dtype", None)
|
|
408
|
+
extra = []
|
|
409
|
+
if shp is not None:
|
|
410
|
+
extra.append(f"shape -> {shp}")
|
|
411
|
+
# if dt is not None:
|
|
412
|
+
# extra.append(f"dtype={dt}")
|
|
413
|
+
suffix = f"(Dataset), {', '.join(extra)}" if extra else "(Dataset)"
|
|
414
|
+
return f"{shorten(name)}{suffix:>40}"
|
|
415
|
+
elif is_group(obj):
|
|
416
|
+
return f"{shorten(name)} (Group)"
|
|
417
|
+
return shorten(name)
|
|
418
|
+
|
|
419
|
+
def walk(group, prefix="", depth=0, last=True):
|
|
420
|
+
lines = []
|
|
421
|
+
if depth == 0:
|
|
422
|
+
lines.append("│ {} (Group)".format(root_name))
|
|
423
|
+
if max_depth is not None and depth >= max_depth:
|
|
424
|
+
return
|
|
425
|
+
|
|
426
|
+
keys = list(group.keys())
|
|
427
|
+
keys.sort()
|
|
428
|
+
n = len(keys)
|
|
429
|
+
for i, key in enumerate(keys):
|
|
430
|
+
child = group[key]
|
|
431
|
+
is_last = (i == n - 1)
|
|
432
|
+
connector = "└── " if is_last else "├── "
|
|
433
|
+
line = prefix + connector + fmt_leaf(key, child)
|
|
434
|
+
lines.append(line)
|
|
435
|
+
|
|
436
|
+
if is_group(child):
|
|
437
|
+
extension = " " if is_last else "│ "
|
|
438
|
+
walk(child, prefix + extension, depth + 1, is_last)
|
|
439
|
+
emit("\n".join(lines))
|
|
440
|
+
|
|
441
|
+
walk(hobj, "", 0, True)
|