jarvisplot 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jarvisplot might be problematic. Click here for more details.

Files changed (42) hide show
  1. jarvisplot/Figure/adapters.py +773 -0
  2. jarvisplot/Figure/cards/std_axes_adapter_config.json +23 -0
  3. jarvisplot/Figure/data_pipelines.py +87 -0
  4. jarvisplot/Figure/figure.py +1573 -0
  5. jarvisplot/Figure/helper.py +217 -0
  6. jarvisplot/Figure/load_data.py +252 -0
  7. jarvisplot/__init__.py +0 -0
  8. jarvisplot/cards/a4paper/1x1/ternary.json +6 -0
  9. jarvisplot/cards/a4paper/2x1/rect.json +106 -0
  10. jarvisplot/cards/a4paper/2x1/rect5x1.json +344 -0
  11. jarvisplot/cards/a4paper/2x1/rect_cmap.json +181 -0
  12. jarvisplot/cards/a4paper/2x1/ternary.json +139 -0
  13. jarvisplot/cards/a4paper/2x1/ternary_cmap.json +189 -0
  14. jarvisplot/cards/a4paper/4x1/rect.json +106 -0
  15. jarvisplot/cards/a4paper/4x1/rect_cmap.json +174 -0
  16. jarvisplot/cards/a4paper/4x1/ternary.json +139 -0
  17. jarvisplot/cards/a4paper/4x1/ternary_cmap.json +189 -0
  18. jarvisplot/cards/args.json +50 -0
  19. jarvisplot/cards/colors/colormaps.json +140 -0
  20. jarvisplot/cards/default/output.json +11 -0
  21. jarvisplot/cards/gambit/1x1/ternary.json +6 -0
  22. jarvisplot/cards/gambit/2x1/rect_cmap.json +200 -0
  23. jarvisplot/cards/gambit/2x1/ternary.json +139 -0
  24. jarvisplot/cards/gambit/2x1/ternary_cmap.json +205 -0
  25. jarvisplot/cards/icons/JarvisHEP.png +0 -0
  26. jarvisplot/cards/icons/gambit.png +0 -0
  27. jarvisplot/cards/icons/gambit_small.png +0 -0
  28. jarvisplot/cards/style_preference.json +23 -0
  29. jarvisplot/cli.py +64 -0
  30. jarvisplot/client.py +6 -0
  31. jarvisplot/config.py +69 -0
  32. jarvisplot/core.py +237 -0
  33. jarvisplot/data_loader.py +441 -0
  34. jarvisplot/inner_func.py +162 -0
  35. jarvisplot/utils/__init__.py +0 -0
  36. jarvisplot/utils/cmaps.py +258 -0
  37. jarvisplot/utils/interpolator.py +377 -0
  38. jarvisplot-1.0.0.dist-info/METADATA +93 -0
  39. jarvisplot-1.0.0.dist-info/RECORD +42 -0
  40. jarvisplot-1.0.0.dist-info/WHEEL +5 -0
  41. jarvisplot-1.0.0.dist-info/entry_points.txt +2 -0
  42. jarvisplot-1.0.0.dist-info/top_level.txt +1 -0
jarvisplot/core.py ADDED
@@ -0,0 +1,237 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from __future__ import annotations
4
+ from pathlib import Path
5
+ from typing import Optional, Any, Dict
6
+ from .cli import CLI
7
+ from loguru import logger
8
+ import os, sys
9
+ from .config import ConfigLoader
10
+ from .data_loader import DataSet
11
+ import io
12
+ from contextlib import redirect_stdout
13
+ jppwd = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
14
+ import json
15
+ from .Figure.data_pipelines import SharedContent, DataContext
16
+
17
+ class JarvisPLOT():
18
+ def __init__(self) -> None:
19
+ self.dataset = {}
20
+ self.variables = {}
21
+ self.yaml = ConfigLoader()
22
+ self.style = {}
23
+ self.profiles = {}
24
+ self.cli = CLI()
25
+ self.logger = None
26
+ self.dataset: Optional[Dict[DataSet]] = []
27
+ self.shared = None
28
+ self.ctx = None
29
+ self.interpolators = None
30
+
31
+ def init(self):
32
+ self.args = self.cli.args.parse_args()
33
+
34
+ # Initialize logger early
35
+ self.init_logger()
36
+
37
+ self.load_cmaps()
38
+
39
+ self.load_yaml()
40
+
41
+ # sys.exit()
42
+ if self.args.parse_data:
43
+ if self.args.out is None and not self.args.inplace:
44
+ self.args.out = self.yaml.path
45
+ elif self.args.out is None and self.args.inplace:
46
+ self.args.out = self.yaml.path
47
+ elif self.args.out is not None and self.args.inplace:
48
+ self.logger.error("Conflicting arguments: --out and --inplace. Please choose only one.")
49
+ sys.exit(2)
50
+ self.load_dataset()
51
+ self.rename_hdf5_and_renew_yaml()
52
+ else:
53
+ self.load_dataset()
54
+ if self.shared is None:
55
+ self.shared = SharedContent(logger=self.logger)
56
+ self.ctx = DataContext(self.shared)
57
+ for dts in self.dataset:
58
+ self.ctx.update(dts.name, dts.data)
59
+
60
+ # Register external functions (e.g. lazy-loaded interpolators) into the expression runtime.
61
+ self.load_interpolators()
62
+
63
+ self.load_styles()
64
+ self.plot()
65
+
66
+ def load_cmaps(self):
67
+ """Load and register JarvisPLOT colormaps from the internal JSON bundle."""
68
+ try:
69
+ # Prefer the project's colormap setup helper
70
+ from .utils import cmaps
71
+
72
+ json_path = "&JP/jarvisplot/cards/colors/colormaps.json"
73
+ cmap_summary = cmaps.setup(self.load_path(json_path), force=True)
74
+
75
+ if self.logger:
76
+ self.logger.debug(f"JarvisPLOT: colormaps registered: {cmap_summary}")
77
+ try:
78
+ self.logger.debug(
79
+ f"JarvisPLOT: available colormaps sample: {cmaps.list_available()}"
80
+ )
81
+ except Exception:
82
+ pass
83
+ except Exception as e:
84
+ if self.logger:
85
+ self.logger.warning(f"JarvisPLOT: failed to initialize colormaps: {e}")
86
+
87
+ def load_interpolators(self):
88
+ """Parse YAML interpolator specs and register them for lazy use in expressions."""
89
+ cfg = self.yaml.config.get("Functions", None)
90
+ if cfg is not None:
91
+ from .inner_func import set_external_funcs_getter
92
+ from .utils.interpolator import InterpolatorManager
93
+ mgr = InterpolatorManager.from_yaml(
94
+ cfg,
95
+ yaml_dir=self.yaml.dir,
96
+ shared=self.shared,
97
+ logger=self.logger,
98
+ )
99
+ self.interpolators = mgr
100
+ set_external_funcs_getter(lambda: (mgr.as_eval_funcs() or {}))
101
+ if self.interpolators:
102
+ self.logger.debug(f"JarvisPLOT: Functions registered: {mgr.summary()}")
103
+
104
+ def load_styles(self):
105
+ spp = "&JP/jarvisplot/cards/style_preference.json"
106
+ self.logger.debug("Loading internal Format set -> {}".format(self.load_path(spp)))
107
+ with open(self.load_path(spp), 'r') as f1:
108
+ stl = json.load(f1)
109
+ for sty, boudle in stl.items():
110
+ self.style[sty] = {}
111
+ for kk, vv in boudle.items():
112
+ vpath = self.load_path(vv)
113
+ if os.path.exists(vpath):
114
+ self.logger.debug("Loading '{}' boudle, {} Style \n\t-> {}".format(sty, kk, vpath))
115
+ with open(vpath, 'r') as f2:
116
+ self.style[sty][kk] = json.load(f2)
117
+ else:
118
+ self.logger.error("Style Not Found: '{}' boudle, {} Style \n\t-> {}".format(sty, kk, vpath))
119
+
120
+
121
+ def load_path(self, path):
122
+ if "&JP/" == path[0:4]:
123
+ path = os.path.abspath( os.path.join(jppwd, path[4:]) )
124
+ else:
125
+ path = Path(path).expanduser().resolve()
126
+ return path
127
+
128
+ def plot(self):
129
+ for fig in self.yaml.config["Figures"]:
130
+ from .Figure.figure import Figure
131
+ figobj = Figure()
132
+ figobj._yaml_dir = self.yaml.dir
133
+ figobj.config = self.yaml.config
134
+ figobj.logger = self.logger
135
+ figobj.jpstyles = self.style
136
+ figobj.context = self.ctx
137
+ if getattr(self.args, "no_logo", False):
138
+ figobj.print = True
139
+
140
+ try:
141
+ if figobj.set(fig):
142
+ self.logger.warning(f"Succefully loading figure -> {figobj.name} setting")
143
+ figobj.plot()
144
+ except Exception as e:
145
+ self.logger.warning(f"Figure {fig.get('name', '<noname>')} failed: {e}")
146
+ continue
147
+
148
+
149
+
150
+
151
+
152
+ # print(fig)
153
+
154
+
155
+
156
+ def load_yaml(self):
157
+ # If no YAML file provided, show a friendly message and help, then return gracefully
158
+ yaml_path = getattr(self.args, 'file', None)
159
+ if not yaml_path:
160
+ self.logger.error("No input YAML file specified. Please provide one.\n")
161
+ try:
162
+ buf = io.StringIO()
163
+ with redirect_stdout(buf):
164
+ self.cli.args.print_help()
165
+ help_text = buf.getvalue()
166
+ self.logger.warning("JarvisPLOT " + help_text)
167
+ except Exception:
168
+ pass
169
+ return
170
+ self.parser_yaml(os.path.abspath(yaml_path))
171
+
172
+ def init_logger(self) -> None:
173
+ from datetime import datetime
174
+ current_time = datetime.now().strftime("%Y-%m-%d[%H:%M:%S]")
175
+
176
+ # Remove Loguru's default handler to avoid duplicate console lines
177
+ try:
178
+ logger.remove()
179
+ except Exception:
180
+ pass
181
+
182
+ def global_log_filter(record):
183
+ return record["extra"].get("JPlot", False)
184
+
185
+ def stream_filter(record):
186
+ return record["extra"].get("to_console", False)
187
+
188
+ def custom_format(record):
189
+ module = record["extra"].get("module", "No module")
190
+ return f"\n\n<cyan>{module}</cyan> \n\t-> <green>{record['time']:MM-DD HH:mm:ss.SSS}</green> - [<level>{record['level']}</level>] >>> \n<level>{record['message']}</level> "
191
+
192
+ logger.add(
193
+ sys.stdout,
194
+ filter=stream_filter,
195
+ format=custom_format,
196
+ colorize=True,
197
+ enqueue=True,
198
+ level="DEBUG" if self.args.debug else "WARNING"
199
+ )
200
+ self.logger = logger.bind(module="JarvisPLOT", to_console=True, JPlot=True)
201
+ self.logger.warning("JarvisPLOT logging system initialized successful!")
202
+ if self.args.debug:
203
+ self.logger.debug("JarvisPLOT run in debug mode!")
204
+
205
+ def parser_yaml(self, file):
206
+ self.yaml.file = os.path.abspath(file)
207
+ self.yaml.load()
208
+ self.logger.debug("Resolved YAML file -> {}".format(self.yaml.path))
209
+
210
+ def load_dataset(self):
211
+ dts = self.yaml.config['DataSet']
212
+ for dt in dts:
213
+ dataset = DataSet()
214
+ dataset.logger = self.logger
215
+ dataset.setinfo(dt, self.yaml.dir)
216
+ self.dataset.append(dataset)
217
+
218
+ def rename_hdf5_and_renew_yaml(self):
219
+ for dt in self.dataset:
220
+ self.logger.warning("DataSet -> {}, type -> {}".format(dt.name, dt.type))
221
+ vmap_dict = {}
222
+ vmap_list = []
223
+ if dt.type == "hdf5":
224
+ for ii, kk in enumerate(dt.keys):
225
+ vname = "Var{}@{}".format(ii, dt.name)
226
+ vmap_dict[kk] = vname
227
+ vmap_list.append({
228
+ "source_name": r"{}".format(kk),
229
+ "new_name": vname
230
+ })
231
+ self.yaml.update_dataset(dt.name, {"columnmap": {"list": vmap_list}})
232
+ dt.rename_columns(vmap_dict)
233
+ print(dt.keys)
234
+
235
+ import yaml
236
+ with open(self.args.out, 'w', encoding='utf-8') as f1:
237
+ yaml.dump(self.yaml.config, f1, sort_keys=False, default_flow_style=False, indent=2)
@@ -0,0 +1,441 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from __future__ import annotations
4
+ from pathlib import Path
5
+ from typing import Optional, Any, Dict, List
6
+ import yaml
7
+ import os, sys
8
+ import pandas as pd
9
+ import h5py
10
+ import numpy as np
11
+
12
+ class DataSet():
13
+ def __init__(self):
14
+ self._file: Optional[str] = None
15
+ self.path: Optional[str] = None
16
+ self._type: Optional[str] = None
17
+ self.base: Optional[str] = None
18
+ self.keys: Optional[List[str]] = None
19
+ self._logger = None
20
+ self.data = None
21
+ self.group = None
22
+ self.is_gambit = False
23
+
24
+ def setinfo(self, dtinfo, rootpath):
25
+ self.file = os.path.join(rootpath, dtinfo['path'])
26
+ self.name = dtinfo['name']
27
+ self.type = dtinfo['type'].lower()
28
+ if self.type == "csv":
29
+ self.load_csv()
30
+ if self.type == "hdf5" and dtinfo.get('dataset'):
31
+ self.group = dtinfo['dataset']
32
+ self.is_gambit = dtinfo.get('is_gambit', False)
33
+ self.columnmap = dtinfo.get('columnmap', {})
34
+ self.load_hdf5()
35
+
36
+
37
+
38
+
39
+ @property
40
+ def file(self) -> Optional[str]:
41
+ return self._file
42
+
43
+ @property
44
+ def type(self) -> Optional[str]:
45
+ return self._type
46
+
47
+ @property
48
+ def logger(self):
49
+ return self._logger
50
+
51
+ @logger.setter
52
+ def logger(self, logger) -> None:
53
+ if logger is None:
54
+ self._logger = None
55
+ self._logger = logger
56
+
57
+ @file.setter
58
+ def file(self, value: Optional[str]) -> None:
59
+ if value is None:
60
+ self._file = None
61
+ self.path = None
62
+ self.base = None
63
+
64
+ p = Path(value).expanduser().resolve()
65
+ self._file = str(p)
66
+ self.path = os.path.abspath(p)
67
+ self.base = os.path.basename(p)
68
+
69
+ @type.setter
70
+ def type(self, value: Optional[str]) -> None:
71
+ if value is None:
72
+ self._type = None
73
+
74
+ self._type = str(value).lower()
75
+ self.logger.debug("Dataset -> {} is assigned as \n\t-> {}\ttype".format(self.base, self.type))
76
+
77
+ def load_csv(self):
78
+ if self.type == "csv":
79
+ if self.logger:
80
+ self.logger.debug("Loading CSV from {}".format(self.path))
81
+
82
+ self.data = pd.read_csv(self.path)
83
+ self.keys = list(self.data.columns)
84
+
85
+ # Emit the same pretty summary used for HDF5 datasets
86
+ summary_name = f" CSV loaded!\n\t name -> {self.name}\n\t path -> {self.path}"
87
+ try:
88
+ summary_msg = dataframe_summary(self.data, name=summary_name)
89
+ except Exception:
90
+ # Fallback minimal summary if something goes wrong
91
+ summary_msg = f"CSV loaded {summary_name}\nDataFrame shape: {self.data.shape}"
92
+
93
+ if self.logger:
94
+ self.logger.warning("\n" + summary_msg)
95
+ else:
96
+ print(summary_msg)
97
+
98
+ def load_hdf5(self):
99
+ def _iter_datasets(hobj, prefix=""):
100
+ for k, v in hobj.items():
101
+ path = f"{prefix}/{k}" if prefix else k
102
+ if isinstance(v, h5py.Dataset):
103
+ yield path, v
104
+ elif isinstance(v, h5py.Group):
105
+ yield from _iter_datasets(v, path)
106
+
107
+ def _pick_dataset(hfile: h5py.File):
108
+ # Heuristic: prefer structured arrays, then 2D arrays
109
+ best = None
110
+ for path, ds in _iter_datasets(hfile):
111
+ shape = getattr(ds, "shape", ())
112
+ dt = getattr(ds, "dtype", None)
113
+ score = 0
114
+ if dt is not None and getattr(dt, "names", None):
115
+ score += 10 # structured array → good for DataFrame
116
+ if len(shape) == 2:
117
+ score += 5
118
+ if shape[1] >= 2:
119
+ score += 1
120
+ if best is None or score > best[0]:
121
+ best = (score, path, ds)
122
+ if best is None:
123
+ raise RuntimeError("No datasets found in HDF5 file.")
124
+ _, path, ds = best
125
+ return path, ds[()]
126
+
127
+ def _to_dataframe(arr, name=""):
128
+ if isinstance(arr, np.ndarray) and getattr(arr.dtype, "names", None):
129
+ df = pd.DataFrame.from_records(arr)
130
+ # prefix columns to keep dataset origin
131
+ if name:
132
+ df.columns = [f"{name}:{c}" for c in df.columns]
133
+ return df
134
+ elif hasattr(arr, "ndim") and arr.ndim == 2:
135
+ cols = [f"col{i}" for i in range(arr.shape[1])]
136
+ if name:
137
+ cols = [f"{name}:{c}" for c in cols]
138
+ return pd.DataFrame(arr, columns=cols)
139
+ else:
140
+ col = name if name else "value"
141
+ return pd.DataFrame({col: np.ravel(arr)})
142
+
143
+ def _collect_group_datasets(g: h5py.Group, prefix: str=""):
144
+ """Recursively collect (path, ndarray) for all datasets under a group."""
145
+ items = []
146
+ for k, v in g.items():
147
+ path = f"{prefix}/{k}" if prefix else k
148
+ if isinstance(v, h5py.Dataset):
149
+ items.append((path, v[()]))
150
+ elif isinstance(v, h5py.Group):
151
+ items.extend(_collect_group_datasets(v, path))
152
+ return items
153
+
154
+ with h5py.File(self.path, "r") as f1:
155
+ # Log top-level keys to help the user
156
+ print_hdf5_tree_ascii(f1[self.group], root_name=self.group, logger=self.logger)
157
+
158
+ if self.group in f1 and isinstance(f1[self.group], h5py.Group):
159
+ group = f1[self.group]
160
+ self.logger.debug("Loading HDF5 group '{}' from {}".format(self.group, self.path))
161
+ if self.is_gambit:
162
+ self.logger.debug("GAMBIT Standard Output")
163
+
164
+ # Collect all datasets under the group (recursively)
165
+ items = _collect_group_datasets(group, prefix=self.group)
166
+ if not items:
167
+ raise RuntimeError(f"HDF5 group '{self.group}' contains no datasets.")
168
+
169
+ # If there is only one dataset, behave like before
170
+ kkeys = []
171
+ if len(items) == 1:
172
+ path, arr = items[0]
173
+ dfs = [(path, _to_dataframe(arr, name=path))]
174
+ kkeys.append(path)
175
+ else:
176
+ # Build a dataframe per dataset
177
+ dfs = [(p, _to_dataframe(arr, name=p)) for p, arr in items]
178
+ kkeys = [p for p, arr in items]
179
+
180
+ # Try to concatenate along columns; all datasets must have identical row counts
181
+ lengths = {len(df) for _, df in dfs}
182
+ if len(lengths) == 1:
183
+ # safe to concat by columns → single merged DataFrame only
184
+ self.data = pd.concat([df for _, df in dfs], axis=1)
185
+
186
+ self.keys = list(self.data.columns)
187
+
188
+ # Deal GAMBIT filtering
189
+ if self.is_gambit:
190
+ self.gambit_filtering(kkeys)
191
+ if self.columnmap.get("list", False):
192
+ self.logger.warning("{}: Loading Column Maps".format(self.name))
193
+ cmap = {}
194
+ for item in self.columnmap.get("list", False):
195
+ cmap[item['source_name']] = item['new_name']
196
+ self.rename_columns(cmap)
197
+
198
+ # Emit a pretty summary BEFORE returning
199
+ summary_name = f" HDF5 loaded!\n\t name -> {self.name}\n\t group -> {self.group}\n\t path -> {self.path}"
200
+ summary_msg = dataframe_summary(self.data, name=summary_name)
201
+ if self.logger:
202
+ self.logger.warning("\n" + summary_msg)
203
+ else:
204
+ print(summary_msg)
205
+
206
+ return # IMPORTANT: stop here; avoid falling through to single-dataset path
207
+ else:
208
+ # Not mergeable → print tree for diagnostics and raise a hard error
209
+ try:
210
+ print_hdf5_tree_ascii(group, root_name=self.group, logger=self.logger)
211
+ except Exception:
212
+ pass
213
+ shapes = {p: df.shape for p, df in dfs}
214
+ raise ValueError(
215
+ "HDF5 group '{grp}' is invalid for merging: datasets have different row counts. "
216
+ "Please fix the input or choose a different dataset/group. Details: {details}".format(
217
+ grp=self.group,
218
+ details=shapes,
219
+ )
220
+ )
221
+ else:
222
+ path, arr = _pick_dataset(f1)
223
+
224
+ def gambit_filtering(self, kkeys):
225
+ isvalids = []
226
+ for kk in kkeys:
227
+ if "_isvalid" == kk[-8:] and kk[:-8] in self.keys:
228
+ isvalids.append(kk)
229
+ self.logger.warning("Filtering Invalid Data from GAMBIT Output")
230
+ sps = self.data.shape
231
+ mask = self.data[isvalids].all(axis=1)
232
+ self.data = self.data[mask].drop(columns=isvalids)
233
+ self.logger.warning("DataSet Shape: \n\t Before filtering -> {}\n\t After filtering -> {}".format(sps, self.data.shape))
234
+ self.keys = list(self.data.columns)
235
+
236
+ def rename_columns(self, vdict):
237
+ self.data = self.data.rename(columns=vdict)
238
+ self.keys = list(self.data.columns)
239
+
240
+
241
+ def dataframe_summary(df: pd.DataFrame, name: str = "") -> str:
242
+ """Pretty, compact multi-line summary for a DataFrame.
243
+
244
+ Sections:
245
+ • header: dataset path (if any) and shape
246
+ • columns table (first max_cols): name | dtype | non-null% | unique (for small card.) | min..max (numeric)
247
+ • tiny preview of first rows/cols
248
+ """
249
+ import pandas as _pd
250
+ import numpy as _np
251
+ import shutil
252
+
253
+ def term_width(default=120):
254
+ try:
255
+ return shutil.get_terminal_size().columns
256
+ except Exception:
257
+ return default
258
+
259
+ def trunc(s: str, width: int) -> str:
260
+ if len(s) <= width:
261
+ return s
262
+ # keep both ends
263
+ head = max(0, width // 2 - 2)
264
+ tail = max(0, width - head - 3)
265
+ return s[:head] + "..." + s[-tail:]
266
+
267
+ nrows, ncols = df.shape
268
+ cols = list(df.columns)
269
+ show_cols = cols[:]
270
+
271
+ # Compute per-column stats for the shown columns
272
+ dtypes = df[show_cols].dtypes.astype(str)
273
+ non_null_pct = (df[show_cols].notna().sum() / max(1, nrows) * 100.0).round(1)
274
+
275
+ # numeric min/max; categorical unique count (cap at 20)
276
+ is_num = [_pd.api.types.is_numeric_dtype(df[c]) for c in show_cols]
277
+ num_cols = [c for c, ok in zip(show_cols, is_num) if ok]
278
+ cat_cols = [c for c, ok in zip(show_cols, is_num) if not ok]
279
+
280
+ num_min = {}
281
+ num_max = {}
282
+ if num_cols:
283
+ try:
284
+ desc = df[num_cols].agg(["min", "max"]).T
285
+ for c in num_cols:
286
+ mn = desc.loc[c, "min"]
287
+ mx = desc.loc[c, "max"]
288
+ num_min[c] = mn
289
+ num_max[c] = mx
290
+ except Exception:
291
+ pass
292
+
293
+ uniques = {}
294
+ if cat_cols:
295
+ for c in cat_cols:
296
+ try:
297
+ u = df[c].nunique(dropna=True)
298
+ uniques[c] = int(u)
299
+ except Exception:
300
+ pass
301
+
302
+ # Build a compact table
303
+ tw = term_width()
304
+ name_w = 34 if tw < 120 else 48
305
+ dtype_w = 10
306
+ nn_w = 8
307
+ stat_w = max(12, tw - (name_w + dtype_w + nn_w + 8)) # 8 for separators/padding
308
+
309
+ def fmt_stat(c: str) -> str:
310
+ if c in num_min and c in num_max:
311
+ try:
312
+ mn = num_min[c]
313
+ mx = num_max[c]
314
+ return f"{mn:>10.4g} .. {mx:>10.4g}"
315
+ except Exception:
316
+ return f"{str(num_min[c]):>10} .. {str(num_max[c]):>10}"
317
+ if c in uniques:
318
+ return f"uniq={uniques[c]}"
319
+ return ""
320
+
321
+ head_lines = []
322
+ if name:
323
+ head_lines.append(f"Selected dataset:{name}")
324
+ head_lines.append(f"DataFrame shape:\n\t {nrows}\t rows × {ncols} \tcols\n")
325
+ head_lines.append("=== DataFrame Summary Table ===")
326
+
327
+ # Column table header
328
+ rows = []
329
+ header = f"{'name':<{name_w}} {'dtype':<{dtype_w}} {'nonnull%':>{nn_w}} {' [min] .. [max]':<{stat_w}}"
330
+ rows.append("-" * len(header))
331
+ rows.append(header)
332
+ rows.append("-" * len(header))
333
+
334
+ for c in show_cols:
335
+ c_name = trunc(str(c), name_w)
336
+ c_dtype = trunc(dtypes[c], dtype_w)
337
+ c_nn = f"{non_null_pct[c]:.1f}%" if nrows else "n/a"
338
+ c_stat = trunc(fmt_stat(c), stat_w)
339
+ rows.append(f"{c_name:<{name_w}} {c_dtype:<{dtype_w}} {c_nn:>{nn_w}} {c_stat:<{stat_w}}")
340
+ rows.append("-" * len(header))
341
+
342
+
343
+ parts = []
344
+ parts.extend(head_lines)
345
+ if show_cols:
346
+ parts.extend(rows)
347
+
348
+ return "\n".join(parts)
349
+
350
+
351
+ def print_hdf5_tree_ascii(hobj, root_name='/', logger=None, max_depth=None):
352
+ """
353
+ Pretty-print an ASCII tree of an h5py.File or Group.
354
+
355
+ Example output:
356
+ /
357
+ ├── data (Group)
358
+ │ ├── samples (Dataset, shape=(1000, 3), dtype=float64)
359
+ │ └── extra (Group)
360
+ │ ├── X (Dataset, shape=(..., ...), dtype=...)
361
+ │ └── Y (Dataset, shape=(..., ...), dtype=...)
362
+ └── metadata (Group)
363
+ └── attrs (Dataset, shape=(...,), dtype=...)
364
+
365
+ Parameters
366
+ ----------
367
+ hobj : h5py.File or h5py.Group
368
+ root_name : str
369
+ Name shown at the root.
370
+ logger : logging-like object (optional)
371
+ If provided, uses logger.debug(...) instead of print.
372
+ max_depth : int or None
373
+ Limit recursion depth (0=only root). None = unlimited.
374
+ """
375
+ try:
376
+ import h5py # noqa: F401
377
+ except Exception:
378
+ raise RuntimeError("h5py is required for HDF5 tree printing.")
379
+
380
+ def emit(msg):
381
+ if logger is None:
382
+ print(msg)
383
+ else:
384
+ try:
385
+ logger.debug(msg)
386
+ except Exception:
387
+ print(msg)
388
+
389
+ def is_dataset(x):
390
+ import h5py
391
+ return isinstance(x, h5py.Dataset)
392
+
393
+ def is_group(x):
394
+ import h5py
395
+ return isinstance(x, h5py.Group)
396
+
397
+ def fmt_leaf(name, obj):
398
+ # maxlen = 60
399
+ def shorten(n):
400
+ if len(n) > 50:
401
+ return f"{n[:15]}...{n[-30:]}"
402
+ else:
403
+ return "{:48}".format(n)
404
+ # return n
405
+ if is_dataset(obj):
406
+ shp = getattr(obj, "shape", None)
407
+ # dt = getattr(obj, "dtype", None)
408
+ extra = []
409
+ if shp is not None:
410
+ extra.append(f"shape -> {shp}")
411
+ # if dt is not None:
412
+ # extra.append(f"dtype={dt}")
413
+ suffix = f"(Dataset), {', '.join(extra)}" if extra else "(Dataset)"
414
+ return f"{shorten(name)}{suffix:>40}"
415
+ elif is_group(obj):
416
+ return f"{shorten(name)} (Group)"
417
+ return shorten(name)
418
+
419
+ def walk(group, prefix="", depth=0, last=True):
420
+ lines = []
421
+ if depth == 0:
422
+ lines.append("│ {} (Group)".format(root_name))
423
+ if max_depth is not None and depth >= max_depth:
424
+ return
425
+
426
+ keys = list(group.keys())
427
+ keys.sort()
428
+ n = len(keys)
429
+ for i, key in enumerate(keys):
430
+ child = group[key]
431
+ is_last = (i == n - 1)
432
+ connector = "└── " if is_last else "├── "
433
+ line = prefix + connector + fmt_leaf(key, child)
434
+ lines.append(line)
435
+
436
+ if is_group(child):
437
+ extension = " " if is_last else "│ "
438
+ walk(child, prefix + extension, depth + 1, is_last)
439
+ emit("\n".join(lines))
440
+
441
+ walk(hobj, "", 0, True)