sift-stack-py 0.17.0.dev1__py3-none-any.whl → 0.17.0.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sift_client/_internal/low_level_wrappers/_test_results_log.py +6 -2
- sift_client/_internal/low_level_wrappers/test_results.py +11 -3
- sift_client/_internal/pyproject_config.py +84 -0
- sift_client/_internal/util/hdf5.py +253 -78
- sift_client/pytest_plugin.py +681 -209
- sift_client/resources/data_imports.py +87 -14
- sift_client/resources/sync_stubs/__init__.pyi +29 -10
- sift_client/resources/test_results.py +2 -1
- sift_client/scripts/import_test_result_log.py +2 -1
- sift_client/sift_types/_mixins/metadata.py +19 -0
- sift_client/sift_types/asset.py +2 -0
- sift_client/sift_types/data_import.py +54 -16
- sift_client/sift_types/report.py +2 -0
- sift_client/sift_types/run.py +2 -0
- sift_client/sift_types/test_report.py +6 -0
- sift_client/util/test_results/context_manager.py +16 -2
- {sift_stack_py-0.17.0.dev1.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/METADATA +2 -1
- {sift_stack_py-0.17.0.dev1.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/RECORD +22 -20
- {sift_stack_py-0.17.0.dev1.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/LICENSE +0 -0
- {sift_stack_py-0.17.0.dev1.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/WHEEL +0 -0
- {sift_stack_py-0.17.0.dev1.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/entry_points.txt +0 -0
- {sift_stack_py-0.17.0.dev1.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/top_level.txt +0 -0
|
@@ -143,9 +143,13 @@ class _ReplayState:
|
|
|
143
143
|
|
|
144
144
|
@dataclass
|
|
145
145
|
class ReplayResult:
|
|
146
|
-
"""Result of replaying a log file.
|
|
146
|
+
"""Result of replaying a log file.
|
|
147
147
|
|
|
148
|
-
report
|
|
148
|
+
``report`` is None on an incremental resume tick that uploaded only steps or
|
|
149
|
+
measurements; the report itself was created on an earlier tick.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
report: TestReport | None = None
|
|
149
153
|
steps: list[TestStep] = field(default_factory=list)
|
|
150
154
|
measurements: list[TestMeasurement] = field(default_factory=list)
|
|
151
155
|
|
|
@@ -1072,13 +1072,17 @@ class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
|
|
|
1072
1072
|
id_map: dict[str, str],
|
|
1073
1073
|
state: _ReplayState,
|
|
1074
1074
|
) -> None:
|
|
1075
|
-
if state.report is None:
|
|
1076
|
-
raise ValueError("UpdateTestReport found before CreateTestReport")
|
|
1077
1075
|
request = UpdateTestReportRequest()
|
|
1078
1076
|
json_format.Parse(json_str, request)
|
|
1079
1077
|
request.test_report.test_report_id = self._map_id(
|
|
1080
1078
|
id_map, request.test_report.test_report_id
|
|
1081
1079
|
)
|
|
1080
|
+
# Batch/simulate replays the whole log in order, so a missing report means
|
|
1081
|
+
# the log is malformed. Incremental replay may have created the report on an
|
|
1082
|
+
# earlier tick (its real ID lives in id_map), so state.report is legitimately
|
|
1083
|
+
# None here -- the mapped ID is enough to issue the update.
|
|
1084
|
+
if simulate and state.report is None:
|
|
1085
|
+
raise ValueError("UpdateTestReport found before CreateTestReport")
|
|
1082
1086
|
state.report = await self.update_test_report(
|
|
1083
1087
|
request=request, simulate=simulate, existing=state.report
|
|
1084
1088
|
)
|
|
@@ -1203,6 +1207,7 @@ class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
|
|
|
1203
1207
|
next tick.
|
|
1204
1208
|
"""
|
|
1205
1209
|
tracking = LogTracking.load(log_path)
|
|
1210
|
+
resuming = tracking.last_uploaded_line > 0
|
|
1206
1211
|
id_map = tracking.id_map
|
|
1207
1212
|
state = _ReplayState()
|
|
1208
1213
|
|
|
@@ -1221,7 +1226,10 @@ class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
|
|
|
1221
1226
|
tracking.last_uploaded_line += 1
|
|
1222
1227
|
tracking.save(log_path)
|
|
1223
1228
|
|
|
1224
|
-
|
|
1229
|
+
# On a resume tick the CreateTestReport line was consumed on an earlier
|
|
1230
|
+
# tick, so state.report is expected to be None; the report already exists
|
|
1231
|
+
# on the server. Only a genuine first pass over an empty log is an error.
|
|
1232
|
+
if state.report is None and not resuming:
|
|
1225
1233
|
raise ValueError("No CreateTestReport found in log file")
|
|
1226
1234
|
|
|
1227
1235
|
return ReplayResult(
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Loader for the ``[tool.sift]`` table in a project's ``pyproject.toml``.
|
|
2
|
+
|
|
3
|
+
The pytest plugin consumes this loader to resolve report-content config (under
|
|
4
|
+
``[tool.sift.pytest.report]``) and SDK-level fallbacks (URIs under
|
|
5
|
+
``[tool.sift]``). A malformed or missing ``pyproject.toml`` returns ``{}`` so a
|
|
6
|
+
bad config file never aborts the session — the plugin falls back to its
|
|
7
|
+
built-in defaults and surfaces a single warning.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import warnings
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import TYPE_CHECKING, Any
|
|
15
|
+
|
|
16
|
+
# ``tomllib`` landed in 3.11; ``tomli`` is the same parser packaged for older
|
|
17
|
+
# interpreters and is declared as a conditional install dep on 3.8-3.10.
|
|
18
|
+
try:
|
|
19
|
+
import tomllib # type: ignore[import-not-found,unused-ignore]
|
|
20
|
+
except ImportError: # pragma: no cover - exercised on 3.8-3.10 only
|
|
21
|
+
import tomli as tomllib # type: ignore[no-redef,import-not-found,unused-ignore]
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
import pytest
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Bound the upward walk so a misconfigured environment can't trigger an
|
|
28
|
+
# unbounded filesystem traversal looking for a project root that isn't there.
|
|
29
|
+
_MAX_PARENT_WALK = 3
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _find_pyproject(config: pytest.Config) -> Path | None:
|
|
33
|
+
"""Locate the active project's ``pyproject.toml``.
|
|
34
|
+
|
|
35
|
+
Order:
|
|
36
|
+
1. ``config.inipath`` when it is itself a ``pyproject.toml`` (the common
|
|
37
|
+
case: project uses ``[tool.pytest.ini_options]`` so pytest loaded the
|
|
38
|
+
ini settings directly from pyproject).
|
|
39
|
+
2. ``<config.rootpath>/pyproject.toml``.
|
|
40
|
+
3. A bounded walk upward from ``rootpath`` for monorepo layouts where
|
|
41
|
+
pytest's rootdir is a subdirectory and the project pyproject lives
|
|
42
|
+
higher up.
|
|
43
|
+
"""
|
|
44
|
+
inipath = config.inipath
|
|
45
|
+
if inipath is not None and inipath.name == "pyproject.toml" and inipath.is_file():
|
|
46
|
+
return inipath
|
|
47
|
+
cur = Path(config.rootpath).resolve()
|
|
48
|
+
candidate = cur / "pyproject.toml"
|
|
49
|
+
if candidate.is_file():
|
|
50
|
+
return candidate
|
|
51
|
+
for _ in range(_MAX_PARENT_WALK):
|
|
52
|
+
cur = cur.parent
|
|
53
|
+
candidate = cur / "pyproject.toml"
|
|
54
|
+
if candidate.is_file():
|
|
55
|
+
return candidate
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def load_tool_sift(config: pytest.Config) -> dict[str, Any]:
|
|
60
|
+
"""Return the parsed ``[tool.sift]`` table from the project's pyproject.toml.
|
|
61
|
+
|
|
62
|
+
Returns ``{}`` when no pyproject is discoverable, when the file omits the
|
|
63
|
+
``[tool.sift]`` table, or when parsing fails. A parse / IO failure emits a
|
|
64
|
+
single :class:`SiftPytestPluginWarning` so the session continues with
|
|
65
|
+
defaults rather than aborting on a malformed file.
|
|
66
|
+
"""
|
|
67
|
+
pyproject = _find_pyproject(config)
|
|
68
|
+
if pyproject is None:
|
|
69
|
+
return {}
|
|
70
|
+
try:
|
|
71
|
+
with pyproject.open("rb") as fh:
|
|
72
|
+
data = tomllib.load(fh)
|
|
73
|
+
except (OSError, tomllib.TOMLDecodeError) as exc:
|
|
74
|
+
# Deferred import: ``pytest_plugin`` imports this loader, so a
|
|
75
|
+
# top-level import here would close the cycle at module load time.
|
|
76
|
+
from sift_client.pytest_plugin import SiftPytestPluginWarning
|
|
77
|
+
|
|
78
|
+
warnings.warn(
|
|
79
|
+
f"Failed to read {pyproject} for [tool.sift]: {type(exc).__name__}: {exc}",
|
|
80
|
+
SiftPytestPluginWarning,
|
|
81
|
+
stacklevel=2,
|
|
82
|
+
)
|
|
83
|
+
return {}
|
|
84
|
+
return (data.get("tool") or {}).get("sift") or {}
|
|
@@ -1,96 +1,271 @@
|
|
|
1
|
+
"""HDF5 schema detection.
|
|
2
|
+
|
|
3
|
+
HDF5 files have no single canonical layout, so detection is parameterized
|
|
4
|
+
by an ``Hdf5Schema``: ``ONE_D`` (per-group time dataset + sibling 1D values,
|
|
5
|
+
with an ancestor walk-up), ``TWO_D`` (``[N, 2]`` datasets where col 0 is
|
|
6
|
+
time), or ``COMPOUND`` (struct-like datasets whose first field is time).
|
|
7
|
+
Each detector walks every dataset in the file recursively; datasets that
|
|
8
|
+
don't fit the chosen schema are not included in the resulting config."""
|
|
9
|
+
|
|
1
10
|
from __future__ import annotations
|
|
2
11
|
|
|
3
12
|
from pathlib import Path
|
|
13
|
+
from typing import Callable
|
|
4
14
|
|
|
5
15
|
import h5py
|
|
16
|
+
import numpy as np
|
|
6
17
|
|
|
7
18
|
from sift_client._internal.util.numpy_types import numpy_to_sift_type
|
|
8
|
-
from sift_client.sift_types.data_import import
|
|
19
|
+
from sift_client.sift_types.data_import import (
|
|
20
|
+
DataTypeKey,
|
|
21
|
+
Hdf5DataColumn,
|
|
22
|
+
Hdf5ImportConfig,
|
|
23
|
+
)
|
|
9
24
|
|
|
10
|
-
#
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
25
|
+
# Heuristic attribute names for channel metadata, in priority order. The
|
|
26
|
+
# first non-empty value found on a dataset wins; missing attributes resolve
|
|
27
|
+
# to empty strings.
|
|
28
|
+
_NAME_ATTRS = ("Name", "name", "Title", "title", "Sensor", "sensor", "Channel", "channel")
|
|
29
|
+
_UNIT_ATTRS = ("Unit", "unit", "Units", "units")
|
|
30
|
+
_DESCRIPTION_ATTRS = ("Description", "description")
|
|
14
31
|
|
|
32
|
+
# Per-group time dataset names, case-insensitive, in priority order.
|
|
33
|
+
_TIME_DATASET_NAMES = ("time", "timestamp", "timestamps", "ts")
|
|
15
34
|
|
|
16
|
-
def _detect_attr(dataset: h5py.Dataset, candidates: list[str], default: str = "") -> str:
|
|
17
|
-
"""Return the first matching HDF5 attribute value, or *default*."""
|
|
18
|
-
possible = [dataset.attrs.get(attr) for attr in candidates if dataset.attrs.get(attr)]
|
|
19
|
-
return str(possible[0]) if possible else default
|
|
20
35
|
|
|
36
|
+
def _read_string_attr(dataset: h5py.Dataset, candidates: tuple[str, ...]) -> str:
|
|
37
|
+
"""Return the first non-empty string attribute among `candidates`."""
|
|
38
|
+
for name in candidates:
|
|
39
|
+
if name not in dataset.attrs:
|
|
40
|
+
continue
|
|
41
|
+
value = dataset.attrs[name]
|
|
42
|
+
if isinstance(value, bytes):
|
|
43
|
+
value = value.decode("utf-8", errors="replace")
|
|
44
|
+
if isinstance(value, str) and value:
|
|
45
|
+
return value
|
|
46
|
+
# h5py returns multi-element string attrs as ndarrays; take the first.
|
|
47
|
+
if isinstance(value, np.ndarray) and value.size > 0:
|
|
48
|
+
first = value.flat[0]
|
|
49
|
+
if isinstance(first, bytes):
|
|
50
|
+
first = first.decode("utf-8", errors="replace")
|
|
51
|
+
if isinstance(first, str) and first:
|
|
52
|
+
return first
|
|
53
|
+
return ""
|
|
21
54
|
|
|
22
|
-
def detect_hdf5_config(file_path: str | Path) -> Hdf5ImportConfig:
|
|
23
|
-
"""Detect an HDF5 import config by inspecting the file's datasets.
|
|
24
55
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
56
|
+
def _read_channel_metadata(dataset: h5py.Dataset) -> tuple[str, str, str]:
|
|
57
|
+
"""Return ``(name, units, description)`` discovered from HDF5 attributes."""
|
|
58
|
+
return (
|
|
59
|
+
_read_string_attr(dataset, _NAME_ATTRS),
|
|
60
|
+
_read_string_attr(dataset, _UNIT_ATTRS),
|
|
61
|
+
_read_string_attr(dataset, _DESCRIPTION_ATTRS),
|
|
62
|
+
)
|
|
31
63
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
64
|
+
|
|
65
|
+
def _is_compound(dataset: h5py.Dataset) -> bool:
|
|
66
|
+
return dataset.dtype.names is not None and len(dataset.dtype.names) > 1
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _is_1d_non_compound(dataset: h5py.Dataset) -> bool:
|
|
70
|
+
return not _is_compound(dataset) and len(dataset.shape) == 1
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _is_2d_n_by_2(dataset: h5py.Dataset) -> bool:
|
|
74
|
+
return not _is_compound(dataset) and len(dataset.shape) == 2 and dataset.shape[1] == 2
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _path_to_channel_name(path: str) -> str:
|
|
78
|
+
"""Sift renders dotted names hierarchically, so ``group1/current`` becomes
|
|
79
|
+
``group1.current``, with ``current`` shown under a ``group1`` folder."""
|
|
80
|
+
return path.replace("/", ".")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _make_name_deduper() -> Callable[[str, str], str]:
|
|
84
|
+
"""Return a callable that resolves duplicate channel names by appending
|
|
85
|
+
the dataset's dotted path. First claim of a name wins; later claims of
|
|
86
|
+
the same name get the fallback suffix appended."""
|
|
87
|
+
used: set[str] = set()
|
|
88
|
+
|
|
89
|
+
def dedupe(base_name: str, fallback_suffix: str) -> str:
|
|
90
|
+
name = f"{base_name}.{fallback_suffix}" if base_name in used else base_name
|
|
91
|
+
used.add(name)
|
|
92
|
+
return name
|
|
93
|
+
|
|
94
|
+
return dedupe
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _collect_datasets(h5file: h5py.File) -> list[h5py.Dataset]:
|
|
98
|
+
"""Recursively walk every dataset in the file."""
|
|
99
|
+
out: list[h5py.Dataset] = []
|
|
100
|
+
|
|
101
|
+
def visit(_name: str, obj: object) -> None:
|
|
102
|
+
if isinstance(obj, h5py.Dataset):
|
|
103
|
+
out.append(obj)
|
|
104
|
+
|
|
105
|
+
h5file.visititems(visit)
|
|
106
|
+
return out
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _group_by_parent(datasets: list[h5py.Dataset]) -> dict[str, list[h5py.Dataset]]:
|
|
110
|
+
"""Group datasets by their parent group path (``""`` for root-level)."""
|
|
111
|
+
out: dict[str, list[h5py.Dataset]] = {}
|
|
112
|
+
for ds in datasets:
|
|
113
|
+
out.setdefault(ds.name.lstrip("/").rpartition("/")[0], []).append(ds)
|
|
114
|
+
return out
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _resolve_ancestor_time(group_path: str, per_group_time: dict[str, str]) -> str:
|
|
118
|
+
"""Return the closest-ancestor time dataset path for ``group_path``,
|
|
119
|
+
walking up to the root. Empty string if no ancestor has one."""
|
|
120
|
+
cursor = group_path
|
|
121
|
+
while True:
|
|
122
|
+
found = per_group_time.get(cursor)
|
|
123
|
+
if found:
|
|
124
|
+
return found
|
|
125
|
+
if cursor == "":
|
|
126
|
+
return ""
|
|
127
|
+
slash = cursor.rfind("/")
|
|
128
|
+
cursor = "" if slash < 0 else cursor[:slash]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _build_one_d_configs(datasets: list[h5py.Dataset]) -> list[Hdf5DataColumn]:
|
|
132
|
+
"""1D non-compound schema: at each group, pick a time dataset (by name)
|
|
133
|
+
and pair every other 1D dataset in that group as a value channel.
|
|
134
|
+
Datasets that aren't 1D non-compound are not included."""
|
|
135
|
+
|
|
136
|
+
def identify_time_dataset(group: list[h5py.Dataset]) -> h5py.Dataset | None:
|
|
137
|
+
"""Pick the group's time dataset by leaf name, case-insensitive, in
|
|
138
|
+
priority order. Returns ``None`` if no candidate matches; callers
|
|
139
|
+
fall back to an ancestor group's time before giving up."""
|
|
140
|
+
for candidate in _TIME_DATASET_NAMES:
|
|
141
|
+
for ds in group:
|
|
142
|
+
if ds.name.rsplit("/", 1)[-1].lower() == candidate:
|
|
143
|
+
return ds
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
columns: list[Hdf5DataColumn] = []
|
|
147
|
+
dedupe = _make_name_deduper()
|
|
148
|
+
|
|
149
|
+
one_d = [ds for ds in datasets if _is_1d_non_compound(ds)]
|
|
150
|
+
grouped = _group_by_parent(one_d)
|
|
151
|
+
|
|
152
|
+
# First pass: each group's own time dataset (if any).
|
|
153
|
+
per_group_time: dict[str, str] = {}
|
|
154
|
+
for group_path, group in grouped.items():
|
|
155
|
+
time_ds = identify_time_dataset(group)
|
|
156
|
+
if time_ds is not None:
|
|
157
|
+
per_group_time[group_path] = time_ds.name.lstrip("/")
|
|
158
|
+
|
|
159
|
+
for group_path, group in grouped.items():
|
|
160
|
+
own_time_path = per_group_time.get(group_path)
|
|
161
|
+
time_path = own_time_path or _resolve_ancestor_time(group_path, per_group_time)
|
|
162
|
+
for ds in group:
|
|
163
|
+
ds_path = ds.name.lstrip("/")
|
|
164
|
+
if own_time_path and ds_path == own_time_path:
|
|
165
|
+
continue
|
|
166
|
+
name, units, description = _read_channel_metadata(ds)
|
|
167
|
+
fallback = _path_to_channel_name(ds_path)
|
|
168
|
+
columns.append(
|
|
169
|
+
Hdf5DataColumn(
|
|
170
|
+
name=dedupe(name or fallback, fallback),
|
|
171
|
+
data_type=numpy_to_sift_type(ds.dtype),
|
|
172
|
+
units=units,
|
|
173
|
+
description=description,
|
|
174
|
+
time_dataset=time_path,
|
|
175
|
+
value_dataset=ds_path,
|
|
176
|
+
time_index=0,
|
|
177
|
+
value_index=0,
|
|
87
178
|
)
|
|
88
|
-
|
|
179
|
+
)
|
|
89
180
|
|
|
90
|
-
|
|
181
|
+
return columns
|
|
91
182
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
183
|
+
|
|
184
|
+
def _build_two_d_configs(datasets: list[h5py.Dataset]) -> list[Hdf5DataColumn]:
|
|
185
|
+
"""2D schema: every dataset with shape ``[N, 2]`` becomes one channel
|
|
186
|
+
(col 0 = time, col 1 = value). Other shapes are not included."""
|
|
187
|
+
columns: list[Hdf5DataColumn] = []
|
|
188
|
+
dedupe = _make_name_deduper()
|
|
189
|
+
|
|
190
|
+
for ds in datasets:
|
|
191
|
+
if not _is_2d_n_by_2(ds):
|
|
192
|
+
continue
|
|
193
|
+
ds_path = ds.name.lstrip("/")
|
|
194
|
+
name, units, description = _read_channel_metadata(ds)
|
|
195
|
+
fallback = _path_to_channel_name(ds_path)
|
|
196
|
+
columns.append(
|
|
197
|
+
Hdf5DataColumn(
|
|
198
|
+
name=dedupe(name or fallback, fallback),
|
|
199
|
+
data_type=numpy_to_sift_type(ds.dtype),
|
|
200
|
+
units=units,
|
|
201
|
+
description=description,
|
|
202
|
+
time_dataset=ds_path,
|
|
203
|
+
value_dataset=ds_path,
|
|
204
|
+
time_index=0,
|
|
205
|
+
value_index=1,
|
|
206
|
+
)
|
|
96
207
|
)
|
|
208
|
+
|
|
209
|
+
return columns
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _build_compound_configs(datasets: list[h5py.Dataset]) -> list[Hdf5DataColumn]:
|
|
213
|
+
"""Compound schema: every compound dataset becomes one channel per
|
|
214
|
+
non-time member. First member is time. Non-compound datasets are not included."""
|
|
215
|
+
columns: list[Hdf5DataColumn] = []
|
|
216
|
+
dedupe = _make_name_deduper()
|
|
217
|
+
|
|
218
|
+
for ds in datasets:
|
|
219
|
+
if not _is_compound(ds):
|
|
220
|
+
continue
|
|
221
|
+
field_names = ds.dtype.names
|
|
222
|
+
assert field_names is not None # guaranteed by _is_compound
|
|
223
|
+
time_field = field_names[0]
|
|
224
|
+
value_fields = field_names[1:]
|
|
225
|
+
ds_path = ds.name.lstrip("/")
|
|
226
|
+
name, units, description = _read_channel_metadata(ds)
|
|
227
|
+
dataset_name = name or _path_to_channel_name(ds_path)
|
|
228
|
+
|
|
229
|
+
for value_field in value_fields:
|
|
230
|
+
base_name = f"{dataset_name}.{value_field}" if len(value_fields) > 1 else dataset_name
|
|
231
|
+
fallback_suffix = f"{_path_to_channel_name(ds_path)}.{value_field}"
|
|
232
|
+
columns.append(
|
|
233
|
+
Hdf5DataColumn(
|
|
234
|
+
name=dedupe(base_name, fallback_suffix),
|
|
235
|
+
data_type=numpy_to_sift_type(ds.dtype[value_field]),
|
|
236
|
+
units=units,
|
|
237
|
+
description=description,
|
|
238
|
+
time_dataset=ds_path,
|
|
239
|
+
value_dataset=ds_path,
|
|
240
|
+
time_index=0,
|
|
241
|
+
value_index=0,
|
|
242
|
+
time_field=time_field,
|
|
243
|
+
value_field=value_field,
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
return columns
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
_BUILDERS: dict[DataTypeKey, Callable[[list[h5py.Dataset]], list[Hdf5DataColumn]]] = {
|
|
251
|
+
DataTypeKey.HDF5_ONE_D: _build_one_d_configs,
|
|
252
|
+
DataTypeKey.HDF5_TWO_D: _build_two_d_configs,
|
|
253
|
+
DataTypeKey.HDF5_COMPOUND: _build_compound_configs,
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def detect_hdf5_config(file_path: str | Path, data_type_key: DataTypeKey) -> Hdf5ImportConfig:
|
|
258
|
+
"""Detect an HDF5 import config under the given variant. Datasets that
|
|
259
|
+
don't fit the chosen variant are not included. ``time_format`` is always
|
|
260
|
+
left unset: HDF5 timestamps aren't self-describing, so the caller must set
|
|
261
|
+
``config.time_format`` before importing."""
|
|
262
|
+
if data_type_key not in _BUILDERS:
|
|
263
|
+
raise ValueError(
|
|
264
|
+
f"detect_hdf5_config requires an HDF5 DataTypeKey variant "
|
|
265
|
+
f"(HDF5_ONE_D, HDF5_TWO_D, or HDF5_COMPOUND); got {data_type_key}."
|
|
266
|
+
)
|
|
267
|
+
path = Path(file_path)
|
|
268
|
+
with h5py.File(path, "r") as h5file:
|
|
269
|
+
columns = _BUILDERS[data_type_key](_collect_datasets(h5file))
|
|
270
|
+
|
|
271
|
+
return Hdf5ImportConfig(asset_name="", data=columns)
|