sift-stack-py 0.17.0.dev0__py3-none-any.whl → 0.17.0.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. sift_client/_internal/grpc_transport/transport.py +5 -20
  2. sift_client/_internal/low_level_wrappers/_test_results_log.py +6 -2
  3. sift_client/_internal/low_level_wrappers/test_results.py +11 -3
  4. sift_client/_internal/pyproject_config.py +84 -0
  5. sift_client/_internal/rest.py +2 -2
  6. sift_client/_internal/urls.py +55 -0
  7. sift_client/_internal/util/hdf5.py +253 -78
  8. sift_client/client.py +27 -0
  9. sift_client/pytest_plugin.py +989 -177
  10. sift_client/resources/data_imports.py +87 -14
  11. sift_client/resources/sync_stubs/__init__.pyi +29 -10
  12. sift_client/resources/test_results.py +2 -1
  13. sift_client/scripts/import_test_result_log.py +2 -1
  14. sift_client/sift_types/_mixins/metadata.py +19 -0
  15. sift_client/sift_types/asset.py +2 -0
  16. sift_client/sift_types/data_import.py +54 -16
  17. sift_client/sift_types/report.py +2 -0
  18. sift_client/sift_types/run.py +2 -0
  19. sift_client/sift_types/test_report.py +38 -0
  20. sift_client/transport/base_connection.py +6 -0
  21. sift_client/util/test_results/context_manager.py +72 -6
  22. {sift_stack_py-0.17.0.dev0.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/METADATA +2 -1
  23. {sift_stack_py-0.17.0.dev0.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/RECORD +27 -24
  24. {sift_stack_py-0.17.0.dev0.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/LICENSE +0 -0
  25. {sift_stack_py-0.17.0.dev0.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/WHEEL +0 -0
  26. {sift_stack_py-0.17.0.dev0.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/entry_points.txt +0 -0
  27. {sift_stack_py-0.17.0.dev0.dist-info → sift_stack_py-0.17.0.dev2.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,6 @@ from __future__ import annotations
8
8
 
9
9
  from importlib.metadata import PackageNotFoundError, version
10
10
  from typing import TYPE_CHECKING, Any, TypedDict, cast
11
- from urllib.parse import ParseResult, urlparse
12
11
 
13
12
  import grpc
14
13
  import grpc.aio as grpc_aio
@@ -21,6 +20,7 @@ from sift_client._internal.grpc_transport._interceptors.metadata import (
21
20
  Metadata,
22
21
  MetadataInterceptor,
23
22
  )
23
+ from sift_client._internal.urls import parse_host
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from sift_client._internal.grpc_transport._async_interceptors.base import ClientAsyncInterceptor
@@ -78,7 +78,7 @@ def use_sift_channel(
78
78
 
79
79
  credentials = get_ssl_credentials(cert_via_openssl)
80
80
  options = _compute_channel_options(config)
81
- api_uri = _clean_uri(config["uri"], use_ssl)
81
+ api_uri = parse_host(config["uri"])
82
82
  channel = grpc.secure_channel(api_uri, credentials, options)
83
83
  interceptors = _compute_sift_interceptors(config, metadata)
84
84
  return grpc.intercept_channel(channel, *interceptors)
@@ -98,7 +98,7 @@ def use_sift_async_channel(
98
98
  return _use_insecure_sift_async_channel(config, metadata)
99
99
 
100
100
  return grpc_aio.secure_channel(
101
- target=_clean_uri(config["uri"], use_ssl),
101
+ target=parse_host(config["uri"]),
102
102
  credentials=get_ssl_credentials(cert_via_openssl),
103
103
  options=_compute_channel_options(config),
104
104
  interceptors=_compute_sift_async_interceptors(config, metadata),
@@ -112,7 +112,7 @@ def _use_insecure_sift_channel(
112
112
  FOR DEVELOPMENT PURPOSES ONLY
113
113
  """
114
114
  options = _compute_channel_options(config)
115
- api_uri = _clean_uri(config["uri"], False)
115
+ api_uri = parse_host(config["uri"])
116
116
  channel = grpc.insecure_channel(api_uri, options)
117
117
  interceptors = _compute_sift_interceptors(config, metadata)
118
118
  return grpc.intercept_channel(channel, *interceptors)
@@ -125,7 +125,7 @@ def _use_insecure_sift_async_channel(
125
125
  FOR DEVELOPMENT PURPOSES ONLY
126
126
  """
127
127
  return grpc_aio.insecure_channel(
128
- target=_clean_uri(config["uri"], False),
128
+ target=parse_host(config["uri"]),
129
129
  options=_compute_channel_options(config),
130
130
  interceptors=_compute_sift_async_interceptors(config, metadata),
131
131
  )
@@ -205,21 +205,6 @@ def _metadata_async_interceptor(
205
205
  return MetadataAsyncInterceptor(md)
206
206
 
207
207
 
208
- def _clean_uri(uri: str, use_ssl: bool) -> str:
209
- """
210
- This will automatically transform the URI to an acceptable form regardless of whether or not
211
- users included the scheme in the URL or included trailing slashes.
212
- """
213
-
214
- if "http://" in uri or "https://" in uri:
215
- parsed: ParseResult = urlparse(uri)
216
- return parsed.netloc
217
-
218
- full_uri = f"https://{uri}" if use_ssl else f"http://{uri}"
219
- parsed_res: ParseResult = urlparse(full_uri)
220
- return parsed_res.netloc
221
-
222
-
223
208
  def _compute_user_agent() -> str:
224
209
  try:
225
210
  return f"sift_stack_py/{version('sift_stack_py')}"
@@ -143,9 +143,13 @@ class _ReplayState:
143
143
 
144
144
  @dataclass
145
145
  class ReplayResult:
146
- """Result of replaying a log file."""
146
+ """Result of replaying a log file.
147
147
 
148
- report: TestReport
148
+ ``report`` is None on an incremental resume tick that uploaded only steps or
149
+ measurements; the report itself was created on an earlier tick.
150
+ """
151
+
152
+ report: TestReport | None = None
149
153
  steps: list[TestStep] = field(default_factory=list)
150
154
  measurements: list[TestMeasurement] = field(default_factory=list)
151
155
 
@@ -1072,13 +1072,17 @@ class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
1072
1072
  id_map: dict[str, str],
1073
1073
  state: _ReplayState,
1074
1074
  ) -> None:
1075
- if state.report is None:
1076
- raise ValueError("UpdateTestReport found before CreateTestReport")
1077
1075
  request = UpdateTestReportRequest()
1078
1076
  json_format.Parse(json_str, request)
1079
1077
  request.test_report.test_report_id = self._map_id(
1080
1078
  id_map, request.test_report.test_report_id
1081
1079
  )
1080
+ # Batch/simulate replays the whole log in order, so a missing report means
1081
+ # the log is malformed. Incremental replay may have created the report on an
1082
+ # earlier tick (its real ID lives in id_map), so state.report is legitimately
1083
+ # None here -- the mapped ID is enough to issue the update.
1084
+ if simulate and state.report is None:
1085
+ raise ValueError("UpdateTestReport found before CreateTestReport")
1082
1086
  state.report = await self.update_test_report(
1083
1087
  request=request, simulate=simulate, existing=state.report
1084
1088
  )
@@ -1203,6 +1207,7 @@ class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
1203
1207
  next tick.
1204
1208
  """
1205
1209
  tracking = LogTracking.load(log_path)
1210
+ resuming = tracking.last_uploaded_line > 0
1206
1211
  id_map = tracking.id_map
1207
1212
  state = _ReplayState()
1208
1213
 
@@ -1221,7 +1226,10 @@ class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
1221
1226
  tracking.last_uploaded_line += 1
1222
1227
  tracking.save(log_path)
1223
1228
 
1224
- if state.report is None:
1229
+ # On a resume tick the CreateTestReport line was consumed on an earlier
1230
+ # tick, so state.report is expected to be None; the report already exists
1231
+ # on the server. Only a genuine first pass over an empty log is an error.
1232
+ if state.report is None and not resuming:
1225
1233
  raise ValueError("No CreateTestReport found in log file")
1226
1234
 
1227
1235
  return ReplayResult(
@@ -0,0 +1,84 @@
1
+ """Loader for the ``[tool.sift]`` table in a project's ``pyproject.toml``.
2
+
3
+ The pytest plugin consumes this loader to resolve report-content config (under
4
+ ``[tool.sift.pytest.report]``) and SDK-level fallbacks (URIs under
5
+ ``[tool.sift]``). A malformed or missing ``pyproject.toml`` returns ``{}`` so a
6
+ bad config file never aborts the session — the plugin falls back to its
7
+ built-in defaults and surfaces a single warning.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import warnings
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING, Any
15
+
16
+ # ``tomllib`` landed in 3.11; ``tomli`` is the same parser packaged for older
17
+ # interpreters and is declared as a conditional install dep on 3.8-3.10.
18
+ try:
19
+ import tomllib # type: ignore[import-not-found,unused-ignore]
20
+ except ImportError: # pragma: no cover - exercised on 3.8-3.10 only
21
+ import tomli as tomllib # type: ignore[no-redef,import-not-found,unused-ignore]
22
+
23
+ if TYPE_CHECKING:
24
+ import pytest
25
+
26
+
27
+ # Bound the upward walk so a misconfigured environment can't trigger an
28
+ # unbounded filesystem traversal looking for a project root that isn't there.
29
+ _MAX_PARENT_WALK = 3
30
+
31
+
32
+ def _find_pyproject(config: pytest.Config) -> Path | None:
33
+ """Locate the active project's ``pyproject.toml``.
34
+
35
+ Order:
36
+ 1. ``config.inipath`` when it is itself a ``pyproject.toml`` (the common
37
+ case: project uses ``[tool.pytest.ini_options]`` so pytest loaded the
38
+ ini settings directly from pyproject).
39
+ 2. ``<config.rootpath>/pyproject.toml``.
40
+ 3. A bounded walk upward from ``rootpath`` for monorepo layouts where
41
+ pytest's rootdir is a subdirectory and the project pyproject lives
42
+ higher up.
43
+ """
44
+ inipath = config.inipath
45
+ if inipath is not None and inipath.name == "pyproject.toml" and inipath.is_file():
46
+ return inipath
47
+ cur = Path(config.rootpath).resolve()
48
+ candidate = cur / "pyproject.toml"
49
+ if candidate.is_file():
50
+ return candidate
51
+ for _ in range(_MAX_PARENT_WALK):
52
+ cur = cur.parent
53
+ candidate = cur / "pyproject.toml"
54
+ if candidate.is_file():
55
+ return candidate
56
+ return None
57
+
58
+
59
+ def load_tool_sift(config: pytest.Config) -> dict[str, Any]:
60
+ """Return the parsed ``[tool.sift]`` table from the project's pyproject.toml.
61
+
62
+ Returns ``{}`` when no pyproject is discoverable, when the file omits the
63
+ ``[tool.sift]`` table, or when parsing fails. A parse / IO failure emits a
64
+ single :class:`SiftPytestPluginWarning` so the session continues with
65
+ defaults rather than aborting on a malformed file.
66
+ """
67
+ pyproject = _find_pyproject(config)
68
+ if pyproject is None:
69
+ return {}
70
+ try:
71
+ with pyproject.open("rb") as fh:
72
+ data = tomllib.load(fh)
73
+ except (OSError, tomllib.TOMLDecodeError) as exc:
74
+ # Deferred import: ``pytest_plugin`` imports this loader, so a
75
+ # top-level import here would close the cycle at module load time.
76
+ from sift_client.pytest_plugin import SiftPytestPluginWarning
77
+
78
+ warnings.warn(
79
+ f"Failed to read {pyproject} for [tool.sift]: {type(exc).__name__}: {exc}",
80
+ SiftPytestPluginWarning,
81
+ stacklevel=2,
82
+ )
83
+ return {}
84
+ return (data.get("tool") or {}).get("sift") or {}
@@ -6,7 +6,7 @@ from requests.adapters import HTTPAdapter
6
6
  from typing_extensions import NotRequired
7
7
  from urllib3.util import Retry
8
8
 
9
- from sift_client._internal.grpc_transport.transport import _clean_uri
9
+ from sift_client._internal.urls import parse_host
10
10
 
11
11
  _DEFAULT_REST_RETRY = Retry(total=3, status_forcelist=[500, 502, 503, 504], backoff_factor=1)
12
12
 
@@ -33,7 +33,7 @@ class SiftRestConfig(TypedDict):
33
33
  def compute_uri(restconf: SiftRestConfig) -> str:
34
34
  uri = restconf["uri"]
35
35
  use_ssl = restconf.get("use_ssl", True)
36
- clean_uri = _clean_uri(uri, use_ssl)
36
+ clean_uri = parse_host(uri)
37
37
 
38
38
  if use_ssl:
39
39
  return f"https://{clean_uri}"
@@ -0,0 +1,55 @@
1
+ """Helpers for turning Sift API endpoints into web-app (frontend) URLs.
2
+
3
+ The Sift frontend can be hosted on several domains and the backend exposes no
4
+ field for its own URL, so the frontend origin is derived client-side from the
5
+ API host. This table mirrors the canonical mapping used by the Grafana
6
+ datasource (sift-stack/sift-grafana-datasource,
7
+ ``src/components/sharelink/getFrontendHostnameDefaults.ts``). Hosts outside the
8
+ table (on-prem and custom deployments) require an explicit override.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from urllib.parse import urlparse
14
+
15
+ # API host (host[:port], no scheme) -> frontend origin (with scheme).
16
+ _API_HOST_TO_FRONTEND_ORIGIN: dict[str, str] = {
17
+ "api.siftstack.com": "https://app.siftstack.com",
18
+ "gov.api.siftstack.com": "https://gov.siftstack.com",
19
+ }
20
+
21
+
22
+ def parse_origin(url: str) -> str:
23
+ """Normalize a URL or bare host into a ``scheme://host[:port]`` origin.
24
+
25
+ Bare hosts (no scheme) are assumed to be ``https``.
26
+ """
27
+ candidate = url if "://" in url else f"https://{url}"
28
+ parsed = urlparse(candidate)
29
+ return f"{parsed.scheme}://{parsed.netloc}".rstrip("/")
30
+
31
+
32
+ def parse_host(url: str) -> str:
33
+ """Extract ``host[:port]`` from a URL or bare host string."""
34
+ candidate = url if "://" in url else f"https://{url}"
35
+ return urlparse(candidate).netloc
36
+
37
+
38
+ def frontend_origin_for_api(api_base_url: str, override: str | None = None) -> str | None:
39
+ """Return the Sift web-app origin for a given API base URL.
40
+
41
+ Args:
42
+ api_base_url: The REST API base URL (e.g. ``https://api.siftstack.com``).
43
+ override: An explicit frontend origin (host or full URL) to use instead
44
+ of the derived value. Set this for on-prem or custom deployments
45
+ whose API host isn't in the built-in mapping.
46
+
47
+ Returns:
48
+ The frontend origin (e.g. ``https://app.siftstack.com``), or ``None``
49
+ when no override is given and the API host isn't recognized.
50
+ """
51
+ if override:
52
+ return parse_origin(override)
53
+ if not api_base_url:
54
+ return None
55
+ return _API_HOST_TO_FRONTEND_ORIGIN.get(parse_host(api_base_url))
@@ -1,96 +1,271 @@
1
+ """HDF5 schema detection.
2
+
3
+ HDF5 files have no single canonical layout, so detection is parameterized
4
+ by an ``Hdf5Schema``: ``ONE_D`` (per-group time dataset + sibling 1D values,
5
+ with an ancestor walk-up), ``TWO_D`` (``[N, 2]`` datasets where col 0 is
6
+ time), or ``COMPOUND`` (struct-like datasets whose first field is time).
7
+ Each detector walks every dataset in the file recursively; datasets that
8
+ don't fit the chosen schema are not included in the resulting config."""
9
+
1
10
  from __future__ import annotations
2
11
 
3
12
  from pathlib import Path
13
+ from typing import Callable
4
14
 
5
15
  import h5py
16
+ import numpy as np
6
17
 
7
18
  from sift_client._internal.util.numpy_types import numpy_to_sift_type
8
- from sift_client.sift_types.data_import import Hdf5DataColumn, Hdf5ImportConfig, TimeFormat
19
+ from sift_client.sift_types.data_import import (
20
+ DataTypeKey,
21
+ Hdf5DataColumn,
22
+ Hdf5ImportConfig,
23
+ )
9
24
 
10
- # Common HDF5 attribute names used to detect channel metadata.
11
- _NAME_ATTRS = ["Name", "name", "Title", "title", "Sensor", "sensor", "Channel", "channel"]
12
- _UNIT_ATTRS = ["Unit", "unit", "Units", "units"]
13
- _DESCRIPTION_ATTRS = ["Description", "description"]
25
+ # Heuristic attribute names for channel metadata, in priority order. The
26
+ # first non-empty value found on a dataset wins; missing attributes resolve
27
+ # to empty strings.
28
+ _NAME_ATTRS = ("Name", "name", "Title", "title", "Sensor", "sensor", "Channel", "channel")
29
+ _UNIT_ATTRS = ("Unit", "unit", "Units", "units")
30
+ _DESCRIPTION_ATTRS = ("Description", "description")
14
31
 
32
+ # Per-group time dataset names, case-insensitive, in priority order.
33
+ _TIME_DATASET_NAMES = ("time", "timestamp", "timestamps", "ts")
15
34
 
16
- def _detect_attr(dataset: h5py.Dataset, candidates: list[str], default: str = "") -> str:
17
- """Return the first matching HDF5 attribute value, or *default*."""
18
- possible = [dataset.attrs.get(attr) for attr in candidates if dataset.attrs.get(attr)]
19
- return str(possible[0]) if possible else default
20
35
 
36
+ def _read_string_attr(dataset: h5py.Dataset, candidates: tuple[str, ...]) -> str:
37
+ """Return the first non-empty string attribute among `candidates`."""
38
+ for name in candidates:
39
+ if name not in dataset.attrs:
40
+ continue
41
+ value = dataset.attrs[name]
42
+ if isinstance(value, bytes):
43
+ value = value.decode("utf-8", errors="replace")
44
+ if isinstance(value, str) and value:
45
+ return value
46
+ # h5py returns multi-element string attrs as ndarrays; take the first.
47
+ if isinstance(value, np.ndarray) and value.size > 0:
48
+ first = value.flat[0]
49
+ if isinstance(first, bytes):
50
+ first = first.decode("utf-8", errors="replace")
51
+ if isinstance(first, str) and first:
52
+ return first
53
+ return ""
21
54
 
22
- def detect_hdf5_config(file_path: str | Path) -> Hdf5ImportConfig:
23
- """Detect an HDF5 import config by inspecting the file's datasets.
24
55
 
25
- Traverses the HDF5 file and produces (time dataset, value dataset) pairs.
26
- For compound datasets with multiple fields, the first field is assumed to
27
- be time and remaining fields become value channels. For simple datasets,
28
- a root-level ``time`` dataset is used if present.
29
- """
30
- path = Path(file_path)
56
+ def _read_channel_metadata(dataset: h5py.Dataset) -> tuple[str, str, str]:
57
+ """Return ``(name, units, description)`` discovered from HDF5 attributes."""
58
+ return (
59
+ _read_string_attr(dataset, _NAME_ATTRS),
60
+ _read_string_attr(dataset, _UNIT_ATTRS),
61
+ _read_string_attr(dataset, _DESCRIPTION_ATTRS),
62
+ )
31
63
 
32
- with h5py.File(path, "r") as h5file:
33
- columns: list[Hdf5DataColumn] = []
34
- seen_names: set[str] = set()
35
- has_root_time = "time" in h5file
36
-
37
- def _visit(dataset_name: str, obj: object) -> None:
38
- if not isinstance(obj, h5py.Dataset):
39
- return
40
-
41
- # Skip root "time" dataset it's used as the time source, not a value channel.
42
- if dataset_name == "time" and obj.parent == h5file:
43
- return
44
-
45
- n_fields = len(obj.dtype.names) if obj.dtype.names else 0
46
-
47
- if n_fields > 1:
48
- # Compound type: first field is time, remaining are value channels.
49
- for value_index in range(1, n_fields):
50
- channel_name = _detect_attr(obj, _NAME_ATTRS, dataset_name)
51
- if channel_name in seen_names:
52
- channel_name = f"{channel_name}.{dataset_name}.{value_index}"
53
-
54
- columns.append(
55
- Hdf5DataColumn(
56
- name=channel_name,
57
- data_type=numpy_to_sift_type(obj.dtype[value_index]),
58
- units=_detect_attr(obj, _UNIT_ATTRS),
59
- description=_detect_attr(obj, _DESCRIPTION_ATTRS),
60
- time_dataset=dataset_name,
61
- value_dataset=dataset_name,
62
- time_index=0,
63
- value_index=0,
64
- time_field=obj.dtype.names[0],
65
- value_field=obj.dtype.names[value_index],
66
- )
67
- )
68
- seen_names.add(channel_name)
69
-
70
- elif n_fields in (0, 1):
71
- # Single column. Use root "time" as time dataset if available.
72
- channel_name = _detect_attr(obj, _NAME_ATTRS, dataset_name)
73
- if channel_name in seen_names:
74
- channel_name = f"{channel_name}.{dataset_name}"
75
-
76
- columns.append(
77
- Hdf5DataColumn(
78
- name=channel_name,
79
- data_type=numpy_to_sift_type(obj.dtype),
80
- units=_detect_attr(obj, _UNIT_ATTRS),
81
- description=_detect_attr(obj, _DESCRIPTION_ATTRS),
82
- time_dataset="time" if has_root_time else "",
83
- value_dataset=dataset_name,
84
- time_index=0,
85
- value_index=0,
86
- )
64
+
65
+ def _is_compound(dataset: h5py.Dataset) -> bool:
66
+ return dataset.dtype.names is not None and len(dataset.dtype.names) > 1
67
+
68
+
69
+ def _is_1d_non_compound(dataset: h5py.Dataset) -> bool:
70
+ return not _is_compound(dataset) and len(dataset.shape) == 1
71
+
72
+
73
+ def _is_2d_n_by_2(dataset: h5py.Dataset) -> bool:
74
+ return not _is_compound(dataset) and len(dataset.shape) == 2 and dataset.shape[1] == 2
75
+
76
+
77
+ def _path_to_channel_name(path: str) -> str:
78
+ """Sift renders dotted names hierarchically, so ``group1/current`` becomes
79
+ ``group1.current``, with ``current`` shown under a ``group1`` folder."""
80
+ return path.replace("/", ".")
81
+
82
+
83
+ def _make_name_deduper() -> Callable[[str, str], str]:
84
+ """Return a callable that resolves duplicate channel names by appending
85
+ the dataset's dotted path. First claim of a name wins; later claims of
86
+ the same name get the fallback suffix appended."""
87
+ used: set[str] = set()
88
+
89
+ def dedupe(base_name: str, fallback_suffix: str) -> str:
90
+ name = f"{base_name}.{fallback_suffix}" if base_name in used else base_name
91
+ used.add(name)
92
+ return name
93
+
94
+ return dedupe
95
+
96
+
97
+ def _collect_datasets(h5file: h5py.File) -> list[h5py.Dataset]:
98
+ """Recursively walk every dataset in the file."""
99
+ out: list[h5py.Dataset] = []
100
+
101
+ def visit(_name: str, obj: object) -> None:
102
+ if isinstance(obj, h5py.Dataset):
103
+ out.append(obj)
104
+
105
+ h5file.visititems(visit)
106
+ return out
107
+
108
+
109
+ def _group_by_parent(datasets: list[h5py.Dataset]) -> dict[str, list[h5py.Dataset]]:
110
+ """Group datasets by their parent group path (``""`` for root-level)."""
111
+ out: dict[str, list[h5py.Dataset]] = {}
112
+ for ds in datasets:
113
+ out.setdefault(ds.name.lstrip("/").rpartition("/")[0], []).append(ds)
114
+ return out
115
+
116
+
117
+ def _resolve_ancestor_time(group_path: str, per_group_time: dict[str, str]) -> str:
118
+ """Return the closest-ancestor time dataset path for ``group_path``,
119
+ walking up to the root. Empty string if no ancestor has one."""
120
+ cursor = group_path
121
+ while True:
122
+ found = per_group_time.get(cursor)
123
+ if found:
124
+ return found
125
+ if cursor == "":
126
+ return ""
127
+ slash = cursor.rfind("/")
128
+ cursor = "" if slash < 0 else cursor[:slash]
129
+
130
+
131
+ def _build_one_d_configs(datasets: list[h5py.Dataset]) -> list[Hdf5DataColumn]:
132
+ """1D non-compound schema: at each group, pick a time dataset (by name)
133
+ and pair every other 1D dataset in that group as a value channel.
134
+ Datasets that aren't 1D non-compound are not included."""
135
+
136
+ def identify_time_dataset(group: list[h5py.Dataset]) -> h5py.Dataset | None:
137
+ """Pick the group's time dataset by leaf name, case-insensitive, in
138
+ priority order. Returns ``None`` if no candidate matches; callers
139
+ fall back to an ancestor group's time before giving up."""
140
+ for candidate in _TIME_DATASET_NAMES:
141
+ for ds in group:
142
+ if ds.name.rsplit("/", 1)[-1].lower() == candidate:
143
+ return ds
144
+ return None
145
+
146
+ columns: list[Hdf5DataColumn] = []
147
+ dedupe = _make_name_deduper()
148
+
149
+ one_d = [ds for ds in datasets if _is_1d_non_compound(ds)]
150
+ grouped = _group_by_parent(one_d)
151
+
152
+ # First pass: each group's own time dataset (if any).
153
+ per_group_time: dict[str, str] = {}
154
+ for group_path, group in grouped.items():
155
+ time_ds = identify_time_dataset(group)
156
+ if time_ds is not None:
157
+ per_group_time[group_path] = time_ds.name.lstrip("/")
158
+
159
+ for group_path, group in grouped.items():
160
+ own_time_path = per_group_time.get(group_path)
161
+ time_path = own_time_path or _resolve_ancestor_time(group_path, per_group_time)
162
+ for ds in group:
163
+ ds_path = ds.name.lstrip("/")
164
+ if own_time_path and ds_path == own_time_path:
165
+ continue
166
+ name, units, description = _read_channel_metadata(ds)
167
+ fallback = _path_to_channel_name(ds_path)
168
+ columns.append(
169
+ Hdf5DataColumn(
170
+ name=dedupe(name or fallback, fallback),
171
+ data_type=numpy_to_sift_type(ds.dtype),
172
+ units=units,
173
+ description=description,
174
+ time_dataset=time_path,
175
+ value_dataset=ds_path,
176
+ time_index=0,
177
+ value_index=0,
87
178
  )
88
- seen_names.add(channel_name)
179
+ )
89
180
 
90
- h5file.visititems(_visit)
181
+ return columns
91
182
 
92
- return Hdf5ImportConfig(
93
- asset_name="",
94
- time_format=TimeFormat.ABSOLUTE_UNIX_NANOSECONDS,
95
- data=columns,
183
+
184
+ def _build_two_d_configs(datasets: list[h5py.Dataset]) -> list[Hdf5DataColumn]:
185
+ """2D schema: every dataset with shape ``[N, 2]`` becomes one channel
186
+ (col 0 = time, col 1 = value). Other shapes are not included."""
187
+ columns: list[Hdf5DataColumn] = []
188
+ dedupe = _make_name_deduper()
189
+
190
+ for ds in datasets:
191
+ if not _is_2d_n_by_2(ds):
192
+ continue
193
+ ds_path = ds.name.lstrip("/")
194
+ name, units, description = _read_channel_metadata(ds)
195
+ fallback = _path_to_channel_name(ds_path)
196
+ columns.append(
197
+ Hdf5DataColumn(
198
+ name=dedupe(name or fallback, fallback),
199
+ data_type=numpy_to_sift_type(ds.dtype),
200
+ units=units,
201
+ description=description,
202
+ time_dataset=ds_path,
203
+ value_dataset=ds_path,
204
+ time_index=0,
205
+ value_index=1,
206
+ )
96
207
  )
208
+
209
+ return columns
210
+
211
+
212
+ def _build_compound_configs(datasets: list[h5py.Dataset]) -> list[Hdf5DataColumn]:
213
+ """Compound schema: every compound dataset becomes one channel per
214
+ non-time member. First member is time. Non-compound datasets are not included."""
215
+ columns: list[Hdf5DataColumn] = []
216
+ dedupe = _make_name_deduper()
217
+
218
+ for ds in datasets:
219
+ if not _is_compound(ds):
220
+ continue
221
+ field_names = ds.dtype.names
222
+ assert field_names is not None # guaranteed by _is_compound
223
+ time_field = field_names[0]
224
+ value_fields = field_names[1:]
225
+ ds_path = ds.name.lstrip("/")
226
+ name, units, description = _read_channel_metadata(ds)
227
+ dataset_name = name or _path_to_channel_name(ds_path)
228
+
229
+ for value_field in value_fields:
230
+ base_name = f"{dataset_name}.{value_field}" if len(value_fields) > 1 else dataset_name
231
+ fallback_suffix = f"{_path_to_channel_name(ds_path)}.{value_field}"
232
+ columns.append(
233
+ Hdf5DataColumn(
234
+ name=dedupe(base_name, fallback_suffix),
235
+ data_type=numpy_to_sift_type(ds.dtype[value_field]),
236
+ units=units,
237
+ description=description,
238
+ time_dataset=ds_path,
239
+ value_dataset=ds_path,
240
+ time_index=0,
241
+ value_index=0,
242
+ time_field=time_field,
243
+ value_field=value_field,
244
+ )
245
+ )
246
+
247
+ return columns
248
+
249
+
250
+ _BUILDERS: dict[DataTypeKey, Callable[[list[h5py.Dataset]], list[Hdf5DataColumn]]] = {
251
+ DataTypeKey.HDF5_ONE_D: _build_one_d_configs,
252
+ DataTypeKey.HDF5_TWO_D: _build_two_d_configs,
253
+ DataTypeKey.HDF5_COMPOUND: _build_compound_configs,
254
+ }
255
+
256
+
257
+ def detect_hdf5_config(file_path: str | Path, data_type_key: DataTypeKey) -> Hdf5ImportConfig:
258
+ """Detect an HDF5 import config under the given variant. Datasets that
259
+ don't fit the chosen variant are not included. ``time_format`` is always
260
+ left unset: HDF5 timestamps aren't self-describing, so the caller must set
261
+ ``config.time_format`` before importing."""
262
+ if data_type_key not in _BUILDERS:
263
+ raise ValueError(
264
+ f"detect_hdf5_config requires an HDF5 DataTypeKey variant "
265
+ f"(HDF5_ONE_D, HDF5_TWO_D, or HDF5_COMPOUND); got {data_type_key}."
266
+ )
267
+ path = Path(file_path)
268
+ with h5py.File(path, "r") as h5file:
269
+ columns = _BUILDERS[data_type_key](_collect_datasets(h5file))
270
+
271
+ return Hdf5ImportConfig(asset_name="", data=columns)