PyPI - sift-stack-py - Versions diffs - 0.17.0.dev1__py3-none-any.whl → 0.17.0.dev2__py3-none-any.whl - Mend

sift-stack-py 0.17.0.dev1py3-none-any.whl → 0.17.0.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

sift_client/_internal/low_level_wrappers/_test_results_log.py CHANGED Viewed

@@ -143,9 +143,13 @@ class _ReplayState:
 @dataclass
 class ReplayResult:
-    """Result of replaying a log file."""
+    """Result of replaying a log file.
-    report: TestReport
+    ``report`` is None on an incremental resume tick that uploaded only steps or
+    measurements; the report itself was created on an earlier tick.
+    """
+    report: TestReport | None = None
     steps: list[TestStep] = field(default_factory=list)
     measurements: list[TestMeasurement] = field(default_factory=list)

sift_client/_internal/low_level_wrappers/test_results.py CHANGED Viewed

@@ -1072,13 +1072,17 @@ class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
         id_map: dict[str, str],
         state: _ReplayState,
     ) -> None:
-        if state.report is None:
-            raise ValueError("UpdateTestReport found before CreateTestReport")
         request = UpdateTestReportRequest()
         json_format.Parse(json_str, request)
         request.test_report.test_report_id = self._map_id(
             id_map, request.test_report.test_report_id
         )
+        # Batch/simulate replays the whole log in order, so a missing report means
+        # the log is malformed. Incremental replay may have created the report on an
+        # earlier tick (its real ID lives in id_map), so state.report is legitimately
+        # None here -- the mapped ID is enough to issue the update.
+        if simulate and state.report is None:
+            raise ValueError("UpdateTestReport found before CreateTestReport")
         state.report = await self.update_test_report(
             request=request, simulate=simulate, existing=state.report
         )
@@ -1203,6 +1207,7 @@ class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
         next tick.
         """
         tracking = LogTracking.load(log_path)
+        resuming = tracking.last_uploaded_line > 0
         id_map = tracking.id_map
         state = _ReplayState()
@@ -1221,7 +1226,10 @@ class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
             tracking.last_uploaded_line += 1
             tracking.save(log_path)
-        if state.report is None:
+        # On a resume tick the CreateTestReport line was consumed on an earlier
+        # tick, so state.report is expected to be None; the report already exists
+        # on the server. Only a genuine first pass over an empty log is an error.
+        if state.report is None and not resuming:
             raise ValueError("No CreateTestReport found in log file")
         return ReplayResult(

sift_client/_internal/pyproject_config.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""Loader for the ``[tool.sift]`` table in a project's ``pyproject.toml``.
+The pytest plugin consumes this loader to resolve report-content config (under
+``[tool.sift.pytest.report]``) and SDK-level fallbacks (URIs under
+``[tool.sift]``). A malformed or missing ``pyproject.toml`` returns ``{}`` so a
+bad config file never aborts the session — the plugin falls back to its
+built-in defaults and surfaces a single warning.
+"""
+from __future__ import annotations
+import warnings
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+# ``tomllib`` landed in 3.11; ``tomli`` is the same parser packaged for older
+# interpreters and is declared as a conditional install dep on 3.8-3.10.
+try:
+    import tomllib  # type: ignore[import-not-found,unused-ignore]
+except ImportError:  # pragma: no cover - exercised on 3.8-3.10 only
+    import tomli as tomllib  # type: ignore[no-redef,import-not-found,unused-ignore]
+if TYPE_CHECKING:
+    import pytest
+# Bound the upward walk so a misconfigured environment can't trigger an
+# unbounded filesystem traversal looking for a project root that isn't there.
+_MAX_PARENT_WALK = 3
+def _find_pyproject(config: pytest.Config) -> Path | None:
+    """Locate the active project's ``pyproject.toml``.
+    Order:
+    1. ``config.inipath`` when it is itself a ``pyproject.toml`` (the common
+       case: project uses ``[tool.pytest.ini_options]`` so pytest loaded the
+       ini settings directly from pyproject).
+    2. ``<config.rootpath>/pyproject.toml``.
+    3. A bounded walk upward from ``rootpath`` for monorepo layouts where
+       pytest's rootdir is a subdirectory and the project pyproject lives
+       higher up.
+    """
+    inipath = config.inipath
+    if inipath is not None and inipath.name == "pyproject.toml" and inipath.is_file():
+        return inipath
+    cur = Path(config.rootpath).resolve()
+    candidate = cur / "pyproject.toml"
+    if candidate.is_file():
+        return candidate
+    for _ in range(_MAX_PARENT_WALK):
+        cur = cur.parent
+        candidate = cur / "pyproject.toml"
+        if candidate.is_file():
+            return candidate
+    return None
+def load_tool_sift(config: pytest.Config) -> dict[str, Any]:
+    """Return the parsed ``[tool.sift]`` table from the project's pyproject.toml.
+    Returns ``{}`` when no pyproject is discoverable, when the file omits the
+    ``[tool.sift]`` table, or when parsing fails. A parse / IO failure emits a
+    single :class:`SiftPytestPluginWarning` so the session continues with
+    defaults rather than aborting on a malformed file.
+    """
+    pyproject = _find_pyproject(config)
+    if pyproject is None:
+        return {}
+    try:
+        with pyproject.open("rb") as fh:
+            data = tomllib.load(fh)
+    except (OSError, tomllib.TOMLDecodeError) as exc:
+        # Deferred import: ``pytest_plugin`` imports this loader, so a
+        # top-level import here would close the cycle at module load time.
+        from sift_client.pytest_plugin import SiftPytestPluginWarning
+        warnings.warn(
+            f"Failed to read {pyproject} for [tool.sift]: {type(exc).__name__}: {exc}",
+            SiftPytestPluginWarning,
+            stacklevel=2,
+        )
+        return {}
+    return (data.get("tool") or {}).get("sift") or {}

sift_client/_internal/util/hdf5.py CHANGED Viewed

@@ -1,96 +1,271 @@
+"""HDF5 schema detection.
+HDF5 files have no single canonical layout, so detection is parameterized
+by an ``Hdf5Schema``: ``ONE_D`` (per-group time dataset + sibling 1D values,
+with an ancestor walk-up), ``TWO_D`` (``[N, 2]`` datasets where col 0 is
+time), or ``COMPOUND`` (struct-like datasets whose first field is time).
+Each detector walks every dataset in the file recursively; datasets that
+don't fit the chosen schema are not included in the resulting config."""
 from __future__ import annotations
 from pathlib import Path
+from typing import Callable
 import h5py
+import numpy as np
 from sift_client._internal.util.numpy_types import numpy_to_sift_type
-from sift_client.sift_types.data_import import Hdf5DataColumn, Hdf5ImportConfig, TimeFormat
+from sift_client.sift_types.data_import import (
+    DataTypeKey,
+    Hdf5DataColumn,
+    Hdf5ImportConfig,
+)
-# Common HDF5 attribute names used to detect channel metadata.
-_NAME_ATTRS = ["Name", "name", "Title", "title", "Sensor", "sensor", "Channel", "channel"]
-_UNIT_ATTRS = ["Unit", "unit", "Units", "units"]
-_DESCRIPTION_ATTRS = ["Description", "description"]
+# Heuristic attribute names for channel metadata, in priority order. The
+# first non-empty value found on a dataset wins; missing attributes resolve
+# to empty strings.
+_NAME_ATTRS = ("Name", "name", "Title", "title", "Sensor", "sensor", "Channel", "channel")
+_UNIT_ATTRS = ("Unit", "unit", "Units", "units")
+_DESCRIPTION_ATTRS = ("Description", "description")
+# Per-group time dataset names, case-insensitive, in priority order.
+_TIME_DATASET_NAMES = ("time", "timestamp", "timestamps", "ts")
-def _detect_attr(dataset: h5py.Dataset, candidates: list[str], default: str = "") -> str:
-    """Return the first matching HDF5 attribute value, or *default*."""
-    possible = [dataset.attrs.get(attr) for attr in candidates if dataset.attrs.get(attr)]
-    return str(possible[0]) if possible else default
+def _read_string_attr(dataset: h5py.Dataset, candidates: tuple[str, ...]) -> str:
+    """Return the first non-empty string attribute among `candidates`."""
+    for name in candidates:
+        if name not in dataset.attrs:
+            continue
+        value = dataset.attrs[name]
+        if isinstance(value, bytes):
+            value = value.decode("utf-8", errors="replace")
+        if isinstance(value, str) and value:
+            return value
+        # h5py returns multi-element string attrs as ndarrays; take the first.
+        if isinstance(value, np.ndarray) and value.size > 0:
+            first = value.flat[0]
+            if isinstance(first, bytes):
+                first = first.decode("utf-8", errors="replace")
+            if isinstance(first, str) and first:
+                return first
+    return ""
-def detect_hdf5_config(file_path: str | Path) -> Hdf5ImportConfig:
-    """Detect an HDF5 import config by inspecting the file's datasets.
-    Traverses the HDF5 file and produces (time dataset, value dataset) pairs.
-    For compound datasets with multiple fields, the first field is assumed to
-    be time and remaining fields become value channels. For simple datasets,
-    a root-level ``time`` dataset is used if present.
-    """
-    path = Path(file_path)
+def _read_channel_metadata(dataset: h5py.Dataset) -> tuple[str, str, str]:
+    """Return ``(name, units, description)`` discovered from HDF5 attributes."""
+    return (
+        _read_string_attr(dataset, _NAME_ATTRS),
+        _read_string_attr(dataset, _UNIT_ATTRS),
+        _read_string_attr(dataset, _DESCRIPTION_ATTRS),
+    )
-    with h5py.File(path, "r") as h5file:
-        columns: list[Hdf5DataColumn] = []
-        seen_names: set[str] = set()
-        has_root_time = "time" in h5file
-        def _visit(dataset_name: str, obj: object) -> None:
-            if not isinstance(obj, h5py.Dataset):
-                return
-            # Skip root "time" dataset — it's used as the time source, not a value channel.
-            if dataset_name == "time" and obj.parent == h5file:
-                return
-            n_fields = len(obj.dtype.names) if obj.dtype.names else 0
-            if n_fields > 1:
-                # Compound type: first field is time, remaining are value channels.
-                for value_index in range(1, n_fields):
-                    channel_name = _detect_attr(obj, _NAME_ATTRS, dataset_name)
-                    if channel_name in seen_names:
-                        channel_name = f"{channel_name}.{dataset_name}.{value_index}"
-                    columns.append(
-                        Hdf5DataColumn(
-                            name=channel_name,
-                            data_type=numpy_to_sift_type(obj.dtype[value_index]),
-                            units=_detect_attr(obj, _UNIT_ATTRS),
-                            description=_detect_attr(obj, _DESCRIPTION_ATTRS),
-                            time_dataset=dataset_name,
-                            value_dataset=dataset_name,
-                            time_index=0,
-                            value_index=0,
-                            time_field=obj.dtype.names[0],
-                            value_field=obj.dtype.names[value_index],
-                        )
-                    )
-                    seen_names.add(channel_name)
-            elif n_fields in (0, 1):
-                # Single column. Use root "time" as time dataset if available.
-                channel_name = _detect_attr(obj, _NAME_ATTRS, dataset_name)
-                if channel_name in seen_names:
-                    channel_name = f"{channel_name}.{dataset_name}"
-                columns.append(
-                    Hdf5DataColumn(
-                        name=channel_name,
-                        data_type=numpy_to_sift_type(obj.dtype),
-                        units=_detect_attr(obj, _UNIT_ATTRS),
-                        description=_detect_attr(obj, _DESCRIPTION_ATTRS),
-                        time_dataset="time" if has_root_time else "",
-                        value_dataset=dataset_name,
-                        time_index=0,
-                        value_index=0,
-                    )
+def _is_compound(dataset: h5py.Dataset) -> bool:
+    return dataset.dtype.names is not None and len(dataset.dtype.names) > 1
+def _is_1d_non_compound(dataset: h5py.Dataset) -> bool:
+    return not _is_compound(dataset) and len(dataset.shape) == 1
+def _is_2d_n_by_2(dataset: h5py.Dataset) -> bool:
+    return not _is_compound(dataset) and len(dataset.shape) == 2 and dataset.shape[1] == 2
+def _path_to_channel_name(path: str) -> str:
+    """Sift renders dotted names hierarchically, so ``group1/current`` becomes
+    ``group1.current``, with ``current`` shown under a ``group1`` folder."""
+    return path.replace("/", ".")
+def _make_name_deduper() -> Callable[[str, str], str]:
+    """Return a callable that resolves duplicate channel names by appending
+    the dataset's dotted path. First claim of a name wins; later claims of
+    the same name get the fallback suffix appended."""
+    used: set[str] = set()
+    def dedupe(base_name: str, fallback_suffix: str) -> str:
+        name = f"{base_name}.{fallback_suffix}" if base_name in used else base_name
+        used.add(name)
+        return name
+    return dedupe
+def _collect_datasets(h5file: h5py.File) -> list[h5py.Dataset]:
+    """Recursively walk every dataset in the file."""
+    out: list[h5py.Dataset] = []
+    def visit(_name: str, obj: object) -> None:
+        if isinstance(obj, h5py.Dataset):
+            out.append(obj)
+    h5file.visititems(visit)
+    return out
+def _group_by_parent(datasets: list[h5py.Dataset]) -> dict[str, list[h5py.Dataset]]:
+    """Group datasets by their parent group path (``""`` for root-level)."""
+    out: dict[str, list[h5py.Dataset]] = {}
+    for ds in datasets:
+        out.setdefault(ds.name.lstrip("/").rpartition("/")[0], []).append(ds)
+    return out
+def _resolve_ancestor_time(group_path: str, per_group_time: dict[str, str]) -> str:
+    """Return the closest-ancestor time dataset path for ``group_path``,
+    walking up to the root. Empty string if no ancestor has one."""
+    cursor = group_path
+    while True:
+        found = per_group_time.get(cursor)
+        if found:
+            return found
+        if cursor == "":
+            return ""
+        slash = cursor.rfind("/")
+        cursor = "" if slash < 0 else cursor[:slash]
+def _build_one_d_configs(datasets: list[h5py.Dataset]) -> list[Hdf5DataColumn]:
+    """1D non-compound schema: at each group, pick a time dataset (by name)
+    and pair every other 1D dataset in that group as a value channel.
+    Datasets that aren't 1D non-compound are not included."""
+    def identify_time_dataset(group: list[h5py.Dataset]) -> h5py.Dataset | None:
+        """Pick the group's time dataset by leaf name, case-insensitive, in
+        priority order. Returns ``None`` if no candidate matches; callers
+        fall back to an ancestor group's time before giving up."""
+        for candidate in _TIME_DATASET_NAMES:
+            for ds in group:
+                if ds.name.rsplit("/", 1)[-1].lower() == candidate:
+                    return ds
+        return None
+    columns: list[Hdf5DataColumn] = []
+    dedupe = _make_name_deduper()
+    one_d = [ds for ds in datasets if _is_1d_non_compound(ds)]
+    grouped = _group_by_parent(one_d)
+    # First pass: each group's own time dataset (if any).
+    per_group_time: dict[str, str] = {}
+    for group_path, group in grouped.items():
+        time_ds = identify_time_dataset(group)
+        if time_ds is not None:
+            per_group_time[group_path] = time_ds.name.lstrip("/")
+    for group_path, group in grouped.items():
+        own_time_path = per_group_time.get(group_path)
+        time_path = own_time_path or _resolve_ancestor_time(group_path, per_group_time)
+        for ds in group:
+            ds_path = ds.name.lstrip("/")
+            if own_time_path and ds_path == own_time_path:
+                continue
+            name, units, description = _read_channel_metadata(ds)
+            fallback = _path_to_channel_name(ds_path)
+            columns.append(
+                Hdf5DataColumn(
+                    name=dedupe(name or fallback, fallback),
+                    data_type=numpy_to_sift_type(ds.dtype),
+                    units=units,
+                    description=description,
+                    time_dataset=time_path,
+                    value_dataset=ds_path,
+                    time_index=0,
+                    value_index=0,
                 )
-                seen_names.add(channel_name)
+            )
-        h5file.visititems(_visit)
+    return columns
-        return Hdf5ImportConfig(
-            asset_name="",
-            time_format=TimeFormat.ABSOLUTE_UNIX_NANOSECONDS,
-            data=columns,
+def _build_two_d_configs(datasets: list[h5py.Dataset]) -> list[Hdf5DataColumn]:
+    """2D schema: every dataset with shape ``[N, 2]`` becomes one channel
+    (col 0 = time, col 1 = value). Other shapes are not included."""
+    columns: list[Hdf5DataColumn] = []
+    dedupe = _make_name_deduper()
+    for ds in datasets:
+        if not _is_2d_n_by_2(ds):
+            continue
+        ds_path = ds.name.lstrip("/")
+        name, units, description = _read_channel_metadata(ds)
+        fallback = _path_to_channel_name(ds_path)
+        columns.append(
+            Hdf5DataColumn(
+                name=dedupe(name or fallback, fallback),
+                data_type=numpy_to_sift_type(ds.dtype),
+                units=units,
+                description=description,
+                time_dataset=ds_path,
+                value_dataset=ds_path,
+                time_index=0,
+                value_index=1,
+            )
         )
+    return columns
+def _build_compound_configs(datasets: list[h5py.Dataset]) -> list[Hdf5DataColumn]:
+    """Compound schema: every compound dataset becomes one channel per
+    non-time member. First member is time. Non-compound datasets are not included."""
+    columns: list[Hdf5DataColumn] = []
+    dedupe = _make_name_deduper()
+    for ds in datasets:
+        if not _is_compound(ds):
+            continue
+        field_names = ds.dtype.names
+        assert field_names is not None  # guaranteed by _is_compound
+        time_field = field_names[0]
+        value_fields = field_names[1:]
+        ds_path = ds.name.lstrip("/")
+        name, units, description = _read_channel_metadata(ds)
+        dataset_name = name or _path_to_channel_name(ds_path)
+        for value_field in value_fields:
+            base_name = f"{dataset_name}.{value_field}" if len(value_fields) > 1 else dataset_name
+            fallback_suffix = f"{_path_to_channel_name(ds_path)}.{value_field}"
+            columns.append(
+                Hdf5DataColumn(
+                    name=dedupe(base_name, fallback_suffix),
+                    data_type=numpy_to_sift_type(ds.dtype[value_field]),
+                    units=units,
+                    description=description,
+                    time_dataset=ds_path,
+                    value_dataset=ds_path,
+                    time_index=0,
+                    value_index=0,
+                    time_field=time_field,
+                    value_field=value_field,
+                )
+            )
+    return columns
+_BUILDERS: dict[DataTypeKey, Callable[[list[h5py.Dataset]], list[Hdf5DataColumn]]] = {
+    DataTypeKey.HDF5_ONE_D: _build_one_d_configs,
+    DataTypeKey.HDF5_TWO_D: _build_two_d_configs,
+    DataTypeKey.HDF5_COMPOUND: _build_compound_configs,
+}
+def detect_hdf5_config(file_path: str | Path, data_type_key: DataTypeKey) -> Hdf5ImportConfig:
+    """Detect an HDF5 import config under the given variant. Datasets that
+    don't fit the chosen variant are not included. ``time_format`` is always
+    left unset: HDF5 timestamps aren't self-describing, so the caller must set
+    ``config.time_format`` before importing."""
+    if data_type_key not in _BUILDERS:
+        raise ValueError(
+            f"detect_hdf5_config requires an HDF5 DataTypeKey variant "
+            f"(HDF5_ONE_D, HDF5_TWO_D, or HDF5_COMPOUND); got {data_type_key}."
+        )
+    path = Path(file_path)
+    with h5py.File(path, "r") as h5file:
+        columns = _BUILDERS[data_type_key](_collect_datasets(h5file))
+    return Hdf5ImportConfig(asset_name="", data=columns)

sift-stack-py 0.17.0.dev1__py3-none-any.whl → 0.17.0.dev2__py3-none-any.whl

sift-stack-py 0.17.0.dev1py3-none-any.whl → 0.17.0.dev2py3-none-any.whl