PyPI - cineon-format - Versions diffs - 3.0.0__tar.gz - Mend

cineon-format 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

cineon_format-3.0.0/PKG-INFO +73 -0
cineon_format-3.0.0/README.md +60 -0
cineon_format-3.0.0/pyproject.toml +40 -0
cineon_format-3.0.0/src/cineon_format/__init__.py +18 -0
cineon_format-3.0.0/src/cineon_format/cineon_data.py +313 -0
cineon_format-3.0.0/src/cineon_format/random_data.py +146 -0
cineon_format-3.0.0/src/cineon_format/rolling_data.py +102 -0
cineon_format-3.0.0/src/cineon_format/utils.py +46 -0
cineon_format-3.0.0/src/cineon_format/version.py +3 -0

cineon_format-3.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,73 @@
+Metadata-Version: 2.3
+Name: cineon-format
+Version: 3.0.0
+Summary: Definitions for Cineon data formats
+Author: Cineon
+Author-email: Cineon <info@cineon.ai>
+Requires-Dist: numpy>=2.3.2
+Requires-Dist: pandas>=2.3.1
+Requires-Dist: pydantic>=2.11.7
+Requires-Dist: typeguard>=4.4.4
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+# Cineon Format
+![Latest Release](badges/version.svg) ![Coverage Status](badges/coverage.svg) ![Code complexity](badges/complexity.svg)
+This package contains a definition of a common data format to be used across Cineon repositories.
+The `CineonData` format is a class that inherits from `pydantic`'s `BaseModel` class, and as such the data validation is performed on class instantiation. If the data being ingested has a problem then class instantiation will fail. Simple. This is useful because downstream processing tasks can make valid assumptions about the data and therefore do not have to perform any explicit checks. This means that lots of downstream data-validation code can be deleted.
+## Prerequisites
+To use the example script in the package you need to install `uv`, but you do not need `uv` in order to use the package in other Python code.
+## Usage
+Add the `cineon_format` package into your project, with either:
+```bash
+poetry add git+https://github.com/cineon-ai/cineon_format.git
+uv add git+https://github.com/cineon-ai/cineon_format.git
+```
+Then:
+```python
+from cineon_format import CineonData
+# Load from CSV
+cineon_data = CineonData.from_csv("path/to/csv")
+# Alternatively, load from JSON
+cineon_data = CineonData.from_json("path/to/json")
+```
+## Examples
+Running the example script with either:
+```bash
+uv run scripts/example.py --csv=data/good.csv
+uv run scripts/example.py --json=data/good.json
+```
+will load a file, convert that file into the `CineonData` format and then spit out a dictionary representation and a `DataFrame` representation to the terminal.
+If you run the script with either:
+```bash
+uv run scripts/example.py --csv=data/bad.csv
+uv run scripts/example.py --json=data/bad.json
+```
+you can see examples of data that does not conform to the format, and therefore errors out with helpful error messages printed to the terminal.
+"Random" `CineonData` can be generated via the script:
+```bash
+uv run scripts/generate.py
+```
+This will print a summary of the `CineonData` to the terminal.

cineon_format-3.0.0/README.md ADDED Viewed

@@ -0,0 +1,60 @@
+# Cineon Format
+![Latest Release](badges/version.svg) ![Coverage Status](badges/coverage.svg) ![Code complexity](badges/complexity.svg)
+This package contains a definition of a common data format to be used across Cineon repositories.
+The `CineonData` format is a class that inherits from `pydantic`'s `BaseModel` class, and as such the data validation is performed on class instantiation. If the data being ingested has a problem then class instantiation will fail. Simple. This is useful because downstream processing tasks can make valid assumptions about the data and therefore do not have to perform any explicit checks. This means that lots of downstream data-validation code can be deleted.
+## Prerequisites
+To use the example script in the package you need to install `uv`, but you do not need `uv` in order to use the package in other Python code.
+## Usage
+Add the `cineon_format` package into your project, with either:
+```bash
+poetry add git+https://github.com/cineon-ai/cineon_format.git
+uv add git+https://github.com/cineon-ai/cineon_format.git
+```
+Then:
+```python
+from cineon_format import CineonData
+# Load from CSV
+cineon_data = CineonData.from_csv("path/to/csv")
+# Alternatively, load from JSON
+cineon_data = CineonData.from_json("path/to/json")
+```
+## Examples
+Running the example script with either:
+```bash
+uv run scripts/example.py --csv=data/good.csv
+uv run scripts/example.py --json=data/good.json
+```
+will load a file, convert that file into the `CineonData` format and then spit out a dictionary representation and a `DataFrame` representation to the terminal.
+If you run the script with either:
+```bash
+uv run scripts/example.py --csv=data/bad.csv
+uv run scripts/example.py --json=data/bad.json
+```
+you can see examples of data that does not conform to the format, and therefore errors out with helpful error messages printed to the terminal.
+"Random" `CineonData` can be generated via the script:
+```bash
+uv run scripts/generate.py
+```
+This will print a summary of the `CineonData` to the terminal.

cineon_format-3.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,40 @@
+[project]
+name = "cineon-format"
+version = "3.0.0"
+description = "Definitions for Cineon data formats"
+readme = "README.md"
+authors = [{ name = "Cineon", email = "info@cineon.ai" }]
+requires-python = ">=3.11"
+dependencies = [
+    "numpy>=2.3.2",
+    "pandas>=2.3.1",
+    "pydantic>=2.11.7",
+    "typeguard>=4.4.4",
+]
+[build-system]
+requires = ["uv_build"]
+build-backend = "uv_build"
+[tool.isort]
+profile = "black"
+[dependency-groups]
+dev = [
+    "coverage>=7.10.6",
+    "genbadge[coverage]>=1.1.2",
+    "ipykernel>=7.1.0",
+    "isort>=6.0.1",
+    "mypy>=1.17.1",
+    "numpy>=2.2.6",
+    "pandas-stubs>=2.3.2.250827",
+    "pytest>=8.4.1",
+    "radon>=6.0.1",
+    "ruff>=0.12.11",
+]
+[[tool.uv.index]]
+name = "cineon-registry"
+url = "https://europe-west2-python.pkg.dev/elengine-463909/cineon-registry/simple/"
+publish-url = "https://europe-west2-python.pkg.dev/elengine-463909/cineon-registry/"
+explicit = true

cineon_format-3.0.0/src/cineon_format/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+from .cineon_data import CineonData
+from .random_data import (
+    create_random_data,
+    create_random_dataframe,
+    create_random_dictionary,
+)
+from .rolling_data import get_window, get_windowed_cineon_data
+from .version import __version__
+__all__ = [
+    "CineonData",
+    "create_random_dictionary",
+    "create_random_dataframe",
+    "create_random_data",
+    "get_windowed_cineon_data",
+    "get_window",
+    "__version__",
+]

cineon_format-3.0.0/src/cineon_format/cineon_data.py ADDED Viewed

@@ -0,0 +1,313 @@
+import json
+from datetime import datetime, timedelta
+from typing import Any, Optional
+import numpy as np
+import pandas as pd
+from pandas.testing import assert_frame_equal
+from pydantic import BaseModel, field_validator, model_validator
+from .utils import flatten_dictionary, json_serialiser, unflatten_dictionary
+# Constants
+MAX_PUPIL_DIAMETER_MM = 13.0
+ATOL_UNIT_VECTOR = 1e-5
+class CartesianVector(BaseModel, extra="forbid"):
+    """Data model for a 3D Cartesian vector."""
+    x: list[float]
+    y: list[float]
+    z: list[float]
+def get_squared_magnitudes(v: CartesianVector) -> np.ndarray:
+    x = np.array(v.x)
+    y = np.array(v.y)
+    z = np.array(v.z)
+    return x**2 + y**2 + z**2
+class Eye(BaseModel, extra="forbid"):
+    """Data model for a cyclops eye gaze data in the head coordinate system."""
+    gaze_direction: CartesianVector
+    gaze_depth: Optional[list[float]] = None
+    gaze_object: Optional[list[str | None]] = None
+    pupil_diameter: Optional[list[float | None]] = None
+    openness: Optional[list[float]] = None
+    # Ensure direction vectors are normalized
+    @field_validator("gaze_direction")
+    @classmethod
+    def validate_unit_vector(cls, vector: CartesianVector):
+        squared_magnitude = get_squared_magnitudes(vector)
+        if not np.allclose(squared_magnitude, 1.0, atol=ATOL_UNIT_VECTOR):
+            idxs = np.where(~np.isclose(squared_magnitude, 1.0, atol=ATOL_UNIT_VECTOR))[
+                0
+            ]
+            bad = squared_magnitude[idxs]
+            raise ValueError(
+                f"Gaze direction at indices {idxs.tolist()} are not normalized: {bad.tolist()!r}"
+            )
+        return vector
+    @field_validator("gaze_direction")
+    @classmethod
+    def validate_local_coordinates(cls, vector: CartesianVector):
+        for idx, value in enumerate(vector.z):
+            if value < 0.0:
+                raise ValueError(
+                    f"Gaze direction z component at index {idx} has invalid value {value!r}: must be non-negative in local coordinates"
+                )
+        return vector
+    @field_validator("gaze_depth")
+    @classmethod
+    def validate_gaze_depth(cls, list_of_values):
+        if list_of_values is None:
+            return list_of_values
+        else:
+            for idx, value in enumerate(list_of_values):
+                if value is not None and value < 0.0:
+                    raise ValueError(
+                        f"Gaze depth at index {idx} has invalid value {value!r}: must be non-negative"
+                    )
+            return list_of_values
+    @field_validator("openness")
+    @classmethod
+    def validate_eye_openness(cls, list_of_values):
+        if list_of_values is None:
+            return list_of_values
+        else:
+            for idx, value in enumerate(list_of_values):
+                if (value is not None) and not (0.0 <= value <= 1.0):
+                    raise ValueError(
+                        f"Eye openness at index {idx} is normalised and must be between 0 and 1"
+                    )
+            return list_of_values
+    @field_validator("pupil_diameter")
+    @classmethod
+    def validate_pupil_diameter(cls, list_of_values):
+        if list_of_values is None:
+            return list_of_values
+        else:
+            for idx, value in enumerate(list_of_values):
+                if value is not None and not (0.0 <= value <= MAX_PUPIL_DIAMETER_MM):
+                    raise ValueError(
+                        f"Pupil_diameter at index {idx} has invalid value {value!r}: must be non-negative and less than {MAX_PUPIL_DIAMETER_MM}mm"
+                    )
+            return list_of_values
+class Head(BaseModel, extra="forbid"):
+    """Data model for head position data in the world coordinate system."""
+    direction: Optional[CartesianVector] = None
+    position: Optional[CartesianVector] = None
+    acceleration: Optional[CartesianVector] = None
+    # Ensure direction vectors are normalized
+    @field_validator("direction")
+    @classmethod
+    def validate_unit_vector(cls, vector):
+        if vector is None:
+            return vector
+        else:
+            squared_magnitudes = get_squared_magnitudes(vector)
+            if not np.allclose(squared_magnitudes, 1.0, atol=ATOL_UNIT_VECTOR):
+                idxs = np.where(
+                    ~np.isclose(squared_magnitudes, 1.0, atol=ATOL_UNIT_VECTOR)
+                )[0]
+                bad = squared_magnitudes[idxs]
+                raise ValueError(
+                    f"Directions at indices {idxs.tolist()} are not normalized: magnitudes are {bad.tolist()!r}"
+                )
+            return vector
+class CineonData(BaseModel, extra="forbid"):
+    """
+    Data model for Cineon eye-tracking data.
+    The coordinate system in which the data is represented is left-handed. The y axis points upwards (gravity accelerates in the negative y direction).
+    In the case of data in the Eye object, the coordinate system is non-inertial and local to the head. The z axis points "forwards", and the x axis points to the "right". The y axis always points upwards. In this local coordinate system, gaze direction vectors should have a non-negative z component (you cannot look backwards through your own head).
+    In the case of the data in the Head object, the coordinate system is inertial and world-based. The y axis always points upwards. The orientation of the other axes is arbitrary but should be consistent within a dataset. Often (e.g., in virtual reality) the z axis points "forwards" from the initial location of the head, and the x axis points to the "right" from the initial location of the head. Otherwise z might point North and x East (e.g., if the data-collection device has a magnetometer). In any case, x and z must be orthogonal and left-handed with respect to y.
+    Some columns are optional and may be omitted if not available. A `None` value for an entire column indicates data the was not available to be collected (e.g., some hardware does not have the ability to detect eye openness). Missing data within a column is indicated by the value `None` (e.g., the pupil diameter could not be measured while the eye was closed).
+    Attributes
+    ----------
+    timestamp : list[datetime]
+        List of timestamps for each sample.
+    eye : Eye
+        Eye gaze data.
+    head : Head
+        Head position data.
+    participant_id : Optional[list[int]]
+        List of participant IDs.
+    event : Optional[list[str | None]]
+        List of event labels.
+    stress_report : Optional[list[float]]
+        List of stress report values.
+    stress_certainty_report : Optional[list[float]]
+        List of stress certainty report values.
+    workload_report : Optional[list[float]]
+        List of workload report values.
+    workload_certainty_report : Optional[list[float]]
+        List of workload certainty report values.
+    fatigue_report : Optional[list[float]]
+        List of fatigue report values.
+    fatigue_certainty_report : Optional[list[float]]
+        List of fatigue certainty report values.
+    """
+    timestamp: list[datetime]
+    eye: Eye
+    head: Optional[Head] = None
+    participant_id: Optional[list[int]] = None
+    event: Optional[list[str | None]] = None
+    shard_id: Optional[list[int]] = None
+    stress_report: Optional[list[float]] = None
+    stress_certainty_report: Optional[list[float]] = None
+    workload_report: Optional[list[float]] = None
+    workload_certainty_report: Optional[list[float]] = None
+    fatigue_report: Optional[list[float]] = None
+    fatigue_certainty_report: Optional[list[float]] = None
+    # Check that all lists have the same length
+    @model_validator(mode="after")
+    def ensure_equal_lengths(self) -> "CineonData":
+        flat_dict = flatten_dictionary(self.to_dict())
+        list_lengths = {  # Pick out only those attributes that are lists (not None)
+            name: len(value)
+            for name, value in flat_dict.items()
+            if isinstance(value, list)
+        }
+        lengths_set = set(
+            list_lengths.values()
+        )  # A set that should have only one element
+        if len(lengths_set) > 1:  # Show which fields disagree to help debugging
+            raise ValueError(f"List fields must all have same length: {list_lengths}")
+        return self
+    # Check that timestamps are in ascending order
+    @field_validator("timestamp")
+    def validate_timestamps(cls, list_of_timestamps):
+        """Validate that timestamps are in strictly ascending order."""
+        for i in range(1, len(list_of_timestamps)):
+            if list_of_timestamps[i] <= list_of_timestamps[i - 1]:  # Early exit
+                raise ValueError("Timestamps must be in strictly ascending order")
+        return list_of_timestamps
+    @classmethod
+    def from_dict(cls, data_dict: dict[str, Any]):
+        return cls.model_validate(data_dict)
+    @classmethod
+    def from_json(cls, filepath):
+        with open(filepath, "r") as f:
+            data_dict = json.load(f)
+        return cls.from_dict(data_dict)
+    @classmethod
+    def from_dataframe(cls, df: pd.DataFrame):
+        # Convert NaN to None for Pydantic compatibility
+        df = df.astype(object).where(pd.notnull(df), None)
+        flat_dict = df.to_dict(orient="list")
+        data_dict = unflatten_dictionary(flat_dict)
+        return cls.model_validate(data_dict)
+    @classmethod
+    def from_csv(cls, filepath):
+        df = pd.read_csv(
+            filepath,
+            dtype={  # Ensure correct types for optional string columns
+                "event": "string",
+                "eye.gaze_object": "string",
+            },
+            keep_default_na=True,
+            na_values=[""],
+            parse_dates=["timestamp"],
+            date_format="%Y-%m-%d %H:%M:%S.%f",
+        )
+        return cls.from_dataframe(df)
+    def to_dict(self) -> dict[str, Any]:
+        # "exclude_none" removes any keys with value None from the nested structure
+        return self.model_dump(exclude_none=True)
+    def to_json(self, filepath: str) -> None:
+        with open(filepath, "w") as f:
+            json.dump(self.to_dict(), f, indent=2, default=json_serialiser)
+    def to_dataframe(self) -> pd.DataFrame:
+        """Convert the CineonData to a Pandas DataFrame."""
+        flat_dict = flatten_dictionary(self.to_dict())
+        return pd.DataFrame(flat_dict)
+    def to_csv(self, filepath: str) -> None:
+        df = self.to_dataframe()
+        df.to_csv(filepath, index=False)
+    def get_times(self) -> list[float]:
+        """
+        Get timestamps as a list of float seconds since the first timestamp.
+        The first entry in this list is always 0.0.
+        The last entry is the total duration in seconds.
+        """
+        times = [(t - self.timestamp[0]).total_seconds() for t in self.timestamp]
+        return times
+    def duration(self) -> timedelta:
+        return self.timestamp[-1] - self.timestamp[0]
+    def sampling(self) -> tuple[float, float]:
+        """
+        Get the mean and standard deviation of the sampling times in seconds.
+        """
+        t = self.get_times()
+        dt = np.diff(t)
+        return float(np.mean(dt)), float(np.std(dt))
+    def frequency(self) -> tuple[float, float]:
+        """
+        Get the mean and standard deviation of the sampling frequencies in Hz.
+        """
+        t = self.get_times()
+        dt = np.diff(t)
+        freqs = 1 / dt
+        return float(np.mean(freqs)), float(np.std(freqs))
+    def __len__(self) -> int:
+        return len(self.timestamp)
+    def __str__(self) -> str:
+        t_mean, t_std = self.sampling()
+        f_mean, f_std = self.frequency()
+        return (
+            f"CineonData object with\n"
+            f"Start time: {self.timestamp[0].isoformat()}\n"
+            f"End time: {self.timestamp[-1].isoformat()}\n"
+            f"Duration: {self.duration()}\n"
+            f"Data sampling: {t_mean:.4f}±{t_std:.4f}s\n"
+            f"Data frequency: {f_mean:.2f}±{f_std:.2f}Hz\n"
+            f"Number of samples: {len(self)}"
+        )
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, CineonData):
+            return False
+        else:
+            df1 = self.to_dataframe()
+            df2 = other.to_dataframe()
+            try:
+                assert_frame_equal(df1, df2, check_dtype=False)
+            except AssertionError:
+                return False
+            return True

cineon_format-3.0.0/src/cineon_format/random_data.py ADDED Viewed

@@ -0,0 +1,146 @@
+import random
+from datetime import datetime, timedelta
+from typing import Any, Optional
+import numpy as np
+import pandas as pd
+from typeguard import typechecked
+from .cineon_data import CineonData
+from .utils import flatten_dictionary
+DURATION_EPS = 1e-9  # Small epsilon for topping up durations (seconds)
+@typechecked
+def _create_random_unit_vectors(n: int, forward=False) -> np.ndarray:
+    if forward:
+        azimin, azimax = -np.pi / 2.0, np.pi / 2.0
+    else:
+        azimin, azimax = -np.pi, np.pi
+    azimuth = np.random.uniform(azimin, azimax, n)
+    sin_elevation = np.random.uniform(-1.0, 1.0, n)
+    cos_elevation = np.sqrt(1.0 - sin_elevation**2)
+    unit_vectors = np.zeros((n, 3))
+    unit_vectors[:, 0] = cos_elevation * np.sin(azimuth)
+    unit_vectors[:, 1] = sin_elevation
+    unit_vectors[:, 2] = cos_elevation * np.cos(azimuth)
+    return unit_vectors
+@typechecked
+def create_random_dictionary(
+    duration: float,
+    frequency: float = 60.0,
+    include_gaze_depth: bool = False,
+    include_pupil: bool = False,
+    include_eye_openness: bool = False,
+    include_target_objects: bool = False,
+    include_head_direction: bool = False,
+    include_head_position: bool = False,
+    include_head_acceleration: bool = False,
+    include_events: bool = False,
+    include_shards: bool = False,
+    include_reports: bool = False,
+    seed: Optional[int] = None,
+) -> dict[str, Any]:
+    # Seed the random number generators for reproducibility
+    if seed is not None:
+        np.random.seed(seed)
+        random.seed(seed)
+    # Create timestamps based on duration and frequency
+    n = int(np.floor((duration + DURATION_EPS) * frequency)) + 1
+    t = np.arange(n, dtype=float) / frequency
+    t = t[t <= duration + DURATION_EPS]
+    n = len(t)
+    timestamp_start = datetime.now()
+    timestamp = [(timestamp_start + timedelta(seconds=float(ts))) for ts in t]
+    # Eye data
+    gaze_direction = _create_random_unit_vectors(n, forward=True)
+    eye: dict[str, Any] = {
+        "gaze_direction": {
+            "x": gaze_direction[:, 0].tolist(),
+            "y": gaze_direction[:, 1].tolist(),
+            "z": gaze_direction[:, 2].tolist(),
+        }
+    }
+    if include_gaze_depth:
+        eye["gaze_depth"] = np.random.uniform(0, 10.0, n).tolist()
+    if include_target_objects:
+        target_objects = random.choices(["A", "B", "C", None], k=n)
+        eye["gaze_object"] = target_objects
+    if include_pupil:
+        eye["pupil_diameter"] = np.random.uniform(0.0, 13.0, n).tolist()
+    if include_eye_openness:
+        eye["openness"] = np.random.uniform(0.0, 1.0, n).tolist()
+    # Head data
+    head: dict[str, Any] = {}
+    if include_head_direction:
+        head_direction = _create_random_unit_vectors(n)
+        head["direction"] = {
+            "x": head_direction[:, 0].tolist(),
+            "y": head_direction[:, 1].tolist(),
+            "z": head_direction[:, 2].tolist(),
+        }
+    if include_head_position:
+        head["position"] = {
+            "x": np.random.normal(0.0, 1.0, n).tolist(),
+            "y": np.random.normal(0.0, 1.0, n).tolist(),
+            "z": np.random.normal(0.0, 1.0, n).tolist(),
+        }
+    if include_head_acceleration:
+        head["acceleration"] = {
+            "x": np.random.normal(0.0, 1.0, n).tolist(),
+            "y": np.random.normal(0.0, 1.0, n).tolist(),
+            "z": np.random.normal(0.0, 1.0, n).tolist(),
+        }
+    # Create full object
+    data: dict[str, Any] = {
+        "timestamp": timestamp,
+        "eye": eye,
+    }
+    if len(head) > 0:
+        data["head"] = head
+    if include_events:
+        data["event"] = random.choices(["A", "B", "C", None], k=n)
+    if include_shards:
+        data["shard_id"] = np.random.randint(0, 10, size=n).tolist()
+    if include_reports:
+        data["stress_report"] = np.random.randint(0, 11, size=n).tolist()
+        data["workload_report"] = np.random.randint(0, 11, size=n).tolist()
+        data["fatigue_report"] = np.random.randint(0, 11, size=n).tolist()
+    return data
+@typechecked
+def create_random_data(
+    duration: float,
+    frequency: float = 60.0,
+    **kwargs: Any,
+) -> CineonData:
+    data_dict = create_random_dictionary(
+        duration=duration,
+        frequency=frequency,
+        **kwargs,
+    )
+    return CineonData.from_dict(data_dict)
+@typechecked
+def create_random_dataframe(
+    duration: float,
+    frequency: float = 60.0,
+    **kwargs: Any,
+) -> pd.DataFrame:
+    data_dict = create_random_dictionary(
+        duration=duration,
+        frequency=frequency,
+        **kwargs,
+    )
+    flat_dict = flatten_dictionary(data_dict)
+    return pd.DataFrame(flat_dict)

cineon_format-3.0.0/src/cineon_format/rolling_data.py ADDED Viewed

@@ -0,0 +1,102 @@
+from datetime import timedelta
+from typing import Generator
+import numpy as np
+from typeguard import typechecked
+from .cineon_data import CineonData
+@typechecked
+def get_window(data: CineonData, tmin: timedelta, tmax: timedelta) -> CineonData:
+    """
+    Trim the input data to a window defined by `tmin` (inclusive) and `tmax` (exclusive) from the start of the data.
+    The returned `CineonData` will only include data points whose timestamps fall within this window.
+    """
+    if tmin < timedelta(0):
+        raise ValueError(f"`tmin` = {tmin} and must be non-negative.")
+    if tmax <= timedelta(0):
+        raise ValueError(f"`tmax` = {tmax} and must be greater than 0.")
+    if tmax <= tmin:
+        raise ValueError(f"`tmax` = {tmax} must be greater than `tmin` = {tmin}.")
+    df = data.to_dataframe()
+    if df.empty:
+        raise ValueError("The provided CineonData is empty and cannot be windowed.")
+    start_timestamp = df["timestamp"].iloc[0]
+    window_start = start_timestamp + tmin
+    window_end = start_timestamp + tmax
+    df = df[(df["timestamp"] >= window_start) & (df["timestamp"] < window_end)]
+    return CineonData.from_dataframe(df)
+@typechecked
+def get_windowed_cineon_data(
+    cineon_data: CineonData, window_size: timedelta, step: timedelta | None = None
+) -> Generator[CineonData, None, None]:
+    """
+    Returns a generator of time-based `CineonData` windows.
+    Each yielded `CineonData` contains all data points whose timestamps fall
+    within a time span of approximately `window_size` duration (from the
+    first to the last timestamp in that window); it does not guarantee a fixed
+    number of samples per window.
+    If `step` is specified, each window starts `step` after the previous
+    window's start time. This function will raise an error if `window_size < step`,
+    so all produced windows will overlap at least slightly.
+    When `step` is not specified, it defaults to the provided `window_size`,
+    which typically results in consecutive, non-overlapping windows."""
+    if step is None:
+        step = window_size
+    data = cineon_data.to_dict()
+    timestamps = np.array(cineon_data.timestamp)
+    if window_size < step:
+        raise ValueError(
+            f"window_size must be >= step. Currently {window_size} < {step}"
+        )
+    if window_size.total_seconds() <= 0:
+        raise ValueError(f"`window_size` = {window_size} and must be greater than 0.")
+    if step.total_seconds() <= 0:
+        raise ValueError(f"`step` = {step} and must be greater than 0.")
+    if len(timestamps) < 2:
+        raise ValueError("At least two data points must be included in the CineonData")
+    time_difference = timestamps[-1] - timestamps[0]
+    if time_difference < window_size:
+        raise ValueError(
+            f"The provided CineonData only has {time_difference}s of data in it, which isn't enough for a window_size of {window_size}"
+        )
+    def split_columns(start: int, stop: int) -> CineonData:
+        # For hierarchical format, recursively slice nested dicts/lists
+        def recursive_slice(obj):
+            if isinstance(obj, dict):
+                return {k: recursive_slice(v) for k, v in obj.items()}
+            elif isinstance(obj, list) or isinstance(obj, np.ndarray):
+                return obj[start:stop]
+            else:
+                return obj  # Non-indexable, return as is
+        return CineonData.from_dict(recursive_slice(data))
+    def find_timestamp_index(t: timedelta) -> int | None:
+        if t > time_difference:
+            return None
+        return timestamps.searchsorted(t + timestamps[0])
+    # Calculate num_steps using timedelta arithmetic to avoid floating-precision issues
+    max_start_offset = time_difference - window_size
+    num_steps = (max_start_offset // step) + 1
+    for i in range(num_steps):
+        td = i * step
+        start = find_timestamp_index(td)
+        stop = find_timestamp_index(td + window_size)
+        if start is None or stop is None:
+            break
+        yield split_columns(start, stop)

cineon_format-3.0.0/src/cineon_format/utils.py ADDED Viewed

@@ -0,0 +1,46 @@
+from datetime import datetime
+from typing import Any
+from typeguard import typechecked
+@typechecked
+def json_serialiser(obj: Any) -> str:
+    if isinstance(obj, (datetime,)):
+        return obj.isoformat()
+    raise TypeError("Type not serializable")
+@typechecked
+def flatten_dictionary(data_dict: dict, separator: str = ".") -> dict[str, Any]:
+    """Convert a nested dictionary to a flat dictionary."""
+    flat_dict: dict[str, Any] = {}
+    def _flatten(prefix: str, value: Any):
+        if isinstance(value, dict):
+            for k, v in value.items():
+                _flatten(f"{prefix}{separator}{k}", v)
+        else:
+            flat_dict[prefix] = value
+    for key, val in data_dict.items():
+        _flatten(key, val)
+    return flat_dict
+@typechecked
+def unflatten_dictionary(flat_dict: dict, separator: str = ".") -> dict[str, Any]:
+    """Convert a flat dictionary to a nested dictionary."""
+    nested_dict: dict[str, Any] = {}
+    for flat_key, value in flat_dict.items():
+        keys = flat_key.split(separator)
+        d = nested_dict
+        for key in keys[:-1]:
+            if key not in d:
+                d[key] = {}
+            d = d[key]
+        d[keys[-1]] = value
+    return nested_dict

cineon_format-3.0.0/src/cineon_format/version.py ADDED Viewed

@@ -0,0 +1,3 @@
+from importlib import metadata
+__version__ = metadata.version("cineon-format")