PyPI - open-earable-python - Versions diffs - 0.0.1__py3-none-any.whl - Mend

open-earable-python 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

open_earable_python/__init__.py +9 -0
open_earable_python/dataset.py +362 -0
open_earable_python/parser.py +451 -0
open_earable_python/scheme.py +40 -0
open_earable_python-0.0.1.dist-info/METADATA +128 -0
open_earable_python-0.0.1.dist-info/RECORD +9 -0
open_earable_python-0.0.1.dist-info/WHEEL +5 -0
open_earable_python-0.0.1.dist-info/licenses/LICENSE +21 -0
open_earable_python-0.0.1.dist-info/top_level.txt +1 -0

open_earable_python/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .dataset import (
+    SensorDataset,
+    load_recordings,
+)
+__all__ = [
+    "SensorDataset",
+    "load_recordings",
+]

open_earable_python/dataset.py ADDED Viewed

@@ -0,0 +1,362 @@
+import os
+import tempfile
+from collections import defaultdict
+from typing import Dict, List, Optional, Sequence
+import numpy as np
+import pandas as pd
+from open_earable_python import parser
+import open_earable_python.scheme as scheme
+from IPython.display import Audio, display
+from scipy.io.wavfile import write
+LABELS: Dict[str, List[str]] = {
+    "imu": [
+        "acc.x", "acc.y", "acc.z",
+        "gyro.x", "gyro.y", "gyro.z",
+        "mag.x", "mag.y", "mag.z",
+    ],
+    "barometer": ["barometer.temperature", "barometer.pressure"],
+    "ppg": ["ppg.red", "ppg.ir", "ppg.green", "ppg.ambient"],
+    "bone_acc": ["bone_acc.x", "bone_acc.y", "bone_acc.z"],
+    "optical_temp": ["optical_temp"],
+}
+COLORS: Dict[str, List[str]] = {
+    "ppg": ["red", "darkred", "green", "gray"],
+}
+class _SensorAccessor:
+    """Convenience wrapper around a pandas DataFrame to provide grouped access
+    to sensor channels.
+    For IMU data with columns:
+    - acc.x, acc.y, acc.z
+    - gyro.x, gyro.y, gyro.z
+    - mag.x, mag.y, mag.z
+    Access patterns:
+    - accessor["imu"] or accessor.imu -> sub-DataFrame
+    - accessor.acc["x"] or accessor.acc.x -> Series
+    """
+    def __init__(self, df: pd.DataFrame, labels: Sequence[str]):
+        self._df = df
+        self._data: Dict[str, pd.DataFrame] = {}
+        groups: Dict[str, List[str]] = defaultdict(list)
+        for label in labels:
+            parts = label.split(".")
+            if len(parts) == 2:
+                group, _field = parts
+                if label in df:
+                    groups[group].append(label)
+            elif label in df:
+                # Single-level column names are exposed directly.
+                self._data[label] = df[label]
+        for group, columns in groups.items():
+            short_names = [label.split(".")[1] for label in columns]
+            subdf = df[columns].copy()
+            subdf.columns = short_names
+            self._data[group] = subdf
+        # Preserve the original column names to avoid collisions between groups
+        # with identical short names (e.g., acc.x vs gyro.x).
+        self._full_df = df.copy()
+    @property
+    def df(self) -> pd.DataFrame:
+        """Return the underlying full DataFrame view."""
+        return self._full_df
+    def to_dataframe(self) -> pd.DataFrame:
+        """Alias for :attr:`df` for convenience."""
+        return self._full_df
+    def __getitem__(self, key):
+        if key in self._data:
+            return self._data[key]
+        if key in self._full_df.columns:
+            return self._full_df[key]
+        raise KeyError(f"{key!r} not found in available sensor groups or channels")
+    def __getattr__(self, name):
+        if name in self._data:
+            return self._data[name]
+        if hasattr(self._full_df, name):
+            return getattr(self._full_df, name)
+        raise AttributeError(f"'SensorAccessor' object has no attribute '{name}'")
+    def __repr__(self) -> str:
+        return repr(self._full_df)
+class SensorDataset:
+    """High-level representation of an OpenEarable sensor recording file."""
+    SENSOR_SID: Dict[str, int] = {
+        "imu": 0,
+        "barometer": 1,
+        "microphone": 2,
+        "ppg": 4,
+        "optical_temp": 6,
+        "bone_acc": 7,
+    }
+    SID_NAMES: Dict[int, str] = {
+        0: "imu",
+        1: "barometer",
+        2: "microphone",
+        4: "ppg",
+        6: "optical_temp",
+        7: "bone_acc",
+    }
+    sensor_formats: Dict[int, str] = {
+        SENSOR_SID["imu"]: "<9f",
+        SENSOR_SID["barometer"]: "<2f",
+        SENSOR_SID["ppg"]: "<4I",
+        SENSOR_SID["optical_temp"]: "<f",
+        SENSOR_SID["bone_acc"]: "<3h",
+    }
+    def __init__(self, filename: str, verbose: bool = False):
+        self.filename = filename
+        self.verbose = verbose
+        self.parse_result: Dict[int, List] = defaultdict(list)
+        # Per-SID dataframes built in _build_accessors
+        self.sensor_dfs: Dict[int, pd.DataFrame] = {}
+        self.audio_stereo: Optional[np.ndarray] = None
+        self.bone_sound: Optional[np.ndarray] = None
+        self.df: pd.DataFrame = pd.DataFrame()
+        self.imu = _SensorAccessor(pd.DataFrame(columns=LABELS["imu"]), LABELS["imu"])
+        self.barometer = _SensorAccessor(pd.DataFrame(columns=LABELS["barometer"]), LABELS["barometer"])
+        self.ppg = _SensorAccessor(pd.DataFrame(columns=LABELS["ppg"]), LABELS["ppg"])
+        self.bone_acc = _SensorAccessor(pd.DataFrame(columns=LABELS["bone_acc"]), LABELS["bone_acc"])
+        self.optical_temp = _SensorAccessor(pd.DataFrame(columns=LABELS["optical_temp"]), LABELS["optical_temp"])
+        self.parser: parser.Parser = parser.Parser({
+            self.SENSOR_SID["imu"]: parser.SchemePayloadParser(scheme.SensorScheme(
+                name='imu',
+                sid=self.SENSOR_SID["imu"],
+                groups=[
+                    scheme.SensorComponentGroupScheme(
+                        name='acc',
+                        components=[
+                            scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
+                            scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
+                            scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
+                        ]
+                    ),
+                    scheme.SensorComponentGroupScheme(
+                        name='gyro',
+                        components=[
+                            scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
+                            scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
+                            scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
+                        ]
+                    ),
+                    scheme.SensorComponentGroupScheme(
+                        name='mag',
+                        components=[
+                            scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
+                            scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
+                            scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
+                        ]
+                    ),
+                ])),
+            self.SENSOR_SID["barometer"]: parser.SchemePayloadParser(scheme.SensorScheme(
+                name='barometer',
+                sid=self.SENSOR_SID["barometer"],
+                groups=[
+                    scheme.SensorComponentGroupScheme(
+                        name='barometer',
+                        components=[
+                            scheme.SensorComponentScheme('temperature', scheme.ParseType.FLOAT),
+                            scheme.SensorComponentScheme('pressure', scheme.ParseType.FLOAT),
+                        ]
+                    ),
+                ])),
+            self.SENSOR_SID["ppg"]: parser.SchemePayloadParser(scheme.SensorScheme(
+                name='ppg',
+                sid=self.SENSOR_SID["ppg"],
+                groups=[
+                    scheme.SensorComponentGroupScheme(
+                        name='ppg',
+                        components=[
+                            scheme.SensorComponentScheme('red', scheme.ParseType.UINT32),
+                            scheme.SensorComponentScheme('ir', scheme.ParseType.UINT32),
+                            scheme.SensorComponentScheme('green', scheme.ParseType.UINT32),
+                            scheme.SensorComponentScheme('ambient', scheme.ParseType.UINT32),
+                        ]
+                    ),
+                ])),
+            self.SENSOR_SID["optical_temp"]: parser.SchemePayloadParser(scheme.SensorScheme(
+                name='optical_temp',
+                sid=self.SENSOR_SID["optical_temp"],
+                groups=[
+                    scheme.SensorComponentGroupScheme(
+                        name='optical_temp',
+                        components=[
+                            scheme.SensorComponentScheme('optical_temp', scheme.ParseType.FLOAT),
+                        ]
+                    ),
+                ])),
+            self.SENSOR_SID["bone_acc"]: parser.SchemePayloadParser(scheme.SensorScheme(
+                name='bone_acc',
+                sid=self.SENSOR_SID["bone_acc"],
+                groups=[
+                    scheme.SensorComponentGroupScheme(
+                        name='bone_acc',
+                        components=[
+                            scheme.SensorComponentScheme('x', scheme.ParseType.INT16),
+                            scheme.SensorComponentScheme('y', scheme.ParseType.INT16),
+                            scheme.SensorComponentScheme('z', scheme.ParseType.INT16),
+                        ]
+                    ),
+                ])),
+            self.SENSOR_SID["microphone"]: parser.MicPayloadParser(
+                sample_count=48000,
+            ),
+        }, verbose=verbose)
+        self.parse()
+        self._build_accessors()
+    def parse(self) -> None:
+        """Parse the binary recording file into structured sensor data."""
+        with open(self.filename, "rb") as f:
+            parse_result = self.parser.parse(f)
+        self.parse_result = parse_result
+    def _build_accessors(self) -> None:
+        """Construct per-sensor accessors and per-SID DataFrames.
+        Each sensor's data is stored in its own DataFrame in ``self.sensor_dfs``.
+        The combined DataFrame over all sensors is built lazily in
+        :meth:`get_dataframe`.
+        """
+        data_dict = self.parse_result.sensor_dfs
+        for name, sid in self.SENSOR_SID.items():
+            labels = LABELS.get(name, [f"val{i}" for i in range(0)])
+            if sid in data_dict and isinstance(data_dict[sid], pd.DataFrame):
+                df = data_dict[sid]
+                df = df[~df.index.duplicated(keep="first")]
+            else:
+                df = pd.DataFrame(columns=labels)
+            # Store per-SID dataframe
+            self.sensor_dfs[sid] = df
+            # Create/update SensorAccessor for this sensor name
+            setattr(self, name, _SensorAccessor(df, labels))
+        # Clear combined dataframe; it will be built lazily on demand
+        self.df = pd.DataFrame()
+        self.audio_stereo = self.parse_result.audio_stereo
+    def list_sensors(self) -> List[str]:
+        """Return a list of available sensor names in the dataset."""
+        available_sensors = []
+        for name, sid in self.SENSOR_SID.items():
+            accessor = getattr(self, name, None)
+            if isinstance(accessor, _SensorAccessor) and not accessor.df.empty:
+                available_sensors.append(name)
+        return available_sensors
+    def get_sensor_dataframe(self, name: str) -> pd.DataFrame:
+        """Return the DataFrame for a single sensor.
+        Parameters
+        ----------
+        name:
+            Sensor name, e.g. "imu", "barometer", "ppg", "bone_acc", "optical_temp".
+        Returns
+        -------
+        pandas.DataFrame
+            The time-indexed DataFrame for the requested sensor.
+        """
+        if name not in self.SENSOR_SID:
+            raise KeyError(f"Unknown sensor name: {name!r}. "
+                           f"Known sensors: {sorted(self.SENSOR_SID.keys())}")
+        accessor = getattr(self, name, None)
+        if isinstance(accessor, _SensorAccessor):
+            return accessor.to_dataframe()
+        # Fallback: should not normally happen, but return an empty DataFrame
+        # instead of crashing.
+        return pd.DataFrame()
+    def get_dataframe(self) -> pd.DataFrame:
+        """Return the combined, time-indexed DataFrame of all sensors.
+        The merged DataFrame is built lazily from the per-SID DataFrames in
+        :attr:`sensor_dfs` and cached in :attr:`df`.
+        """
+        # If we've already built a non-empty combined DataFrame, reuse it
+        if not self.df.empty:
+            return self.df
+        # If per-SID dataframes are not available, nothing to merge
+        if not getattr(self, "sensor_dfs", None):
+            return self.df
+        # Collect all non-empty per-SID dataframes
+        dfs = [df for df in self.sensor_dfs.values() if not df.empty]
+        if not dfs:
+            return self.df
+        # Build a common time index over all sensors
+        common_index = pd.Index([])
+        for df in dfs:
+            common_index = common_index.union(df.index)
+        common_index = common_index.sort_values()
+        # Reindex each DataFrame to the common index and concatenate
+        reindexed_dfs = [df.reindex(common_index) for df in dfs]
+        self.df = pd.concat(reindexed_dfs, axis=1)
+        return self.df
+    def export_csv(self) -> None:
+        base_filename, _ = os.path.splitext(self.filename)
+        self.save_csv(base_filename + ".csv")
+    def save_csv(self, path: str) -> None:
+        if not self.df.empty:
+            self.df.to_csv(path)
+    def play_audio(self, sampling_rate: int = 48000) -> None:
+        if self.audio_stereo is None:
+            print("❌ No microphone data available.")
+            return
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            write(tmp.name, sampling_rate, self.audio_stereo)
+            display(Audio(tmp.name))
+    def save_audio(self, path: str, sampling_rate: int = 48000) -> None:
+        if self.audio_stereo is None:
+            print("❌ No microphone data available to save.")
+            return
+        try:
+            write(path, sampling_rate, self.audio_stereo)
+            print(f"✅ Audio saved successfully to {path}")
+        except Exception as e:
+            print(f"❌ Error saving audio to {path}: {e}")
+def load_recordings(file_paths: Sequence[str]) -> List[SensorDataset]:
+    return [SensorDataset(path) for path in file_paths if os.path.isfile(path)]

open_earable_python/parser.py ADDED Viewed

@@ -0,0 +1,451 @@
+import struct
+from open_earable_python.scheme import SensorScheme, ParseType
+import pandas as pd
+from typing import BinaryIO, Dict, List, Optional
+from dataclasses import dataclass
+import numpy as np
+class PayloadParser:
+    """Abstract base class for payload parsers.
+    Subclasses must set ``expected_size`` and implement :meth:`parse`.
+    """
+    expected_size: int
+    def parse(self, data: bytes, **kwargs) -> List[dict]:
+        """Parse a payload into one or more decoded samples.
+        Parameters
+        ----------
+        data:
+            Raw payload bytes (without header).
+        """
+        raise NotImplementedError
+    def should_build_df(self) -> bool:
+        """Whether this parser's output should be included in the final DataFrame.
+        By default, all parsers are included. Subclasses can override this method
+        to exclude certain parsers (e.g., microphone parsers).
+        """
+        return True
+# MARK: - ParseResult dataclass
+@dataclass
+class ParseResult:
+    """Result of parsing a stream.
+    - `sensor_dfs`: per-SID DataFrames (timestamp-indexed)
+    - `mic_samples`: interleaved int16 samples accumulated across mic packets
+    - `audio_stereo`: (N,2) int16 array [inner, outer] if microphone data was present
+    """
+    sensor_dfs: Dict[int, pd.DataFrame]
+    mic_samples: List[int]
+    audio_stereo: Optional[np.ndarray] = None
+    @staticmethod
+    def mic_samples_to_stereo(mic_samples: List[int]) -> Optional[np.ndarray]:
+        if not mic_samples:
+            return None
+        mic_array = np.array(mic_samples, dtype=np.int16)
+        # If odd number of samples, drop the last one to ensure even pairing
+        if len(mic_array) % 2 != 0:
+            mic_array = mic_array[:-1]
+        # Original behavior: [inner, outer] = [odd, even]
+        return np.column_stack((mic_array[1::2], mic_array[0::2]))
+class Parser:
+    def __init__(self, parsers: dict[int, PayloadParser], verbose: bool = False):
+        """Create a Parser from a mapping of SID -> PayloadParser."""
+        self.parsers = parsers
+        self.verbose = verbose
+    @classmethod
+    def from_sensor_schemes(
+        cls,
+        sensor_schemes: dict[int, SensorScheme],
+        verbose: bool = False,
+    ) -> "Parser":
+        """Construct a Parser where each SID uses a SchemePayloadParser.
+        This does **not** add a special microphone parser; callers can
+        override or extend the parser mapping for microphone SIDs as needed.
+        """
+        parsers: dict[int, PayloadParser] = {
+            sid: SchemePayloadParser(scheme) for sid, scheme in sensor_schemes.items()
+        }
+        return cls(parsers=parsers, verbose=verbose)
+    def parse(
+        self,
+        data_stream: BinaryIO,
+        *,
+        chunk_size: int = 4096,
+        max_resync_scan_bytes: int = 256,
+    ) -> ParseResult:
+        """Parse a binary byte stream into per-SID DataFrames.
+        This function reads from `data_stream` incrementally in chunks and keeps an
+        internal buffer so the entire stream does not need to be loaded into memory.
+        Parameters
+        ----------
+        data_stream:
+            A binary stream (file-like object) positioned at the beginning of packet data.
+            Note: If this is an .oe file, the caller should have already consumed the
+            file header before passing the stream here.
+        chunk_size:
+            Number of bytes to read per chunk.
+        max_resync_scan_bytes:
+            How many bytes ahead to scan when attempting to resynchronize after a corrupted
+            header/payload.
+        Returns
+        -------
+        ParseResult
+            Contains per-SID DataFrames, microphone samples, and stereo PCM audio if present.
+        """
+        rows_by_sid: dict[int, list[dict]] = {}
+        header_size = 10
+        buffer = bytearray()
+        packet_idx = 0
+        mic_samples: List[int] = []
+        def flush_to_dataframes() -> Dict[int, pd.DataFrame]:
+            result: Dict[int, pd.DataFrame] = {}
+            for sid, rows in rows_by_sid.items():
+                df = pd.DataFrame(rows)
+                if not df.empty and "timestamp" in df.columns:
+                    df.set_index("timestamp", inplace=True)
+                result[sid] = df
+            return result
+        # Main read/parse loop
+        while True:
+            # Ensure we have enough data for at least a header; if not, read more
+            if len(buffer) < header_size:
+                chunk = data_stream.read(chunk_size)
+                if not chunk:
+                    # End of stream
+                    if self.verbose and buffer:
+                        print(
+                            f"End of stream with {len(buffer)} leftover bytes (incomplete header/payload)."
+                        )
+                    break
+                buffer.extend(chunk)
+                continue
+            # We have at least a header
+            header = bytes(buffer[:header_size])
+            sid, size, time = self._parse_header(header)
+            timestamp_s = time / 1e6
+            if self.verbose:
+                print(
+                    f"Packet #{packet_idx}: SID={sid}, size={size}, time={timestamp_s:.6f}s "
+                    f"(buffer_len={len(buffer)})"
+                )
+            # Basic sanity checks
+            if sid not in self.parsers:
+                if self.verbose:
+                    print(f"Warning: No parser registered for SID={sid}. Attempting resync...")
+                new_offset = self._attempt_resync(bytes(buffer), 0, packet_idx, max_scan_bytes=max_resync_scan_bytes)
+                if new_offset is None:
+                    del buffer[:1]
+                else:
+                    del buffer[:new_offset]
+                continue
+            if size <= 0:
+                if self.verbose:
+                    print(f"Invalid size={size} for SID={sid}. Attempting resync...")
+                new_offset = self._attempt_resync(bytes(buffer), 0, packet_idx, max_scan_bytes=max_resync_scan_bytes)
+                if new_offset is None:
+                    del buffer[:1]
+                else:
+                    del buffer[:new_offset]
+                continue
+            parser = self.parsers[sid]
+            needed = header_size + size
+            if len(buffer) < needed:
+                chunk = data_stream.read(chunk_size)
+                if not chunk:
+                    if self.verbose:
+                        print(
+                            f"Truncated payload at packet #{packet_idx}: need {needed} bytes, "
+                            f"have {len(buffer)} bytes and stream ended."
+                        )
+                    break
+                buffer.extend(chunk)
+                continue
+            payload = bytes(buffer[header_size:needed])
+            try:
+                values_list = parser.parse(payload)
+                # Accumulate microphone samples in a single interleaved buffer
+                if isinstance(parser, MicPayloadParser):
+                    for item in values_list:
+                        samples = item.get("samples")
+                        if samples is None:
+                            continue
+                        # `samples` is a tuple of int16; extend global list
+                        mic_samples.extend(list(samples))
+                if self.verbose:
+                    if isinstance(parser, MicPayloadParser):
+                        print(
+                            f"Parsed mic packet #{packet_idx} (SID={sid}) successfully: "
+                            f"{len(values_list[0].get('samples', [])) if values_list else 0} samples"
+                        )
+                    else:
+                        print(
+                            f"Parsed packet #{packet_idx} (SID={sid}) successfully: {values_list}"
+                        )
+            except Exception as e:
+                if self.verbose:
+                    print(
+                        f"struct.error while parsing payload at packet #{packet_idx} "
+                        f"(SID={sid}, size={size}): {e}. Attempting resync..."
+                    )
+                # Resync within the current buffer
+                new_offset = self._attempt_resync(bytes(buffer), 0, packet_idx, max_scan_bytes=max_resync_scan_bytes)
+                if new_offset is None:
+                    del buffer[:1]
+                else:
+                    del buffer[:new_offset]
+                continue
+            if parser.should_build_df():
+                for values in values_list:
+                    # Flatten nested group structure (group.component -> value)
+                    flat_values: dict[str, object] = {}
+                    for key, val in values.items():
+                        if key == "t_delta":
+                            timestamp_s += val / 1e6
+                            continue
+                        if isinstance(val, dict):
+                            for sub_key, sub_val in val.items():
+                                flat_values[f"{key}.{sub_key}"] = sub_val
+                        else:
+                            flat_values[key] = val
+                    row = {
+                        "timestamp": timestamp_s,
+                        **flat_values,
+                    }
+                    rows_by_sid.setdefault(sid, []).append(row)
+            # Consume this packet from the buffer
+            del buffer[:needed]
+            packet_idx += 1
+        sensor_dfs = flush_to_dataframes()
+        audio_stereo = ParseResult.mic_samples_to_stereo(mic_samples)
+        return ParseResult(sensor_dfs=sensor_dfs, mic_samples=mic_samples, audio_stereo=audio_stereo)
+    def _parse_header(self, header: bytes) -> tuple[int, int, int]:
+        """Parse a 10-byte packet header into (sid, size, time)."""
+        sid, size, time = struct.unpack("<BBQ", header)
+        return sid, size, time
+    def _is_plausible_header(self, sid: int, size: int, remaining: int) -> bool:
+        """Heuristic check whether a (sid, size) looks like a valid header.
+        - SID must have a registered PayloadParser
+        - size must be positive, not exceed remaining bytes, and match the
+          expected payload size from the SensorScheme
+        """
+        if sid not in self.parsers:
+            return False
+        if size <= 0 or size > remaining:
+            return False
+        parser = self.parsers[sid]
+        if hasattr(parser, "expected_size") and parser.expected_size is not None:
+            if size != parser.expected_size:
+                return False
+        return True
+    def _attempt_resync(
+        self,
+        data: bytes,
+        packet_start: int,
+        packet_idx: int,
+        max_scan_bytes: int = 64,
+    ) -> Optional[int]:
+        """Try to recover from a corrupted header by scanning forward.
+        Returns a new offset where a plausible header was found, or ``None``
+        if no suitable header was located within ``max_scan_bytes``.
+        """
+        total_len = len(data)
+        header_size = 10
+        if self.verbose:
+            print(
+                f"Attempting resync after packet #{packet_idx} from offset {packet_start} "
+                f"(scan up to {max_scan_bytes} bytes ahead)..."
+            )
+        for delta in range(1, max_scan_bytes + 1):
+            candidate = packet_start + delta
+            if candidate + header_size > total_len:
+                break
+            header = data[candidate : candidate + header_size]
+            try:
+                sid, size, time = self._parse_header(header)
+            except struct.error:
+                continue
+            remaining = total_len - (candidate + header_size)
+            if not self._is_plausible_header(sid, size, remaining):
+                continue
+            if self.verbose:
+                timestamp_s = time / 1e6
+                print(
+                    f"Resynced at offset {candidate} (skipped {delta} bytes): "
+                    f"SID={sid}, size={size}, time={timestamp_s:.6f}s"
+                )
+            return candidate
+        if self.verbose:
+            print(
+                f"Resync failed within {max_scan_bytes} bytes after packet #{packet_idx}; "
+                f"giving up on this buffer."
+            )
+        return None
+# MARK: - MicParser
+class MicPayloadParser(PayloadParser):
+    """Payload parser for microphone packets (int16 PCM samples)."""
+    def __init__(self, sample_count: int, verbose: bool = False):
+        self.sample_count = sample_count
+        self.expected_size = sample_count * 2  # int16 samples
+        self.verbose = verbose
+    def parse(self, data: bytes, **kwargs) -> List[dict]:
+        # Allow slight deviations in size but warn if unexpected
+        if len(data) != self.expected_size and self.verbose:
+            print(
+                f"Mic payload size {len(data)} bytes does not match expected "
+                f"{self.expected_size} bytes (sample_count={self.sample_count})."
+            )
+        if len(data) % 2 != 0 and self.verbose:
+            print(
+                f"Mic payload has odd size {len(data)}; last byte will be ignored."
+            )
+        n_samples = len(data) // 2
+        format_str = f"<{n_samples}h"
+        samples = struct.unpack_from(format_str, data, 0)
+        return [{"samples": samples}]
+    def should_build_df(self) -> bool:
+        return False
+# MARK: - SchemePayloadParser
+class SchemePayloadParser(PayloadParser):
+    def __init__(self, sensor_scheme: SensorScheme):
+        self.sensor_scheme = sensor_scheme
+        # Precompute expected payload size in bytes for a single packet
+        size = 0
+        for group in self.sensor_scheme.groups:
+            for component in group.components:
+                if component.data_type == ParseType.UINT8 or component.data_type == ParseType.INT8:
+                    size += 1
+                elif component.data_type in (ParseType.UINT16, ParseType.INT16):
+                    size += 2
+                elif component.data_type == ParseType.UINT32 or component.data_type == ParseType.INT32 or component.data_type == ParseType.FLOAT:
+                    size += 4
+                elif component.data_type == ParseType.DOUBLE:
+                    size += 8
+                else:
+                    raise ValueError(f"Unsupported data type in scheme: {component.data_type}")
+        self.expected_size = size
+    def check_size(self, data: bytes) -> None:
+        size = len(data)
+        if size != self.expected_size and not (size > self.expected_size and (size - 2) % self.expected_size == 0):
+            raise ValueError(
+                f"Payload size {size} bytes does not match expected size "
+                f"{self.expected_size} bytes for sensor '{self.sensor_scheme.name}'"
+            )
+    def is_buffered(self, data: bytes) -> bool:
+        size = len(data)
+        return size > self.expected_size and (size - 2) % self.expected_size == 0
+    def parse(self, data: bytes, **kwargs) -> List[dict]:
+        self.check_size(data)
+        if self.is_buffered(data):
+            results = []
+            # get the t_delta as an uint16 from the last two bytes
+            t_delta = struct.unpack_from("<H", data, len(data) - 2)[0]
+            payload = data[:-2]
+            n_packets = len(payload) // self.expected_size
+            for i in range(n_packets):
+                packet_data = payload[i * self.expected_size : (i + 1) * self.expected_size]
+                parsed_packet = self.parse_packet(packet_data)
+                # add t_delta to the parsed packet
+                parsed_packet["t_delta"] = t_delta
+                results.append(parsed_packet)
+            return results
+        else:
+            return [self.parse_packet(data)]
+    def parse_packet(self, data: bytes) -> dict:
+        parsed_data = {}
+        offset = 0
+        for group in self.sensor_scheme.groups:
+            group_data = {}
+            for component in group.components:
+                if component.data_type == ParseType.UINT8:
+                    value = struct.unpack_from("<B", data, offset)[0]
+                    offset += 1
+                elif component.data_type == ParseType.UINT16:
+                    value = struct.unpack_from("<H", data, offset)[0]
+                    offset += 2
+                elif component.data_type == ParseType.UINT32:
+                    value = struct.unpack_from("<I", data, offset)[0]
+                    offset += 4
+                elif component.data_type == ParseType.INT8:
+                    value = struct.unpack_from("<b", data, offset)[0]
+                    offset += 1
+                elif component.data_type == ParseType.INT16:
+                    value = struct.unpack_from("<h", data, offset)[0]
+                    offset += 2
+                elif component.data_type == ParseType.INT32:
+                    value = struct.unpack_from("<i", data, offset)[0]
+                    offset += 4
+                elif component.data_type == ParseType.FLOAT:
+                    value = struct.unpack_from("<f", data, offset)[0]
+                    offset += 4
+                elif component.data_type == ParseType.DOUBLE:
+                    value = struct.unpack_from("<d", data, offset)[0]
+                    offset += 8
+                else:
+                    raise ValueError(f"Unsupported data type: {component.data_type}")
+                group_data[component.name] = value
+            parsed_data[group.name] = group_data
+        return parsed_data

open_earable_python/scheme.py ADDED Viewed

@@ -0,0 +1,40 @@
+import enum
+class ParseType(enum.Enum):
+    UINT8 = "uint8"
+    UINT16 = "uint16"
+    UINT32 = "uint32"
+    INT8 = "int8"
+    INT16 = "int16"
+    INT32 = "int32"
+    FLOAT = "float"
+    DOUBLE = "double"
+class SensorComponentScheme:
+    def __init__(self, name: str, data_type: ParseType):
+        self.name = name
+        self.data_type = data_type
+    def __repr__(self):
+        return f"SensorComponentScheme(name={self.name}, data_type={self.data_type})"
+class SensorComponentGroupScheme:
+    def __init__(self, name: str, components: list[SensorComponentScheme]):
+        self.name = name
+        self.components = components
+    def __repr__(self):
+        return f"SensorComponentGroupScheme(name={self.name}, components={self.components})"
+class SensorScheme:
+    """
+    A class representing the schema for sensor data in an earable device.
+    """
+    def __init__(self, name: str, sid: int, groups: list[SensorComponentGroupScheme]):
+        self.name = name
+        self.sid = sid
+        self.groups = groups
+    def __repr__(self):
+        return f"SensorScheme(name={self.name}, sid={self.sid}, groups={self.groups})"

open_earable_python-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,128 @@
+Metadata-Version: 2.4
+Name: open-earable-python
+Version: 0.0.1
+Summary: Reader and utilities for multi-sensor OpenEarable recordings.
+Author-email: "Karlsruhe Institut of Technology (KIT)" <open-earable@lists.kit.edu>
+License-Expression: MIT
+Project-URL: Source, https://github.com/OpenEarable/open-earable-python
+Project-URL: Issues, https://github.com/OpenEarable/open-earable-python/issues
+Keywords: openearable,earable,sensors,imu,ppg,audio,wearables,.oe
+Classifier: Programming Language :: Python :: 3
+Classifier: Development Status :: 4 - Beta
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Science/Research
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Scientific/Engineering
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy
+Requires-Dist: pandas
+Requires-Dist: ipython
+Requires-Dist: scipy
+Dynamic: license-file
+# Open Earable Python
+A Python toolkit for parsing and analyzing multi-sensor recordings from an OpenEarable device. The library provides pandas-friendly accessors for IMU, barometer, PPG, bone accelerometer, optical temperature, and microphone data, along with audio utilities.
+## Features
+- Load `.oe` recordings into a single time-aligned pandas DataFrame.
+- Convenient attribute and key-based accessors for grouped sensors and individual channels.
+- Play or export microphone audio directly from notebooks.
+- Export combined sensor data to CSV for downstream analysis.
+## Installation
+The package targets Python 3.9+.
+Once published to PyPI:
+```bash
+pip install open-earable-python
+```
+From source (for development):
+```bash
+git clone https://github.com/OpenEarable/open-earable-python.git
+cd open-earable-python
+python -m venv .venv
+source .venv/bin/activate
+pip install -e .
+```
+## Quickstart
+Load a recording and explore the combined DataFrame:
+```python
+from open_earable_python import SensorDataset
+# Load a single .oe file
+recording = SensorDataset("my_recording.oe")
+# Time-indexed dataframe containing all available sensors
+full_df = recording.get_dataframe()
+print(full_df.head())
+# Export to CSV
+recording.save_csv("my_recording.csv")
+```
+### Sensor access patterns
+Each sensor has an accessor exposing both grouped views and individual channels using attribute or key syntax. For IMU data:
+```python
+imu = recording.imu
+# Full IMU dataframe (original column names retained)
+imu.df          # or imu.to_dataframe()
+imu["acc.x"]   # Column-style access
+# Accelerometer
+imu.acc         # Accelerometer dataframe
+imu.acc["x"]   # Accelerometer X channel
+imu.acc["y"]
+imu.acc["z"]
+# Gyroscope
+imu.gyro        # Gyroscope dataframe
+imu.gyro["x"]
+imu.gyro["y"]
+imu.gyro["z"]
+# Magnetometer
+imu.mag          # Magnetometer dataframe
+imu.mag["x"]
+imu.mag["y"]
+imu.mag["z"]
+```
+PPG channels follow the same pattern:
+```python
+ppg = recording.ppg
+ppg.df           # Full PPG dataframe
+ppg["ppg.red"]  # Column-style access
+ppg["red"]      # Channel shortcut
+ppg.ir
+ppg.green
+ppg.ambient
+```
+### Working with multiple recordings
+Load several files at once and iterate over them:
+```python
+from open_earable_python.dataset import load_recordings
+paths = ["session1.oe", "session2.oe"]
+recordings = load_recordings(paths)
+# Access a specific recording
+first = recordings[0]
+print(first.list_sensors())
+```
+### Audio utilities
+- `play_audio(sampling_rate=48000)`: play stereo microphone data in a Jupyter environment.
+- `save_audio(path, sampling_rate=48000)`: export microphone audio to WAV.

open_earable_python-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+open_earable_python/__init__.py,sha256=Pk5FAkGZbz9lU_QuEwC506J5e0RkCuqZfZNDqJp7kII,124
+open_earable_python/dataset.py,sha256=4HAPOzVXIZS7c43LkKT9xZsteJJqel5DYjtVcazNpwk,13793
+open_earable_python/parser.py,sha256=eMFr6CkOrE6we_k_UdYF58SUEqgcrpHXz4-tqpqMrCY,17586
+open_earable_python/scheme.py,sha256=I7W8Oc1fR0d1dHV9hFteePDnUkSa3TBqbe_-0bp96KE,1146
+open_earable_python-0.0.1.dist-info/licenses/LICENSE,sha256=5LXwERaAaP6zyG5Y0M4C_Bj8QkyBXkdrs5XKqrwDl3Q,1068
+open_earable_python-0.0.1.dist-info/METADATA,sha256=B6Luz1RnCNbdidlGaJDzjRPvyq-SwR4BO_4ay6PYet8,3548
+open_earable_python-0.0.1.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
+open_earable_python-0.0.1.dist-info/top_level.txt,sha256=AMtcGbjZ5ChIDQ86ElTwXlzLD3ruHTwTUunyHScOtT8,20
+open_earable_python-0.0.1.dist-info/RECORD,,

open_earable_python-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

open_earable_python-0.0.1.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 OpenEarable
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

open_earable_python-0.0.1.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ open_earable_python