open-earable-python 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ from .dataset import (
2
+ SensorDataset,
3
+ load_recordings,
4
+ )
5
+
6
+ __all__ = [
7
+ "SensorDataset",
8
+ "load_recordings",
9
+ ]
@@ -0,0 +1,362 @@
1
+ import os
2
+ import tempfile
3
+ from collections import defaultdict
4
+ from typing import Dict, List, Optional, Sequence
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from open_earable_python import parser
9
+ import open_earable_python.scheme as scheme
10
+ from IPython.display import Audio, display
11
+ from scipy.io.wavfile import write
12
+
13
+ LABELS: Dict[str, List[str]] = {
14
+ "imu": [
15
+ "acc.x", "acc.y", "acc.z",
16
+ "gyro.x", "gyro.y", "gyro.z",
17
+ "mag.x", "mag.y", "mag.z",
18
+ ],
19
+ "barometer": ["barometer.temperature", "barometer.pressure"],
20
+ "ppg": ["ppg.red", "ppg.ir", "ppg.green", "ppg.ambient"],
21
+ "bone_acc": ["bone_acc.x", "bone_acc.y", "bone_acc.z"],
22
+ "optical_temp": ["optical_temp"],
23
+ }
24
+
25
+ COLORS: Dict[str, List[str]] = {
26
+ "ppg": ["red", "darkred", "green", "gray"],
27
+ }
28
+
29
+
30
+ class _SensorAccessor:
31
+ """Convenience wrapper around a pandas DataFrame to provide grouped access
32
+ to sensor channels.
33
+
34
+ For IMU data with columns:
35
+ - acc.x, acc.y, acc.z
36
+ - gyro.x, gyro.y, gyro.z
37
+ - mag.x, mag.y, mag.z
38
+
39
+ Access patterns:
40
+
41
+ - accessor["imu"] or accessor.imu -> sub-DataFrame
42
+ - accessor.acc["x"] or accessor.acc.x -> Series
43
+ """
44
+
45
+ def __init__(self, df: pd.DataFrame, labels: Sequence[str]):
46
+ self._df = df
47
+ self._data: Dict[str, pd.DataFrame] = {}
48
+
49
+ groups: Dict[str, List[str]] = defaultdict(list)
50
+
51
+ for label in labels:
52
+ parts = label.split(".")
53
+ if len(parts) == 2:
54
+ group, _field = parts
55
+ if label in df:
56
+ groups[group].append(label)
57
+ elif label in df:
58
+ # Single-level column names are exposed directly.
59
+ self._data[label] = df[label]
60
+
61
+ for group, columns in groups.items():
62
+ short_names = [label.split(".")[1] for label in columns]
63
+ subdf = df[columns].copy()
64
+ subdf.columns = short_names
65
+ self._data[group] = subdf
66
+
67
+ # Preserve the original column names to avoid collisions between groups
68
+ # with identical short names (e.g., acc.x vs gyro.x).
69
+ self._full_df = df.copy()
70
+
71
+ @property
72
+ def df(self) -> pd.DataFrame:
73
+ """Return the underlying full DataFrame view."""
74
+ return self._full_df
75
+
76
+ def to_dataframe(self) -> pd.DataFrame:
77
+ """Alias for :attr:`df` for convenience."""
78
+ return self._full_df
79
+
80
+ def __getitem__(self, key):
81
+ if key in self._data:
82
+ return self._data[key]
83
+
84
+ if key in self._full_df.columns:
85
+ return self._full_df[key]
86
+
87
+ raise KeyError(f"{key!r} not found in available sensor groups or channels")
88
+
89
+ def __getattr__(self, name):
90
+ if name in self._data:
91
+ return self._data[name]
92
+
93
+ if hasattr(self._full_df, name):
94
+ return getattr(self._full_df, name)
95
+
96
+ raise AttributeError(f"'SensorAccessor' object has no attribute '{name}'")
97
+
98
+ def __repr__(self) -> str:
99
+ return repr(self._full_df)
100
+
101
+
102
+ class SensorDataset:
103
+ """High-level representation of an OpenEarable sensor recording file."""
104
+
105
+ SENSOR_SID: Dict[str, int] = {
106
+ "imu": 0,
107
+ "barometer": 1,
108
+ "microphone": 2,
109
+ "ppg": 4,
110
+ "optical_temp": 6,
111
+ "bone_acc": 7,
112
+ }
113
+
114
+ SID_NAMES: Dict[int, str] = {
115
+ 0: "imu",
116
+ 1: "barometer",
117
+ 2: "microphone",
118
+ 4: "ppg",
119
+ 6: "optical_temp",
120
+ 7: "bone_acc",
121
+ }
122
+
123
+ sensor_formats: Dict[int, str] = {
124
+ SENSOR_SID["imu"]: "<9f",
125
+ SENSOR_SID["barometer"]: "<2f",
126
+ SENSOR_SID["ppg"]: "<4I",
127
+ SENSOR_SID["optical_temp"]: "<f",
128
+ SENSOR_SID["bone_acc"]: "<3h",
129
+ }
130
+
131
+ def __init__(self, filename: str, verbose: bool = False):
132
+ self.filename = filename
133
+ self.verbose = verbose
134
+ self.parse_result: Dict[int, List] = defaultdict(list)
135
+ # Per-SID dataframes built in _build_accessors
136
+ self.sensor_dfs: Dict[int, pd.DataFrame] = {}
137
+ self.audio_stereo: Optional[np.ndarray] = None
138
+ self.bone_sound: Optional[np.ndarray] = None
139
+ self.df: pd.DataFrame = pd.DataFrame()
140
+
141
+ self.imu = _SensorAccessor(pd.DataFrame(columns=LABELS["imu"]), LABELS["imu"])
142
+ self.barometer = _SensorAccessor(pd.DataFrame(columns=LABELS["barometer"]), LABELS["barometer"])
143
+ self.ppg = _SensorAccessor(pd.DataFrame(columns=LABELS["ppg"]), LABELS["ppg"])
144
+ self.bone_acc = _SensorAccessor(pd.DataFrame(columns=LABELS["bone_acc"]), LABELS["bone_acc"])
145
+ self.optical_temp = _SensorAccessor(pd.DataFrame(columns=LABELS["optical_temp"]), LABELS["optical_temp"])
146
+
147
+ self.parser: parser.Parser = parser.Parser({
148
+ self.SENSOR_SID["imu"]: parser.SchemePayloadParser(scheme.SensorScheme(
149
+ name='imu',
150
+ sid=self.SENSOR_SID["imu"],
151
+ groups=[
152
+ scheme.SensorComponentGroupScheme(
153
+ name='acc',
154
+ components=[
155
+ scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
156
+ scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
157
+ scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
158
+ ]
159
+ ),
160
+ scheme.SensorComponentGroupScheme(
161
+ name='gyro',
162
+ components=[
163
+ scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
164
+ scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
165
+ scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
166
+ ]
167
+ ),
168
+ scheme.SensorComponentGroupScheme(
169
+ name='mag',
170
+ components=[
171
+ scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
172
+ scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
173
+ scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
174
+ ]
175
+ ),
176
+ ])),
177
+ self.SENSOR_SID["barometer"]: parser.SchemePayloadParser(scheme.SensorScheme(
178
+ name='barometer',
179
+ sid=self.SENSOR_SID["barometer"],
180
+ groups=[
181
+ scheme.SensorComponentGroupScheme(
182
+ name='barometer',
183
+ components=[
184
+ scheme.SensorComponentScheme('temperature', scheme.ParseType.FLOAT),
185
+ scheme.SensorComponentScheme('pressure', scheme.ParseType.FLOAT),
186
+ ]
187
+ ),
188
+ ])),
189
+ self.SENSOR_SID["ppg"]: parser.SchemePayloadParser(scheme.SensorScheme(
190
+ name='ppg',
191
+ sid=self.SENSOR_SID["ppg"],
192
+ groups=[
193
+ scheme.SensorComponentGroupScheme(
194
+ name='ppg',
195
+ components=[
196
+ scheme.SensorComponentScheme('red', scheme.ParseType.UINT32),
197
+ scheme.SensorComponentScheme('ir', scheme.ParseType.UINT32),
198
+ scheme.SensorComponentScheme('green', scheme.ParseType.UINT32),
199
+ scheme.SensorComponentScheme('ambient', scheme.ParseType.UINT32),
200
+ ]
201
+ ),
202
+ ])),
203
+ self.SENSOR_SID["optical_temp"]: parser.SchemePayloadParser(scheme.SensorScheme(
204
+ name='optical_temp',
205
+ sid=self.SENSOR_SID["optical_temp"],
206
+ groups=[
207
+ scheme.SensorComponentGroupScheme(
208
+ name='optical_temp',
209
+ components=[
210
+ scheme.SensorComponentScheme('optical_temp', scheme.ParseType.FLOAT),
211
+ ]
212
+ ),
213
+ ])),
214
+ self.SENSOR_SID["bone_acc"]: parser.SchemePayloadParser(scheme.SensorScheme(
215
+ name='bone_acc',
216
+ sid=self.SENSOR_SID["bone_acc"],
217
+ groups=[
218
+ scheme.SensorComponentGroupScheme(
219
+ name='bone_acc',
220
+ components=[
221
+ scheme.SensorComponentScheme('x', scheme.ParseType.INT16),
222
+ scheme.SensorComponentScheme('y', scheme.ParseType.INT16),
223
+ scheme.SensorComponentScheme('z', scheme.ParseType.INT16),
224
+ ]
225
+ ),
226
+ ])),
227
+ self.SENSOR_SID["microphone"]: parser.MicPayloadParser(
228
+ sample_count=48000,
229
+ ),
230
+ }, verbose=verbose)
231
+
232
+ self.parse()
233
+ self._build_accessors()
234
+
235
+ def parse(self) -> None:
236
+ """Parse the binary recording file into structured sensor data."""
237
+ with open(self.filename, "rb") as f:
238
+ parse_result = self.parser.parse(f)
239
+ self.parse_result = parse_result
240
+
241
+ def _build_accessors(self) -> None:
242
+ """Construct per-sensor accessors and per-SID DataFrames.
243
+
244
+ Each sensor's data is stored in its own DataFrame in ``self.sensor_dfs``.
245
+ The combined DataFrame over all sensors is built lazily in
246
+ :meth:`get_dataframe`.
247
+ """
248
+ data_dict = self.parse_result.sensor_dfs
249
+ for name, sid in self.SENSOR_SID.items():
250
+ labels = LABELS.get(name, [f"val{i}" for i in range(0)])
251
+ if sid in data_dict and isinstance(data_dict[sid], pd.DataFrame):
252
+ df = data_dict[sid]
253
+ df = df[~df.index.duplicated(keep="first")]
254
+ else:
255
+ df = pd.DataFrame(columns=labels)
256
+
257
+ # Store per-SID dataframe
258
+ self.sensor_dfs[sid] = df
259
+
260
+ # Create/update SensorAccessor for this sensor name
261
+ setattr(self, name, _SensorAccessor(df, labels))
262
+
263
+ # Clear combined dataframe; it will be built lazily on demand
264
+ self.df = pd.DataFrame()
265
+
266
+ self.audio_stereo = self.parse_result.audio_stereo
267
+
268
+ def list_sensors(self) -> List[str]:
269
+ """Return a list of available sensor names in the dataset."""
270
+ available_sensors = []
271
+ for name, sid in self.SENSOR_SID.items():
272
+ accessor = getattr(self, name, None)
273
+ if isinstance(accessor, _SensorAccessor) and not accessor.df.empty:
274
+ available_sensors.append(name)
275
+ return available_sensors
276
+
277
+ def get_sensor_dataframe(self, name: str) -> pd.DataFrame:
278
+ """Return the DataFrame for a single sensor.
279
+
280
+ Parameters
281
+ ----------
282
+ name:
283
+ Sensor name, e.g. "imu", "barometer", "ppg", "bone_acc", "optical_temp".
284
+
285
+ Returns
286
+ -------
287
+ pandas.DataFrame
288
+ The time-indexed DataFrame for the requested sensor.
289
+ """
290
+ if name not in self.SENSOR_SID:
291
+ raise KeyError(f"Unknown sensor name: {name!r}. "
292
+ f"Known sensors: {sorted(self.SENSOR_SID.keys())}")
293
+
294
+ accessor = getattr(self, name, None)
295
+ if isinstance(accessor, _SensorAccessor):
296
+ return accessor.to_dataframe()
297
+
298
+ # Fallback: should not normally happen, but return an empty DataFrame
299
+ # instead of crashing.
300
+ return pd.DataFrame()
301
+
302
+ def get_dataframe(self) -> pd.DataFrame:
303
+ """Return the combined, time-indexed DataFrame of all sensors.
304
+
305
+ The merged DataFrame is built lazily from the per-SID DataFrames in
306
+ :attr:`sensor_dfs` and cached in :attr:`df`.
307
+ """
308
+ # If we've already built a non-empty combined DataFrame, reuse it
309
+ if not self.df.empty:
310
+ return self.df
311
+
312
+ # If per-SID dataframes are not available, nothing to merge
313
+ if not getattr(self, "sensor_dfs", None):
314
+ return self.df
315
+
316
+ # Collect all non-empty per-SID dataframes
317
+ dfs = [df for df in self.sensor_dfs.values() if not df.empty]
318
+ if not dfs:
319
+ return self.df
320
+
321
+ # Build a common time index over all sensors
322
+ common_index = pd.Index([])
323
+ for df in dfs:
324
+ common_index = common_index.union(df.index)
325
+ common_index = common_index.sort_values()
326
+
327
+ # Reindex each DataFrame to the common index and concatenate
328
+ reindexed_dfs = [df.reindex(common_index) for df in dfs]
329
+ self.df = pd.concat(reindexed_dfs, axis=1)
330
+
331
+ return self.df
332
+
333
+ def export_csv(self) -> None:
334
+ base_filename, _ = os.path.splitext(self.filename)
335
+ self.save_csv(base_filename + ".csv")
336
+
337
+ def save_csv(self, path: str) -> None:
338
+ if not self.df.empty:
339
+ self.df.to_csv(path)
340
+
341
+ def play_audio(self, sampling_rate: int = 48000) -> None:
342
+ if self.audio_stereo is None:
343
+ print("❌ No microphone data available.")
344
+ return
345
+
346
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
347
+ write(tmp.name, sampling_rate, self.audio_stereo)
348
+ display(Audio(tmp.name))
349
+
350
+ def save_audio(self, path: str, sampling_rate: int = 48000) -> None:
351
+ if self.audio_stereo is None:
352
+ print("❌ No microphone data available to save.")
353
+ return
354
+ try:
355
+ write(path, sampling_rate, self.audio_stereo)
356
+ print(f"✅ Audio saved successfully to {path}")
357
+ except Exception as e:
358
+ print(f"❌ Error saving audio to {path}: {e}")
359
+
360
+
361
+ def load_recordings(file_paths: Sequence[str]) -> List[SensorDataset]:
362
+ return [SensorDataset(path) for path in file_paths if os.path.isfile(path)]
@@ -0,0 +1,451 @@
1
+ import struct
2
+ from open_earable_python.scheme import SensorScheme, ParseType
3
+ import pandas as pd
4
+ from typing import BinaryIO, Dict, List, Optional
5
+ from dataclasses import dataclass
6
+ import numpy as np
7
+
8
+ class PayloadParser:
9
+ """Abstract base class for payload parsers.
10
+
11
+ Subclasses must set ``expected_size`` and implement :meth:`parse`.
12
+ """
13
+
14
+ expected_size: int
15
+
16
+ def parse(self, data: bytes, **kwargs) -> List[dict]:
17
+ """Parse a payload into one or more decoded samples.
18
+
19
+ Parameters
20
+ ----------
21
+ data:
22
+ Raw payload bytes (without header).
23
+ """
24
+ raise NotImplementedError
25
+
26
+ def should_build_df(self) -> bool:
27
+ """Whether this parser's output should be included in the final DataFrame.
28
+
29
+ By default, all parsers are included. Subclasses can override this method
30
+ to exclude certain parsers (e.g., microphone parsers).
31
+ """
32
+ return True
33
+
34
+
35
+ # MARK: - ParseResult dataclass
36
+
37
+ @dataclass
38
+ class ParseResult:
39
+ """Result of parsing a stream.
40
+
41
+ - `sensor_dfs`: per-SID DataFrames (timestamp-indexed)
42
+ - `mic_samples`: interleaved int16 samples accumulated across mic packets
43
+ - `audio_stereo`: (N,2) int16 array [inner, outer] if microphone data was present
44
+ """
45
+
46
+ sensor_dfs: Dict[int, pd.DataFrame]
47
+ mic_samples: List[int]
48
+ audio_stereo: Optional[np.ndarray] = None
49
+
50
+ @staticmethod
51
+ def mic_samples_to_stereo(mic_samples: List[int]) -> Optional[np.ndarray]:
52
+ if not mic_samples:
53
+ return None
54
+ mic_array = np.array(mic_samples, dtype=np.int16)
55
+ # If odd number of samples, drop the last one to ensure even pairing
56
+ if len(mic_array) % 2 != 0:
57
+ mic_array = mic_array[:-1]
58
+ # Original behavior: [inner, outer] = [odd, even]
59
+ return np.column_stack((mic_array[1::2], mic_array[0::2]))
60
+
61
+ class Parser:
62
+ def __init__(self, parsers: dict[int, PayloadParser], verbose: bool = False):
63
+ """Create a Parser from a mapping of SID -> PayloadParser."""
64
+ self.parsers = parsers
65
+ self.verbose = verbose
66
+
67
+ @classmethod
68
+ def from_sensor_schemes(
69
+ cls,
70
+ sensor_schemes: dict[int, SensorScheme],
71
+ verbose: bool = False,
72
+ ) -> "Parser":
73
+ """Construct a Parser where each SID uses a SchemePayloadParser.
74
+
75
+ This does **not** add a special microphone parser; callers can
76
+ override or extend the parser mapping for microphone SIDs as needed.
77
+ """
78
+ parsers: dict[int, PayloadParser] = {
79
+ sid: SchemePayloadParser(scheme) for sid, scheme in sensor_schemes.items()
80
+ }
81
+ return cls(parsers=parsers, verbose=verbose)
82
+
83
+ def parse(
84
+ self,
85
+ data_stream: BinaryIO,
86
+ *,
87
+ chunk_size: int = 4096,
88
+ max_resync_scan_bytes: int = 256,
89
+ ) -> ParseResult:
90
+ """Parse a binary byte stream into per-SID DataFrames.
91
+
92
+ This function reads from `data_stream` incrementally in chunks and keeps an
93
+ internal buffer so the entire stream does not need to be loaded into memory.
94
+
95
+ Parameters
96
+ ----------
97
+ data_stream:
98
+ A binary stream (file-like object) positioned at the beginning of packet data.
99
+ Note: If this is an .oe file, the caller should have already consumed the
100
+ file header before passing the stream here.
101
+ chunk_size:
102
+ Number of bytes to read per chunk.
103
+ max_resync_scan_bytes:
104
+ How many bytes ahead to scan when attempting to resynchronize after a corrupted
105
+ header/payload.
106
+
107
+ Returns
108
+ -------
109
+ ParseResult
110
+ Contains per-SID DataFrames, microphone samples, and stereo PCM audio if present.
111
+ """
112
+ rows_by_sid: dict[int, list[dict]] = {}
113
+
114
+ header_size = 10
115
+ buffer = bytearray()
116
+ packet_idx = 0
117
+ mic_samples: List[int] = []
118
+
119
+ def flush_to_dataframes() -> Dict[int, pd.DataFrame]:
120
+ result: Dict[int, pd.DataFrame] = {}
121
+ for sid, rows in rows_by_sid.items():
122
+ df = pd.DataFrame(rows)
123
+ if not df.empty and "timestamp" in df.columns:
124
+ df.set_index("timestamp", inplace=True)
125
+ result[sid] = df
126
+ return result
127
+
128
+ # Main read/parse loop
129
+ while True:
130
+ # Ensure we have enough data for at least a header; if not, read more
131
+ if len(buffer) < header_size:
132
+ chunk = data_stream.read(chunk_size)
133
+ if not chunk:
134
+ # End of stream
135
+ if self.verbose and buffer:
136
+ print(
137
+ f"End of stream with {len(buffer)} leftover bytes (incomplete header/payload)."
138
+ )
139
+ break
140
+ buffer.extend(chunk)
141
+ continue
142
+
143
+ # We have at least a header
144
+ header = bytes(buffer[:header_size])
145
+ sid, size, time = self._parse_header(header)
146
+
147
+ timestamp_s = time / 1e6
148
+
149
+ if self.verbose:
150
+ print(
151
+ f"Packet #{packet_idx}: SID={sid}, size={size}, time={timestamp_s:.6f}s "
152
+ f"(buffer_len={len(buffer)})"
153
+ )
154
+
155
+ # Basic sanity checks
156
+ if sid not in self.parsers:
157
+ if self.verbose:
158
+ print(f"Warning: No parser registered for SID={sid}. Attempting resync...")
159
+ new_offset = self._attempt_resync(bytes(buffer), 0, packet_idx, max_scan_bytes=max_resync_scan_bytes)
160
+ if new_offset is None:
161
+ del buffer[:1]
162
+ else:
163
+ del buffer[:new_offset]
164
+ continue
165
+
166
+ if size <= 0:
167
+ if self.verbose:
168
+ print(f"Invalid size={size} for SID={sid}. Attempting resync...")
169
+ new_offset = self._attempt_resync(bytes(buffer), 0, packet_idx, max_scan_bytes=max_resync_scan_bytes)
170
+ if new_offset is None:
171
+ del buffer[:1]
172
+ else:
173
+ del buffer[:new_offset]
174
+ continue
175
+
176
+ parser = self.parsers[sid]
177
+
178
+ needed = header_size + size
179
+ if len(buffer) < needed:
180
+ chunk = data_stream.read(chunk_size)
181
+ if not chunk:
182
+ if self.verbose:
183
+ print(
184
+ f"Truncated payload at packet #{packet_idx}: need {needed} bytes, "
185
+ f"have {len(buffer)} bytes and stream ended."
186
+ )
187
+ break
188
+ buffer.extend(chunk)
189
+ continue
190
+
191
+ payload = bytes(buffer[header_size:needed])
192
+ try:
193
+ values_list = parser.parse(payload)
194
+ # Accumulate microphone samples in a single interleaved buffer
195
+ if isinstance(parser, MicPayloadParser):
196
+ for item in values_list:
197
+ samples = item.get("samples")
198
+ if samples is None:
199
+ continue
200
+ # `samples` is a tuple of int16; extend global list
201
+ mic_samples.extend(list(samples))
202
+ if self.verbose:
203
+ if isinstance(parser, MicPayloadParser):
204
+ print(
205
+ f"Parsed mic packet #{packet_idx} (SID={sid}) successfully: "
206
+ f"{len(values_list[0].get('samples', [])) if values_list else 0} samples"
207
+ )
208
+ else:
209
+ print(
210
+ f"Parsed packet #{packet_idx} (SID={sid}) successfully: {values_list}"
211
+ )
212
+ except Exception as e:
213
+ if self.verbose:
214
+ print(
215
+ f"struct.error while parsing payload at packet #{packet_idx} "
216
+ f"(SID={sid}, size={size}): {e}. Attempting resync..."
217
+ )
218
+ # Resync within the current buffer
219
+ new_offset = self._attempt_resync(bytes(buffer), 0, packet_idx, max_scan_bytes=max_resync_scan_bytes)
220
+ if new_offset is None:
221
+ del buffer[:1]
222
+ else:
223
+ del buffer[:new_offset]
224
+ continue
225
+
226
+ if parser.should_build_df():
227
+ for values in values_list:
228
+ # Flatten nested group structure (group.component -> value)
229
+ flat_values: dict[str, object] = {}
230
+ for key, val in values.items():
231
+ if key == "t_delta":
232
+ timestamp_s += val / 1e6
233
+ continue
234
+ if isinstance(val, dict):
235
+ for sub_key, sub_val in val.items():
236
+ flat_values[f"{key}.{sub_key}"] = sub_val
237
+ else:
238
+ flat_values[key] = val
239
+
240
+ row = {
241
+ "timestamp": timestamp_s,
242
+ **flat_values,
243
+ }
244
+ rows_by_sid.setdefault(sid, []).append(row)
245
+
246
+ # Consume this packet from the buffer
247
+ del buffer[:needed]
248
+ packet_idx += 1
249
+
250
+ sensor_dfs = flush_to_dataframes()
251
+ audio_stereo = ParseResult.mic_samples_to_stereo(mic_samples)
252
+ return ParseResult(sensor_dfs=sensor_dfs, mic_samples=mic_samples, audio_stereo=audio_stereo)
253
+
254
+ def _parse_header(self, header: bytes) -> tuple[int, int, int]:
255
+ """Parse a 10-byte packet header into (sid, size, time)."""
256
+ sid, size, time = struct.unpack("<BBQ", header)
257
+ return sid, size, time
258
+
259
+ def _is_plausible_header(self, sid: int, size: int, remaining: int) -> bool:
260
+ """Heuristic check whether a (sid, size) looks like a valid header.
261
+
262
+ - SID must have a registered PayloadParser
263
+ - size must be positive, not exceed remaining bytes, and match the
264
+ expected payload size from the SensorScheme
265
+ """
266
+ if sid not in self.parsers:
267
+ return False
268
+ if size <= 0 or size > remaining:
269
+ return False
270
+
271
+ parser = self.parsers[sid]
272
+ if hasattr(parser, "expected_size") and parser.expected_size is not None:
273
+ if size != parser.expected_size:
274
+ return False
275
+
276
+ return True
277
+
278
+ def _attempt_resync(
279
+ self,
280
+ data: bytes,
281
+ packet_start: int,
282
+ packet_idx: int,
283
+ max_scan_bytes: int = 64,
284
+ ) -> Optional[int]:
285
+ """Try to recover from a corrupted header by scanning forward.
286
+
287
+ Returns a new offset where a plausible header was found, or ``None``
288
+ if no suitable header was located within ``max_scan_bytes``.
289
+ """
290
+ total_len = len(data)
291
+ header_size = 10
292
+
293
+ if self.verbose:
294
+ print(
295
+ f"Attempting resync after packet #{packet_idx} from offset {packet_start} "
296
+ f"(scan up to {max_scan_bytes} bytes ahead)..."
297
+ )
298
+
299
+ for delta in range(1, max_scan_bytes + 1):
300
+ candidate = packet_start + delta
301
+ if candidate + header_size > total_len:
302
+ break
303
+
304
+ header = data[candidate : candidate + header_size]
305
+ try:
306
+ sid, size, time = self._parse_header(header)
307
+ except struct.error:
308
+ continue
309
+
310
+ remaining = total_len - (candidate + header_size)
311
+ if not self._is_plausible_header(sid, size, remaining):
312
+ continue
313
+
314
+ if self.verbose:
315
+ timestamp_s = time / 1e6
316
+ print(
317
+ f"Resynced at offset {candidate} (skipped {delta} bytes): "
318
+ f"SID={sid}, size={size}, time={timestamp_s:.6f}s"
319
+ )
320
+
321
+ return candidate
322
+
323
+ if self.verbose:
324
+ print(
325
+ f"Resync failed within {max_scan_bytes} bytes after packet #{packet_idx}; "
326
+ f"giving up on this buffer."
327
+ )
328
+ return None
329
+
330
+ # MARK: - MicParser
331
+
332
+ class MicPayloadParser(PayloadParser):
333
+ """Payload parser for microphone packets (int16 PCM samples)."""
334
+
335
+ def __init__(self, sample_count: int, verbose: bool = False):
336
+ self.sample_count = sample_count
337
+ self.expected_size = sample_count * 2 # int16 samples
338
+ self.verbose = verbose
339
+
340
+ def parse(self, data: bytes, **kwargs) -> List[dict]:
341
+ # Allow slight deviations in size but warn if unexpected
342
+ if len(data) != self.expected_size and self.verbose:
343
+ print(
344
+ f"Mic payload size {len(data)} bytes does not match expected "
345
+ f"{self.expected_size} bytes (sample_count={self.sample_count})."
346
+ )
347
+
348
+ if len(data) % 2 != 0 and self.verbose:
349
+ print(
350
+ f"Mic payload has odd size {len(data)}; last byte will be ignored."
351
+ )
352
+
353
+ n_samples = len(data) // 2
354
+ format_str = f"<{n_samples}h"
355
+ samples = struct.unpack_from(format_str, data, 0)
356
+ return [{"samples": samples}]
357
+
358
+ def should_build_df(self) -> bool:
359
+ return False
360
+
361
+ # MARK: - SchemePayloadParser
362
+
363
+ class SchemePayloadParser(PayloadParser):
364
+ def __init__(self, sensor_scheme: SensorScheme):
365
+ self.sensor_scheme = sensor_scheme
366
+
367
+ # Precompute expected payload size in bytes for a single packet
368
+ size = 0
369
+ for group in self.sensor_scheme.groups:
370
+ for component in group.components:
371
+ if component.data_type == ParseType.UINT8 or component.data_type == ParseType.INT8:
372
+ size += 1
373
+ elif component.data_type in (ParseType.UINT16, ParseType.INT16):
374
+ size += 2
375
+ elif component.data_type == ParseType.UINT32 or component.data_type == ParseType.INT32 or component.data_type == ParseType.FLOAT:
376
+ size += 4
377
+ elif component.data_type == ParseType.DOUBLE:
378
+ size += 8
379
+ else:
380
+ raise ValueError(f"Unsupported data type in scheme: {component.data_type}")
381
+ self.expected_size = size
382
+
383
+ def check_size(self, data: bytes) -> None:
384
+ size = len(data)
385
+ if size != self.expected_size and not (size > self.expected_size and (size - 2) % self.expected_size == 0):
386
+ raise ValueError(
387
+ f"Payload size {size} bytes does not match expected size "
388
+ f"{self.expected_size} bytes for sensor '{self.sensor_scheme.name}'"
389
+ )
390
+
391
+ def is_buffered(self, data: bytes) -> bool:
392
+ size = len(data)
393
+ return size > self.expected_size and (size - 2) % self.expected_size == 0
394
+
395
+ def parse(self, data: bytes, **kwargs) -> List[dict]:
396
+ self.check_size(data)
397
+ if self.is_buffered(data):
398
+ results = []
399
+ # get the t_delta as an uint16 from the last two bytes
400
+ t_delta = struct.unpack_from("<H", data, len(data) - 2)[0]
401
+ payload = data[:-2]
402
+ n_packets = len(payload) // self.expected_size
403
+ for i in range(n_packets):
404
+ packet_data = payload[i * self.expected_size : (i + 1) * self.expected_size]
405
+ parsed_packet = self.parse_packet(packet_data)
406
+ # add t_delta to the parsed packet
407
+ parsed_packet["t_delta"] = t_delta
408
+ results.append(parsed_packet)
409
+ return results
410
+ else:
411
+ return [self.parse_packet(data)]
412
+
413
+
414
+ def parse_packet(self, data: bytes) -> dict:
415
+ parsed_data = {}
416
+ offset = 0
417
+
418
+ for group in self.sensor_scheme.groups:
419
+ group_data = {}
420
+ for component in group.components:
421
+ if component.data_type == ParseType.UINT8:
422
+ value = struct.unpack_from("<B", data, offset)[0]
423
+ offset += 1
424
+ elif component.data_type == ParseType.UINT16:
425
+ value = struct.unpack_from("<H", data, offset)[0]
426
+ offset += 2
427
+ elif component.data_type == ParseType.UINT32:
428
+ value = struct.unpack_from("<I", data, offset)[0]
429
+ offset += 4
430
+ elif component.data_type == ParseType.INT8:
431
+ value = struct.unpack_from("<b", data, offset)[0]
432
+ offset += 1
433
+ elif component.data_type == ParseType.INT16:
434
+ value = struct.unpack_from("<h", data, offset)[0]
435
+ offset += 2
436
+ elif component.data_type == ParseType.INT32:
437
+ value = struct.unpack_from("<i", data, offset)[0]
438
+ offset += 4
439
+ elif component.data_type == ParseType.FLOAT:
440
+ value = struct.unpack_from("<f", data, offset)[0]
441
+ offset += 4
442
+ elif component.data_type == ParseType.DOUBLE:
443
+ value = struct.unpack_from("<d", data, offset)[0]
444
+ offset += 8
445
+ else:
446
+ raise ValueError(f"Unsupported data type: {component.data_type}")
447
+
448
+ group_data[component.name] = value
449
+ parsed_data[group.name] = group_data
450
+
451
+ return parsed_data
@@ -0,0 +1,40 @@
1
+ import enum
2
+
3
+ class ParseType(enum.Enum):
4
+ UINT8 = "uint8"
5
+ UINT16 = "uint16"
6
+ UINT32 = "uint32"
7
+ INT8 = "int8"
8
+ INT16 = "int16"
9
+ INT32 = "int32"
10
+ FLOAT = "float"
11
+ DOUBLE = "double"
12
+
13
+ class SensorComponentScheme:
14
+ def __init__(self, name: str, data_type: ParseType):
15
+ self.name = name
16
+ self.data_type = data_type
17
+
18
+ def __repr__(self):
19
+ return f"SensorComponentScheme(name={self.name}, data_type={self.data_type})"
20
+
21
+ class SensorComponentGroupScheme:
22
+ def __init__(self, name: str, components: list[SensorComponentScheme]):
23
+ self.name = name
24
+ self.components = components
25
+
26
+ def __repr__(self):
27
+ return f"SensorComponentGroupScheme(name={self.name}, components={self.components})"
28
+
29
+ class SensorScheme:
30
+ """
31
+ A class representing the schema for sensor data in an earable device.
32
+ """
33
+
34
+ def __init__(self, name: str, sid: int, groups: list[SensorComponentGroupScheme]):
35
+ self.name = name
36
+ self.sid = sid
37
+ self.groups = groups
38
+
39
+ def __repr__(self):
40
+ return f"SensorScheme(name={self.name}, sid={self.sid}, groups={self.groups})"
@@ -0,0 +1,128 @@
1
+ Metadata-Version: 2.4
2
+ Name: open-earable-python
3
+ Version: 0.0.1
4
+ Summary: Reader and utilities for multi-sensor OpenEarable recordings.
5
+ Author-email: "Karlsruhe Institut of Technology (KIT)" <open-earable@lists.kit.edu>
6
+ License-Expression: MIT
7
+ Project-URL: Source, https://github.com/OpenEarable/open-earable-python
8
+ Project-URL: Issues, https://github.com/OpenEarable/open-earable-python/issues
9
+ Keywords: openearable,earable,sensors,imu,ppg,audio,wearables,.oe
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Requires-Python: >=3.9
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: numpy
20
+ Requires-Dist: pandas
21
+ Requires-Dist: ipython
22
+ Requires-Dist: scipy
23
+ Dynamic: license-file
24
+
25
+ # Open Earable Python
26
+
27
+ A Python toolkit for parsing and analyzing multi-sensor recordings from an OpenEarable device. The library provides pandas-friendly accessors for IMU, barometer, PPG, bone accelerometer, optical temperature, and microphone data, along with audio utilities.
28
+
29
+ ## Features
30
+ - Load `.oe` recordings into a single time-aligned pandas DataFrame.
31
+ - Convenient attribute and key-based accessors for grouped sensors and individual channels.
32
+ - Play or export microphone audio directly from notebooks.
33
+ - Export combined sensor data to CSV for downstream analysis.
34
+
35
+ ## Installation
36
+ The package targets Python 3.9+.
37
+
38
+ Once published to PyPI:
39
+
40
+ ```bash
41
+ pip install open-earable-python
42
+ ```
43
+
44
+ From source (for development):
45
+
46
+ ```bash
47
+ git clone https://github.com/OpenEarable/open-earable-python.git
48
+ cd open-earable-python
49
+ python -m venv .venv
50
+ source .venv/bin/activate
51
+ pip install -e .
52
+ ```
53
+
54
+ ## Quickstart
55
+ Load a recording and explore the combined DataFrame:
56
+
57
+ ```python
58
+ from open_earable_python import SensorDataset
59
+
60
+ # Load a single .oe file
61
+ recording = SensorDataset("my_recording.oe")
62
+
63
+ # Time-indexed dataframe containing all available sensors
64
+ full_df = recording.get_dataframe()
65
+ print(full_df.head())
66
+
67
+ # Export to CSV
68
+ recording.save_csv("my_recording.csv")
69
+ ```
70
+
71
+ ### Sensor access patterns
72
+ Each sensor has an accessor exposing both grouped views and individual channels using attribute or key syntax. For IMU data:
73
+
74
+ ```python
75
+ imu = recording.imu
76
+
77
+ # Full IMU dataframe (original column names retained)
78
+ imu.df # or imu.to_dataframe()
79
+ imu["acc.x"] # Column-style access
80
+
81
+ # Accelerometer
82
+ imu.acc # Accelerometer dataframe
83
+ imu.acc["x"] # Accelerometer X channel
84
+ imu.acc["y"]
85
+ imu.acc["z"]
86
+
87
+ # Gyroscope
88
+ imu.gyro # Gyroscope dataframe
89
+ imu.gyro["x"]
90
+ imu.gyro["y"]
91
+ imu.gyro["z"]
92
+
93
+ # Magnetometer
94
+ imu.mag # Magnetometer dataframe
95
+ imu.mag["x"]
96
+ imu.mag["y"]
97
+ imu.mag["z"]
98
+ ```
99
+
100
+ PPG channels follow the same pattern:
101
+
102
+ ```python
103
+ ppg = recording.ppg
104
+ ppg.df # Full PPG dataframe
105
+ ppg["ppg.red"] # Column-style access
106
+ ppg["red"] # Channel shortcut
107
+ ppg.ir
108
+ ppg.green
109
+ ppg.ambient
110
+ ```
111
+
112
+ ### Working with multiple recordings
113
+ Load several files at once and iterate over them:
114
+
115
+ ```python
116
+ from open_earable_python.dataset import load_recordings
117
+
118
+ paths = ["session1.oe", "session2.oe"]
119
+ recordings = load_recordings(paths)
120
+
121
+ # Access a specific recording
122
+ first = recordings[0]
123
+ print(first.list_sensors())
124
+ ```
125
+
126
+ ### Audio utilities
127
+ - `play_audio(sampling_rate=48000)`: play stereo microphone data in a Jupyter environment.
128
+ - `save_audio(path, sampling_rate=48000)`: export microphone audio to WAV.
@@ -0,0 +1,9 @@
1
+ open_earable_python/__init__.py,sha256=Pk5FAkGZbz9lU_QuEwC506J5e0RkCuqZfZNDqJp7kII,124
2
+ open_earable_python/dataset.py,sha256=4HAPOzVXIZS7c43LkKT9xZsteJJqel5DYjtVcazNpwk,13793
3
+ open_earable_python/parser.py,sha256=eMFr6CkOrE6we_k_UdYF58SUEqgcrpHXz4-tqpqMrCY,17586
4
+ open_earable_python/scheme.py,sha256=I7W8Oc1fR0d1dHV9hFteePDnUkSa3TBqbe_-0bp96KE,1146
5
+ open_earable_python-0.0.1.dist-info/licenses/LICENSE,sha256=5LXwERaAaP6zyG5Y0M4C_Bj8QkyBXkdrs5XKqrwDl3Q,1068
6
+ open_earable_python-0.0.1.dist-info/METADATA,sha256=B6Luz1RnCNbdidlGaJDzjRPvyq-SwR4BO_4ay6PYet8,3548
7
+ open_earable_python-0.0.1.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
8
+ open_earable_python-0.0.1.dist-info/top_level.txt,sha256=AMtcGbjZ5ChIDQ86ElTwXlzLD3ruHTwTUunyHScOtT8,20
9
+ open_earable_python-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 OpenEarable
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ open_earable_python