open-earable-python 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. open_earable_python-0.0.2/CHANGELOG.md +10 -0
  2. open_earable_python-0.0.2/PKG-INFO +70 -0
  3. open_earable_python-0.0.2/README.md +46 -0
  4. open_earable_python-0.0.2/docs/README.md +16 -0
  5. open_earable_python-0.0.2/docs/api-reference.md +125 -0
  6. open_earable_python-0.0.2/docs/data-model.md +61 -0
  7. open_earable_python-0.0.2/docs/getting-started.md +96 -0
  8. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python/dataset.py +95 -95
  9. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python/parser.py +56 -10
  10. open_earable_python-0.0.2/src/open_earable_python/scheme.py +121 -0
  11. open_earable_python-0.0.2/src/open_earable_python.egg-info/PKG-INFO +70 -0
  12. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python.egg-info/SOURCES.txt +4 -0
  13. open_earable_python-0.0.1/CHANGELOG.md +0 -4
  14. open_earable_python-0.0.1/PKG-INFO +0 -128
  15. open_earable_python-0.0.1/README.md +0 -104
  16. open_earable_python-0.0.1/src/open_earable_python/scheme.py +0 -40
  17. open_earable_python-0.0.1/src/open_earable_python.egg-info/PKG-INFO +0 -128
  18. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/.github/workflows/publish-pypi.yml +0 -0
  19. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/.gitignore +0 -0
  20. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/LICENSE +0 -0
  21. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/pyproject.toml +0 -0
  22. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/setup.cfg +0 -0
  23. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python/__init__.py +0 -0
  24. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python.egg-info/dependency_links.txt +0 -0
  25. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python.egg-info/requires.txt +0 -0
  26. {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python.egg-info/top_level.txt +0 -0
@@ -0,0 +1,10 @@
1
+ ## 0.0.2
2
+
3
+ * added access to raw microphone dataframes
4
+ * added a dedicated `docs/` documentation set (getting started, data model, API reference)
5
+ * updated `README.md` as a concise entry point and linked full docs
6
+
7
+ ## 0.0.1
8
+
9
+ * Initial release of the Open Earable Python SDK.
10
+ * parse Open Earable data files
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.4
2
+ Name: open-earable-python
3
+ Version: 0.0.2
4
+ Summary: Reader and utilities for multi-sensor OpenEarable recordings.
5
+ Author-email: "Karlsruhe Institut of Technology (KIT)" <open-earable@lists.kit.edu>
6
+ License-Expression: MIT
7
+ Project-URL: Source, https://github.com/OpenEarable/open-earable-python
8
+ Project-URL: Issues, https://github.com/OpenEarable/open-earable-python/issues
9
+ Keywords: openearable,earable,sensors,imu,ppg,audio,wearables,.oe
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Requires-Python: >=3.9
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: numpy
20
+ Requires-Dist: pandas
21
+ Requires-Dist: ipython
22
+ Requires-Dist: scipy
23
+ Dynamic: license-file
24
+
25
+ # Open Earable Python
26
+
27
+ Python toolkit for parsing and analyzing multi-sensor OpenEarable recordings.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install open-earable-python
33
+ ```
34
+
35
+ For local development:
36
+
37
+ ```bash
38
+ git clone https://github.com/OpenEarable/open-earable-python.git
39
+ cd open-earable-python
40
+ python -m venv .venv
41
+ source .venv/bin/activate
42
+ pip install -e .
43
+ ```
44
+
45
+ ## Quick Example
46
+
47
+ ```python
48
+ from open_earable_python import SensorDataset
49
+
50
+ dataset = SensorDataset("recording.oe")
51
+
52
+ # Combined time-indexed DataFrame of all parsed sensors
53
+ df = dataset.get_dataframe()
54
+
55
+ # Per-sensor views
56
+ imu_df = dataset.imu.df
57
+ ppg_red = dataset.ppg["ppg.red"]
58
+ audio_df = dataset.get_audio_dataframe()
59
+ ```
60
+
61
+ ## Documentation
62
+
63
+ - [Documentation index](docs/README.md)
64
+ - [Getting started](docs/getting-started.md)
65
+ - [Data model and sensor channels](docs/data-model.md)
66
+ - [API reference](docs/api-reference.md)
67
+
68
+ ## License
69
+
70
+ MIT. See `LICENSE`.
@@ -0,0 +1,46 @@
1
+ # Open Earable Python
2
+
3
+ Python toolkit for parsing and analyzing multi-sensor OpenEarable recordings.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install open-earable-python
9
+ ```
10
+
11
+ For local development:
12
+
13
+ ```bash
14
+ git clone https://github.com/OpenEarable/open-earable-python.git
15
+ cd open-earable-python
16
+ python -m venv .venv
17
+ source .venv/bin/activate
18
+ pip install -e .
19
+ ```
20
+
21
+ ## Quick Example
22
+
23
+ ```python
24
+ from open_earable_python import SensorDataset
25
+
26
+ dataset = SensorDataset("recording.oe")
27
+
28
+ # Combined time-indexed DataFrame of all parsed sensors
29
+ df = dataset.get_dataframe()
30
+
31
+ # Per-sensor views
32
+ imu_df = dataset.imu.df
33
+ ppg_red = dataset.ppg["ppg.red"]
34
+ audio_df = dataset.get_audio_dataframe()
35
+ ```
36
+
37
+ ## Documentation
38
+
39
+ - [Documentation index](docs/README.md)
40
+ - [Getting started](docs/getting-started.md)
41
+ - [Data model and sensor channels](docs/data-model.md)
42
+ - [API reference](docs/api-reference.md)
43
+
44
+ ## License
45
+
46
+ MIT. See `LICENSE`.
@@ -0,0 +1,16 @@
1
+ # Open Earable Python Documentation
2
+
3
+ `open-earable-python` parses `.oe` recordings into pandas DataFrames and exposes convenient accessors for OpenEarable sensor streams.
4
+
5
+ ## Contents
6
+
7
+ - [Getting started](getting-started.md)
8
+ - [Data model and sensor channels](data-model.md)
9
+ - [API reference](api-reference.md)
10
+
11
+ ## Package Scope
12
+
13
+ - Parse binary OpenEarable streams into structured sensor samples.
14
+ - Build per-sensor and combined time-indexed DataFrames.
15
+ - Decode microphone PCM samples and export/play audio.
16
+ - Load one or multiple recordings with the same API.
@@ -0,0 +1,125 @@
1
+ # API Reference
2
+
3
+ ## Package Exports
4
+
5
+ ```python
6
+ from open_earable_python import SensorDataset, load_recordings
7
+ ```
8
+
9
+ ## `SensorDataset`
10
+
11
+ High-level API for loading and analyzing a single `.oe` recording.
12
+
13
+ ### Constructor
14
+
15
+ ```python
16
+ SensorDataset(filename: str, verbose: bool = False)
17
+ ```
18
+
19
+ - `filename`: path to `.oe` file.
20
+ - `verbose`: enables parser diagnostic output.
21
+
22
+ Parsing happens during initialization.
23
+
24
+ ### Attributes
25
+
26
+ - `filename: str` source file path.
27
+ - `verbose: bool` parser verbosity flag.
28
+ - `parse_result: parser.ParseResult` raw parse output.
29
+ - `sensor_dfs: Dict[int, pandas.DataFrame]` per-SID DataFrames.
30
+ - `df: pandas.DataFrame` lazily built combined DataFrame.
31
+ - `audio_stereo: Optional[numpy.ndarray]` stereo audio frames (`int16`, shape `(N, 2)`).
32
+ - `audio_df: pandas.DataFrame` cached audio DataFrame.
33
+
34
+ Sensor accessor attributes:
35
+
36
+ - `dataset.imu`
37
+ - `dataset.barometer`
38
+ - `dataset.microphone`
39
+ - `dataset.ppg`
40
+ - `dataset.optical_temp`
41
+ - `dataset.bone_acc`
42
+
43
+ Each accessor supports grouped and channel-level access (see data model docs).
44
+
45
+ ### Methods
46
+
47
+ #### `parse() -> None`
48
+
49
+ Re-parses the recording file and updates `parse_result`.
50
+
51
+ #### `list_sensors() -> List[str]`
52
+
53
+ Returns sensor names with non-empty DataFrames.
54
+
55
+ #### `get_sensor_dataframe(name: str) -> pandas.DataFrame`
56
+
57
+ Returns one sensor DataFrame by name.
58
+
59
+ - Valid names: `imu`, `barometer`, `microphone`, `ppg`, `optical_temp`, `bone_acc`
60
+ - Raises `KeyError` for unknown names.
61
+
62
+ #### `get_dataframe() -> pandas.DataFrame`
63
+
64
+ Builds and caches a merged DataFrame across all non-empty sensor streams.
65
+
66
+ #### `get_audio_dataframe(sampling_rate: int = 48000) -> pandas.DataFrame`
67
+
68
+ Returns timestamp-indexed audio DataFrame with columns:
69
+
70
+ - `mic.inner`
71
+ - `mic.outer`
72
+
73
+ Behavior:
74
+
75
+ - Raises `ValueError` if `sampling_rate <= 0`.
76
+ - Returns empty DataFrame with expected columns if no mic packets exist.
77
+ - Caches by sampling rate.
78
+
79
+ #### `export_csv() -> None`
80
+
81
+ Writes combined DataFrame to `<recording_basename>.csv` by delegating to `save_csv()`.
82
+
83
+ #### `save_csv(path: str) -> None`
84
+
85
+ Saves the combined DataFrame to CSV if `self.df` is non-empty.
86
+
87
+ Call `get_dataframe()` first to ensure `self.df` is populated.
88
+
89
+ #### `play_audio(sampling_rate: int = 48000) -> None`
90
+
91
+ Plays audio in IPython/Jupyter via `IPython.display.Audio`.
92
+
93
+ #### `save_audio(path: str, sampling_rate: int = 48000) -> None`
94
+
95
+ Writes WAV audio with `scipy.io.wavfile.write`.
96
+
97
+ ## `load_recordings`
98
+
99
+ ```python
100
+ load_recordings(file_paths: Sequence[str]) -> List[SensorDataset]
101
+ ```
102
+
103
+ Creates `SensorDataset` objects for existing files only.
104
+
105
+ ## Parser Module (`open_earable_python.parser`)
106
+
107
+ Core classes and helpers for decoding binary packets:
108
+
109
+ - `Parser`: stream parser over packetized binary data.
110
+ - `PayloadParser`: base parser interface.
111
+ - `SchemePayloadParser`: parser built from `SensorScheme`.
112
+ - `MicPayloadParser`: parser for microphone payloads.
113
+ - `ParseResult`: parse container with per-SID DataFrames and microphone artifacts.
114
+ - `interleaved_mic_to_stereo(samples)`: converts interleaved samples to stereo.
115
+ - `mic_packet_to_stereo_frames(packet, sampling_rate)`: timestamp + stereo frame conversion.
116
+
117
+ ## Scheme Module (`open_earable_python.scheme`)
118
+
119
+ Defines sensor schema primitives:
120
+
121
+ - `ParseType` enum
122
+ - `SensorComponentScheme`
123
+ - `SensorComponentGroupScheme`
124
+ - `SensorScheme`
125
+ - `build_default_sensor_schemes(sensor_sid)`
@@ -0,0 +1,61 @@
1
+ # Data Model and Sensor Channels
2
+
3
+ ## Time Index
4
+
5
+ All sensor DataFrames are indexed by `timestamp` in seconds (`float`), derived from packet timestamps in microseconds.
6
+
7
+ ## Sensor Streams
8
+
9
+ `SensorDataset` defines these sensor streams:
10
+
11
+ - `imu` (SID 0)
12
+ - `barometer` (SID 1)
13
+ - `microphone` (SID 2)
14
+ - `ppg` (SID 4)
15
+ - `optical_temp` (SID 6)
16
+ - `bone_acc` (SID 7)
17
+
18
+ ## Default Columns by Sensor
19
+
20
+ - `imu`: `acc.x`, `acc.y`, `acc.z`, `gyro.x`, `gyro.y`, `gyro.z`, `mag.x`, `mag.y`, `mag.z`
21
+ - `barometer`: `barometer.temperature`, `barometer.pressure`
22
+ - `ppg`: `ppg.red`, `ppg.ir`, `ppg.green`, `ppg.ambient`
23
+ - `bone_acc`: `bone_acc.x`, `bone_acc.y`, `bone_acc.z`
24
+ - `optical_temp`: `optical_temp`
25
+ - `microphone`: `mic.inner`, `mic.outer`
26
+
27
+ ## Accessor Semantics
28
+
29
+ Each sensor is exposed as a `_SensorAccessor` object:
30
+
31
+ - `sensor.df` or `sensor.to_dataframe()` returns the full sensor DataFrame with original column names.
32
+ - Group columns are available as sub-DataFrames:
33
+ - `dataset.imu.acc` -> columns `x`, `y`, `z`
34
+ - `dataset.imu.gyro` -> columns `x`, `y`, `z`
35
+ - `dataset.imu.mag` -> columns `x`, `y`, `z`
36
+ - `dataset.ppg.ppg` -> columns `red`, `ir`, `green`, `ambient`
37
+ - Original columns remain directly accessible:
38
+ - `dataset.imu["acc.x"]`
39
+ - `dataset.ppg["ppg.red"]`
40
+
41
+ ## Combined DataFrame
42
+
43
+ `get_dataframe()` merges all non-empty per-sensor DataFrames:
44
+
45
+ - Creates a union of all sensor timestamps.
46
+ - Reindexes each sensor DataFrame onto that common index.
47
+ - Concatenates columns into one DataFrame.
48
+
49
+ This preserves each stream while aligning them on time.
50
+
51
+ ## Microphone Data Details
52
+
53
+ Microphone payloads are parsed as interleaved `int16` samples and converted to stereo frames:
54
+
55
+ - Input interleaving: `[outer, inner, outer, inner, ...]`
56
+ - Output stereo columns/order: `[inner, outer]`
57
+
58
+ The audio DataFrame generated by `get_audio_dataframe()` uses:
59
+
60
+ - index: `timestamp` in seconds
61
+ - columns: `mic.inner`, `mic.outer`
@@ -0,0 +1,96 @@
1
+ # Getting Started
2
+
3
+ ## Requirements
4
+
5
+ - Python 3.9+
6
+ - `numpy`, `pandas`, `scipy`, `ipython` (installed automatically with this package)
7
+
8
+ ## Installation
9
+
10
+ ```bash
11
+ pip install open-earable-python
12
+ ```
13
+
14
+ From source:
15
+
16
+ ```bash
17
+ git clone https://github.com/OpenEarable/open-earable-python.git
18
+ cd open-earable-python
19
+ python -m venv .venv
20
+ source .venv/bin/activate
21
+ pip install -e .
22
+ ```
23
+
24
+ ## Load a Recording
25
+
26
+ ```python
27
+ from open_earable_python import SensorDataset
28
+
29
+ dataset = SensorDataset("my_recording.oe")
30
+ ```
31
+
32
+ `SensorDataset` parses the file immediately during initialization.
33
+
34
+ ## Work with Sensor Data
35
+
36
+ ```python
37
+ # Combined DataFrame (all available non-empty sensor streams)
38
+ df = dataset.get_dataframe()
39
+ print(df.head())
40
+
41
+ # List non-empty sensor streams
42
+ print(dataset.list_sensors())
43
+
44
+ # Access one sensor DataFrame directly
45
+ imu_df = dataset.get_sensor_dataframe("imu")
46
+ print(imu_df.columns)
47
+ ```
48
+
49
+ ## Access Channels via Accessors
50
+
51
+ ```python
52
+ # Full IMU DataFrame (columns: acc.x, acc.y, ...)
53
+ imu = dataset.imu.df
54
+
55
+ # Group-level access (columns renamed to x, y, z)
56
+ acc = dataset.imu.acc
57
+ gyro = dataset.imu.gyro
58
+
59
+ # Channel-level access
60
+ acc_x = dataset.imu.acc["x"]
61
+ mag_z = dataset.imu.mag.z
62
+ ```
63
+
64
+ ## Work with Audio
65
+
66
+ ```python
67
+ # Timestamp-indexed stereo audio DataFrame
68
+ audio_df = dataset.get_audio_dataframe() # default 48_000 Hz
69
+ print(audio_df.columns) # mic.inner, mic.outer
70
+
71
+ # Save WAV
72
+ dataset.save_audio("recording.wav")
73
+
74
+ # Play in Jupyter/IPython environments
75
+ dataset.play_audio()
76
+ ```
77
+
78
+ ## Export CSV
79
+
80
+ ```python
81
+ # Build combined DataFrame, then export it
82
+ dataset.get_dataframe()
83
+ dataset.save_csv("recording.csv")
84
+ ```
85
+
86
+ `save_csv()` writes only if the combined DataFrame is already populated (for example after calling `get_dataframe()`).
87
+
88
+ ## Load Multiple Files
89
+
90
+ ```python
91
+ from open_earable_python import load_recordings
92
+
93
+ recordings = load_recordings(["session1.oe", "session2.oe"])
94
+ for rec in recordings:
95
+ print(rec.filename, rec.list_sensors())
96
+ ```
@@ -20,6 +20,7 @@ LABELS: Dict[str, List[str]] = {
20
20
  "ppg": ["ppg.red", "ppg.ir", "ppg.green", "ppg.ambient"],
21
21
  "bone_acc": ["bone_acc.x", "bone_acc.y", "bone_acc.z"],
22
22
  "optical_temp": ["optical_temp"],
23
+ "microphone": ["mic.inner", "mic.outer"],
23
24
  }
24
25
 
25
26
  COLORS: Dict[str, List[str]] = {
@@ -131,107 +132,43 @@ class SensorDataset:
131
132
  def __init__(self, filename: str, verbose: bool = False):
132
133
  self.filename = filename
133
134
  self.verbose = verbose
134
- self.parse_result: Dict[int, List] = defaultdict(list)
135
+ self.parse_result: parser.ParseResult = parser.ParseResult(
136
+ sensor_dfs={},
137
+ mic_samples=[],
138
+ )
135
139
  # Per-SID dataframes built in _build_accessors
136
140
  self.sensor_dfs: Dict[int, pd.DataFrame] = {}
137
141
  self.audio_stereo: Optional[np.ndarray] = None
142
+ self.audio_df: pd.DataFrame = pd.DataFrame()
143
+ self._audio_df_sampling_rate: Optional[int] = None
138
144
  self.bone_sound: Optional[np.ndarray] = None
139
145
  self.df: pd.DataFrame = pd.DataFrame()
140
146
 
141
- self.imu = _SensorAccessor(pd.DataFrame(columns=LABELS["imu"]), LABELS["imu"])
142
- self.barometer = _SensorAccessor(pd.DataFrame(columns=LABELS["barometer"]), LABELS["barometer"])
143
- self.ppg = _SensorAccessor(pd.DataFrame(columns=LABELS["ppg"]), LABELS["ppg"])
144
- self.bone_acc = _SensorAccessor(pd.DataFrame(columns=LABELS["bone_acc"]), LABELS["bone_acc"])
145
- self.optical_temp = _SensorAccessor(pd.DataFrame(columns=LABELS["optical_temp"]), LABELS["optical_temp"])
146
-
147
- self.parser: parser.Parser = parser.Parser({
148
- self.SENSOR_SID["imu"]: parser.SchemePayloadParser(scheme.SensorScheme(
149
- name='imu',
150
- sid=self.SENSOR_SID["imu"],
151
- groups=[
152
- scheme.SensorComponentGroupScheme(
153
- name='acc',
154
- components=[
155
- scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
156
- scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
157
- scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
158
- ]
159
- ),
160
- scheme.SensorComponentGroupScheme(
161
- name='gyro',
162
- components=[
163
- scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
164
- scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
165
- scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
166
- ]
167
- ),
168
- scheme.SensorComponentGroupScheme(
169
- name='mag',
170
- components=[
171
- scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
172
- scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
173
- scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
174
- ]
175
- ),
176
- ])),
177
- self.SENSOR_SID["barometer"]: parser.SchemePayloadParser(scheme.SensorScheme(
178
- name='barometer',
179
- sid=self.SENSOR_SID["barometer"],
180
- groups=[
181
- scheme.SensorComponentGroupScheme(
182
- name='barometer',
183
- components=[
184
- scheme.SensorComponentScheme('temperature', scheme.ParseType.FLOAT),
185
- scheme.SensorComponentScheme('pressure', scheme.ParseType.FLOAT),
186
- ]
187
- ),
188
- ])),
189
- self.SENSOR_SID["ppg"]: parser.SchemePayloadParser(scheme.SensorScheme(
190
- name='ppg',
191
- sid=self.SENSOR_SID["ppg"],
192
- groups=[
193
- scheme.SensorComponentGroupScheme(
194
- name='ppg',
195
- components=[
196
- scheme.SensorComponentScheme('red', scheme.ParseType.UINT32),
197
- scheme.SensorComponentScheme('ir', scheme.ParseType.UINT32),
198
- scheme.SensorComponentScheme('green', scheme.ParseType.UINT32),
199
- scheme.SensorComponentScheme('ambient', scheme.ParseType.UINT32),
200
- ]
201
- ),
202
- ])),
203
- self.SENSOR_SID["optical_temp"]: parser.SchemePayloadParser(scheme.SensorScheme(
204
- name='optical_temp',
205
- sid=self.SENSOR_SID["optical_temp"],
206
- groups=[
207
- scheme.SensorComponentGroupScheme(
208
- name='optical_temp',
209
- components=[
210
- scheme.SensorComponentScheme('optical_temp', scheme.ParseType.FLOAT),
211
- ]
212
- ),
213
- ])),
214
- self.SENSOR_SID["bone_acc"]: parser.SchemePayloadParser(scheme.SensorScheme(
215
- name='bone_acc',
216
- sid=self.SENSOR_SID["bone_acc"],
217
- groups=[
218
- scheme.SensorComponentGroupScheme(
219
- name='bone_acc',
220
- components=[
221
- scheme.SensorComponentScheme('x', scheme.ParseType.INT16),
222
- scheme.SensorComponentScheme('y', scheme.ParseType.INT16),
223
- scheme.SensorComponentScheme('z', scheme.ParseType.INT16),
224
- ]
225
- ),
226
- ])),
227
- self.SENSOR_SID["microphone"]: parser.MicPayloadParser(
228
- sample_count=48000,
229
- ),
230
- }, verbose=verbose)
147
+ for sensor_name, labels in LABELS.items():
148
+ setattr(
149
+ self,
150
+ sensor_name,
151
+ _SensorAccessor(pd.DataFrame(columns=labels), labels),
152
+ )
153
+
154
+ self.parser: parser.Parser = self._build_parser(verbose=verbose)
231
155
 
232
156
  self.parse()
233
157
  self._build_accessors()
234
158
 
159
+ @classmethod
160
+ def _build_parser(cls, verbose: bool = False) -> parser.Parser:
161
+ sensor_schemes = scheme.build_default_sensor_schemes(cls.SENSOR_SID)
162
+ dataset_parser = parser.Parser.from_sensor_schemes(
163
+ sensor_schemes=sensor_schemes,
164
+ verbose=verbose,
165
+ )
166
+ dataset_parser.parsers[cls.SENSOR_SID["microphone"]] = parser.MicPayloadParser(
167
+ sample_count=48000,
168
+ verbose=verbose,
169
+ )
170
+ return dataset_parser
171
+
235
172
  def parse(self) -> None:
236
173
  """Parse the binary recording file into structured sensor data."""
237
174
  with open(self.filename, "rb") as f:
@@ -245,10 +182,17 @@ class SensorDataset:
245
182
  The combined DataFrame over all sensors is built lazily in
246
183
  :meth:`get_dataframe`.
247
184
  """
185
+ self.audio_stereo = self.parse_result.audio_stereo
186
+ self.audio_df = pd.DataFrame()
187
+ self._audio_df_sampling_rate = None
188
+ self.sensor_dfs = {}
189
+
248
190
  data_dict = self.parse_result.sensor_dfs
249
191
  for name, sid in self.SENSOR_SID.items():
250
- labels = LABELS.get(name, [f"val{i}" for i in range(0)])
251
- if sid in data_dict and isinstance(data_dict[sid], pd.DataFrame):
192
+ labels = LABELS.get(name, [])
193
+ if name == "microphone":
194
+ df = self.get_audio_dataframe()
195
+ elif sid in data_dict and isinstance(data_dict[sid], pd.DataFrame):
252
196
  df = data_dict[sid]
253
197
  df = df[~df.index.duplicated(keep="first")]
254
198
  else:
@@ -263,8 +207,6 @@ class SensorDataset:
263
207
  # Clear combined dataframe; it will be built lazily on demand
264
208
  self.df = pd.DataFrame()
265
209
 
266
- self.audio_stereo = self.parse_result.audio_stereo
267
-
268
210
  def list_sensors(self) -> List[str]:
269
211
  """Return a list of available sensor names in the dataset."""
270
212
  available_sensors = []
@@ -330,6 +272,64 @@ class SensorDataset:
330
272
 
331
273
  return self.df
332
274
 
275
+ def get_audio_dataframe(self, sampling_rate: int = 48000) -> pd.DataFrame:
276
+ """Return microphone audio as a timestamp-indexed stereo DataFrame.
277
+
278
+ The returned DataFrame has:
279
+ - index: ``timestamp`` in seconds
280
+ - columns: ``mic.inner`` and ``mic.outer`` (int16 PCM)
281
+ """
282
+ if sampling_rate <= 0:
283
+ raise ValueError(f"sampling_rate must be > 0, got {sampling_rate}")
284
+
285
+ if self._audio_df_sampling_rate == sampling_rate:
286
+ return self.audio_df
287
+
288
+ mic_packets = getattr(self.parse_result, "mic_packets", [])
289
+ if not mic_packets:
290
+ self.audio_df = pd.DataFrame(columns=["mic.inner", "mic.outer"])
291
+ self.audio_df.index.name = "timestamp"
292
+ self._audio_df_sampling_rate = sampling_rate
293
+ return self.audio_df
294
+
295
+ timestamps: List[np.ndarray] = []
296
+ stereo_frames: List[np.ndarray] = []
297
+
298
+ for packet in mic_packets:
299
+ ts, stereo = parser.mic_packet_to_stereo_frames(
300
+ packet=packet,
301
+ sampling_rate=sampling_rate,
302
+ )
303
+ if stereo.size == 0:
304
+ continue
305
+ timestamps.append(ts)
306
+ stereo_frames.append(stereo)
307
+
308
+ if not timestamps:
309
+ self.audio_df = pd.DataFrame(columns=["mic.inner", "mic.outer"])
310
+ self.audio_df.index.name = "timestamp"
311
+ self._audio_df_sampling_rate = sampling_rate
312
+ return self.audio_df
313
+
314
+ all_ts = np.concatenate(timestamps)
315
+ all_stereo = np.vstack(stereo_frames)
316
+
317
+ self.audio_df = pd.DataFrame(
318
+ {
319
+ "mic.inner": all_stereo[:, 0],
320
+ "mic.outer": all_stereo[:, 1],
321
+ },
322
+ index=all_ts,
323
+ )
324
+ self.audio_df.index.name = "timestamp"
325
+ self.audio_df = self.audio_df[~self.audio_df.index.duplicated(keep="first")]
326
+ self._audio_df_sampling_rate = sampling_rate
327
+
328
+ if sampling_rate == 48000:
329
+ self.sensor_dfs[self.SENSOR_SID["microphone"]] = self.audio_df
330
+
331
+ return self.audio_df
332
+
333
333
  def export_csv(self) -> None:
334
334
  base_filename, _ = os.path.splitext(self.filename)
335
335
  self.save_csv(base_filename + ".csv")