open-earable-python 0.0.1__tar.gz → 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- open_earable_python-0.0.2/CHANGELOG.md +10 -0
- open_earable_python-0.0.2/PKG-INFO +70 -0
- open_earable_python-0.0.2/README.md +46 -0
- open_earable_python-0.0.2/docs/README.md +16 -0
- open_earable_python-0.0.2/docs/api-reference.md +125 -0
- open_earable_python-0.0.2/docs/data-model.md +61 -0
- open_earable_python-0.0.2/docs/getting-started.md +96 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python/dataset.py +95 -95
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python/parser.py +56 -10
- open_earable_python-0.0.2/src/open_earable_python/scheme.py +121 -0
- open_earable_python-0.0.2/src/open_earable_python.egg-info/PKG-INFO +70 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python.egg-info/SOURCES.txt +4 -0
- open_earable_python-0.0.1/CHANGELOG.md +0 -4
- open_earable_python-0.0.1/PKG-INFO +0 -128
- open_earable_python-0.0.1/README.md +0 -104
- open_earable_python-0.0.1/src/open_earable_python/scheme.py +0 -40
- open_earable_python-0.0.1/src/open_earable_python.egg-info/PKG-INFO +0 -128
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/.github/workflows/publish-pypi.yml +0 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/.gitignore +0 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/LICENSE +0 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/pyproject.toml +0 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/setup.cfg +0 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python/__init__.py +0 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python.egg-info/dependency_links.txt +0 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python.egg-info/requires.txt +0 -0
- {open_earable_python-0.0.1 → open_earable_python-0.0.2}/src/open_earable_python.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
## 0.0.2
|
|
2
|
+
|
|
3
|
+
* added access to raw microphone dataframes
|
|
4
|
+
* added a dedicated `docs/` documentation set (getting started, data model, API reference)
|
|
5
|
+
* updated `README.md` as a concise entry point and linked full docs
|
|
6
|
+
|
|
7
|
+
## 0.0.1
|
|
8
|
+
|
|
9
|
+
* Initial release of the Open Earable Python SDK.
|
|
10
|
+
* parse Open Earable data files
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: open-earable-python
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Reader and utilities for multi-sensor OpenEarable recordings.
|
|
5
|
+
Author-email: "Karlsruhe Institut of Technology (KIT)" <open-earable@lists.kit.edu>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Source, https://github.com/OpenEarable/open-earable-python
|
|
8
|
+
Project-URL: Issues, https://github.com/OpenEarable/open-earable-python/issues
|
|
9
|
+
Keywords: openearable,earable,sensors,imu,ppg,audio,wearables,.oe
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: pandas
|
|
21
|
+
Requires-Dist: ipython
|
|
22
|
+
Requires-Dist: scipy
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# Open Earable Python
|
|
26
|
+
|
|
27
|
+
Python toolkit for parsing and analyzing multi-sensor OpenEarable recordings.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install open-earable-python
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
For local development:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
git clone https://github.com/OpenEarable/open-earable-python.git
|
|
39
|
+
cd open-earable-python
|
|
40
|
+
python -m venv .venv
|
|
41
|
+
source .venv/bin/activate
|
|
42
|
+
pip install -e .
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Quick Example
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from open_earable_python import SensorDataset
|
|
49
|
+
|
|
50
|
+
dataset = SensorDataset("recording.oe")
|
|
51
|
+
|
|
52
|
+
# Combined time-indexed DataFrame of all parsed sensors
|
|
53
|
+
df = dataset.get_dataframe()
|
|
54
|
+
|
|
55
|
+
# Per-sensor views
|
|
56
|
+
imu_df = dataset.imu.df
|
|
57
|
+
ppg_red = dataset.ppg["ppg.red"]
|
|
58
|
+
audio_df = dataset.get_audio_dataframe()
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Documentation
|
|
62
|
+
|
|
63
|
+
- [Documentation index](docs/README.md)
|
|
64
|
+
- [Getting started](docs/getting-started.md)
|
|
65
|
+
- [Data model and sensor channels](docs/data-model.md)
|
|
66
|
+
- [API reference](docs/api-reference.md)
|
|
67
|
+
|
|
68
|
+
## License
|
|
69
|
+
|
|
70
|
+
MIT. See `LICENSE`.
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Open Earable Python
|
|
2
|
+
|
|
3
|
+
Python toolkit for parsing and analyzing multi-sensor OpenEarable recordings.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install open-earable-python
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
For local development:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
git clone https://github.com/OpenEarable/open-earable-python.git
|
|
15
|
+
cd open-earable-python
|
|
16
|
+
python -m venv .venv
|
|
17
|
+
source .venv/bin/activate
|
|
18
|
+
pip install -e .
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Quick Example
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from open_earable_python import SensorDataset
|
|
25
|
+
|
|
26
|
+
dataset = SensorDataset("recording.oe")
|
|
27
|
+
|
|
28
|
+
# Combined time-indexed DataFrame of all parsed sensors
|
|
29
|
+
df = dataset.get_dataframe()
|
|
30
|
+
|
|
31
|
+
# Per-sensor views
|
|
32
|
+
imu_df = dataset.imu.df
|
|
33
|
+
ppg_red = dataset.ppg["ppg.red"]
|
|
34
|
+
audio_df = dataset.get_audio_dataframe()
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Documentation
|
|
38
|
+
|
|
39
|
+
- [Documentation index](docs/README.md)
|
|
40
|
+
- [Getting started](docs/getting-started.md)
|
|
41
|
+
- [Data model and sensor channels](docs/data-model.md)
|
|
42
|
+
- [API reference](docs/api-reference.md)
|
|
43
|
+
|
|
44
|
+
## License
|
|
45
|
+
|
|
46
|
+
MIT. See `LICENSE`.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Open Earable Python Documentation
|
|
2
|
+
|
|
3
|
+
`open-earable-python` parses `.oe` recordings into pandas DataFrames and exposes convenient accessors for OpenEarable sensor streams.
|
|
4
|
+
|
|
5
|
+
## Contents
|
|
6
|
+
|
|
7
|
+
- [Getting started](getting-started.md)
|
|
8
|
+
- [Data model and sensor channels](data-model.md)
|
|
9
|
+
- [API reference](api-reference.md)
|
|
10
|
+
|
|
11
|
+
## Package Scope
|
|
12
|
+
|
|
13
|
+
- Parse binary OpenEarable streams into structured sensor samples.
|
|
14
|
+
- Build per-sensor and combined time-indexed DataFrames.
|
|
15
|
+
- Decode microphone PCM samples and export/play audio.
|
|
16
|
+
- Load one or multiple recordings with the same API.
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# API Reference
|
|
2
|
+
|
|
3
|
+
## Package Exports
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
from open_earable_python import SensorDataset, load_recordings
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## `SensorDataset`
|
|
10
|
+
|
|
11
|
+
High-level API for loading and analyzing a single `.oe` recording.
|
|
12
|
+
|
|
13
|
+
### Constructor
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
SensorDataset(filename: str, verbose: bool = False)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
- `filename`: path to `.oe` file.
|
|
20
|
+
- `verbose`: enables parser diagnostic output.
|
|
21
|
+
|
|
22
|
+
Parsing happens during initialization.
|
|
23
|
+
|
|
24
|
+
### Attributes
|
|
25
|
+
|
|
26
|
+
- `filename: str` source file path.
|
|
27
|
+
- `verbose: bool` parser verbosity flag.
|
|
28
|
+
- `parse_result: parser.ParseResult` raw parse output.
|
|
29
|
+
- `sensor_dfs: Dict[int, pandas.DataFrame]` per-SID DataFrames.
|
|
30
|
+
- `df: pandas.DataFrame` lazily built combined DataFrame.
|
|
31
|
+
- `audio_stereo: Optional[numpy.ndarray]` stereo audio frames (`int16`, shape `(N, 2)`).
|
|
32
|
+
- `audio_df: pandas.DataFrame` cached audio DataFrame.
|
|
33
|
+
|
|
34
|
+
Sensor accessor attributes:
|
|
35
|
+
|
|
36
|
+
- `dataset.imu`
|
|
37
|
+
- `dataset.barometer`
|
|
38
|
+
- `dataset.microphone`
|
|
39
|
+
- `dataset.ppg`
|
|
40
|
+
- `dataset.optical_temp`
|
|
41
|
+
- `dataset.bone_acc`
|
|
42
|
+
|
|
43
|
+
Each accessor supports grouped and channel-level access (see data model docs).
|
|
44
|
+
|
|
45
|
+
### Methods
|
|
46
|
+
|
|
47
|
+
#### `parse() -> None`
|
|
48
|
+
|
|
49
|
+
Re-parses the recording file and updates `parse_result`.
|
|
50
|
+
|
|
51
|
+
#### `list_sensors() -> List[str]`
|
|
52
|
+
|
|
53
|
+
Returns sensor names with non-empty DataFrames.
|
|
54
|
+
|
|
55
|
+
#### `get_sensor_dataframe(name: str) -> pandas.DataFrame`
|
|
56
|
+
|
|
57
|
+
Returns one sensor DataFrame by name.
|
|
58
|
+
|
|
59
|
+
- Valid names: `imu`, `barometer`, `microphone`, `ppg`, `optical_temp`, `bone_acc`
|
|
60
|
+
- Raises `KeyError` for unknown names.
|
|
61
|
+
|
|
62
|
+
#### `get_dataframe() -> pandas.DataFrame`
|
|
63
|
+
|
|
64
|
+
Builds and caches a merged DataFrame across all non-empty sensor streams.
|
|
65
|
+
|
|
66
|
+
#### `get_audio_dataframe(sampling_rate: int = 48000) -> pandas.DataFrame`
|
|
67
|
+
|
|
68
|
+
Returns timestamp-indexed audio DataFrame with columns:
|
|
69
|
+
|
|
70
|
+
- `mic.inner`
|
|
71
|
+
- `mic.outer`
|
|
72
|
+
|
|
73
|
+
Behavior:
|
|
74
|
+
|
|
75
|
+
- Raises `ValueError` if `sampling_rate <= 0`.
|
|
76
|
+
- Returns empty DataFrame with expected columns if no mic packets exist.
|
|
77
|
+
- Caches by sampling rate.
|
|
78
|
+
|
|
79
|
+
#### `export_csv() -> None`
|
|
80
|
+
|
|
81
|
+
Writes combined DataFrame to `<recording_basename>.csv` by delegating to `save_csv()`.
|
|
82
|
+
|
|
83
|
+
#### `save_csv(path: str) -> None`
|
|
84
|
+
|
|
85
|
+
Saves the combined DataFrame to CSV if `self.df` is non-empty.
|
|
86
|
+
|
|
87
|
+
Call `get_dataframe()` first to ensure `self.df` is populated.
|
|
88
|
+
|
|
89
|
+
#### `play_audio(sampling_rate: int = 48000) -> None`
|
|
90
|
+
|
|
91
|
+
Plays audio in IPython/Jupyter via `IPython.display.Audio`.
|
|
92
|
+
|
|
93
|
+
#### `save_audio(path: str, sampling_rate: int = 48000) -> None`
|
|
94
|
+
|
|
95
|
+
Writes WAV audio with `scipy.io.wavfile.write`.
|
|
96
|
+
|
|
97
|
+
## `load_recordings`
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
load_recordings(file_paths: Sequence[str]) -> List[SensorDataset]
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Creates `SensorDataset` objects for existing files only.
|
|
104
|
+
|
|
105
|
+
## Parser Module (`open_earable_python.parser`)
|
|
106
|
+
|
|
107
|
+
Core classes and helpers for decoding binary packets:
|
|
108
|
+
|
|
109
|
+
- `Parser`: stream parser over packetized binary data.
|
|
110
|
+
- `PayloadParser`: base parser interface.
|
|
111
|
+
- `SchemePayloadParser`: parser built from `SensorScheme`.
|
|
112
|
+
- `MicPayloadParser`: parser for microphone payloads.
|
|
113
|
+
- `ParseResult`: parse container with per-SID DataFrames and microphone artifacts.
|
|
114
|
+
- `interleaved_mic_to_stereo(samples)`: converts interleaved samples to stereo.
|
|
115
|
+
- `mic_packet_to_stereo_frames(packet, sampling_rate)`: timestamp + stereo frame conversion.
|
|
116
|
+
|
|
117
|
+
## Scheme Module (`open_earable_python.scheme`)
|
|
118
|
+
|
|
119
|
+
Defines sensor schema primitives:
|
|
120
|
+
|
|
121
|
+
- `ParseType` enum
|
|
122
|
+
- `SensorComponentScheme`
|
|
123
|
+
- `SensorComponentGroupScheme`
|
|
124
|
+
- `SensorScheme`
|
|
125
|
+
- `build_default_sensor_schemes(sensor_sid)`
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Data Model and Sensor Channels
|
|
2
|
+
|
|
3
|
+
## Time Index
|
|
4
|
+
|
|
5
|
+
All sensor DataFrames are indexed by `timestamp` in seconds (`float`), derived from packet timestamps in microseconds.
|
|
6
|
+
|
|
7
|
+
## Sensor Streams
|
|
8
|
+
|
|
9
|
+
`SensorDataset` defines these sensor streams:
|
|
10
|
+
|
|
11
|
+
- `imu` (SID 0)
|
|
12
|
+
- `barometer` (SID 1)
|
|
13
|
+
- `microphone` (SID 2)
|
|
14
|
+
- `ppg` (SID 4)
|
|
15
|
+
- `optical_temp` (SID 6)
|
|
16
|
+
- `bone_acc` (SID 7)
|
|
17
|
+
|
|
18
|
+
## Default Columns by Sensor
|
|
19
|
+
|
|
20
|
+
- `imu`: `acc.x`, `acc.y`, `acc.z`, `gyro.x`, `gyro.y`, `gyro.z`, `mag.x`, `mag.y`, `mag.z`
|
|
21
|
+
- `barometer`: `barometer.temperature`, `barometer.pressure`
|
|
22
|
+
- `ppg`: `ppg.red`, `ppg.ir`, `ppg.green`, `ppg.ambient`
|
|
23
|
+
- `bone_acc`: `bone_acc.x`, `bone_acc.y`, `bone_acc.z`
|
|
24
|
+
- `optical_temp`: `optical_temp`
|
|
25
|
+
- `microphone`: `mic.inner`, `mic.outer`
|
|
26
|
+
|
|
27
|
+
## Accessor Semantics
|
|
28
|
+
|
|
29
|
+
Each sensor is exposed as a `_SensorAccessor` object:
|
|
30
|
+
|
|
31
|
+
- `sensor.df` or `sensor.to_dataframe()` returns the full sensor DataFrame with original column names.
|
|
32
|
+
- Group columns are available as sub-DataFrames:
|
|
33
|
+
- `dataset.imu.acc` -> columns `x`, `y`, `z`
|
|
34
|
+
- `dataset.imu.gyro` -> columns `x`, `y`, `z`
|
|
35
|
+
- `dataset.imu.mag` -> columns `x`, `y`, `z`
|
|
36
|
+
- `dataset.ppg.ppg` -> columns `red`, `ir`, `green`, `ambient`
|
|
37
|
+
- Original columns remain directly accessible:
|
|
38
|
+
- `dataset.imu["acc.x"]`
|
|
39
|
+
- `dataset.ppg["ppg.red"]`
|
|
40
|
+
|
|
41
|
+
## Combined DataFrame
|
|
42
|
+
|
|
43
|
+
`get_dataframe()` merges all non-empty per-sensor DataFrames:
|
|
44
|
+
|
|
45
|
+
- Creates a union of all sensor timestamps.
|
|
46
|
+
- Reindexes each sensor DataFrame onto that common index.
|
|
47
|
+
- Concatenates columns into one DataFrame.
|
|
48
|
+
|
|
49
|
+
This preserves each stream while aligning them on time.
|
|
50
|
+
|
|
51
|
+
## Microphone Data Details
|
|
52
|
+
|
|
53
|
+
Microphone payloads are parsed as interleaved `int16` samples and converted to stereo frames:
|
|
54
|
+
|
|
55
|
+
- Input interleaving: `[outer, inner, outer, inner, ...]`
|
|
56
|
+
- Output stereo columns/order: `[inner, outer]`
|
|
57
|
+
|
|
58
|
+
The audio DataFrame generated by `get_audio_dataframe()` uses:
|
|
59
|
+
|
|
60
|
+
- index: `timestamp` in seconds
|
|
61
|
+
- columns: `mic.inner`, `mic.outer`
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Getting Started
|
|
2
|
+
|
|
3
|
+
## Requirements
|
|
4
|
+
|
|
5
|
+
- Python 3.9+
|
|
6
|
+
- `numpy`, `pandas`, `scipy`, `ipython` (installed automatically with this package)
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install open-earable-python
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
From source:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
git clone https://github.com/OpenEarable/open-earable-python.git
|
|
18
|
+
cd open-earable-python
|
|
19
|
+
python -m venv .venv
|
|
20
|
+
source .venv/bin/activate
|
|
21
|
+
pip install -e .
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Load a Recording
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from open_earable_python import SensorDataset
|
|
28
|
+
|
|
29
|
+
dataset = SensorDataset("my_recording.oe")
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
`SensorDataset` parses the file immediately during initialization.
|
|
33
|
+
|
|
34
|
+
## Work with Sensor Data
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
# Combined DataFrame (all available non-empty sensor streams)
|
|
38
|
+
df = dataset.get_dataframe()
|
|
39
|
+
print(df.head())
|
|
40
|
+
|
|
41
|
+
# List non-empty sensor streams
|
|
42
|
+
print(dataset.list_sensors())
|
|
43
|
+
|
|
44
|
+
# Access one sensor DataFrame directly
|
|
45
|
+
imu_df = dataset.get_sensor_dataframe("imu")
|
|
46
|
+
print(imu_df.columns)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Access Channels via Accessors
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
# Full IMU DataFrame (columns: acc.x, acc.y, ...)
|
|
53
|
+
imu = dataset.imu.df
|
|
54
|
+
|
|
55
|
+
# Group-level access (columns renamed to x, y, z)
|
|
56
|
+
acc = dataset.imu.acc
|
|
57
|
+
gyro = dataset.imu.gyro
|
|
58
|
+
|
|
59
|
+
# Channel-level access
|
|
60
|
+
acc_x = dataset.imu.acc["x"]
|
|
61
|
+
mag_z = dataset.imu.mag.z
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Work with Audio
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
# Timestamp-indexed stereo audio DataFrame
|
|
68
|
+
audio_df = dataset.get_audio_dataframe() # default 48_000 Hz
|
|
69
|
+
print(audio_df.columns) # mic.inner, mic.outer
|
|
70
|
+
|
|
71
|
+
# Save WAV
|
|
72
|
+
dataset.save_audio("recording.wav")
|
|
73
|
+
|
|
74
|
+
# Play in Jupyter/IPython environments
|
|
75
|
+
dataset.play_audio()
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Export CSV
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# Build combined DataFrame, then export it
|
|
82
|
+
dataset.get_dataframe()
|
|
83
|
+
dataset.save_csv("recording.csv")
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
`save_csv()` writes only if the combined DataFrame is already populated (for example after calling `get_dataframe()`).
|
|
87
|
+
|
|
88
|
+
## Load Multiple Files
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from open_earable_python import load_recordings
|
|
92
|
+
|
|
93
|
+
recordings = load_recordings(["session1.oe", "session2.oe"])
|
|
94
|
+
for rec in recordings:
|
|
95
|
+
print(rec.filename, rec.list_sensors())
|
|
96
|
+
```
|
|
@@ -20,6 +20,7 @@ LABELS: Dict[str, List[str]] = {
|
|
|
20
20
|
"ppg": ["ppg.red", "ppg.ir", "ppg.green", "ppg.ambient"],
|
|
21
21
|
"bone_acc": ["bone_acc.x", "bone_acc.y", "bone_acc.z"],
|
|
22
22
|
"optical_temp": ["optical_temp"],
|
|
23
|
+
"microphone": ["mic.inner", "mic.outer"],
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
COLORS: Dict[str, List[str]] = {
|
|
@@ -131,107 +132,43 @@ class SensorDataset:
|
|
|
131
132
|
def __init__(self, filename: str, verbose: bool = False):
|
|
132
133
|
self.filename = filename
|
|
133
134
|
self.verbose = verbose
|
|
134
|
-
self.parse_result:
|
|
135
|
+
self.parse_result: parser.ParseResult = parser.ParseResult(
|
|
136
|
+
sensor_dfs={},
|
|
137
|
+
mic_samples=[],
|
|
138
|
+
)
|
|
135
139
|
# Per-SID dataframes built in _build_accessors
|
|
136
140
|
self.sensor_dfs: Dict[int, pd.DataFrame] = {}
|
|
137
141
|
self.audio_stereo: Optional[np.ndarray] = None
|
|
142
|
+
self.audio_df: pd.DataFrame = pd.DataFrame()
|
|
143
|
+
self._audio_df_sampling_rate: Optional[int] = None
|
|
138
144
|
self.bone_sound: Optional[np.ndarray] = None
|
|
139
145
|
self.df: pd.DataFrame = pd.DataFrame()
|
|
140
146
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
name='imu',
|
|
150
|
-
sid=self.SENSOR_SID["imu"],
|
|
151
|
-
groups=[
|
|
152
|
-
scheme.SensorComponentGroupScheme(
|
|
153
|
-
name='acc',
|
|
154
|
-
components=[
|
|
155
|
-
scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
|
|
156
|
-
scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
|
|
157
|
-
scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
|
|
158
|
-
]
|
|
159
|
-
),
|
|
160
|
-
scheme.SensorComponentGroupScheme(
|
|
161
|
-
name='gyro',
|
|
162
|
-
components=[
|
|
163
|
-
scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
|
|
164
|
-
scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
|
|
165
|
-
scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
|
|
166
|
-
]
|
|
167
|
-
),
|
|
168
|
-
scheme.SensorComponentGroupScheme(
|
|
169
|
-
name='mag',
|
|
170
|
-
components=[
|
|
171
|
-
scheme.SensorComponentScheme('x', scheme.ParseType.FLOAT),
|
|
172
|
-
scheme.SensorComponentScheme('y', scheme.ParseType.FLOAT),
|
|
173
|
-
scheme.SensorComponentScheme('z', scheme.ParseType.FLOAT),
|
|
174
|
-
]
|
|
175
|
-
),
|
|
176
|
-
])),
|
|
177
|
-
self.SENSOR_SID["barometer"]: parser.SchemePayloadParser(scheme.SensorScheme(
|
|
178
|
-
name='barometer',
|
|
179
|
-
sid=self.SENSOR_SID["barometer"],
|
|
180
|
-
groups=[
|
|
181
|
-
scheme.SensorComponentGroupScheme(
|
|
182
|
-
name='barometer',
|
|
183
|
-
components=[
|
|
184
|
-
scheme.SensorComponentScheme('temperature', scheme.ParseType.FLOAT),
|
|
185
|
-
scheme.SensorComponentScheme('pressure', scheme.ParseType.FLOAT),
|
|
186
|
-
]
|
|
187
|
-
),
|
|
188
|
-
])),
|
|
189
|
-
self.SENSOR_SID["ppg"]: parser.SchemePayloadParser(scheme.SensorScheme(
|
|
190
|
-
name='ppg',
|
|
191
|
-
sid=self.SENSOR_SID["ppg"],
|
|
192
|
-
groups=[
|
|
193
|
-
scheme.SensorComponentGroupScheme(
|
|
194
|
-
name='ppg',
|
|
195
|
-
components=[
|
|
196
|
-
scheme.SensorComponentScheme('red', scheme.ParseType.UINT32),
|
|
197
|
-
scheme.SensorComponentScheme('ir', scheme.ParseType.UINT32),
|
|
198
|
-
scheme.SensorComponentScheme('green', scheme.ParseType.UINT32),
|
|
199
|
-
scheme.SensorComponentScheme('ambient', scheme.ParseType.UINT32),
|
|
200
|
-
]
|
|
201
|
-
),
|
|
202
|
-
])),
|
|
203
|
-
self.SENSOR_SID["optical_temp"]: parser.SchemePayloadParser(scheme.SensorScheme(
|
|
204
|
-
name='optical_temp',
|
|
205
|
-
sid=self.SENSOR_SID["optical_temp"],
|
|
206
|
-
groups=[
|
|
207
|
-
scheme.SensorComponentGroupScheme(
|
|
208
|
-
name='optical_temp',
|
|
209
|
-
components=[
|
|
210
|
-
scheme.SensorComponentScheme('optical_temp', scheme.ParseType.FLOAT),
|
|
211
|
-
]
|
|
212
|
-
),
|
|
213
|
-
])),
|
|
214
|
-
self.SENSOR_SID["bone_acc"]: parser.SchemePayloadParser(scheme.SensorScheme(
|
|
215
|
-
name='bone_acc',
|
|
216
|
-
sid=self.SENSOR_SID["bone_acc"],
|
|
217
|
-
groups=[
|
|
218
|
-
scheme.SensorComponentGroupScheme(
|
|
219
|
-
name='bone_acc',
|
|
220
|
-
components=[
|
|
221
|
-
scheme.SensorComponentScheme('x', scheme.ParseType.INT16),
|
|
222
|
-
scheme.SensorComponentScheme('y', scheme.ParseType.INT16),
|
|
223
|
-
scheme.SensorComponentScheme('z', scheme.ParseType.INT16),
|
|
224
|
-
]
|
|
225
|
-
),
|
|
226
|
-
])),
|
|
227
|
-
self.SENSOR_SID["microphone"]: parser.MicPayloadParser(
|
|
228
|
-
sample_count=48000,
|
|
229
|
-
),
|
|
230
|
-
}, verbose=verbose)
|
|
147
|
+
for sensor_name, labels in LABELS.items():
|
|
148
|
+
setattr(
|
|
149
|
+
self,
|
|
150
|
+
sensor_name,
|
|
151
|
+
_SensorAccessor(pd.DataFrame(columns=labels), labels),
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
self.parser: parser.Parser = self._build_parser(verbose=verbose)
|
|
231
155
|
|
|
232
156
|
self.parse()
|
|
233
157
|
self._build_accessors()
|
|
234
158
|
|
|
159
|
+
@classmethod
|
|
160
|
+
def _build_parser(cls, verbose: bool = False) -> parser.Parser:
|
|
161
|
+
sensor_schemes = scheme.build_default_sensor_schemes(cls.SENSOR_SID)
|
|
162
|
+
dataset_parser = parser.Parser.from_sensor_schemes(
|
|
163
|
+
sensor_schemes=sensor_schemes,
|
|
164
|
+
verbose=verbose,
|
|
165
|
+
)
|
|
166
|
+
dataset_parser.parsers[cls.SENSOR_SID["microphone"]] = parser.MicPayloadParser(
|
|
167
|
+
sample_count=48000,
|
|
168
|
+
verbose=verbose,
|
|
169
|
+
)
|
|
170
|
+
return dataset_parser
|
|
171
|
+
|
|
235
172
|
def parse(self) -> None:
|
|
236
173
|
"""Parse the binary recording file into structured sensor data."""
|
|
237
174
|
with open(self.filename, "rb") as f:
|
|
@@ -245,10 +182,17 @@ class SensorDataset:
|
|
|
245
182
|
The combined DataFrame over all sensors is built lazily in
|
|
246
183
|
:meth:`get_dataframe`.
|
|
247
184
|
"""
|
|
185
|
+
self.audio_stereo = self.parse_result.audio_stereo
|
|
186
|
+
self.audio_df = pd.DataFrame()
|
|
187
|
+
self._audio_df_sampling_rate = None
|
|
188
|
+
self.sensor_dfs = {}
|
|
189
|
+
|
|
248
190
|
data_dict = self.parse_result.sensor_dfs
|
|
249
191
|
for name, sid in self.SENSOR_SID.items():
|
|
250
|
-
labels = LABELS.get(name, [
|
|
251
|
-
if
|
|
192
|
+
labels = LABELS.get(name, [])
|
|
193
|
+
if name == "microphone":
|
|
194
|
+
df = self.get_audio_dataframe()
|
|
195
|
+
elif sid in data_dict and isinstance(data_dict[sid], pd.DataFrame):
|
|
252
196
|
df = data_dict[sid]
|
|
253
197
|
df = df[~df.index.duplicated(keep="first")]
|
|
254
198
|
else:
|
|
@@ -263,8 +207,6 @@ class SensorDataset:
|
|
|
263
207
|
# Clear combined dataframe; it will be built lazily on demand
|
|
264
208
|
self.df = pd.DataFrame()
|
|
265
209
|
|
|
266
|
-
self.audio_stereo = self.parse_result.audio_stereo
|
|
267
|
-
|
|
268
210
|
def list_sensors(self) -> List[str]:
|
|
269
211
|
"""Return a list of available sensor names in the dataset."""
|
|
270
212
|
available_sensors = []
|
|
@@ -330,6 +272,64 @@ class SensorDataset:
|
|
|
330
272
|
|
|
331
273
|
return self.df
|
|
332
274
|
|
|
275
|
+
def get_audio_dataframe(self, sampling_rate: int = 48000) -> pd.DataFrame:
|
|
276
|
+
"""Return microphone audio as a timestamp-indexed stereo DataFrame.
|
|
277
|
+
|
|
278
|
+
The returned DataFrame has:
|
|
279
|
+
- index: ``timestamp`` in seconds
|
|
280
|
+
- columns: ``mic.inner`` and ``mic.outer`` (int16 PCM)
|
|
281
|
+
"""
|
|
282
|
+
if sampling_rate <= 0:
|
|
283
|
+
raise ValueError(f"sampling_rate must be > 0, got {sampling_rate}")
|
|
284
|
+
|
|
285
|
+
if self._audio_df_sampling_rate == sampling_rate:
|
|
286
|
+
return self.audio_df
|
|
287
|
+
|
|
288
|
+
mic_packets = getattr(self.parse_result, "mic_packets", [])
|
|
289
|
+
if not mic_packets:
|
|
290
|
+
self.audio_df = pd.DataFrame(columns=["mic.inner", "mic.outer"])
|
|
291
|
+
self.audio_df.index.name = "timestamp"
|
|
292
|
+
self._audio_df_sampling_rate = sampling_rate
|
|
293
|
+
return self.audio_df
|
|
294
|
+
|
|
295
|
+
timestamps: List[np.ndarray] = []
|
|
296
|
+
stereo_frames: List[np.ndarray] = []
|
|
297
|
+
|
|
298
|
+
for packet in mic_packets:
|
|
299
|
+
ts, stereo = parser.mic_packet_to_stereo_frames(
|
|
300
|
+
packet=packet,
|
|
301
|
+
sampling_rate=sampling_rate,
|
|
302
|
+
)
|
|
303
|
+
if stereo.size == 0:
|
|
304
|
+
continue
|
|
305
|
+
timestamps.append(ts)
|
|
306
|
+
stereo_frames.append(stereo)
|
|
307
|
+
|
|
308
|
+
if not timestamps:
|
|
309
|
+
self.audio_df = pd.DataFrame(columns=["mic.inner", "mic.outer"])
|
|
310
|
+
self.audio_df.index.name = "timestamp"
|
|
311
|
+
self._audio_df_sampling_rate = sampling_rate
|
|
312
|
+
return self.audio_df
|
|
313
|
+
|
|
314
|
+
all_ts = np.concatenate(timestamps)
|
|
315
|
+
all_stereo = np.vstack(stereo_frames)
|
|
316
|
+
|
|
317
|
+
self.audio_df = pd.DataFrame(
|
|
318
|
+
{
|
|
319
|
+
"mic.inner": all_stereo[:, 0],
|
|
320
|
+
"mic.outer": all_stereo[:, 1],
|
|
321
|
+
},
|
|
322
|
+
index=all_ts,
|
|
323
|
+
)
|
|
324
|
+
self.audio_df.index.name = "timestamp"
|
|
325
|
+
self.audio_df = self.audio_df[~self.audio_df.index.duplicated(keep="first")]
|
|
326
|
+
self._audio_df_sampling_rate = sampling_rate
|
|
327
|
+
|
|
328
|
+
if sampling_rate == 48000:
|
|
329
|
+
self.sensor_dfs[self.SENSOR_SID["microphone"]] = self.audio_df
|
|
330
|
+
|
|
331
|
+
return self.audio_df
|
|
332
|
+
|
|
333
333
|
def export_csv(self) -> None:
|
|
334
334
|
base_filename, _ = os.path.splitext(self.filename)
|
|
335
335
|
self.save_csv(base_filename + ".csv")
|