audex 1.0.7a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- audex/__init__.py +9 -0
- audex/__main__.py +7 -0
- audex/cli/__init__.py +189 -0
- audex/cli/apis/__init__.py +12 -0
- audex/cli/apis/init/__init__.py +34 -0
- audex/cli/apis/init/gencfg.py +130 -0
- audex/cli/apis/init/setup.py +330 -0
- audex/cli/apis/init/vprgroup.py +125 -0
- audex/cli/apis/serve.py +141 -0
- audex/cli/args.py +356 -0
- audex/cli/exceptions.py +44 -0
- audex/cli/helper/__init__.py +0 -0
- audex/cli/helper/ansi.py +193 -0
- audex/cli/helper/display.py +288 -0
- audex/config/__init__.py +64 -0
- audex/config/core/__init__.py +30 -0
- audex/config/core/app.py +29 -0
- audex/config/core/audio.py +45 -0
- audex/config/core/logging.py +163 -0
- audex/config/core/session.py +11 -0
- audex/config/helper/__init__.py +1 -0
- audex/config/helper/client/__init__.py +1 -0
- audex/config/helper/client/http.py +28 -0
- audex/config/helper/client/websocket.py +21 -0
- audex/config/helper/provider/__init__.py +1 -0
- audex/config/helper/provider/dashscope.py +13 -0
- audex/config/helper/provider/unisound.py +18 -0
- audex/config/helper/provider/xfyun.py +23 -0
- audex/config/infrastructure/__init__.py +31 -0
- audex/config/infrastructure/cache.py +51 -0
- audex/config/infrastructure/database.py +48 -0
- audex/config/infrastructure/recorder.py +32 -0
- audex/config/infrastructure/store.py +19 -0
- audex/config/provider/__init__.py +18 -0
- audex/config/provider/transcription.py +109 -0
- audex/config/provider/vpr.py +99 -0
- audex/container.py +40 -0
- audex/entity/__init__.py +468 -0
- audex/entity/doctor.py +109 -0
- audex/entity/doctor.pyi +51 -0
- audex/entity/fields.py +401 -0
- audex/entity/segment.py +115 -0
- audex/entity/segment.pyi +38 -0
- audex/entity/session.py +133 -0
- audex/entity/session.pyi +47 -0
- audex/entity/utterance.py +142 -0
- audex/entity/utterance.pyi +48 -0
- audex/entity/vp.py +68 -0
- audex/entity/vp.pyi +35 -0
- audex/exceptions.py +157 -0
- audex/filters/__init__.py +692 -0
- audex/filters/generated/__init__.py +21 -0
- audex/filters/generated/doctor.py +987 -0
- audex/filters/generated/segment.py +723 -0
- audex/filters/generated/session.py +978 -0
- audex/filters/generated/utterance.py +939 -0
- audex/filters/generated/vp.py +815 -0
- audex/helper/__init__.py +1 -0
- audex/helper/hash.py +33 -0
- audex/helper/mixin.py +65 -0
- audex/helper/net.py +19 -0
- audex/helper/settings/__init__.py +830 -0
- audex/helper/settings/fields.py +317 -0
- audex/helper/stream.py +153 -0
- audex/injectors/__init__.py +1 -0
- audex/injectors/config.py +12 -0
- audex/injectors/lifespan.py +7 -0
- audex/lib/__init__.py +1 -0
- audex/lib/cache/__init__.py +383 -0
- audex/lib/cache/inmemory.py +513 -0
- audex/lib/database/__init__.py +83 -0
- audex/lib/database/sqlite.py +406 -0
- audex/lib/exporter.py +189 -0
- audex/lib/injectors/__init__.py +1 -0
- audex/lib/injectors/cache.py +25 -0
- audex/lib/injectors/container.py +47 -0
- audex/lib/injectors/exporter.py +26 -0
- audex/lib/injectors/recorder.py +33 -0
- audex/lib/injectors/server.py +17 -0
- audex/lib/injectors/session.py +18 -0
- audex/lib/injectors/sqlite.py +24 -0
- audex/lib/injectors/store.py +13 -0
- audex/lib/injectors/transcription.py +42 -0
- audex/lib/injectors/usb.py +12 -0
- audex/lib/injectors/vpr.py +65 -0
- audex/lib/injectors/wifi.py +7 -0
- audex/lib/recorder.py +844 -0
- audex/lib/repos/__init__.py +149 -0
- audex/lib/repos/container.py +23 -0
- audex/lib/repos/database/__init__.py +1 -0
- audex/lib/repos/database/sqlite.py +672 -0
- audex/lib/repos/decorators.py +74 -0
- audex/lib/repos/doctor.py +286 -0
- audex/lib/repos/segment.py +302 -0
- audex/lib/repos/session.py +285 -0
- audex/lib/repos/tables/__init__.py +70 -0
- audex/lib/repos/tables/doctor.py +137 -0
- audex/lib/repos/tables/segment.py +113 -0
- audex/lib/repos/tables/session.py +140 -0
- audex/lib/repos/tables/utterance.py +131 -0
- audex/lib/repos/tables/vp.py +102 -0
- audex/lib/repos/utterance.py +288 -0
- audex/lib/repos/vp.py +286 -0
- audex/lib/restful.py +251 -0
- audex/lib/server/__init__.py +97 -0
- audex/lib/server/auth.py +98 -0
- audex/lib/server/handlers.py +248 -0
- audex/lib/server/templates/index.html.j2 +226 -0
- audex/lib/server/templates/login.html.j2 +111 -0
- audex/lib/server/templates/static/script.js +68 -0
- audex/lib/server/templates/static/style.css +579 -0
- audex/lib/server/types.py +123 -0
- audex/lib/session.py +503 -0
- audex/lib/store/__init__.py +238 -0
- audex/lib/store/localfile.py +411 -0
- audex/lib/transcription/__init__.py +33 -0
- audex/lib/transcription/dashscope.py +525 -0
- audex/lib/transcription/events.py +62 -0
- audex/lib/usb.py +554 -0
- audex/lib/vpr/__init__.py +38 -0
- audex/lib/vpr/unisound/__init__.py +185 -0
- audex/lib/vpr/unisound/types.py +469 -0
- audex/lib/vpr/xfyun/__init__.py +483 -0
- audex/lib/vpr/xfyun/types.py +679 -0
- audex/lib/websocket/__init__.py +8 -0
- audex/lib/websocket/connection.py +485 -0
- audex/lib/websocket/pool.py +991 -0
- audex/lib/wifi.py +1146 -0
- audex/lifespan.py +75 -0
- audex/service/__init__.py +27 -0
- audex/service/decorators.py +73 -0
- audex/service/doctor/__init__.py +652 -0
- audex/service/doctor/const.py +36 -0
- audex/service/doctor/exceptions.py +96 -0
- audex/service/doctor/types.py +54 -0
- audex/service/export/__init__.py +236 -0
- audex/service/export/const.py +17 -0
- audex/service/export/exceptions.py +34 -0
- audex/service/export/types.py +21 -0
- audex/service/injectors/__init__.py +1 -0
- audex/service/injectors/container.py +53 -0
- audex/service/injectors/doctor.py +34 -0
- audex/service/injectors/export.py +27 -0
- audex/service/injectors/session.py +49 -0
- audex/service/session/__init__.py +754 -0
- audex/service/session/const.py +34 -0
- audex/service/session/exceptions.py +67 -0
- audex/service/session/types.py +91 -0
- audex/types.py +39 -0
- audex/utils.py +287 -0
- audex/valueobj/__init__.py +81 -0
- audex/valueobj/common/__init__.py +1 -0
- audex/valueobj/common/auth.py +84 -0
- audex/valueobj/common/email.py +16 -0
- audex/valueobj/common/ops.py +22 -0
- audex/valueobj/common/phone.py +84 -0
- audex/valueobj/common/version.py +72 -0
- audex/valueobj/session.py +19 -0
- audex/valueobj/utterance.py +15 -0
- audex/view/__init__.py +51 -0
- audex/view/container.py +17 -0
- audex/view/decorators.py +303 -0
- audex/view/pages/__init__.py +1 -0
- audex/view/pages/dashboard/__init__.py +286 -0
- audex/view/pages/dashboard/wifi.py +407 -0
- audex/view/pages/login.py +110 -0
- audex/view/pages/recording.py +348 -0
- audex/view/pages/register.py +202 -0
- audex/view/pages/sessions/__init__.py +196 -0
- audex/view/pages/sessions/details.py +224 -0
- audex/view/pages/sessions/export.py +443 -0
- audex/view/pages/settings.py +374 -0
- audex/view/pages/voiceprint/__init__.py +1 -0
- audex/view/pages/voiceprint/enroll.py +195 -0
- audex/view/pages/voiceprint/update.py +195 -0
- audex/view/static/css/dashboard.css +452 -0
- audex/view/static/css/glass.css +22 -0
- audex/view/static/css/global.css +541 -0
- audex/view/static/css/login.css +386 -0
- audex/view/static/css/recording.css +439 -0
- audex/view/static/css/register.css +293 -0
- audex/view/static/css/sessions/styles.css +501 -0
- audex/view/static/css/settings.css +186 -0
- audex/view/static/css/voiceprint/enroll.css +43 -0
- audex/view/static/css/voiceprint/styles.css +209 -0
- audex/view/static/css/voiceprint/update.css +44 -0
- audex/view/static/images/logo.svg +95 -0
- audex/view/static/js/recording.js +42 -0
- audex-1.0.7a3.dist-info/METADATA +361 -0
- audex-1.0.7a3.dist-info/RECORD +192 -0
- audex-1.0.7a3.dist-info/WHEEL +4 -0
- audex-1.0.7a3.dist-info/entry_points.txt +3 -0
audex/lib/recorder.py
ADDED
|
@@ -0,0 +1,844 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import datetime
|
|
5
|
+
import enum
|
|
6
|
+
import io
|
|
7
|
+
import typing as t
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import numpy.typing as npt
|
|
11
|
+
import pyaudio
|
|
12
|
+
import pydub
|
|
13
|
+
|
|
14
|
+
from audex import utils
|
|
15
|
+
from audex.helper.mixin import AsyncContextMixin
|
|
16
|
+
from audex.helper.mixin import LoggingMixin
|
|
17
|
+
from audex.lib.store import Store
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AudioFormat(str, enum.Enum):
|
|
21
|
+
"""Supported audio output formats."""
|
|
22
|
+
|
|
23
|
+
PCM = "pcm"
|
|
24
|
+
WAV = "wav"
|
|
25
|
+
MP3 = "mp3"
|
|
26
|
+
OPUS = "opus"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AudioConfig(t.NamedTuple):
|
|
30
|
+
"""Audio recording configuration.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
format: Audio format (pyaudio constant).
|
|
34
|
+
channels: Number of audio channels (1=mono, 2=stereo).
|
|
35
|
+
rate: Sample rate in Hz (e.g., 16000, 44100, 48000).
|
|
36
|
+
chunk: Number of frames per buffer.
|
|
37
|
+
input_device_index: Index of input device, None for default.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
format: int = pyaudio.paInt16
|
|
41
|
+
channels: int = 1
|
|
42
|
+
rate: int = 16000
|
|
43
|
+
chunk: int = 1024
|
|
44
|
+
input_device_index: int | None = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class AudioFrame:
|
|
48
|
+
"""Single audio frame with timestamp.
|
|
49
|
+
|
|
50
|
+
Uses __slots__ to minimize memory footprint.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
timestamp: When this frame was captured.
|
|
54
|
+
data: Raw audio bytes.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
__slots__ = ("data", "timestamp")
|
|
58
|
+
|
|
59
|
+
def __init__(self, timestamp: datetime.datetime, data: bytes) -> None:
|
|
60
|
+
self.timestamp = timestamp
|
|
61
|
+
self.data = data
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class AudioSegment(t.NamedTuple):
|
|
65
|
+
"""Represents a recorded audio segment.
|
|
66
|
+
|
|
67
|
+
Attributes:
|
|
68
|
+
key: Storage key where the audio is saved.
|
|
69
|
+
duration_ms: Duration of the segment in milliseconds.
|
|
70
|
+
started_at: Timestamp when recording started.
|
|
71
|
+
ended_at: Timestamp when recording ended.
|
|
72
|
+
frames: Raw audio frames (bytes).
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
key: str
|
|
76
|
+
duration_ms: int
|
|
77
|
+
started_at: datetime.datetime
|
|
78
|
+
ended_at: datetime.datetime
|
|
79
|
+
frames: bytes
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class AudioRecorder(LoggingMixin, AsyncContextMixin):
|
|
83
|
+
"""High-performance audio recorder using PyAudio with real-time
|
|
84
|
+
streaming.
|
|
85
|
+
|
|
86
|
+
This recorder captures audio from a microphone and can start/stop
|
|
87
|
+
recording multiple times, creating separate audio segments for each
|
|
88
|
+
recording session. Audio data is automatically uploaded to the
|
|
89
|
+
configured Store.
|
|
90
|
+
|
|
91
|
+
Features:
|
|
92
|
+
- Real-time audio streaming with async generators
|
|
93
|
+
- Multiple output format support (PCM, WAV, MP3, OPUS)
|
|
94
|
+
- Efficient numpy-based audio processing
|
|
95
|
+
- Non-blocking streaming while recording
|
|
96
|
+
- Time-based segment extraction
|
|
97
|
+
- Dynamic dtype handling based on AudioConfig
|
|
98
|
+
|
|
99
|
+
Attributes:
|
|
100
|
+
store: Storage backend for uploading audio files.
|
|
101
|
+
config: Audio recording configuration.
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
```python
|
|
105
|
+
recorder = AudioRecorder(
|
|
106
|
+
store=local_store,
|
|
107
|
+
config=AudioConfig(
|
|
108
|
+
format=pyaudio.paInt16,
|
|
109
|
+
channels=1,
|
|
110
|
+
rate=16000,
|
|
111
|
+
chunk=1024,
|
|
112
|
+
),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
await recorder.init()
|
|
116
|
+
await recorder.start("session-123", "segment")
|
|
117
|
+
|
|
118
|
+
# Stream audio chunks in real-time
|
|
119
|
+
async for chunk in recorder.stream(
|
|
120
|
+
chunk_size=16000, # 1 second chunks
|
|
121
|
+
format=AudioFormat.MP3,
|
|
122
|
+
):
|
|
123
|
+
await send_to_api(chunk)
|
|
124
|
+
|
|
125
|
+
segment = await recorder.stop()
|
|
126
|
+
await recorder.close()
|
|
127
|
+
```
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
__logtag__ = "audex.lib.audio.recorder"
|
|
131
|
+
|
|
132
|
+
# Mapping PyAudio format to numpy dtype and sample width
|
|
133
|
+
_FORMAT_MAP: t.ClassVar[dict[object, tuple[npt.DTypeLike, int]]] = {
|
|
134
|
+
pyaudio.paInt8: (np.int8, 1),
|
|
135
|
+
pyaudio.paInt16: (np.int16, 2),
|
|
136
|
+
pyaudio.paInt24: (np.int32, 3), # Note: 24-bit stored in 32-bit container
|
|
137
|
+
pyaudio.paInt32: (np.int32, 4),
|
|
138
|
+
pyaudio.paFloat32: (np.float32, 4),
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
def __init__(self, store: Store, config: AudioConfig | None = None):
|
|
142
|
+
super().__init__()
|
|
143
|
+
self.store = store
|
|
144
|
+
self.config = config or AudioConfig()
|
|
145
|
+
|
|
146
|
+
# Determine numpy dtype and sample width from config
|
|
147
|
+
if self.config.format not in self._FORMAT_MAP:
|
|
148
|
+
raise ValueError(f"Unsupported audio format: {self.config.format}")
|
|
149
|
+
|
|
150
|
+
self._numpy_dtype, self._sample_width = self._FORMAT_MAP[self.config.format]
|
|
151
|
+
|
|
152
|
+
self._audio: pyaudio.PyAudio | None = None
|
|
153
|
+
self._stream: pyaudio.Stream | None = None
|
|
154
|
+
|
|
155
|
+
# Use numpy array for efficient operations
|
|
156
|
+
self._frames_data: list[npt.NDArray[t.Any]] = [] # Store as numpy arrays
|
|
157
|
+
self._frames_timestamps: list[datetime.datetime] = [] # Separate timestamps
|
|
158
|
+
|
|
159
|
+
self._is_recording = False
|
|
160
|
+
self._current_key: str | None = None
|
|
161
|
+
self._started_at: datetime.datetime | None = None
|
|
162
|
+
|
|
163
|
+
# Streaming state
|
|
164
|
+
self._stream_position: int = 0 # Track streaming position in samples
|
|
165
|
+
self._stream_lock = asyncio.Lock()
|
|
166
|
+
|
|
167
|
+
self.logger.debug(
|
|
168
|
+
f"Initialized with dtype={self._numpy_dtype}, sample_width={self._sample_width}"
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
async def init(self) -> None:
|
|
172
|
+
"""Initialize the audio system.
|
|
173
|
+
|
|
174
|
+
Creates the PyAudio instance and validates the audio configuration.
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
Exception: If audio initialization fails.
|
|
178
|
+
"""
|
|
179
|
+
self._audio = pyaudio.PyAudio()
|
|
180
|
+
self.logger.info("Audio system initialized")
|
|
181
|
+
|
|
182
|
+
# Log available devices
|
|
183
|
+
device_count = self._audio.get_device_count()
|
|
184
|
+
self.logger.debug(f"Found {device_count} audio devices")
|
|
185
|
+
|
|
186
|
+
for i in range(device_count):
|
|
187
|
+
device_info = self._audio.get_device_info_by_index(i)
|
|
188
|
+
if device_info["maxInputChannels"] > 0:
|
|
189
|
+
self.logger.debug(
|
|
190
|
+
f"Input device {i}: {device_info['name']} "
|
|
191
|
+
f"(channels: {device_info['maxInputChannels']}, "
|
|
192
|
+
f"rate: {device_info['defaultSampleRate']})"
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
async def close(self) -> None:
|
|
196
|
+
"""Close the audio system and release resources.
|
|
197
|
+
|
|
198
|
+
Stops any active recording and cleans up PyAudio resources.
|
|
199
|
+
"""
|
|
200
|
+
if self._is_recording:
|
|
201
|
+
await self.stop()
|
|
202
|
+
|
|
203
|
+
if self._stream is not None:
|
|
204
|
+
self._stream.stop_stream()
|
|
205
|
+
self._stream.close()
|
|
206
|
+
self._stream = None
|
|
207
|
+
|
|
208
|
+
if self._audio is not None:
|
|
209
|
+
self._audio.terminate()
|
|
210
|
+
self._audio = None
|
|
211
|
+
|
|
212
|
+
self.logger.info("Audio system closed")
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def is_recording(self) -> bool:
|
|
216
|
+
"""Check if recording is currently active."""
|
|
217
|
+
return self._is_recording
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def current_segment_key(self) -> str | None:
|
|
221
|
+
"""Get the key of the current recording segment."""
|
|
222
|
+
return self._current_key
|
|
223
|
+
|
|
224
|
+
async def start(self, *prefixes: str) -> str:
|
|
225
|
+
"""Start a new recording segment.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
*prefixes: Prefix parts for the storage key.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
The full storage key for this segment.
|
|
232
|
+
|
|
233
|
+
Raises:
|
|
234
|
+
RuntimeError: If already recording or audio system not initialized.
|
|
235
|
+
"""
|
|
236
|
+
if self._is_recording:
|
|
237
|
+
raise RuntimeError("Already recording")
|
|
238
|
+
|
|
239
|
+
if self._audio is None:
|
|
240
|
+
raise RuntimeError("Audio system not initialized. Call init() first.")
|
|
241
|
+
|
|
242
|
+
# Generate unique key
|
|
243
|
+
segment_id = utils.gen_id(prefix="")
|
|
244
|
+
self._current_key = self.store.key_builder.build(*prefixes, f"{segment_id}.wav")
|
|
245
|
+
self._frames_data.clear()
|
|
246
|
+
self._frames_timestamps.clear()
|
|
247
|
+
self._stream_position = 0
|
|
248
|
+
self._started_at = utils.utcnow()
|
|
249
|
+
|
|
250
|
+
# Open audio stream
|
|
251
|
+
self._stream = self._audio.open(
|
|
252
|
+
format=self.config.format,
|
|
253
|
+
channels=self.config.channels,
|
|
254
|
+
rate=self.config.rate,
|
|
255
|
+
input=True,
|
|
256
|
+
frames_per_buffer=self.config.chunk,
|
|
257
|
+
input_device_index=self.config.input_device_index,
|
|
258
|
+
stream_callback=self._audio_callback,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
self._is_recording = True
|
|
262
|
+
self._stream.start_stream()
|
|
263
|
+
|
|
264
|
+
self.logger.info(f"Started recording to {self._current_key}")
|
|
265
|
+
return self._current_key
|
|
266
|
+
|
|
267
|
+
def _audio_callback(
|
|
268
|
+
self,
|
|
269
|
+
in_data: bytes | None,
|
|
270
|
+
_frame_count: int,
|
|
271
|
+
_time_info: t.Mapping[str, float],
|
|
272
|
+
_status_flags: int,
|
|
273
|
+
) -> tuple[None, int]:
|
|
274
|
+
"""PyAudio callback for capturing audio frames.
|
|
275
|
+
|
|
276
|
+
Converts to numpy array with correct dtype based on config.
|
|
277
|
+
"""
|
|
278
|
+
if in_data and self._is_recording:
|
|
279
|
+
timestamp = utils.utcnow()
|
|
280
|
+
|
|
281
|
+
# Handle 24-bit audio specially (packed, needs unpacking)
|
|
282
|
+
if self.config.format == pyaudio.paInt24:
|
|
283
|
+
# Convert 24-bit packed to 32-bit
|
|
284
|
+
audio_array = self._unpack_24bit(in_data)
|
|
285
|
+
else:
|
|
286
|
+
# Standard conversion
|
|
287
|
+
audio_array = np.frombuffer(in_data, dtype=self._numpy_dtype)
|
|
288
|
+
|
|
289
|
+
self._frames_data.append(audio_array) # type: ignore
|
|
290
|
+
self._frames_timestamps.append(timestamp)
|
|
291
|
+
return None, pyaudio.paContinue
|
|
292
|
+
|
|
293
|
+
def _unpack_24bit(self, data: bytes) -> npt.ArrayLike:
|
|
294
|
+
"""Unpack 24-bit audio data to 32-bit numpy array.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
data: 24-bit packed audio data.
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
32-bit numpy array.
|
|
301
|
+
"""
|
|
302
|
+
num_samples = len(data) // 3
|
|
303
|
+
samples = np.zeros(num_samples, dtype=np.int32)
|
|
304
|
+
|
|
305
|
+
for i in range(num_samples):
|
|
306
|
+
# Read 3 bytes (little-endian)
|
|
307
|
+
b0 = data[i * 3]
|
|
308
|
+
b1 = data[i * 3 + 1]
|
|
309
|
+
b2 = data[i * 3 + 2]
|
|
310
|
+
|
|
311
|
+
# Combine into 24-bit value
|
|
312
|
+
value = b0 | (b1 << 8) | (b2 << 16)
|
|
313
|
+
|
|
314
|
+
# Sign extension: if bit 23 is set, extend with 1s
|
|
315
|
+
if value & 0x800000: # Negative number
|
|
316
|
+
value |= 0xFF000000 # Set upper 8 bits
|
|
317
|
+
|
|
318
|
+
# Convert to signed int32
|
|
319
|
+
samples[i] = np.int32(value if value < 0x80000000 else value - 0x100000000)
|
|
320
|
+
|
|
321
|
+
return samples
|
|
322
|
+
|
|
323
|
+
def _pack_24bit(self, data: npt.ArrayLike) -> bytes:
|
|
324
|
+
"""Pack 32-bit numpy array to 24-bit audio data.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
data: 32-bit numpy array.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
24-bit packed audio data.
|
|
331
|
+
"""
|
|
332
|
+
# Clip to 24-bit range
|
|
333
|
+
data = np.clip(data, -8388608, 8388607)
|
|
334
|
+
|
|
335
|
+
packed = bytearray(len(data) * 3)
|
|
336
|
+
for i, sample in enumerate(data):
|
|
337
|
+
# Extract 3 bytes (little-endian)
|
|
338
|
+
packed[i * 3] = sample & 0xFF # type: ignore
|
|
339
|
+
packed[i * 3 + 1] = (sample >> 8) & 0xFF # type: ignore
|
|
340
|
+
packed[i * 3 + 2] = (sample >> 16) & 0xFF # type: ignore
|
|
341
|
+
|
|
342
|
+
return bytes(packed)
|
|
343
|
+
|
|
344
|
+
def _find_frame_index(self, target_time: datetime.datetime) -> int:
|
|
345
|
+
"""Binary search to find frame index closest to target time.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
target_time: Target timestamp to search for.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Index of the frame closest to target time (rounded down).
|
|
352
|
+
"""
|
|
353
|
+
if not self._frames_timestamps:
|
|
354
|
+
return 0
|
|
355
|
+
|
|
356
|
+
left, right = 0, len(self._frames_timestamps) - 1
|
|
357
|
+
|
|
358
|
+
# Handle boundary cases
|
|
359
|
+
if target_time <= self._frames_timestamps[0]:
|
|
360
|
+
return 0
|
|
361
|
+
if target_time >= self._frames_timestamps[-1]:
|
|
362
|
+
return len(self._frames_timestamps) - 1
|
|
363
|
+
|
|
364
|
+
# Binary search
|
|
365
|
+
while left <= right:
|
|
366
|
+
mid = (left + right) // 2
|
|
367
|
+
mid_time = self._frames_timestamps[mid]
|
|
368
|
+
|
|
369
|
+
if mid_time == target_time:
|
|
370
|
+
return mid
|
|
371
|
+
if mid_time < target_time:
|
|
372
|
+
left = mid + 1
|
|
373
|
+
else:
|
|
374
|
+
right = mid - 1
|
|
375
|
+
|
|
376
|
+
return right if right >= 0 else 0
|
|
377
|
+
|
|
378
|
+
def _resample_audio_numpy(
|
|
379
|
+
self,
|
|
380
|
+
audio_data: npt.NDArray[t.Any],
|
|
381
|
+
src_rate: int,
|
|
382
|
+
dst_rate: int,
|
|
383
|
+
src_channels: int,
|
|
384
|
+
dst_channels: int,
|
|
385
|
+
) -> npt.NDArray[t.Any]:
|
|
386
|
+
"""Resample audio using numpy (fast linear interpolation).
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
audio_data: Input audio as numpy array.
|
|
390
|
+
src_rate: Source sample rate.
|
|
391
|
+
dst_rate: Destination sample rate.
|
|
392
|
+
src_channels: Source number of channels.
|
|
393
|
+
dst_channels: Destination number of channels.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
Resampled audio as numpy array.
|
|
397
|
+
"""
|
|
398
|
+
original_dtype = audio_data.dtype
|
|
399
|
+
|
|
400
|
+
# Convert to float for processing if integer type
|
|
401
|
+
if np.issubdtype(original_dtype, np.integer):
|
|
402
|
+
# Normalize to [-1.0, 1.0]
|
|
403
|
+
if original_dtype == np.int8:
|
|
404
|
+
audio_data = audio_data.astype(np.float32) / 128.0
|
|
405
|
+
elif original_dtype == np.int16:
|
|
406
|
+
audio_data = audio_data.astype(np.float32) / 32768.0
|
|
407
|
+
elif original_dtype == np.int32:
|
|
408
|
+
audio_data = audio_data.astype(np.float32) / 2147483648.0
|
|
409
|
+
|
|
410
|
+
# Reshape for multi-channel processing
|
|
411
|
+
if src_channels > 1:
|
|
412
|
+
audio_data = audio_data.reshape(-1, src_channels)
|
|
413
|
+
else:
|
|
414
|
+
audio_data = audio_data.reshape(-1, 1)
|
|
415
|
+
|
|
416
|
+
# Channel conversion
|
|
417
|
+
if src_channels != dst_channels:
|
|
418
|
+
if dst_channels == 1 and src_channels == 2:
|
|
419
|
+
# Stereo to mono: average channels
|
|
420
|
+
audio_data = audio_data.mean(axis=1, keepdims=True)
|
|
421
|
+
elif dst_channels == 2 and src_channels == 1:
|
|
422
|
+
# Mono to stereo: duplicate channel
|
|
423
|
+
audio_data = np.repeat(audio_data, 2, axis=1)
|
|
424
|
+
|
|
425
|
+
# Sample rate conversion using numpy interpolation
|
|
426
|
+
if src_rate != dst_rate:
|
|
427
|
+
num_frames = audio_data.shape[0]
|
|
428
|
+
ratio = src_rate / dst_rate
|
|
429
|
+
new_num_frames = int(num_frames / ratio)
|
|
430
|
+
|
|
431
|
+
# Create interpolation indices
|
|
432
|
+
src_indices = np.arange(new_num_frames) * ratio
|
|
433
|
+
src_indices_low = src_indices.astype(np.int32)
|
|
434
|
+
src_indices_high = np.minimum(src_indices_low + 1, num_frames - 1)
|
|
435
|
+
frac = (src_indices - src_indices_low).reshape(-1, 1)
|
|
436
|
+
|
|
437
|
+
# Linear interpolation (vectorized!)
|
|
438
|
+
audio_low = audio_data[src_indices_low]
|
|
439
|
+
audio_high = audio_data[src_indices_high]
|
|
440
|
+
audio_data = audio_low * (1 - frac) + audio_high * frac
|
|
441
|
+
|
|
442
|
+
# Convert back to original dtype
|
|
443
|
+
if np.issubdtype(original_dtype, np.integer):
|
|
444
|
+
if original_dtype == np.int8:
|
|
445
|
+
audio_data = (audio_data * 128.0).clip(-128, 127).astype(np.int8)
|
|
446
|
+
elif original_dtype == np.int16:
|
|
447
|
+
audio_data = (audio_data * 32768.0).clip(-32768, 32767).astype(np.int16)
|
|
448
|
+
elif original_dtype == np.int32:
|
|
449
|
+
audio_data = (
|
|
450
|
+
(audio_data * 2147483648.0).clip(-2147483648, 2147483647).astype(np.int32)
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
return audio_data.flatten()
|
|
454
|
+
|
|
455
|
+
def _to_pydub_segment(
|
|
456
|
+
self,
|
|
457
|
+
audio_data: npt.NDArray[t.Any],
|
|
458
|
+
sample_rate: int,
|
|
459
|
+
channels: int,
|
|
460
|
+
) -> pydub.AudioSegment:
|
|
461
|
+
"""Convert numpy array to pydub AudioSegment.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
audio_data: Audio data as numpy array.
|
|
465
|
+
sample_rate: Sample rate in Hz.
|
|
466
|
+
channels: Number of channels.
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
pydub AudioSegment.
|
|
470
|
+
"""
|
|
471
|
+
# Convert numpy array to bytes
|
|
472
|
+
if self.config.format == pyaudio.paInt24:
|
|
473
|
+
raw_data = self._pack_24bit(audio_data)
|
|
474
|
+
else:
|
|
475
|
+
raw_data = audio_data.tobytes()
|
|
476
|
+
|
|
477
|
+
# Determine sample width
|
|
478
|
+
if audio_data.dtype == np.int8:
|
|
479
|
+
sample_width = 1
|
|
480
|
+
elif audio_data.dtype == np.int16:
|
|
481
|
+
sample_width = 2
|
|
482
|
+
elif audio_data.dtype == np.int32:
|
|
483
|
+
sample_width = 4
|
|
484
|
+
elif audio_data.dtype == np.float32:
|
|
485
|
+
# Convert float32 to int16 for pydub
|
|
486
|
+
audio_data = (audio_data * 32768.0).clip(-32768, 32767).astype(np.int16)
|
|
487
|
+
raw_data = audio_data.tobytes()
|
|
488
|
+
sample_width = 2
|
|
489
|
+
else:
|
|
490
|
+
raise ValueError(f"Unsupported numpy dtype: {audio_data.dtype}")
|
|
491
|
+
|
|
492
|
+
# Create pydub AudioSegment
|
|
493
|
+
return pydub.AudioSegment(
|
|
494
|
+
data=raw_data,
|
|
495
|
+
sample_width=sample_width,
|
|
496
|
+
frame_rate=sample_rate,
|
|
497
|
+
channels=channels,
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
def _encode_audio(
|
|
501
|
+
self,
|
|
502
|
+
audio_data: npt.NDArray[t.Any],
|
|
503
|
+
sample_rate: int,
|
|
504
|
+
channels: int,
|
|
505
|
+
output_format: AudioFormat,
|
|
506
|
+
) -> bytes:
|
|
507
|
+
"""Encode audio to specified format using pydub.
|
|
508
|
+
|
|
509
|
+
Args:
|
|
510
|
+
audio_data: Audio data as numpy array.
|
|
511
|
+
sample_rate: Sample rate in Hz.
|
|
512
|
+
channels: Number of channels.
|
|
513
|
+
output_format: Target audio format.
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Encoded audio data.
|
|
517
|
+
|
|
518
|
+
Raises:
|
|
519
|
+
ValueError: If unsupported format.
|
|
520
|
+
"""
|
|
521
|
+
if output_format == AudioFormat.PCM:
|
|
522
|
+
# Return raw PCM data
|
|
523
|
+
if self.config.format == pyaudio.paInt24:
|
|
524
|
+
return self._pack_24bit(audio_data)
|
|
525
|
+
return audio_data.tobytes()
|
|
526
|
+
|
|
527
|
+
# Convert to pydub AudioSegment
|
|
528
|
+
pydub_audio = self._to_pydub_segment(audio_data, sample_rate, channels)
|
|
529
|
+
|
|
530
|
+
if output_format == AudioFormat.WAV:
|
|
531
|
+
# Export as WAV
|
|
532
|
+
buffer = io.BytesIO()
|
|
533
|
+
pydub_audio.export(buffer, format="wav")
|
|
534
|
+
return buffer.getvalue()
|
|
535
|
+
|
|
536
|
+
if output_format == AudioFormat.MP3:
|
|
537
|
+
# Export as MP3
|
|
538
|
+
buffer = io.BytesIO()
|
|
539
|
+
pydub_audio.export(
|
|
540
|
+
buffer,
|
|
541
|
+
format="mp3",
|
|
542
|
+
bitrate="128k",
|
|
543
|
+
parameters=["-q:a", "2"], # High quality
|
|
544
|
+
)
|
|
545
|
+
self.logger.debug(f"Encoded to MP3: {len(buffer.getvalue())} bytes")
|
|
546
|
+
return buffer.getvalue()
|
|
547
|
+
|
|
548
|
+
if output_format == AudioFormat.OPUS:
|
|
549
|
+
# Export as OPUS
|
|
550
|
+
buffer = io.BytesIO()
|
|
551
|
+
pydub_audio.export(
|
|
552
|
+
buffer,
|
|
553
|
+
format="opus",
|
|
554
|
+
codec="libopus",
|
|
555
|
+
parameters=["-b:a", "64k"],
|
|
556
|
+
)
|
|
557
|
+
self.logger.debug(f"Encoded to OPUS: {len(buffer.getvalue())} bytes")
|
|
558
|
+
return buffer.getvalue()
|
|
559
|
+
|
|
560
|
+
raise ValueError(f"Unsupported format: {output_format}")
|
|
561
|
+
|
|
562
|
+
async def stream(
|
|
563
|
+
self,
|
|
564
|
+
chunk_size: int | None = None,
|
|
565
|
+
format: AudioFormat = AudioFormat.PCM,
|
|
566
|
+
channels: int | None = None,
|
|
567
|
+
rate: int | None = None,
|
|
568
|
+
) -> t.AsyncGenerator[bytes, None]:
|
|
569
|
+
"""Stream audio chunks in real-time while recording.
|
|
570
|
+
|
|
571
|
+
This does NOT affect the recording buffer. You can stream and
|
|
572
|
+
record simultaneously.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
chunk_size: Number of samples per chunk. None = config.chunk.
|
|
576
|
+
format: Output audio format.
|
|
577
|
+
channels: Target channels. None = config.channels.
|
|
578
|
+
rate: Target sample rate. None = config.rate.
|
|
579
|
+
|
|
580
|
+
Yields:
|
|
581
|
+
Audio chunks in specified format.
|
|
582
|
+
|
|
583
|
+
Example:
|
|
584
|
+
```python
|
|
585
|
+
# Stream 1-second MP3 chunks
|
|
586
|
+
async for chunk in recorder.stream(
|
|
587
|
+
chunk_size=16000, format=AudioFormat.MP3
|
|
588
|
+
):
|
|
589
|
+
await send_to_server(chunk)
|
|
590
|
+
```
|
|
591
|
+
"""
|
|
592
|
+
if not self._is_recording:
|
|
593
|
+
self.logger.warning("Cannot stream: not recording")
|
|
594
|
+
return
|
|
595
|
+
|
|
596
|
+
chunk_size = chunk_size or self.config.chunk
|
|
597
|
+
target_channels = channels or self.config.channels
|
|
598
|
+
target_rate = rate or self.config.rate
|
|
599
|
+
|
|
600
|
+
self.logger.info(
|
|
601
|
+
f"Started streaming: chunk_size={chunk_size}, format={format.value}, "
|
|
602
|
+
f"rate={target_rate}, channels={target_channels}"
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
while self._is_recording:
|
|
606
|
+
async with self._stream_lock:
|
|
607
|
+
# Check if we have enough new frames
|
|
608
|
+
total_samples = sum(len(frame) for frame in self._frames_data)
|
|
609
|
+
streamed_samples = self._stream_position
|
|
610
|
+
|
|
611
|
+
available_samples = total_samples - streamed_samples
|
|
612
|
+
|
|
613
|
+
if available_samples < chunk_size:
|
|
614
|
+
# Not enough data yet
|
|
615
|
+
await asyncio.sleep(0.01) # 10ms
|
|
616
|
+
continue
|
|
617
|
+
|
|
618
|
+
# Calculate which frames to extract
|
|
619
|
+
samples_needed = chunk_size
|
|
620
|
+
start_sample = streamed_samples
|
|
621
|
+
end_sample = start_sample + samples_needed
|
|
622
|
+
|
|
623
|
+
# Efficiently concatenate numpy arrays
|
|
624
|
+
all_audio = np.concatenate(self._frames_data)
|
|
625
|
+
chunk_audio = all_audio[start_sample:end_sample]
|
|
626
|
+
|
|
627
|
+
# Update stream position
|
|
628
|
+
self._stream_position = end_sample
|
|
629
|
+
|
|
630
|
+
# Process audio (outside lock for performance)
|
|
631
|
+
if target_rate != self.config.rate or target_channels != self.config.channels:
|
|
632
|
+
chunk_audio = self._resample_audio_numpy(
|
|
633
|
+
chunk_audio,
|
|
634
|
+
src_rate=self.config.rate,
|
|
635
|
+
dst_rate=target_rate,
|
|
636
|
+
src_channels=self.config.channels,
|
|
637
|
+
dst_channels=target_channels,
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
# Encode to target format
|
|
641
|
+
encoded_chunk = self._encode_audio(
|
|
642
|
+
chunk_audio,
|
|
643
|
+
sample_rate=target_rate,
|
|
644
|
+
channels=target_channels,
|
|
645
|
+
output_format=format,
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
yield encoded_chunk
|
|
649
|
+
|
|
650
|
+
async def segment(
|
|
651
|
+
self,
|
|
652
|
+
started_at: datetime.datetime,
|
|
653
|
+
ended_at: datetime.datetime,
|
|
654
|
+
*,
|
|
655
|
+
channels: int | None = None,
|
|
656
|
+
rate: int | None = None,
|
|
657
|
+
format: AudioFormat = AudioFormat.PCM,
|
|
658
|
+
) -> bytes:
|
|
659
|
+
"""Extract audio segment between two timestamps.
|
|
660
|
+
|
|
661
|
+
Args:
|
|
662
|
+
started_at: Start timestamp.
|
|
663
|
+
ended_at: End timestamp.
|
|
664
|
+
channels: Target channels. None = config.channels.
|
|
665
|
+
rate: Target sample rate. None = config.rate.
|
|
666
|
+
format: Output format (PCM, WAV, MP3, OPUS).
|
|
667
|
+
|
|
668
|
+
Returns:
|
|
669
|
+
Audio segment in specified format.
|
|
670
|
+
|
|
671
|
+
Raises:
|
|
672
|
+
RuntimeError: If audio system not initialized.
|
|
673
|
+
ValueError: If invalid time range or no frames.
|
|
674
|
+
"""
|
|
675
|
+
if self._audio is None:
|
|
676
|
+
raise RuntimeError("Audio system not initialized")
|
|
677
|
+
|
|
678
|
+
if ended_at < started_at:
|
|
679
|
+
raise ValueError(
|
|
680
|
+
f"End time ({ended_at.isoformat()}) must be after "
|
|
681
|
+
f"start time ({started_at.isoformat()})"
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
if not self._frames_data:
|
|
685
|
+
raise ValueError("No audio frames available")
|
|
686
|
+
|
|
687
|
+
target_channels = channels or self.config.channels
|
|
688
|
+
target_rate = rate or self.config.rate
|
|
689
|
+
|
|
690
|
+
# Find frame indices
|
|
691
|
+
start_idx = self._find_frame_index(started_at)
|
|
692
|
+
end_idx = self._find_frame_index(ended_at)
|
|
693
|
+
|
|
694
|
+
if start_idx == end_idx:
|
|
695
|
+
end_idx = min(start_idx + 1, len(self._frames_data) - 1)
|
|
696
|
+
|
|
697
|
+
self.logger.debug(
|
|
698
|
+
f"Extracting frames {start_idx} to {end_idx} (total: {end_idx - start_idx + 1} frames)"
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
# Efficiently concatenate numpy arrays
|
|
702
|
+
selected_frames = self._frames_data[start_idx : end_idx + 1]
|
|
703
|
+
combined_audio = np.concatenate(selected_frames)
|
|
704
|
+
|
|
705
|
+
# Resample if needed
|
|
706
|
+
if target_rate != self.config.rate or target_channels != self.config.channels:
|
|
707
|
+
combined_audio = self._resample_audio_numpy(
|
|
708
|
+
combined_audio,
|
|
709
|
+
src_rate=self.config.rate,
|
|
710
|
+
dst_rate=target_rate,
|
|
711
|
+
src_channels=self.config.channels,
|
|
712
|
+
dst_channels=target_channels,
|
|
713
|
+
)
|
|
714
|
+
self.logger.debug(
|
|
715
|
+
f"Resampled: {self.config.rate}Hz {self.config.channels}ch -> "
|
|
716
|
+
f"{target_rate}Hz {target_channels}ch"
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
# Encode to target format
|
|
720
|
+
encoded_data = self._encode_audio(
|
|
721
|
+
combined_audio,
|
|
722
|
+
sample_rate=target_rate,
|
|
723
|
+
channels=target_channels,
|
|
724
|
+
output_format=format,
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
self.logger.debug(
|
|
728
|
+
f"Created {format.value.upper()} segment: "
|
|
729
|
+
f"{len(encoded_data)} bytes, {target_rate}Hz {target_channels}ch"
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
return encoded_data
|
|
733
|
+
|
|
734
|
+
async def stop(self) -> AudioSegment:
|
|
735
|
+
"""Stop recording and save to storage.
|
|
736
|
+
|
|
737
|
+
Returns:
|
|
738
|
+
AudioSegment containing recording information.
|
|
739
|
+
|
|
740
|
+
Raises:
|
|
741
|
+
RuntimeError: If not currently recording.
|
|
742
|
+
"""
|
|
743
|
+
if not self._is_recording:
|
|
744
|
+
raise RuntimeError("Not currently recording")
|
|
745
|
+
|
|
746
|
+
self._is_recording = False
|
|
747
|
+
|
|
748
|
+
# Stop stream
|
|
749
|
+
if self._stream is not None:
|
|
750
|
+
self._stream.stop_stream()
|
|
751
|
+
self._stream.close()
|
|
752
|
+
self._stream = None
|
|
753
|
+
|
|
754
|
+
ended_at = utils.utcnow()
|
|
755
|
+
|
|
756
|
+
# Combine all frames efficiently with numpy
|
|
757
|
+
all_audio = np.concatenate(self._frames_data)
|
|
758
|
+
|
|
759
|
+
# Convert to bytes based on format
|
|
760
|
+
if self.config.format == pyaudio.paInt24:
|
|
761
|
+
frames = self._pack_24bit(all_audio)
|
|
762
|
+
else:
|
|
763
|
+
frames = all_audio.tobytes()
|
|
764
|
+
|
|
765
|
+
frame_count = len(self._frames_data)
|
|
766
|
+
|
|
767
|
+
# Calculate duration
|
|
768
|
+
if self._started_at is None:
|
|
769
|
+
self._started_at = ended_at
|
|
770
|
+
|
|
771
|
+
duration_ms = int((ended_at - self._started_at).total_seconds() * 1000)
|
|
772
|
+
|
|
773
|
+
# Create WAV file using pydub
|
|
774
|
+
pydub_audio = self._to_pydub_segment(
|
|
775
|
+
all_audio,
|
|
776
|
+
sample_rate=self.config.rate,
|
|
777
|
+
channels=self.config.channels,
|
|
778
|
+
)
|
|
779
|
+
wav_buffer = io.BytesIO()
|
|
780
|
+
pydub_audio.export(wav_buffer, format="wav")
|
|
781
|
+
wav_data = wav_buffer.getvalue()
|
|
782
|
+
|
|
783
|
+
# Upload to store
|
|
784
|
+
key = self._current_key
|
|
785
|
+
if key is None:
|
|
786
|
+
raise RuntimeError("No current segment key")
|
|
787
|
+
|
|
788
|
+
await self.store.upload(
|
|
789
|
+
data=wav_data,
|
|
790
|
+
key=key,
|
|
791
|
+
metadata={
|
|
792
|
+
"content_type": "audio/wav",
|
|
793
|
+
"duration_ms": duration_ms,
|
|
794
|
+
"sample_rate": self.config.rate,
|
|
795
|
+
"channels": self.config.channels,
|
|
796
|
+
"started_at": self._started_at.isoformat(),
|
|
797
|
+
"ended_at": ended_at.isoformat(),
|
|
798
|
+
"frame_count": frame_count,
|
|
799
|
+
},
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
self.logger.info(
|
|
803
|
+
f"Stopped recording. Duration: {duration_ms}ms, "
|
|
804
|
+
f"Frames: {frame_count}, Size: {len(wav_data)} bytes"
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
segment = AudioSegment(
|
|
808
|
+
key=key,
|
|
809
|
+
duration_ms=duration_ms,
|
|
810
|
+
started_at=self._started_at,
|
|
811
|
+
ended_at=ended_at,
|
|
812
|
+
frames=frames,
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
# Reset state but keep frames for potential extraction
|
|
816
|
+
self._current_key = None
|
|
817
|
+
self._started_at = None
|
|
818
|
+
|
|
819
|
+
return segment
|
|
820
|
+
|
|
821
|
+
def clear_frames(self) -> None:
|
|
822
|
+
"""Clear all recorded frames from memory."""
|
|
823
|
+
self._frames_data.clear()
|
|
824
|
+
self._frames_timestamps.clear()
|
|
825
|
+
self._stream_position = 0
|
|
826
|
+
self.logger.debug("Cleared all recorded frames from memory")
|
|
827
|
+
|
|
828
|
+
def list_input_devices(self) -> list[dict[str, t.Any]]:
|
|
829
|
+
"""List available audio input devices."""
|
|
830
|
+
if self._audio is None:
|
|
831
|
+
raise RuntimeError("Audio system not initialized")
|
|
832
|
+
|
|
833
|
+
devices: list[dict[str, t.Any]] = []
|
|
834
|
+
for i in range(self._audio.get_device_count()):
|
|
835
|
+
info = self._audio.get_device_info_by_index(i)
|
|
836
|
+
if info["maxInputChannels"] > 0:
|
|
837
|
+
devices.append({
|
|
838
|
+
"index": i,
|
|
839
|
+
"name": info["name"],
|
|
840
|
+
"channels": info["maxInputChannels"],
|
|
841
|
+
"default_rate": info["defaultSampleRate"],
|
|
842
|
+
})
|
|
843
|
+
|
|
844
|
+
return devices
|