audex 1.0.7a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. audex/__init__.py +9 -0
  2. audex/__main__.py +7 -0
  3. audex/cli/__init__.py +189 -0
  4. audex/cli/apis/__init__.py +12 -0
  5. audex/cli/apis/init/__init__.py +34 -0
  6. audex/cli/apis/init/gencfg.py +130 -0
  7. audex/cli/apis/init/setup.py +330 -0
  8. audex/cli/apis/init/vprgroup.py +125 -0
  9. audex/cli/apis/serve.py +141 -0
  10. audex/cli/args.py +356 -0
  11. audex/cli/exceptions.py +44 -0
  12. audex/cli/helper/__init__.py +0 -0
  13. audex/cli/helper/ansi.py +193 -0
  14. audex/cli/helper/display.py +288 -0
  15. audex/config/__init__.py +64 -0
  16. audex/config/core/__init__.py +30 -0
  17. audex/config/core/app.py +29 -0
  18. audex/config/core/audio.py +45 -0
  19. audex/config/core/logging.py +163 -0
  20. audex/config/core/session.py +11 -0
  21. audex/config/helper/__init__.py +1 -0
  22. audex/config/helper/client/__init__.py +1 -0
  23. audex/config/helper/client/http.py +28 -0
  24. audex/config/helper/client/websocket.py +21 -0
  25. audex/config/helper/provider/__init__.py +1 -0
  26. audex/config/helper/provider/dashscope.py +13 -0
  27. audex/config/helper/provider/unisound.py +18 -0
  28. audex/config/helper/provider/xfyun.py +23 -0
  29. audex/config/infrastructure/__init__.py +31 -0
  30. audex/config/infrastructure/cache.py +51 -0
  31. audex/config/infrastructure/database.py +48 -0
  32. audex/config/infrastructure/recorder.py +32 -0
  33. audex/config/infrastructure/store.py +19 -0
  34. audex/config/provider/__init__.py +18 -0
  35. audex/config/provider/transcription.py +109 -0
  36. audex/config/provider/vpr.py +99 -0
  37. audex/container.py +40 -0
  38. audex/entity/__init__.py +468 -0
  39. audex/entity/doctor.py +109 -0
  40. audex/entity/doctor.pyi +51 -0
  41. audex/entity/fields.py +401 -0
  42. audex/entity/segment.py +115 -0
  43. audex/entity/segment.pyi +38 -0
  44. audex/entity/session.py +133 -0
  45. audex/entity/session.pyi +47 -0
  46. audex/entity/utterance.py +142 -0
  47. audex/entity/utterance.pyi +48 -0
  48. audex/entity/vp.py +68 -0
  49. audex/entity/vp.pyi +35 -0
  50. audex/exceptions.py +157 -0
  51. audex/filters/__init__.py +692 -0
  52. audex/filters/generated/__init__.py +21 -0
  53. audex/filters/generated/doctor.py +987 -0
  54. audex/filters/generated/segment.py +723 -0
  55. audex/filters/generated/session.py +978 -0
  56. audex/filters/generated/utterance.py +939 -0
  57. audex/filters/generated/vp.py +815 -0
  58. audex/helper/__init__.py +1 -0
  59. audex/helper/hash.py +33 -0
  60. audex/helper/mixin.py +65 -0
  61. audex/helper/net.py +19 -0
  62. audex/helper/settings/__init__.py +830 -0
  63. audex/helper/settings/fields.py +317 -0
  64. audex/helper/stream.py +153 -0
  65. audex/injectors/__init__.py +1 -0
  66. audex/injectors/config.py +12 -0
  67. audex/injectors/lifespan.py +7 -0
  68. audex/lib/__init__.py +1 -0
  69. audex/lib/cache/__init__.py +383 -0
  70. audex/lib/cache/inmemory.py +513 -0
  71. audex/lib/database/__init__.py +83 -0
  72. audex/lib/database/sqlite.py +406 -0
  73. audex/lib/exporter.py +189 -0
  74. audex/lib/injectors/__init__.py +1 -0
  75. audex/lib/injectors/cache.py +25 -0
  76. audex/lib/injectors/container.py +47 -0
  77. audex/lib/injectors/exporter.py +26 -0
  78. audex/lib/injectors/recorder.py +33 -0
  79. audex/lib/injectors/server.py +17 -0
  80. audex/lib/injectors/session.py +18 -0
  81. audex/lib/injectors/sqlite.py +24 -0
  82. audex/lib/injectors/store.py +13 -0
  83. audex/lib/injectors/transcription.py +42 -0
  84. audex/lib/injectors/usb.py +12 -0
  85. audex/lib/injectors/vpr.py +65 -0
  86. audex/lib/injectors/wifi.py +7 -0
  87. audex/lib/recorder.py +844 -0
  88. audex/lib/repos/__init__.py +149 -0
  89. audex/lib/repos/container.py +23 -0
  90. audex/lib/repos/database/__init__.py +1 -0
  91. audex/lib/repos/database/sqlite.py +672 -0
  92. audex/lib/repos/decorators.py +74 -0
  93. audex/lib/repos/doctor.py +286 -0
  94. audex/lib/repos/segment.py +302 -0
  95. audex/lib/repos/session.py +285 -0
  96. audex/lib/repos/tables/__init__.py +70 -0
  97. audex/lib/repos/tables/doctor.py +137 -0
  98. audex/lib/repos/tables/segment.py +113 -0
  99. audex/lib/repos/tables/session.py +140 -0
  100. audex/lib/repos/tables/utterance.py +131 -0
  101. audex/lib/repos/tables/vp.py +102 -0
  102. audex/lib/repos/utterance.py +288 -0
  103. audex/lib/repos/vp.py +286 -0
  104. audex/lib/restful.py +251 -0
  105. audex/lib/server/__init__.py +97 -0
  106. audex/lib/server/auth.py +98 -0
  107. audex/lib/server/handlers.py +248 -0
  108. audex/lib/server/templates/index.html.j2 +226 -0
  109. audex/lib/server/templates/login.html.j2 +111 -0
  110. audex/lib/server/templates/static/script.js +68 -0
  111. audex/lib/server/templates/static/style.css +579 -0
  112. audex/lib/server/types.py +123 -0
  113. audex/lib/session.py +503 -0
  114. audex/lib/store/__init__.py +238 -0
  115. audex/lib/store/localfile.py +411 -0
  116. audex/lib/transcription/__init__.py +33 -0
  117. audex/lib/transcription/dashscope.py +525 -0
  118. audex/lib/transcription/events.py +62 -0
  119. audex/lib/usb.py +554 -0
  120. audex/lib/vpr/__init__.py +38 -0
  121. audex/lib/vpr/unisound/__init__.py +185 -0
  122. audex/lib/vpr/unisound/types.py +469 -0
  123. audex/lib/vpr/xfyun/__init__.py +483 -0
  124. audex/lib/vpr/xfyun/types.py +679 -0
  125. audex/lib/websocket/__init__.py +8 -0
  126. audex/lib/websocket/connection.py +485 -0
  127. audex/lib/websocket/pool.py +991 -0
  128. audex/lib/wifi.py +1146 -0
  129. audex/lifespan.py +75 -0
  130. audex/service/__init__.py +27 -0
  131. audex/service/decorators.py +73 -0
  132. audex/service/doctor/__init__.py +652 -0
  133. audex/service/doctor/const.py +36 -0
  134. audex/service/doctor/exceptions.py +96 -0
  135. audex/service/doctor/types.py +54 -0
  136. audex/service/export/__init__.py +236 -0
  137. audex/service/export/const.py +17 -0
  138. audex/service/export/exceptions.py +34 -0
  139. audex/service/export/types.py +21 -0
  140. audex/service/injectors/__init__.py +1 -0
  141. audex/service/injectors/container.py +53 -0
  142. audex/service/injectors/doctor.py +34 -0
  143. audex/service/injectors/export.py +27 -0
  144. audex/service/injectors/session.py +49 -0
  145. audex/service/session/__init__.py +754 -0
  146. audex/service/session/const.py +34 -0
  147. audex/service/session/exceptions.py +67 -0
  148. audex/service/session/types.py +91 -0
  149. audex/types.py +39 -0
  150. audex/utils.py +287 -0
  151. audex/valueobj/__init__.py +81 -0
  152. audex/valueobj/common/__init__.py +1 -0
  153. audex/valueobj/common/auth.py +84 -0
  154. audex/valueobj/common/email.py +16 -0
  155. audex/valueobj/common/ops.py +22 -0
  156. audex/valueobj/common/phone.py +84 -0
  157. audex/valueobj/common/version.py +72 -0
  158. audex/valueobj/session.py +19 -0
  159. audex/valueobj/utterance.py +15 -0
  160. audex/view/__init__.py +51 -0
  161. audex/view/container.py +17 -0
  162. audex/view/decorators.py +303 -0
  163. audex/view/pages/__init__.py +1 -0
  164. audex/view/pages/dashboard/__init__.py +286 -0
  165. audex/view/pages/dashboard/wifi.py +407 -0
  166. audex/view/pages/login.py +110 -0
  167. audex/view/pages/recording.py +348 -0
  168. audex/view/pages/register.py +202 -0
  169. audex/view/pages/sessions/__init__.py +196 -0
  170. audex/view/pages/sessions/details.py +224 -0
  171. audex/view/pages/sessions/export.py +443 -0
  172. audex/view/pages/settings.py +374 -0
  173. audex/view/pages/voiceprint/__init__.py +1 -0
  174. audex/view/pages/voiceprint/enroll.py +195 -0
  175. audex/view/pages/voiceprint/update.py +195 -0
  176. audex/view/static/css/dashboard.css +452 -0
  177. audex/view/static/css/glass.css +22 -0
  178. audex/view/static/css/global.css +541 -0
  179. audex/view/static/css/login.css +386 -0
  180. audex/view/static/css/recording.css +439 -0
  181. audex/view/static/css/register.css +293 -0
  182. audex/view/static/css/sessions/styles.css +501 -0
  183. audex/view/static/css/settings.css +186 -0
  184. audex/view/static/css/voiceprint/enroll.css +43 -0
  185. audex/view/static/css/voiceprint/styles.css +209 -0
  186. audex/view/static/css/voiceprint/update.css +44 -0
  187. audex/view/static/images/logo.svg +95 -0
  188. audex/view/static/js/recording.js +42 -0
  189. audex-1.0.7a3.dist-info/METADATA +361 -0
  190. audex-1.0.7a3.dist-info/RECORD +192 -0
  191. audex-1.0.7a3.dist-info/WHEEL +4 -0
  192. audex-1.0.7a3.dist-info/entry_points.txt +3 -0
audex/lib/recorder.py ADDED
@@ -0,0 +1,844 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import datetime
5
+ import enum
6
+ import io
7
+ import typing as t
8
+
9
+ import numpy as np
10
+ import numpy.typing as npt
11
+ import pyaudio
12
+ import pydub
13
+
14
+ from audex import utils
15
+ from audex.helper.mixin import AsyncContextMixin
16
+ from audex.helper.mixin import LoggingMixin
17
+ from audex.lib.store import Store
18
+
19
+
20
+ class AudioFormat(str, enum.Enum):
21
+ """Supported audio output formats."""
22
+
23
+ PCM = "pcm"
24
+ WAV = "wav"
25
+ MP3 = "mp3"
26
+ OPUS = "opus"
27
+
28
+
29
+ class AudioConfig(t.NamedTuple):
30
+ """Audio recording configuration.
31
+
32
+ Attributes:
33
+ format: Audio format (pyaudio constant).
34
+ channels: Number of audio channels (1=mono, 2=stereo).
35
+ rate: Sample rate in Hz (e.g., 16000, 44100, 48000).
36
+ chunk: Number of frames per buffer.
37
+ input_device_index: Index of input device, None for default.
38
+ """
39
+
40
+ format: int = pyaudio.paInt16
41
+ channels: int = 1
42
+ rate: int = 16000
43
+ chunk: int = 1024
44
+ input_device_index: int | None = None
45
+
46
+
47
+ class AudioFrame:
48
+ """Single audio frame with timestamp.
49
+
50
+ Uses __slots__ to minimize memory footprint.
51
+
52
+ Attributes:
53
+ timestamp: When this frame was captured.
54
+ data: Raw audio bytes.
55
+ """
56
+
57
+ __slots__ = ("data", "timestamp")
58
+
59
+ def __init__(self, timestamp: datetime.datetime, data: bytes) -> None:
60
+ self.timestamp = timestamp
61
+ self.data = data
62
+
63
+
64
+ class AudioSegment(t.NamedTuple):
65
+ """Represents a recorded audio segment.
66
+
67
+ Attributes:
68
+ key: Storage key where the audio is saved.
69
+ duration_ms: Duration of the segment in milliseconds.
70
+ started_at: Timestamp when recording started.
71
+ ended_at: Timestamp when recording ended.
72
+ frames: Raw audio frames (bytes).
73
+ """
74
+
75
+ key: str
76
+ duration_ms: int
77
+ started_at: datetime.datetime
78
+ ended_at: datetime.datetime
79
+ frames: bytes
80
+
81
+
82
+ class AudioRecorder(LoggingMixin, AsyncContextMixin):
83
+ """High-performance audio recorder using PyAudio with real-time
84
+ streaming.
85
+
86
+ This recorder captures audio from a microphone and can start/stop
87
+ recording multiple times, creating separate audio segments for each
88
+ recording session. Audio data is automatically uploaded to the
89
+ configured Store.
90
+
91
+ Features:
92
+ - Real-time audio streaming with async generators
93
+ - Multiple output format support (PCM, WAV, MP3, OPUS)
94
+ - Efficient numpy-based audio processing
95
+ - Non-blocking streaming while recording
96
+ - Time-based segment extraction
97
+ - Dynamic dtype handling based on AudioConfig
98
+
99
+ Attributes:
100
+ store: Storage backend for uploading audio files.
101
+ config: Audio recording configuration.
102
+
103
+ Example:
104
+ ```python
105
+ recorder = AudioRecorder(
106
+ store=local_store,
107
+ config=AudioConfig(
108
+ format=pyaudio.paInt16,
109
+ channels=1,
110
+ rate=16000,
111
+ chunk=1024,
112
+ ),
113
+ )
114
+
115
+ await recorder.init()
116
+ await recorder.start("session-123", "segment")
117
+
118
+ # Stream audio chunks in real-time
119
+ async for chunk in recorder.stream(
120
+ chunk_size=16000, # 1 second chunks
121
+ format=AudioFormat.MP3,
122
+ ):
123
+ await send_to_api(chunk)
124
+
125
+ segment = await recorder.stop()
126
+ await recorder.close()
127
+ ```
128
+ """
129
+
130
+ __logtag__ = "audex.lib.audio.recorder"
131
+
132
+ # Mapping PyAudio format to numpy dtype and sample width
133
+ _FORMAT_MAP: t.ClassVar[dict[object, tuple[npt.DTypeLike, int]]] = {
134
+ pyaudio.paInt8: (np.int8, 1),
135
+ pyaudio.paInt16: (np.int16, 2),
136
+ pyaudio.paInt24: (np.int32, 3), # Note: 24-bit stored in 32-bit container
137
+ pyaudio.paInt32: (np.int32, 4),
138
+ pyaudio.paFloat32: (np.float32, 4),
139
+ }
140
+
141
+ def __init__(self, store: Store, config: AudioConfig | None = None):
142
+ super().__init__()
143
+ self.store = store
144
+ self.config = config or AudioConfig()
145
+
146
+ # Determine numpy dtype and sample width from config
147
+ if self.config.format not in self._FORMAT_MAP:
148
+ raise ValueError(f"Unsupported audio format: {self.config.format}")
149
+
150
+ self._numpy_dtype, self._sample_width = self._FORMAT_MAP[self.config.format]
151
+
152
+ self._audio: pyaudio.PyAudio | None = None
153
+ self._stream: pyaudio.Stream | None = None
154
+
155
+ # Use numpy array for efficient operations
156
+ self._frames_data: list[npt.NDArray[t.Any]] = [] # Store as numpy arrays
157
+ self._frames_timestamps: list[datetime.datetime] = [] # Separate timestamps
158
+
159
+ self._is_recording = False
160
+ self._current_key: str | None = None
161
+ self._started_at: datetime.datetime | None = None
162
+
163
+ # Streaming state
164
+ self._stream_position: int = 0 # Track streaming position in samples
165
+ self._stream_lock = asyncio.Lock()
166
+
167
+ self.logger.debug(
168
+ f"Initialized with dtype={self._numpy_dtype}, sample_width={self._sample_width}"
169
+ )
170
+
171
+ async def init(self) -> None:
172
+ """Initialize the audio system.
173
+
174
+ Creates the PyAudio instance and validates the audio configuration.
175
+
176
+ Raises:
177
+ Exception: If audio initialization fails.
178
+ """
179
+ self._audio = pyaudio.PyAudio()
180
+ self.logger.info("Audio system initialized")
181
+
182
+ # Log available devices
183
+ device_count = self._audio.get_device_count()
184
+ self.logger.debug(f"Found {device_count} audio devices")
185
+
186
+ for i in range(device_count):
187
+ device_info = self._audio.get_device_info_by_index(i)
188
+ if device_info["maxInputChannels"] > 0:
189
+ self.logger.debug(
190
+ f"Input device {i}: {device_info['name']} "
191
+ f"(channels: {device_info['maxInputChannels']}, "
192
+ f"rate: {device_info['defaultSampleRate']})"
193
+ )
194
+
195
+ async def close(self) -> None:
196
+ """Close the audio system and release resources.
197
+
198
+ Stops any active recording and cleans up PyAudio resources.
199
+ """
200
+ if self._is_recording:
201
+ await self.stop()
202
+
203
+ if self._stream is not None:
204
+ self._stream.stop_stream()
205
+ self._stream.close()
206
+ self._stream = None
207
+
208
+ if self._audio is not None:
209
+ self._audio.terminate()
210
+ self._audio = None
211
+
212
+ self.logger.info("Audio system closed")
213
+
214
+ @property
215
+ def is_recording(self) -> bool:
216
+ """Check if recording is currently active."""
217
+ return self._is_recording
218
+
219
+ @property
220
+ def current_segment_key(self) -> str | None:
221
+ """Get the key of the current recording segment."""
222
+ return self._current_key
223
+
224
+ async def start(self, *prefixes: str) -> str:
225
+ """Start a new recording segment.
226
+
227
+ Args:
228
+ *prefixes: Prefix parts for the storage key.
229
+
230
+ Returns:
231
+ The full storage key for this segment.
232
+
233
+ Raises:
234
+ RuntimeError: If already recording or audio system not initialized.
235
+ """
236
+ if self._is_recording:
237
+ raise RuntimeError("Already recording")
238
+
239
+ if self._audio is None:
240
+ raise RuntimeError("Audio system not initialized. Call init() first.")
241
+
242
+ # Generate unique key
243
+ segment_id = utils.gen_id(prefix="")
244
+ self._current_key = self.store.key_builder.build(*prefixes, f"{segment_id}.wav")
245
+ self._frames_data.clear()
246
+ self._frames_timestamps.clear()
247
+ self._stream_position = 0
248
+ self._started_at = utils.utcnow()
249
+
250
+ # Open audio stream
251
+ self._stream = self._audio.open(
252
+ format=self.config.format,
253
+ channels=self.config.channels,
254
+ rate=self.config.rate,
255
+ input=True,
256
+ frames_per_buffer=self.config.chunk,
257
+ input_device_index=self.config.input_device_index,
258
+ stream_callback=self._audio_callback,
259
+ )
260
+
261
+ self._is_recording = True
262
+ self._stream.start_stream()
263
+
264
+ self.logger.info(f"Started recording to {self._current_key}")
265
+ return self._current_key
266
+
267
+ def _audio_callback(
268
+ self,
269
+ in_data: bytes | None,
270
+ _frame_count: int,
271
+ _time_info: t.Mapping[str, float],
272
+ _status_flags: int,
273
+ ) -> tuple[None, int]:
274
+ """PyAudio callback for capturing audio frames.
275
+
276
+ Converts to numpy array with correct dtype based on config.
277
+ """
278
+ if in_data and self._is_recording:
279
+ timestamp = utils.utcnow()
280
+
281
+ # Handle 24-bit audio specially (packed, needs unpacking)
282
+ if self.config.format == pyaudio.paInt24:
283
+ # Convert 24-bit packed to 32-bit
284
+ audio_array = self._unpack_24bit(in_data)
285
+ else:
286
+ # Standard conversion
287
+ audio_array = np.frombuffer(in_data, dtype=self._numpy_dtype)
288
+
289
+ self._frames_data.append(audio_array) # type: ignore
290
+ self._frames_timestamps.append(timestamp)
291
+ return None, pyaudio.paContinue
292
+
293
+ def _unpack_24bit(self, data: bytes) -> npt.ArrayLike:
294
+ """Unpack 24-bit audio data to 32-bit numpy array.
295
+
296
+ Args:
297
+ data: 24-bit packed audio data.
298
+
299
+ Returns:
300
+ 32-bit numpy array.
301
+ """
302
+ num_samples = len(data) // 3
303
+ samples = np.zeros(num_samples, dtype=np.int32)
304
+
305
+ for i in range(num_samples):
306
+ # Read 3 bytes (little-endian)
307
+ b0 = data[i * 3]
308
+ b1 = data[i * 3 + 1]
309
+ b2 = data[i * 3 + 2]
310
+
311
+ # Combine into 24-bit value
312
+ value = b0 | (b1 << 8) | (b2 << 16)
313
+
314
+ # Sign extension: if bit 23 is set, extend with 1s
315
+ if value & 0x800000: # Negative number
316
+ value |= 0xFF000000 # Set upper 8 bits
317
+
318
+ # Convert to signed int32
319
+ samples[i] = np.int32(value if value < 0x80000000 else value - 0x100000000)
320
+
321
+ return samples
322
+
323
+ def _pack_24bit(self, data: npt.ArrayLike) -> bytes:
324
+ """Pack 32-bit numpy array to 24-bit audio data.
325
+
326
+ Args:
327
+ data: 32-bit numpy array.
328
+
329
+ Returns:
330
+ 24-bit packed audio data.
331
+ """
332
+ # Clip to 24-bit range
333
+ data = np.clip(data, -8388608, 8388607)
334
+
335
+ packed = bytearray(len(data) * 3)
336
+ for i, sample in enumerate(data):
337
+ # Extract 3 bytes (little-endian)
338
+ packed[i * 3] = sample & 0xFF # type: ignore
339
+ packed[i * 3 + 1] = (sample >> 8) & 0xFF # type: ignore
340
+ packed[i * 3 + 2] = (sample >> 16) & 0xFF # type: ignore
341
+
342
+ return bytes(packed)
343
+
344
+ def _find_frame_index(self, target_time: datetime.datetime) -> int:
345
+ """Binary search to find frame index closest to target time.
346
+
347
+ Args:
348
+ target_time: Target timestamp to search for.
349
+
350
+ Returns:
351
+ Index of the frame closest to target time (rounded down).
352
+ """
353
+ if not self._frames_timestamps:
354
+ return 0
355
+
356
+ left, right = 0, len(self._frames_timestamps) - 1
357
+
358
+ # Handle boundary cases
359
+ if target_time <= self._frames_timestamps[0]:
360
+ return 0
361
+ if target_time >= self._frames_timestamps[-1]:
362
+ return len(self._frames_timestamps) - 1
363
+
364
+ # Binary search
365
+ while left <= right:
366
+ mid = (left + right) // 2
367
+ mid_time = self._frames_timestamps[mid]
368
+
369
+ if mid_time == target_time:
370
+ return mid
371
+ if mid_time < target_time:
372
+ left = mid + 1
373
+ else:
374
+ right = mid - 1
375
+
376
+ return right if right >= 0 else 0
377
+
378
+ def _resample_audio_numpy(
379
+ self,
380
+ audio_data: npt.NDArray[t.Any],
381
+ src_rate: int,
382
+ dst_rate: int,
383
+ src_channels: int,
384
+ dst_channels: int,
385
+ ) -> npt.NDArray[t.Any]:
386
+ """Resample audio using numpy (fast linear interpolation).
387
+
388
+ Args:
389
+ audio_data: Input audio as numpy array.
390
+ src_rate: Source sample rate.
391
+ dst_rate: Destination sample rate.
392
+ src_channels: Source number of channels.
393
+ dst_channels: Destination number of channels.
394
+
395
+ Returns:
396
+ Resampled audio as numpy array.
397
+ """
398
+ original_dtype = audio_data.dtype
399
+
400
+ # Convert to float for processing if integer type
401
+ if np.issubdtype(original_dtype, np.integer):
402
+ # Normalize to [-1.0, 1.0]
403
+ if original_dtype == np.int8:
404
+ audio_data = audio_data.astype(np.float32) / 128.0
405
+ elif original_dtype == np.int16:
406
+ audio_data = audio_data.astype(np.float32) / 32768.0
407
+ elif original_dtype == np.int32:
408
+ audio_data = audio_data.astype(np.float32) / 2147483648.0
409
+
410
+ # Reshape for multi-channel processing
411
+ if src_channels > 1:
412
+ audio_data = audio_data.reshape(-1, src_channels)
413
+ else:
414
+ audio_data = audio_data.reshape(-1, 1)
415
+
416
+ # Channel conversion
417
+ if src_channels != dst_channels:
418
+ if dst_channels == 1 and src_channels == 2:
419
+ # Stereo to mono: average channels
420
+ audio_data = audio_data.mean(axis=1, keepdims=True)
421
+ elif dst_channels == 2 and src_channels == 1:
422
+ # Mono to stereo: duplicate channel
423
+ audio_data = np.repeat(audio_data, 2, axis=1)
424
+
425
+ # Sample rate conversion using numpy interpolation
426
+ if src_rate != dst_rate:
427
+ num_frames = audio_data.shape[0]
428
+ ratio = src_rate / dst_rate
429
+ new_num_frames = int(num_frames / ratio)
430
+
431
+ # Create interpolation indices
432
+ src_indices = np.arange(new_num_frames) * ratio
433
+ src_indices_low = src_indices.astype(np.int32)
434
+ src_indices_high = np.minimum(src_indices_low + 1, num_frames - 1)
435
+ frac = (src_indices - src_indices_low).reshape(-1, 1)
436
+
437
+ # Linear interpolation (vectorized!)
438
+ audio_low = audio_data[src_indices_low]
439
+ audio_high = audio_data[src_indices_high]
440
+ audio_data = audio_low * (1 - frac) + audio_high * frac
441
+
442
+ # Convert back to original dtype
443
+ if np.issubdtype(original_dtype, np.integer):
444
+ if original_dtype == np.int8:
445
+ audio_data = (audio_data * 128.0).clip(-128, 127).astype(np.int8)
446
+ elif original_dtype == np.int16:
447
+ audio_data = (audio_data * 32768.0).clip(-32768, 32767).astype(np.int16)
448
+ elif original_dtype == np.int32:
449
+ audio_data = (
450
+ (audio_data * 2147483648.0).clip(-2147483648, 2147483647).astype(np.int32)
451
+ )
452
+
453
+ return audio_data.flatten()
454
+
455
+ def _to_pydub_segment(
456
+ self,
457
+ audio_data: npt.NDArray[t.Any],
458
+ sample_rate: int,
459
+ channels: int,
460
+ ) -> pydub.AudioSegment:
461
+ """Convert numpy array to pydub AudioSegment.
462
+
463
+ Args:
464
+ audio_data: Audio data as numpy array.
465
+ sample_rate: Sample rate in Hz.
466
+ channels: Number of channels.
467
+
468
+ Returns:
469
+ pydub AudioSegment.
470
+ """
471
+ # Convert numpy array to bytes
472
+ if self.config.format == pyaudio.paInt24:
473
+ raw_data = self._pack_24bit(audio_data)
474
+ else:
475
+ raw_data = audio_data.tobytes()
476
+
477
+ # Determine sample width
478
+ if audio_data.dtype == np.int8:
479
+ sample_width = 1
480
+ elif audio_data.dtype == np.int16:
481
+ sample_width = 2
482
+ elif audio_data.dtype == np.int32:
483
+ sample_width = 4
484
+ elif audio_data.dtype == np.float32:
485
+ # Convert float32 to int16 for pydub
486
+ audio_data = (audio_data * 32768.0).clip(-32768, 32767).astype(np.int16)
487
+ raw_data = audio_data.tobytes()
488
+ sample_width = 2
489
+ else:
490
+ raise ValueError(f"Unsupported numpy dtype: {audio_data.dtype}")
491
+
492
+ # Create pydub AudioSegment
493
+ return pydub.AudioSegment(
494
+ data=raw_data,
495
+ sample_width=sample_width,
496
+ frame_rate=sample_rate,
497
+ channels=channels,
498
+ )
499
+
500
+ def _encode_audio(
501
+ self,
502
+ audio_data: npt.NDArray[t.Any],
503
+ sample_rate: int,
504
+ channels: int,
505
+ output_format: AudioFormat,
506
+ ) -> bytes:
507
+ """Encode audio to specified format using pydub.
508
+
509
+ Args:
510
+ audio_data: Audio data as numpy array.
511
+ sample_rate: Sample rate in Hz.
512
+ channels: Number of channels.
513
+ output_format: Target audio format.
514
+
515
+ Returns:
516
+ Encoded audio data.
517
+
518
+ Raises:
519
+ ValueError: If unsupported format.
520
+ """
521
+ if output_format == AudioFormat.PCM:
522
+ # Return raw PCM data
523
+ if self.config.format == pyaudio.paInt24:
524
+ return self._pack_24bit(audio_data)
525
+ return audio_data.tobytes()
526
+
527
+ # Convert to pydub AudioSegment
528
+ pydub_audio = self._to_pydub_segment(audio_data, sample_rate, channels)
529
+
530
+ if output_format == AudioFormat.WAV:
531
+ # Export as WAV
532
+ buffer = io.BytesIO()
533
+ pydub_audio.export(buffer, format="wav")
534
+ return buffer.getvalue()
535
+
536
+ if output_format == AudioFormat.MP3:
537
+ # Export as MP3
538
+ buffer = io.BytesIO()
539
+ pydub_audio.export(
540
+ buffer,
541
+ format="mp3",
542
+ bitrate="128k",
543
+ parameters=["-q:a", "2"], # High quality
544
+ )
545
+ self.logger.debug(f"Encoded to MP3: {len(buffer.getvalue())} bytes")
546
+ return buffer.getvalue()
547
+
548
+ if output_format == AudioFormat.OPUS:
549
+ # Export as OPUS
550
+ buffer = io.BytesIO()
551
+ pydub_audio.export(
552
+ buffer,
553
+ format="opus",
554
+ codec="libopus",
555
+ parameters=["-b:a", "64k"],
556
+ )
557
+ self.logger.debug(f"Encoded to OPUS: {len(buffer.getvalue())} bytes")
558
+ return buffer.getvalue()
559
+
560
+ raise ValueError(f"Unsupported format: {output_format}")
561
+
562
+ async def stream(
563
+ self,
564
+ chunk_size: int | None = None,
565
+ format: AudioFormat = AudioFormat.PCM,
566
+ channels: int | None = None,
567
+ rate: int | None = None,
568
+ ) -> t.AsyncGenerator[bytes, None]:
569
+ """Stream audio chunks in real-time while recording.
570
+
571
+ This does NOT affect the recording buffer. You can stream and
572
+ record simultaneously.
573
+
574
+ Args:
575
+ chunk_size: Number of samples per chunk. None = config.chunk.
576
+ format: Output audio format.
577
+ channels: Target channels. None = config.channels.
578
+ rate: Target sample rate. None = config.rate.
579
+
580
+ Yields:
581
+ Audio chunks in specified format.
582
+
583
+ Example:
584
+ ```python
585
+ # Stream 1-second MP3 chunks
586
+ async for chunk in recorder.stream(
587
+ chunk_size=16000, format=AudioFormat.MP3
588
+ ):
589
+ await send_to_server(chunk)
590
+ ```
591
+ """
592
+ if not self._is_recording:
593
+ self.logger.warning("Cannot stream: not recording")
594
+ return
595
+
596
+ chunk_size = chunk_size or self.config.chunk
597
+ target_channels = channels or self.config.channels
598
+ target_rate = rate or self.config.rate
599
+
600
+ self.logger.info(
601
+ f"Started streaming: chunk_size={chunk_size}, format={format.value}, "
602
+ f"rate={target_rate}, channels={target_channels}"
603
+ )
604
+
605
+ while self._is_recording:
606
+ async with self._stream_lock:
607
+ # Check if we have enough new frames
608
+ total_samples = sum(len(frame) for frame in self._frames_data)
609
+ streamed_samples = self._stream_position
610
+
611
+ available_samples = total_samples - streamed_samples
612
+
613
+ if available_samples < chunk_size:
614
+ # Not enough data yet
615
+ await asyncio.sleep(0.01) # 10ms
616
+ continue
617
+
618
+ # Calculate which frames to extract
619
+ samples_needed = chunk_size
620
+ start_sample = streamed_samples
621
+ end_sample = start_sample + samples_needed
622
+
623
+ # Efficiently concatenate numpy arrays
624
+ all_audio = np.concatenate(self._frames_data)
625
+ chunk_audio = all_audio[start_sample:end_sample]
626
+
627
+ # Update stream position
628
+ self._stream_position = end_sample
629
+
630
+ # Process audio (outside lock for performance)
631
+ if target_rate != self.config.rate or target_channels != self.config.channels:
632
+ chunk_audio = self._resample_audio_numpy(
633
+ chunk_audio,
634
+ src_rate=self.config.rate,
635
+ dst_rate=target_rate,
636
+ src_channels=self.config.channels,
637
+ dst_channels=target_channels,
638
+ )
639
+
640
+ # Encode to target format
641
+ encoded_chunk = self._encode_audio(
642
+ chunk_audio,
643
+ sample_rate=target_rate,
644
+ channels=target_channels,
645
+ output_format=format,
646
+ )
647
+
648
+ yield encoded_chunk
649
+
650
+ async def segment(
651
+ self,
652
+ started_at: datetime.datetime,
653
+ ended_at: datetime.datetime,
654
+ *,
655
+ channels: int | None = None,
656
+ rate: int | None = None,
657
+ format: AudioFormat = AudioFormat.PCM,
658
+ ) -> bytes:
659
+ """Extract audio segment between two timestamps.
660
+
661
+ Args:
662
+ started_at: Start timestamp.
663
+ ended_at: End timestamp.
664
+ channels: Target channels. None = config.channels.
665
+ rate: Target sample rate. None = config.rate.
666
+ format: Output format (PCM, WAV, MP3, OPUS).
667
+
668
+ Returns:
669
+ Audio segment in specified format.
670
+
671
+ Raises:
672
+ RuntimeError: If audio system not initialized.
673
+ ValueError: If invalid time range or no frames.
674
+ """
675
+ if self._audio is None:
676
+ raise RuntimeError("Audio system not initialized")
677
+
678
+ if ended_at < started_at:
679
+ raise ValueError(
680
+ f"End time ({ended_at.isoformat()}) must be after "
681
+ f"start time ({started_at.isoformat()})"
682
+ )
683
+
684
+ if not self._frames_data:
685
+ raise ValueError("No audio frames available")
686
+
687
+ target_channels = channels or self.config.channels
688
+ target_rate = rate or self.config.rate
689
+
690
+ # Find frame indices
691
+ start_idx = self._find_frame_index(started_at)
692
+ end_idx = self._find_frame_index(ended_at)
693
+
694
+ if start_idx == end_idx:
695
+ end_idx = min(start_idx + 1, len(self._frames_data) - 1)
696
+
697
+ self.logger.debug(
698
+ f"Extracting frames {start_idx} to {end_idx} (total: {end_idx - start_idx + 1} frames)"
699
+ )
700
+
701
+ # Efficiently concatenate numpy arrays
702
+ selected_frames = self._frames_data[start_idx : end_idx + 1]
703
+ combined_audio = np.concatenate(selected_frames)
704
+
705
+ # Resample if needed
706
+ if target_rate != self.config.rate or target_channels != self.config.channels:
707
+ combined_audio = self._resample_audio_numpy(
708
+ combined_audio,
709
+ src_rate=self.config.rate,
710
+ dst_rate=target_rate,
711
+ src_channels=self.config.channels,
712
+ dst_channels=target_channels,
713
+ )
714
+ self.logger.debug(
715
+ f"Resampled: {self.config.rate}Hz {self.config.channels}ch -> "
716
+ f"{target_rate}Hz {target_channels}ch"
717
+ )
718
+
719
+ # Encode to target format
720
+ encoded_data = self._encode_audio(
721
+ combined_audio,
722
+ sample_rate=target_rate,
723
+ channels=target_channels,
724
+ output_format=format,
725
+ )
726
+
727
+ self.logger.debug(
728
+ f"Created {format.value.upper()} segment: "
729
+ f"{len(encoded_data)} bytes, {target_rate}Hz {target_channels}ch"
730
+ )
731
+
732
+ return encoded_data
733
+
734
+ async def stop(self) -> AudioSegment:
735
+ """Stop recording and save to storage.
736
+
737
+ Returns:
738
+ AudioSegment containing recording information.
739
+
740
+ Raises:
741
+ RuntimeError: If not currently recording.
742
+ """
743
+ if not self._is_recording:
744
+ raise RuntimeError("Not currently recording")
745
+
746
+ self._is_recording = False
747
+
748
+ # Stop stream
749
+ if self._stream is not None:
750
+ self._stream.stop_stream()
751
+ self._stream.close()
752
+ self._stream = None
753
+
754
+ ended_at = utils.utcnow()
755
+
756
+ # Combine all frames efficiently with numpy
757
+ all_audio = np.concatenate(self._frames_data)
758
+
759
+ # Convert to bytes based on format
760
+ if self.config.format == pyaudio.paInt24:
761
+ frames = self._pack_24bit(all_audio)
762
+ else:
763
+ frames = all_audio.tobytes()
764
+
765
+ frame_count = len(self._frames_data)
766
+
767
+ # Calculate duration
768
+ if self._started_at is None:
769
+ self._started_at = ended_at
770
+
771
+ duration_ms = int((ended_at - self._started_at).total_seconds() * 1000)
772
+
773
+ # Create WAV file using pydub
774
+ pydub_audio = self._to_pydub_segment(
775
+ all_audio,
776
+ sample_rate=self.config.rate,
777
+ channels=self.config.channels,
778
+ )
779
+ wav_buffer = io.BytesIO()
780
+ pydub_audio.export(wav_buffer, format="wav")
781
+ wav_data = wav_buffer.getvalue()
782
+
783
+ # Upload to store
784
+ key = self._current_key
785
+ if key is None:
786
+ raise RuntimeError("No current segment key")
787
+
788
+ await self.store.upload(
789
+ data=wav_data,
790
+ key=key,
791
+ metadata={
792
+ "content_type": "audio/wav",
793
+ "duration_ms": duration_ms,
794
+ "sample_rate": self.config.rate,
795
+ "channels": self.config.channels,
796
+ "started_at": self._started_at.isoformat(),
797
+ "ended_at": ended_at.isoformat(),
798
+ "frame_count": frame_count,
799
+ },
800
+ )
801
+
802
+ self.logger.info(
803
+ f"Stopped recording. Duration: {duration_ms}ms, "
804
+ f"Frames: {frame_count}, Size: {len(wav_data)} bytes"
805
+ )
806
+
807
+ segment = AudioSegment(
808
+ key=key,
809
+ duration_ms=duration_ms,
810
+ started_at=self._started_at,
811
+ ended_at=ended_at,
812
+ frames=frames,
813
+ )
814
+
815
+ # Reset state but keep frames for potential extraction
816
+ self._current_key = None
817
+ self._started_at = None
818
+
819
+ return segment
820
+
821
+ def clear_frames(self) -> None:
822
+ """Clear all recorded frames from memory."""
823
+ self._frames_data.clear()
824
+ self._frames_timestamps.clear()
825
+ self._stream_position = 0
826
+ self.logger.debug("Cleared all recorded frames from memory")
827
+
828
+ def list_input_devices(self) -> list[dict[str, t.Any]]:
829
+ """List available audio input devices."""
830
+ if self._audio is None:
831
+ raise RuntimeError("Audio system not initialized")
832
+
833
+ devices: list[dict[str, t.Any]] = []
834
+ for i in range(self._audio.get_device_count()):
835
+ info = self._audio.get_device_info_by_index(i)
836
+ if info["maxInputChannels"] > 0:
837
+ devices.append({
838
+ "index": i,
839
+ "name": info["name"],
840
+ "channels": info["maxInputChannels"],
841
+ "default_rate": info["defaultSampleRate"],
842
+ })
843
+
844
+ return devices