atom-audio-engine 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {atom_audio_engine-0.1.1.dist-info → atom_audio_engine-0.1.2.dist-info}/METADATA +1 -1
  2. atom_audio_engine-0.1.2.dist-info/RECORD +57 -0
  3. audio_engine/asr/__init__.py +45 -0
  4. audio_engine/asr/base.py +89 -0
  5. audio_engine/asr/cartesia.py +356 -0
  6. audio_engine/asr/deepgram.py +196 -0
  7. audio_engine/core/__init__.py +13 -0
  8. audio_engine/core/config.py +162 -0
  9. audio_engine/core/pipeline.py +282 -0
  10. audio_engine/core/types.py +87 -0
  11. audio_engine/examples/__init__.py +1 -0
  12. audio_engine/examples/basic_stt_llm_tts.py +200 -0
  13. audio_engine/examples/geneface_animation.py +99 -0
  14. audio_engine/examples/personaplex_pipeline.py +116 -0
  15. audio_engine/examples/websocket_server.py +86 -0
  16. audio_engine/integrations/__init__.py +5 -0
  17. audio_engine/integrations/geneface.py +297 -0
  18. audio_engine/llm/__init__.py +38 -0
  19. audio_engine/llm/base.py +108 -0
  20. audio_engine/llm/groq.py +210 -0
  21. audio_engine/pipelines/__init__.py +1 -0
  22. audio_engine/pipelines/personaplex/__init__.py +41 -0
  23. audio_engine/pipelines/personaplex/client.py +259 -0
  24. audio_engine/pipelines/personaplex/config.py +69 -0
  25. audio_engine/pipelines/personaplex/pipeline.py +301 -0
  26. audio_engine/pipelines/personaplex/types.py +173 -0
  27. audio_engine/pipelines/personaplex/utils.py +192 -0
  28. audio_engine/scripts/debug_pipeline.py +79 -0
  29. audio_engine/scripts/debug_tts.py +162 -0
  30. audio_engine/scripts/test_cartesia_connect.py +57 -0
  31. audio_engine/streaming/__init__.py +5 -0
  32. audio_engine/streaming/websocket_server.py +341 -0
  33. audio_engine/tests/__init__.py +1 -0
  34. audio_engine/tests/test_personaplex/__init__.py +1 -0
  35. audio_engine/tests/test_personaplex/test_personaplex.py +10 -0
  36. audio_engine/tests/test_personaplex/test_personaplex_client.py +259 -0
  37. audio_engine/tests/test_personaplex/test_personaplex_config.py +71 -0
  38. audio_engine/tests/test_personaplex/test_personaplex_message.py +80 -0
  39. audio_engine/tests/test_personaplex/test_personaplex_pipeline.py +226 -0
  40. audio_engine/tests/test_personaplex/test_personaplex_session.py +184 -0
  41. audio_engine/tests/test_personaplex/test_personaplex_transcript.py +184 -0
  42. audio_engine/tests/test_traditional_pipeline/__init__.py +1 -0
  43. audio_engine/tests/test_traditional_pipeline/test_cartesia_asr.py +474 -0
  44. audio_engine/tests/test_traditional_pipeline/test_config_env.py +97 -0
  45. audio_engine/tests/test_traditional_pipeline/test_conversation_context.py +115 -0
  46. audio_engine/tests/test_traditional_pipeline/test_pipeline_creation.py +64 -0
  47. audio_engine/tests/test_traditional_pipeline/test_pipeline_with_mocks.py +173 -0
  48. audio_engine/tests/test_traditional_pipeline/test_provider_factories.py +61 -0
  49. audio_engine/tests/test_traditional_pipeline/test_websocket_server.py +58 -0
  50. audio_engine/tts/__init__.py +37 -0
  51. audio_engine/tts/base.py +155 -0
  52. audio_engine/tts/cartesia.py +392 -0
  53. audio_engine/utils/__init__.py +15 -0
  54. audio_engine/utils/audio.py +220 -0
  55. atom_audio_engine-0.1.1.dist-info/RECORD +0 -5
  56. {atom_audio_engine-0.1.1.dist-info → atom_audio_engine-0.1.2.dist-info}/WHEEL +0 -0
  57. {atom_audio_engine-0.1.1.dist-info → atom_audio_engine-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,392 @@
1
+ """Cartesia API implementation for TTS (Text-to-Speech)."""
2
+
3
+ import asyncio
4
+ import base64
5
+ import json
6
+ import logging
7
+ from typing import AsyncIterator, Optional
8
+
9
+ import websockets
10
+
11
+
12
+ from core.types import AudioChunk, AudioFormat
13
+ from .base import BaseTTS
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class CartesiaTTS(BaseTTS):
19
+ """
20
+ Cartesia API client for text-to-speech synthesis.
21
+
22
+ Supports streaming synthesis with per-chunk latency < 200ms.
23
+ Uses WebSocket connections for real-time streaming with continuations.
24
+ Outputs 16kHz PCM by default (can be configured).
25
+
26
+ Example:
27
+ tts = CartesiaTTS(api_key="...", voice_id="sonic")
28
+
29
+ # Streaming text input (from LLM)
30
+ async for chunk in tts.synthesize_stream_text(llm_text_stream):
31
+ play_audio(chunk)
32
+ """
33
+
34
+ CARTESIA_VERSION = "2025-04-16"
35
+ DEFAULT_VOICE_ID = "c8605446-247c-4d39-acd4-8f4c28aa363c" # Edith voice
36
+ WS_URL = "wss://api.cartesia.ai/tts/websocket"
37
+
38
+ def __init__(
39
+ self,
40
+ api_key: Optional[str] = None,
41
+ voice_id: Optional[str] = None,
42
+ model: Optional[str] = "sonic-3",
43
+ speed: float = 1.0,
44
+ output_format: AudioFormat = AudioFormat.PCM_16K,
45
+ sample_rate: int = 16000,
46
+ max_buffer_delay_ms: int = 1500,
47
+ **kwargs,
48
+ ):
49
+ """
50
+ Initialize Cartesia TTS provider.
51
+
52
+ Args:
53
+ api_key: Cartesia API key (or None to use CARTESIA_API_KEY env var)
54
+ voice_id: Voice identifier (UUID or default Edith)
55
+ model: Model to use (default: sonic-3)
56
+ speed: Speech speed multiplier (1.0 = normal)
57
+ output_format: Desired audio output format (default 16kHz PCM)
58
+ sample_rate: Output sample rate in Hz (default: 16000)
59
+ max_buffer_delay_ms: Buffering delay for streaming (0-5000ms)
60
+ **kwargs: Additional config
61
+ """
62
+ # Fallback to environment variable if not provided
63
+ if not api_key:
64
+ import os
65
+
66
+ api_key = os.getenv("CARTESIA_API_KEY")
67
+
68
+ super().__init__(
69
+ api_key=api_key,
70
+ voice_id=voice_id or self.DEFAULT_VOICE_ID,
71
+ model=model,
72
+ speed=speed,
73
+ output_format=output_format,
74
+ **kwargs,
75
+ )
76
+ self._sample_rate = sample_rate
77
+ self.max_buffer_delay_ms = max_buffer_delay_ms
78
+
79
+ @property
80
+ def name(self) -> str:
81
+ """Return provider name."""
82
+ return "cartesia"
83
+
84
+ @property
85
+ def sample_rate(self) -> int:
86
+ """Return the sample rate for this provider's output."""
87
+ return self._sample_rate
88
+
89
+ async def connect(self):
90
+ """Cartesia uses WebSocket connections - no persistent client needed."""
91
+ pass
92
+
93
+ async def disconnect(self):
94
+ """Cartesia uses WebSocket connections - no persistent client needed."""
95
+ pass
96
+
97
+ async def synthesize(self, text: str) -> bytes:
98
+ """
99
+ Synthesize complete audio from text (non-streaming).
100
+
101
+ Args:
102
+ text: Text to convert to speech
103
+
104
+ Returns:
105
+ Complete audio as bytes (PCM)
106
+ """
107
+ audio_data = bytearray()
108
+ async for chunk in self.synthesize_stream_text(self._text_to_async_iter(text)):
109
+ if chunk.data and not chunk.is_final:
110
+ audio_data.extend(chunk.data)
111
+ return bytes(audio_data)
112
+
113
+ async def synthesize_stream(self, text: str) -> AsyncIterator[AudioChunk]:
114
+ """
115
+ Synthesize streaming audio from text.
116
+
117
+ Args:
118
+ text: Text to convert to speech
119
+
120
+ Yields:
121
+ AudioChunk objects with audio data
122
+ """
123
+ async for chunk in self.synthesize_stream_text(self._text_to_async_iter(text)):
124
+ yield chunk
125
+
126
+ async def synthesize_stream_text(
127
+ self, text_stream: AsyncIterator[str]
128
+ ) -> AsyncIterator[AudioChunk]:
129
+ """
130
+ Synthesize streaming audio from streaming text input via WebSocket.
131
+
132
+ Uses continuations to maintain natural prosody across streamed text chunks.
133
+
134
+ Args:
135
+ text_stream: Async iterator yielding text tokens
136
+
137
+ Yields:
138
+ AudioChunk objects with audio data
139
+ """
140
+ if websockets is None:
141
+ raise ImportError(
142
+ "websockets package required. Install: pip install websockets"
143
+ )
144
+
145
+ if not self.api_key:
146
+ raise ValueError("api_key required for Cartesia TTS")
147
+
148
+ # Use unique context ID for this synthesis session
149
+ import uuid
150
+
151
+ context_id = str(uuid.uuid4())
152
+
153
+ ws_url = (
154
+ f"{self.WS_URL}"
155
+ f"?api_key={self.api_key}"
156
+ f"&cartesia_version={self.CARTESIA_VERSION}"
157
+ )
158
+
159
+ try:
160
+ async with websockets.connect(ws_url) as websocket:
161
+ logger.debug(
162
+ f"Cartesia TTS WebSocket connected | Context: {context_id}"
163
+ )
164
+
165
+ # Task to receive audio from WebSocket
166
+ async def receive_audio():
167
+ """Receive audio chunks from TTS WebSocket."""
168
+ logger.debug("Cartesia: receive_audio started")
169
+ try:
170
+ async for message in websocket:
171
+ if isinstance(message, str):
172
+ try:
173
+ response = json.loads(message)
174
+ logger.debug(
175
+ f"Cartesia: received response type={response.get('type')}"
176
+ )
177
+ # Handle audio chunk (base64 in "data" field)
178
+ if response.get("type") == "chunk" and response.get(
179
+ "data"
180
+ ):
181
+ audio_bytes = base64.b64decode(response["data"])
182
+ yield audio_bytes
183
+ logger.debug(
184
+ f"Cartesia: received audio chunk {len(audio_bytes)} bytes"
185
+ )
186
+ # Handle buffer flush
187
+ elif response.get("type") == "flush_done":
188
+ logger.debug("Cartesia: buffer flushed")
189
+ # Handle completion
190
+ elif response.get("type") == "done":
191
+ logger.info("Cartesia: TTS generation complete")
192
+ break
193
+ # Handle error
194
+ elif response.get("type") == "error":
195
+ error_msg = (
196
+ response.get("error")
197
+ or response.get("error_message")
198
+ or response.get("message")
199
+ or str(response)
200
+ )
201
+ logger.error(f"Cartesia TTS error: {error_msg}")
202
+ raise RuntimeError(
203
+ f"Cartesia API error: {error_msg}"
204
+ )
205
+ else:
206
+ logger.debug(
207
+ f"Cartesia: response type {response.get('type')}"
208
+ )
209
+ except json.JSONDecodeError:
210
+ logger.warning(
211
+ f"Failed to parse Cartesia response: {message}"
212
+ )
213
+ except Exception as e:
214
+ logger.error(f"Cartesia receive error: {e}", exc_info=True)
215
+ raise
216
+
217
+ # Task to send text to WebSocket
218
+ async def send_text():
219
+ """Send text tokens to TTS WebSocket."""
220
+ logger.debug("Cartesia: send_text started")
221
+ accumulated_text = ""
222
+ first_token_timeout = 30.0
223
+ subsequent_token_timeout = 2.0
224
+ first_token_received = False
225
+
226
+ try:
227
+ while True:
228
+ try:
229
+ # Wait for token with appropriate timeout
230
+ timeout = (
231
+ first_token_timeout
232
+ if not first_token_received
233
+ else subsequent_token_timeout
234
+ )
235
+ token = await asyncio.wait_for(
236
+ self._get_next_token(text_stream),
237
+ timeout=timeout,
238
+ )
239
+ first_token_received = True
240
+ except asyncio.TimeoutError:
241
+ logger.debug(
242
+ f"Cartesia: token timeout (first_token={first_token_received})"
243
+ )
244
+ # Send accumulated text even on timeout
245
+ if accumulated_text.strip():
246
+ request = {
247
+ "model_id": self.model,
248
+ "transcript": accumulated_text,
249
+ "context_id": context_id,
250
+ "continue": True,
251
+ "max_buffer_delay_ms": self.max_buffer_delay_ms,
252
+ "voice": {
253
+ "mode": "id",
254
+ "id": self.voice_id,
255
+ },
256
+ "output_format": {
257
+ "container": "raw",
258
+ "encoding": "pcm_s16le",
259
+ "sample_rate": self.sample_rate,
260
+ },
261
+ }
262
+ await websocket.send(json.dumps(request))
263
+ logger.debug(
264
+ f"Cartesia: sent text on timeout (continue=true)"
265
+ )
266
+ accumulated_text = ""
267
+ continue
268
+
269
+ # None signals end of text stream
270
+ if token is None:
271
+ # Send remaining text with continue=false
272
+ if accumulated_text.strip():
273
+ request = {
274
+ "model_id": self.model,
275
+ "transcript": accumulated_text,
276
+ "context_id": context_id,
277
+ "continue": False,
278
+ "max_buffer_delay_ms": self.max_buffer_delay_ms,
279
+ "voice": {
280
+ "mode": "id",
281
+ "id": self.voice_id,
282
+ },
283
+ "output_format": {
284
+ "container": "raw",
285
+ "encoding": "pcm_s16le",
286
+ "sample_rate": self.sample_rate,
287
+ },
288
+ }
289
+ await websocket.send(json.dumps(request))
290
+ logger.debug(
291
+ f"Cartesia: sent final text (continue=false)"
292
+ )
293
+ else:
294
+ # Send empty transcript to signal end
295
+ request = {
296
+ "model_id": self.model,
297
+ "transcript": "",
298
+ "context_id": context_id,
299
+ "continue": False,
300
+ "max_buffer_delay_ms": self.max_buffer_delay_ms,
301
+ "voice": {
302
+ "mode": "id",
303
+ "id": self.voice_id,
304
+ },
305
+ "output_format": {
306
+ "container": "raw",
307
+ "encoding": "pcm_s16le",
308
+ "sample_rate": self.sample_rate,
309
+ },
310
+ }
311
+ await websocket.send(json.dumps(request))
312
+ logger.debug(
313
+ "Cartesia: sent empty transcript to signal end"
314
+ )
315
+ logger.info("Cartesia: all text sent")
316
+ break
317
+
318
+ # Accumulate token
319
+ accumulated_text += token
320
+ logger.debug(
321
+ f"Cartesia: buffered token {len(accumulated_text)} chars total"
322
+ )
323
+
324
+ # Send when buffer is large enough or ends with punctuation
325
+ if len(accumulated_text) > 30 or token.endswith(
326
+ (".", "!", "?")
327
+ ):
328
+ request = {
329
+ "model_id": self.model,
330
+ "transcript": accumulated_text,
331
+ "context_id": context_id,
332
+ "continue": True,
333
+ "max_buffer_delay_ms": self.max_buffer_delay_ms,
334
+ "voice": {
335
+ "mode": "id",
336
+ "id": self.voice_id,
337
+ },
338
+ "output_format": {
339
+ "container": "raw",
340
+ "encoding": "pcm_s16le",
341
+ "sample_rate": self.sample_rate,
342
+ },
343
+ }
344
+ await websocket.send(json.dumps(request))
345
+ logger.debug(
346
+ f"Cartesia: sent buffered text (continue=true)"
347
+ )
348
+ accumulated_text = ""
349
+
350
+ except Exception as e:
351
+ logger.error(f"Cartesia send error: {e}")
352
+
353
+ # Run send and receive concurrently
354
+ send_task = asyncio.create_task(send_text())
355
+
356
+ async for audio_bytes in receive_audio():
357
+ yield AudioChunk(
358
+ data=audio_bytes,
359
+ sample_rate=self.sample_rate,
360
+ channels=1,
361
+ format=self.output_format,
362
+ is_final=False,
363
+ )
364
+
365
+ # Wait for send task to complete
366
+ await send_task
367
+
368
+ # Yield final marker
369
+ yield AudioChunk(
370
+ data=b"",
371
+ sample_rate=self.sample_rate,
372
+ channels=1,
373
+ format=self.output_format,
374
+ is_final=True,
375
+ )
376
+
377
+ logger.info("Cartesia: stream complete")
378
+
379
+ except Exception as e:
380
+ logger.error(f"Cartesia streaming text error: {e}")
381
+ raise
382
+
383
+ async def _get_next_token(self, text_stream: AsyncIterator[str]) -> Optional[str]:
384
+ """Get next token from async iterator."""
385
+ try:
386
+ return await text_stream.__anext__()
387
+ except StopAsyncIteration:
388
+ return None
389
+
390
+ async def _text_to_async_iter(self, text: str) -> AsyncIterator[str]:
391
+ """Convert plain text to async iterator."""
392
+ yield text
@@ -0,0 +1,15 @@
1
+ """Utility functions for the audio engine."""
2
+
3
+ from utils.audio import (
4
+ resample_audio,
5
+ pcm_to_wav,
6
+ wav_to_pcm,
7
+ get_audio_duration,
8
+ )
9
+
10
+ __all__ = [
11
+ "resample_audio",
12
+ "pcm_to_wav",
13
+ "wav_to_pcm",
14
+ "get_audio_duration",
15
+ ]
@@ -0,0 +1,220 @@
1
+ """Audio utility functions."""
2
+
3
+ import struct
4
+ from typing import Optional
5
+
6
+
7
+ def resample_audio(
8
+ audio: bytes,
9
+ from_rate: int,
10
+ to_rate: int,
11
+ channels: int = 1,
12
+ sample_width: int = 2,
13
+ ) -> bytes:
14
+ """
15
+ Resample audio to a different sample rate.
16
+
17
+ Uses linear interpolation for simple resampling.
18
+ For higher quality, consider using librosa or scipy.
19
+
20
+ Args:
21
+ audio: Input audio bytes (PCM format)
22
+ from_rate: Original sample rate
23
+ to_rate: Target sample rate
24
+ channels: Number of audio channels
25
+ sample_width: Bytes per sample (2 for 16-bit)
26
+
27
+ Returns:
28
+ Resampled audio bytes
29
+ """
30
+ if from_rate == to_rate:
31
+ return audio
32
+
33
+ try:
34
+ import numpy as np
35
+ from scipy import signal
36
+
37
+ # Convert bytes to numpy array
38
+ dtype = np.int16 if sample_width == 2 else np.int32
39
+ samples = np.frombuffer(audio, dtype=dtype)
40
+
41
+ # Resample using scipy
42
+ num_samples = int(len(samples) * to_rate / from_rate)
43
+ resampled = signal.resample(samples, num_samples)
44
+
45
+ return resampled.astype(dtype).tobytes()
46
+
47
+ except ImportError:
48
+ # Fallback to simple linear interpolation
49
+ return _simple_resample(audio, from_rate, to_rate, sample_width)
50
+
51
+
52
+ def _simple_resample(
53
+ audio: bytes,
54
+ from_rate: int,
55
+ to_rate: int,
56
+ sample_width: int = 2,
57
+ ) -> bytes:
58
+ """Simple linear interpolation resampling."""
59
+ if sample_width == 2:
60
+ fmt = "<h"
61
+ samples = [
62
+ struct.unpack(fmt, audio[i : i + 2])[0] for i in range(0, len(audio), 2)
63
+ ]
64
+ else:
65
+ raise ValueError(f"Unsupported sample width: {sample_width}")
66
+
67
+ ratio = from_rate / to_rate
68
+ new_length = int(len(samples) / ratio)
69
+ resampled = []
70
+
71
+ for i in range(new_length):
72
+ pos = i * ratio
73
+ idx = int(pos)
74
+ frac = pos - idx
75
+
76
+ if idx + 1 < len(samples):
77
+ sample = int(samples[idx] * (1 - frac) + samples[idx + 1] * frac)
78
+ else:
79
+ sample = samples[idx]
80
+
81
+ resampled.append(sample)
82
+
83
+ return struct.pack(f"<{len(resampled)}h", *resampled)
84
+
85
+
86
+ def pcm_to_wav(
87
+ pcm_data: bytes,
88
+ sample_rate: int = 16000,
89
+ channels: int = 1,
90
+ bits_per_sample: int = 16,
91
+ ) -> bytes:
92
+ """
93
+ Convert raw PCM data to WAV format.
94
+
95
+ Args:
96
+ pcm_data: Raw PCM audio bytes
97
+ sample_rate: Sample rate in Hz
98
+ channels: Number of audio channels
99
+ bits_per_sample: Bits per sample (typically 16)
100
+
101
+ Returns:
102
+ WAV file as bytes
103
+ """
104
+ byte_rate = sample_rate * channels * bits_per_sample // 8
105
+ block_align = channels * bits_per_sample // 8
106
+ data_size = len(pcm_data)
107
+
108
+ header = struct.pack(
109
+ "<4sI4s4sIHHIIHH4sI",
110
+ b"RIFF",
111
+ 36 + data_size,
112
+ b"WAVE",
113
+ b"fmt ",
114
+ 16, # fmt chunk size
115
+ 1, # audio format (PCM)
116
+ channels,
117
+ sample_rate,
118
+ byte_rate,
119
+ block_align,
120
+ bits_per_sample,
121
+ b"data",
122
+ data_size,
123
+ )
124
+
125
+ return header + pcm_data
126
+
127
+
128
+ def wav_to_pcm(wav_data: bytes) -> tuple[bytes, int, int, int]:
129
+ """
130
+ Extract raw PCM data from WAV format.
131
+
132
+ Args:
133
+ wav_data: WAV file as bytes
134
+
135
+ Returns:
136
+ Tuple of (pcm_data, sample_rate, channels, bits_per_sample)
137
+ """
138
+ # Parse RIFF header
139
+ if wav_data[:4] != b"RIFF" or wav_data[8:12] != b"WAVE":
140
+ raise ValueError("Invalid WAV file")
141
+
142
+ # Find fmt chunk
143
+ pos = 12
144
+ sample_rate = 0
145
+ channels = 0
146
+ bits_per_sample = 0
147
+
148
+ while pos < len(wav_data):
149
+ chunk_id = wav_data[pos : pos + 4]
150
+ chunk_size = struct.unpack("<I", wav_data[pos + 4 : pos + 8])[0]
151
+
152
+ if chunk_id == b"fmt ":
153
+ _, channels, sample_rate, _, _, bits_per_sample = struct.unpack(
154
+ "<HHIIHH", wav_data[pos + 8 : pos + 24]
155
+ )
156
+ elif chunk_id == b"data":
157
+ pcm_data = wav_data[pos + 8 : pos + 8 + chunk_size]
158
+ return pcm_data, sample_rate, channels, bits_per_sample
159
+
160
+ pos += 8 + chunk_size
161
+
162
+ raise ValueError("No data chunk found in WAV file")
163
+
164
+
165
+ def get_audio_duration(
166
+ audio: bytes,
167
+ sample_rate: int,
168
+ channels: int = 1,
169
+ bits_per_sample: int = 16,
170
+ ) -> float:
171
+ """
172
+ Calculate duration of PCM audio in seconds.
173
+
174
+ Args:
175
+ audio: PCM audio bytes
176
+ sample_rate: Sample rate in Hz
177
+ channels: Number of audio channels
178
+ bits_per_sample: Bits per sample
179
+
180
+ Returns:
181
+ Duration in seconds
182
+ """
183
+ bytes_per_sample = bits_per_sample // 8
184
+ total_samples = len(audio) // (bytes_per_sample * channels)
185
+ return total_samples / sample_rate
186
+
187
+
188
+ def normalize_audio(audio: bytes, target_db: float = -20.0) -> bytes:
189
+ """
190
+ Normalize audio to a target dB level.
191
+
192
+ Args:
193
+ audio: PCM audio bytes (16-bit)
194
+ target_db: Target dB level
195
+
196
+ Returns:
197
+ Normalized audio bytes
198
+ """
199
+ try:
200
+ import numpy as np
201
+
202
+ samples = np.frombuffer(audio, dtype=np.int16).astype(np.float32)
203
+
204
+ # Calculate current RMS
205
+ rms = np.sqrt(np.mean(samples**2))
206
+ if rms == 0:
207
+ return audio
208
+
209
+ # Calculate target RMS
210
+ target_rms = 32768 * (10 ** (target_db / 20))
211
+
212
+ # Scale
213
+ gain = target_rms / rms
214
+ normalized = np.clip(samples * gain, -32768, 32767).astype(np.int16)
215
+
216
+ return normalized.tobytes()
217
+
218
+ except ImportError:
219
+ # Return unchanged if numpy not available
220
+ return audio
@@ -1,5 +0,0 @@
1
- audio_engine/__init__.py,sha256=AQ0uto-Jn3cNqW35MMtSyX5mhXJMFv9AQhjcAkqZ7L4,1499
2
- atom_audio_engine-0.1.1.dist-info/METADATA,sha256=Apv8YTxoTYqqptLeY6ofsjyk82X9LRxtqEJDiAV14Bg,6690
3
- atom_audio_engine-0.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
4
- atom_audio_engine-0.1.1.dist-info/top_level.txt,sha256=IyumwgFrsDL7nlZlBijX-0shiSVhhBCFPUNBRNKzWP4,13
5
- atom_audio_engine-0.1.1.dist-info/RECORD,,