atom-audio-engine 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {atom_audio_engine-0.1.2.dist-info → atom_audio_engine-0.1.5.dist-info}/METADATA +1 -1
  2. atom_audio_engine-0.1.5.dist-info/RECORD +32 -0
  3. audio_engine/__init__.py +1 -1
  4. audio_engine/asr/__init__.py +2 -3
  5. audio_engine/asr/base.py +1 -1
  6. audio_engine/asr/cartesia.py +4 -10
  7. audio_engine/asr/deepgram.py +1 -1
  8. audio_engine/core/__init__.py +3 -3
  9. audio_engine/core/config.py +4 -4
  10. audio_engine/core/pipeline.py +6 -10
  11. audio_engine/integrations/__init__.py +1 -1
  12. audio_engine/integrations/geneface.py +1 -1
  13. audio_engine/llm/__init__.py +2 -4
  14. audio_engine/llm/base.py +3 -5
  15. audio_engine/llm/groq.py +2 -4
  16. audio_engine/streaming/__init__.py +1 -1
  17. audio_engine/streaming/websocket_server.py +7 -15
  18. audio_engine/tts/__init__.py +2 -4
  19. audio_engine/tts/base.py +3 -5
  20. audio_engine/tts/cartesia.py +12 -34
  21. audio_engine/utils/__init__.py +1 -1
  22. audio_engine/utils/audio.py +1 -3
  23. atom_audio_engine-0.1.2.dist-info/RECORD +0 -57
  24. audio_engine/examples/__init__.py +0 -1
  25. audio_engine/examples/basic_stt_llm_tts.py +0 -200
  26. audio_engine/examples/geneface_animation.py +0 -99
  27. audio_engine/examples/personaplex_pipeline.py +0 -116
  28. audio_engine/examples/websocket_server.py +0 -86
  29. audio_engine/scripts/debug_pipeline.py +0 -79
  30. audio_engine/scripts/debug_tts.py +0 -162
  31. audio_engine/scripts/test_cartesia_connect.py +0 -57
  32. audio_engine/tests/__init__.py +0 -1
  33. audio_engine/tests/test_personaplex/__init__.py +0 -1
  34. audio_engine/tests/test_personaplex/test_personaplex.py +0 -10
  35. audio_engine/tests/test_personaplex/test_personaplex_client.py +0 -259
  36. audio_engine/tests/test_personaplex/test_personaplex_config.py +0 -71
  37. audio_engine/tests/test_personaplex/test_personaplex_message.py +0 -80
  38. audio_engine/tests/test_personaplex/test_personaplex_pipeline.py +0 -226
  39. audio_engine/tests/test_personaplex/test_personaplex_session.py +0 -184
  40. audio_engine/tests/test_personaplex/test_personaplex_transcript.py +0 -184
  41. audio_engine/tests/test_traditional_pipeline/__init__.py +0 -1
  42. audio_engine/tests/test_traditional_pipeline/test_cartesia_asr.py +0 -474
  43. audio_engine/tests/test_traditional_pipeline/test_config_env.py +0 -97
  44. audio_engine/tests/test_traditional_pipeline/test_conversation_context.py +0 -115
  45. audio_engine/tests/test_traditional_pipeline/test_pipeline_creation.py +0 -64
  46. audio_engine/tests/test_traditional_pipeline/test_pipeline_with_mocks.py +0 -173
  47. audio_engine/tests/test_traditional_pipeline/test_provider_factories.py +0 -61
  48. audio_engine/tests/test_traditional_pipeline/test_websocket_server.py +0 -58
  49. {atom_audio_engine-0.1.2.dist-info → atom_audio_engine-0.1.5.dist-info}/WHEEL +0 -0
  50. {atom_audio_engine-0.1.2.dist-info → atom_audio_engine-0.1.5.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atom-audio-engine
3
- Version: 0.1.2
3
+ Version: 0.1.5
4
4
  Summary: A pluggable, async-first Python framework for real-time audio-to-audio conversational AI
5
5
  Author-email: ATOM Group <info@atomgroup.ng>
6
6
  License-Expression: MIT
@@ -0,0 +1,32 @@
1
+ audio_engine/__init__.py,sha256=6AuBeSWgRhAUEMIuIpPkJaATDjB5pttdUDujWtvC-8Q,1499
2
+ audio_engine/asr/__init__.py,sha256=VlQ09fZf__4Dn2EGV3RnsOvwrKMPBemDRKNTHHnYFHY,1193
3
+ audio_engine/asr/base.py,sha256=dC6cvAlxoOzUXNz9do-ueRObmNXqg4rtsY9fh6k67vA,2380
4
+ audio_engine/asr/cartesia.py,sha256=jLoetv6jiAbcfmfGkrwKRGo5sbMZ-WJTwIVvWno1Vko,13185
5
+ audio_engine/asr/deepgram.py,sha256=RwtG0e66Y_8HKehoHFzjgJ5JlQs8JEpu_0kpWGmsvBc,6431
6
+ audio_engine/core/__init__.py,sha256=aqN1HeFZGX0xAnVyfURdExXXxxmwwZEq_KFj6MqXZwo,289
7
+ audio_engine/core/config.py,sha256=Jmp-rr0MXXejF3qtx1-CPCEwQOorggHIH5cV3t8G-I4,5205
8
+ audio_engine/core/pipeline.py,sha256=rMZOlllT32xruz3nkeoYGRfnq94zgs-dzAbTahSITtU,8808
9
+ audio_engine/core/types.py,sha256=iFQPajgeS1YgMWXJvubA8sWbxLI1Z8nF-z1uucrgNm4,2295
10
+ audio_engine/integrations/__init__.py,sha256=69Hna1pfmB929WbM7GpAHlrk4xPOleKTnoaBBksFo9k,114
11
+ audio_engine/integrations/geneface.py,sha256=JgxGYfqDk9n-p4e1VNczoEJdMPzzfF5QGsyxxinrWr8,8790
12
+ audio_engine/llm/__init__.py,sha256=kJpCYylJMsbO_oA5XFcLKBI2N-ZCOVdwUduU_jbh3po,977
13
+ audio_engine/llm/base.py,sha256=vsKi2UYuhMr_nubMsoyU6hzSV6gr3DZ1sPvqkSvap3c,2862
14
+ audio_engine/llm/groq.py,sha256=zX4z_ZPyB5_FxhMwg-MnK5Ga6vpqRJLaRTrknSYJZXU,6682
15
+ audio_engine/pipelines/__init__.py,sha256=Q1iZjX38TigrZPBaFgv_5AXw21wBN1Z-4nfXPjV-xDI,49
16
+ audio_engine/pipelines/personaplex/__init__.py,sha256=nX37MS93pYUPKiYwY2aa9G-PEI4x2yKjdLqGeab7wWI,916
17
+ audio_engine/pipelines/personaplex/client.py,sha256=NAiG6V9nTWh8ozrb5jT-6h8fesTuJZDgh-l7DlHQm6M,8667
18
+ audio_engine/pipelines/personaplex/config.py,sha256=6fBteI-HjJJl3ZcK5QZCCa9kcKVNDgPptLIkJNZc9kg,2935
19
+ audio_engine/pipelines/personaplex/pipeline.py,sha256=WUkFalPQ9sxICeFpF-58HJxzfQ30vfZ4WAs-E5aI60s,10411
20
+ audio_engine/pipelines/personaplex/types.py,sha256=6MvU2hBukBflJxat3MtC6bGQY1b33jaOIiOi2tZJRnU,4727
21
+ audio_engine/pipelines/personaplex/utils.py,sha256=um_7nGRFH0QaLIIfLwPnBXgFW0fVGU7gkjF8Gm-Hq4U,5000
22
+ audio_engine/streaming/__init__.py,sha256=0FOorloUtads4ZeJKLdlTcqaL0l2G7Byq4ijQG3W1Fk,127
23
+ audio_engine/streaming/websocket_server.py,sha256=p9_ugvXfUW0TeuCkTCTUoHCmQ0vBTGq2J2Ubys1HdeY,10896
24
+ audio_engine/tts/__init__.py,sha256=sJIN_PgwO1_GVtr6NBR8oegqzLa9oR_qqD6ixbkDVro,967
25
+ audio_engine/tts/base.py,sha256=YbvdVF9XbJyv7NWf_5W7bawyquz8z83BcNcXOdA6iaY,4424
26
+ audio_engine/tts/cartesia.py,sha256=VipeNLgOac_hlsi2hasZe_ALYZZ7hvQ72eFeY17J600,16313
27
+ audio_engine/utils/__init__.py,sha256=J-XxXjgjAmvsM39W3pKI2we-C6S7rd49zfaEf9omwN8,245
28
+ audio_engine/utils/audio.py,sha256=RHp-FRjyCMPDaSQCOnxp7m_KO2z3Enu3iX7J5BVRD-0,5507
29
+ atom_audio_engine-0.1.5.dist-info/METADATA,sha256=4326-a-wzhsbhZW-O4KRUetl6Y_Js8GcH1Mu4_7iXi4,6690
30
+ atom_audio_engine-0.1.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
31
+ atom_audio_engine-0.1.5.dist-info/top_level.txt,sha256=IyumwgFrsDL7nlZlBijX-0shiSVhhBCFPUNBRNKzWP4,13
32
+ atom_audio_engine-0.1.5.dist-info/RECORD,,
audio_engine/__init__.py CHANGED
@@ -4,7 +4,7 @@ Audio Engine - Pluggable audio-to-audio conversational AI framework.
4
4
  Orchestrates ASR → LLM → TTS pipeline with real-time streaming support.
5
5
  """
6
6
 
7
- __version__ = "0.1.0"
7
+ __version__ = "0.1.5"
8
8
 
9
9
  # Core exports
10
10
  from .core.pipeline import Pipeline
@@ -1,6 +1,6 @@
1
1
  """ASR (Speech-to-Text) providers."""
2
2
 
3
- from core.config import ASRConfig
3
+ from ..core.config import ASRConfig
4
4
 
5
5
  from .base import BaseASR
6
6
  from .deepgram import DeepgramASR
@@ -40,6 +40,5 @@ def get_asr_from_config(config: ASRConfig) -> BaseASR:
40
40
  )
41
41
  else:
42
42
  raise ValueError(
43
- f"Unknown ASR provider: {config.provider}. "
44
- f"Supported: deepgram, cartesia"
43
+ f"Unknown ASR provider: {config.provider}. " f"Supported: deepgram, cartesia"
45
44
  )
audio_engine/asr/base.py CHANGED
@@ -3,7 +3,7 @@
3
3
  from abc import ABC, abstractmethod
4
4
  from typing import AsyncIterator, Optional
5
5
 
6
- from core.types import AudioChunk, TranscriptChunk
6
+ from ..core.types import AudioChunk, TranscriptChunk
7
7
 
8
8
 
9
9
  class BaseASR(ABC):
@@ -8,7 +8,7 @@ from urllib.parse import quote
8
8
 
9
9
  import websockets
10
10
 
11
- from core.types import AudioChunk, TranscriptChunk
11
+ from ..core.types import AudioChunk, TranscriptChunk
12
12
  from .base import BaseASR
13
13
 
14
14
  logger = logging.getLogger(__name__)
@@ -135,9 +135,7 @@ class CartesiaASR(BaseASR):
135
135
  logger.debug("Cartesia WebSocket connected")
136
136
  except asyncio.TimeoutError:
137
137
  logger.error(f"WebSocket connection timeout to {url}")
138
- raise TimeoutError(
139
- "Failed to connect to Cartesia WebSocket within 30s timeout"
140
- )
138
+ raise TimeoutError("Failed to connect to Cartesia WebSocket within 30s timeout")
141
139
 
142
140
  # Start background receive task
143
141
  self._receive_task = asyncio.create_task(self._receive_loop())
@@ -232,9 +230,7 @@ class CartesiaASR(BaseASR):
232
230
  transcript_parts = []
233
231
  while True:
234
232
  try:
235
- response = await asyncio.wait_for(
236
- self._response_queue.get(), timeout=10.0
237
- )
233
+ response = await asyncio.wait_for(self._response_queue.get(), timeout=10.0)
238
234
 
239
235
  if response.get("type") == "transcript":
240
236
  text = response.get("text", "")
@@ -320,9 +316,7 @@ class CartesiaASR(BaseASR):
320
316
  if done_sent:
321
317
  while True:
322
318
  try:
323
- response = await asyncio.wait_for(
324
- self._response_queue.get(), timeout=5.0
325
- )
319
+ response = await asyncio.wait_for(self._response_queue.get(), timeout=5.0)
326
320
 
327
321
  if response.get("type") == "transcript":
328
322
  text = response.get("text", "")
@@ -5,7 +5,7 @@ from typing import AsyncIterator, Optional
5
5
 
6
6
  from deepgram import DeepgramClient
7
7
 
8
- from core.types import AudioChunk, TranscriptChunk
8
+ from ..core.types import AudioChunk, TranscriptChunk
9
9
  from .base import BaseASR
10
10
 
11
11
  logger = logging.getLogger(__name__)
@@ -1,8 +1,8 @@
1
1
  """Core pipeline and configuration."""
2
2
 
3
- from core.pipeline import Pipeline
4
- from core.config import AudioEngineConfig
5
- from core.types import AudioChunk, TranscriptChunk, ResponseChunk
3
+ from .pipeline import Pipeline
4
+ from .config import AudioEngineConfig
5
+ from .types import AudioChunk, TranscriptChunk, ResponseChunk
6
6
 
7
7
  __all__ = [
8
8
  "Pipeline",
@@ -144,10 +144,10 @@ class AudioEngineConfig:
144
144
  Raises:
145
145
  ValueError: If provider initialization fails
146
146
  """
147
- from asr import get_asr_from_config
148
- from llm import get_llm_from_config
149
- from tts import get_tts_from_config
150
- from core.pipeline import Pipeline
147
+ from ..asr import get_asr_from_config
148
+ from ..llm import get_llm_from_config
149
+ from ..tts import get_tts_from_config
150
+ from .pipeline import Pipeline
151
151
 
152
152
  asr = get_asr_from_config(self.asr)
153
153
  llm = get_llm_from_config(self.llm)
@@ -5,10 +5,10 @@ import logging
5
5
  import time
6
6
  from typing import AsyncIterator, Optional, Callable, Any
7
7
 
8
- from asr.base import BaseASR
9
- from llm.base import BaseLLM
10
- from tts.base import BaseTTS
11
- from core.types import (
8
+ from ..asr.base import BaseASR
9
+ from ..llm.base import BaseLLM
10
+ from ..tts.base import BaseTTS
11
+ from .types import (
12
12
  AudioChunk,
13
13
  TranscriptChunk,
14
14
  ResponseChunk,
@@ -165,9 +165,7 @@ class Pipeline:
165
165
 
166
166
  return response_audio
167
167
 
168
- async def stream(
169
- self, audio_stream: AsyncIterator[AudioChunk]
170
- ) -> AsyncIterator[AudioChunk]:
168
+ async def stream(self, audio_stream: AsyncIterator[AudioChunk]) -> AsyncIterator[AudioChunk]:
171
169
  """
172
170
  Process streaming audio input and yield streaming audio output.
173
171
 
@@ -208,9 +206,7 @@ class Pipeline:
208
206
 
209
207
  async def llm_text_stream() -> AsyncIterator[str]:
210
208
  nonlocal response_buffer
211
- async for chunk in self.llm.generate_stream(
212
- transcript_buffer, self.context
213
- ):
209
+ async for chunk in self.llm.generate_stream(transcript_buffer, self.context):
214
210
  response_buffer += chunk.text
215
211
  yield chunk.text
216
212
  if chunk.is_final:
@@ -1,5 +1,5 @@
1
1
  """External system integrations."""
2
2
 
3
- from integrations.geneface import GeneFaceIntegration
3
+ from .geneface import GeneFaceIntegration
4
4
 
5
5
  __all__ = ["GeneFaceIntegration"]
@@ -8,7 +8,7 @@ from pathlib import Path
8
8
  from typing import Optional, AsyncIterator
9
9
  from dataclasses import dataclass
10
10
 
11
- from core.types import AudioChunk
11
+ from ..core.types import AudioChunk
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
@@ -1,6 +1,6 @@
1
1
  """LLM (Large Language Model) providers."""
2
2
 
3
- from core.config import LLMConfig
3
+ from ..core.config import LLMConfig
4
4
 
5
5
  from .base import BaseLLM
6
6
  from .groq import GroqLLM
@@ -33,6 +33,4 @@ def get_llm_from_config(config: LLMConfig) -> BaseLLM:
33
33
  **config.extra,
34
34
  )
35
35
  else:
36
- raise ValueError(
37
- f"Unknown LLM provider: {config.provider}. " f"Supported: groq"
38
- )
36
+ raise ValueError(f"Unknown LLM provider: {config.provider}. " f"Supported: groq")
audio_engine/llm/base.py CHANGED
@@ -3,7 +3,7 @@
3
3
  from abc import ABC, abstractmethod
4
4
  from typing import AsyncIterator, Optional
5
5
 
6
- from core.types import ResponseChunk, ConversationContext
6
+ from ..core.types import ResponseChunk, ConversationContext
7
7
 
8
8
 
9
9
  class BaseLLM(ABC):
@@ -21,7 +21,7 @@ class BaseLLM(ABC):
21
21
  temperature: float = 0.7,
22
22
  max_tokens: int = 1024,
23
23
  system_prompt: Optional[str] = None,
24
- **kwargs
24
+ **kwargs,
25
25
  ):
26
26
  """
27
27
  Initialize the LLM provider.
@@ -42,9 +42,7 @@ class BaseLLM(ABC):
42
42
  self.config = kwargs
43
43
 
44
44
  @abstractmethod
45
- async def generate(
46
- self, prompt: str, context: Optional[ConversationContext] = None
47
- ) -> str:
45
+ async def generate(self, prompt: str, context: Optional[ConversationContext] = None) -> str:
48
46
  """
49
47
  Generate a complete response to a prompt.
50
48
 
audio_engine/llm/groq.py CHANGED
@@ -5,7 +5,7 @@ from typing import AsyncIterator, Optional
5
5
 
6
6
  from groq import Groq
7
7
 
8
- from core.types import ResponseChunk, ConversationContext
8
+ from ..core.types import ResponseChunk, ConversationContext
9
9
  from .base import BaseLLM
10
10
 
11
11
  logger = logging.getLogger(__name__)
@@ -85,9 +85,7 @@ class GroqLLM(BaseLLM):
85
85
  except Exception as e:
86
86
  logger.error(f"Error disconnecting Groq: {e}")
87
87
 
88
- async def generate(
89
- self, prompt: str, context: Optional[ConversationContext] = None
90
- ) -> str:
88
+ async def generate(self, prompt: str, context: Optional[ConversationContext] = None) -> str:
91
89
  """
92
90
  Generate a complete response to a prompt.
93
91
 
@@ -1,5 +1,5 @@
1
1
  """Streaming and WebSocket server components."""
2
2
 
3
- from streaming.websocket_server import WebSocketServer
3
+ from .websocket_server import WebSocketServer
4
4
 
5
5
  __all__ = ["WebSocketServer"]
@@ -7,9 +7,9 @@ from typing import Optional, Callable, Any
7
7
 
8
8
  import websockets
9
9
 
10
- from core.pipeline import Pipeline
11
- from core.types import AudioChunk, AudioFormat
12
- from core.config import AudioEngineConfig
10
+ from ..core.pipeline import Pipeline
11
+ from ..core.types import AudioChunk, AudioFormat
12
+ from ..core.config import AudioEngineConfig
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
@@ -62,9 +62,7 @@ class WebSocketServer:
62
62
  on_disconnect: Callback when client disconnects
63
63
  """
64
64
  if websockets is None:
65
- raise ImportError(
66
- "websockets package required. Install with: pip install websockets"
67
- )
65
+ raise ImportError("websockets package required. Install with: pip install websockets")
68
66
 
69
67
  self.pipeline = pipeline
70
68
  self.host = host
@@ -137,9 +135,7 @@ class WebSocketServer:
137
135
  if self.on_disconnect:
138
136
  self.on_disconnect(client_id)
139
137
 
140
- async def _process_client_stream(
141
- self, websocket: WebSocketServerProtocol, client_id: str
142
- ):
138
+ async def _process_client_stream(self, websocket: WebSocketServerProtocol, client_id: str):
143
139
  """Process streaming audio from a client."""
144
140
  audio_queue: asyncio.Queue[AudioChunk] = asyncio.Queue()
145
141
  end_of_speech = asyncio.Event()
@@ -231,12 +227,8 @@ class WebSocketServer:
231
227
  original_on_llm_response(text)
232
228
 
233
229
  # Temporarily override callbacks
234
- self.pipeline.on_transcript = lambda t: asyncio.create_task(
235
- send_transcript(t)
236
- )
237
- self.pipeline.on_llm_response = lambda t: asyncio.create_task(
238
- send_llm_response(t)
239
- )
230
+ self.pipeline.on_transcript = lambda t: asyncio.create_task(send_transcript(t))
231
+ self.pipeline.on_llm_response = lambda t: asyncio.create_task(send_llm_response(t))
240
232
 
241
233
  try:
242
234
  # Wait for some audio to arrive
@@ -1,6 +1,6 @@
1
1
  """TTS (Text-to-Speech) providers."""
2
2
 
3
- from core.config import TTSConfig
3
+ from ..core.config import TTSConfig
4
4
 
5
5
  from .base import BaseTTS
6
6
  from .cartesia import CartesiaTTS
@@ -32,6 +32,4 @@ def get_tts_from_config(config: TTSConfig) -> BaseTTS:
32
32
  **config.extra,
33
33
  )
34
34
  else:
35
- raise ValueError(
36
- f"Unknown TTS provider: {config.provider}. " f"Supported: cartesia"
37
- )
35
+ raise ValueError(f"Unknown TTS provider: {config.provider}. " f"Supported: cartesia")
audio_engine/tts/base.py CHANGED
@@ -3,7 +3,7 @@
3
3
  from abc import ABC, abstractmethod
4
4
  from typing import AsyncIterator, Optional
5
5
 
6
- from core.types import AudioChunk, AudioFormat
6
+ from ..core.types import AudioChunk, AudioFormat
7
7
 
8
8
 
9
9
  class BaseTTS(ABC):
@@ -21,7 +21,7 @@ class BaseTTS(ABC):
21
21
  model: Optional[str] = None,
22
22
  speed: float = 1.0,
23
23
  output_format: AudioFormat = AudioFormat.PCM_24K,
24
- **kwargs
24
+ **kwargs,
25
25
  ):
26
26
  """
27
27
  Initialize the TTS provider.
@@ -97,9 +97,7 @@ class BaseTTS(ABC):
97
97
  sentence = parts[0] + ender
98
98
 
99
99
  if sentence.strip():
100
- async for audio_chunk in self.synthesize_stream(
101
- sentence.strip()
102
- ):
100
+ async for audio_chunk in self.synthesize_stream(sentence.strip()):
103
101
  yield audio_chunk
104
102
 
105
103
  buffer = parts[1] if len(parts) > 1 else ""
@@ -9,7 +9,7 @@ from typing import AsyncIterator, Optional
9
9
  import websockets
10
10
 
11
11
 
12
- from core.types import AudioChunk, AudioFormat
12
+ from ..core.types import AudioChunk, AudioFormat
13
13
  from .base import BaseTTS
14
14
 
15
15
  logger = logging.getLogger(__name__)
@@ -138,9 +138,7 @@ class CartesiaTTS(BaseTTS):
138
138
  AudioChunk objects with audio data
139
139
  """
140
140
  if websockets is None:
141
- raise ImportError(
142
- "websockets package required. Install: pip install websockets"
143
- )
141
+ raise ImportError("websockets package required. Install: pip install websockets")
144
142
 
145
143
  if not self.api_key:
146
144
  raise ValueError("api_key required for Cartesia TTS")
@@ -151,16 +149,12 @@ class CartesiaTTS(BaseTTS):
151
149
  context_id = str(uuid.uuid4())
152
150
 
153
151
  ws_url = (
154
- f"{self.WS_URL}"
155
- f"?api_key={self.api_key}"
156
- f"&cartesia_version={self.CARTESIA_VERSION}"
152
+ f"{self.WS_URL}" f"?api_key={self.api_key}" f"&cartesia_version={self.CARTESIA_VERSION}"
157
153
  )
158
154
 
159
155
  try:
160
156
  async with websockets.connect(ws_url) as websocket:
161
- logger.debug(
162
- f"Cartesia TTS WebSocket connected | Context: {context_id}"
163
- )
157
+ logger.debug(f"Cartesia TTS WebSocket connected | Context: {context_id}")
164
158
 
165
159
  # Task to receive audio from WebSocket
166
160
  async def receive_audio():
@@ -175,9 +169,7 @@ class CartesiaTTS(BaseTTS):
175
169
  f"Cartesia: received response type={response.get('type')}"
176
170
  )
177
171
  # Handle audio chunk (base64 in "data" field)
178
- if response.get("type") == "chunk" and response.get(
179
- "data"
180
- ):
172
+ if response.get("type") == "chunk" and response.get("data"):
181
173
  audio_bytes = base64.b64decode(response["data"])
182
174
  yield audio_bytes
183
175
  logger.debug(
@@ -199,17 +191,13 @@ class CartesiaTTS(BaseTTS):
199
191
  or str(response)
200
192
  )
201
193
  logger.error(f"Cartesia TTS error: {error_msg}")
202
- raise RuntimeError(
203
- f"Cartesia API error: {error_msg}"
204
- )
194
+ raise RuntimeError(f"Cartesia API error: {error_msg}")
205
195
  else:
206
196
  logger.debug(
207
197
  f"Cartesia: response type {response.get('type')}"
208
198
  )
209
199
  except json.JSONDecodeError:
210
- logger.warning(
211
- f"Failed to parse Cartesia response: {message}"
212
- )
200
+ logger.warning(f"Failed to parse Cartesia response: {message}")
213
201
  except Exception as e:
214
202
  logger.error(f"Cartesia receive error: {e}", exc_info=True)
215
203
  raise
@@ -260,9 +248,7 @@ class CartesiaTTS(BaseTTS):
260
248
  },
261
249
  }
262
250
  await websocket.send(json.dumps(request))
263
- logger.debug(
264
- f"Cartesia: sent text on timeout (continue=true)"
265
- )
251
+ logger.debug(f"Cartesia: sent text on timeout (continue=true)")
266
252
  accumulated_text = ""
267
253
  continue
268
254
 
@@ -287,9 +273,7 @@ class CartesiaTTS(BaseTTS):
287
273
  },
288
274
  }
289
275
  await websocket.send(json.dumps(request))
290
- logger.debug(
291
- f"Cartesia: sent final text (continue=false)"
292
- )
276
+ logger.debug(f"Cartesia: sent final text (continue=false)")
293
277
  else:
294
278
  # Send empty transcript to signal end
295
279
  request = {
@@ -309,9 +293,7 @@ class CartesiaTTS(BaseTTS):
309
293
  },
310
294
  }
311
295
  await websocket.send(json.dumps(request))
312
- logger.debug(
313
- "Cartesia: sent empty transcript to signal end"
314
- )
296
+ logger.debug("Cartesia: sent empty transcript to signal end")
315
297
  logger.info("Cartesia: all text sent")
316
298
  break
317
299
 
@@ -322,9 +304,7 @@ class CartesiaTTS(BaseTTS):
322
304
  )
323
305
 
324
306
  # Send when buffer is large enough or ends with punctuation
325
- if len(accumulated_text) > 30 or token.endswith(
326
- (".", "!", "?")
327
- ):
307
+ if len(accumulated_text) > 30 or token.endswith((".", "!", "?")):
328
308
  request = {
329
309
  "model_id": self.model,
330
310
  "transcript": accumulated_text,
@@ -342,9 +322,7 @@ class CartesiaTTS(BaseTTS):
342
322
  },
343
323
  }
344
324
  await websocket.send(json.dumps(request))
345
- logger.debug(
346
- f"Cartesia: sent buffered text (continue=true)"
347
- )
325
+ logger.debug(f"Cartesia: sent buffered text (continue=true)")
348
326
  accumulated_text = ""
349
327
 
350
328
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  """Utility functions for the audio engine."""
2
2
 
3
- from utils.audio import (
3
+ from .audio import (
4
4
  resample_audio,
5
5
  pcm_to_wav,
6
6
  wav_to_pcm,
@@ -58,9 +58,7 @@ def _simple_resample(
58
58
  """Simple linear interpolation resampling."""
59
59
  if sample_width == 2:
60
60
  fmt = "<h"
61
- samples = [
62
- struct.unpack(fmt, audio[i : i + 2])[0] for i in range(0, len(audio), 2)
63
- ]
61
+ samples = [struct.unpack(fmt, audio[i : i + 2])[0] for i in range(0, len(audio), 2)]
64
62
  else:
65
63
  raise ValueError(f"Unsupported sample width: {sample_width}")
66
64
 
@@ -1,57 +0,0 @@
1
- audio_engine/__init__.py,sha256=AQ0uto-Jn3cNqW35MMtSyX5mhXJMFv9AQhjcAkqZ7L4,1499
2
- audio_engine/asr/__init__.py,sha256=w0t2ahxgApZbZjSc748tN3tmKDeXzasfBh51ZjPF9uc,1203
3
- audio_engine/asr/base.py,sha256=MFC_7HmyEDnhDwUn62CWZsiF9_-mBVVsUK-Yppiq4Vk,2378
4
- audio_engine/asr/cartesia.py,sha256=BXnvscO9VaR3LsfEGn7lJ66udzUjz44JzZTmSizZqIg,13321
5
- audio_engine/asr/deepgram.py,sha256=M59lgrVFMS6-3YQcYaUY7cUdt2-MBptt_VExdfnSXr0,6429
6
- audio_engine/core/__init__.py,sha256=7naTEkqDjrPsejviXk662OR86xVCyckU7eMKVpjwYys,301
7
- audio_engine/core/config.py,sha256=EF98O2Gt8q29FX3T6UeDwWNIbm77bni99SThiJKl5Tk,5203
8
- audio_engine/core/pipeline.py,sha256=jX9jAlIfwU6V8GjqjivyK8Y7P41S-QS8xKYv5c9_qG0,8850
9
- audio_engine/core/types.py,sha256=iFQPajgeS1YgMWXJvubA8sWbxLI1Z8nF-z1uucrgNm4,2295
10
- audio_engine/examples/__init__.py,sha256=4oFCZaD-vg0o48hnj03ZsktG2JrtwJ7HXUYOwEYSNCY,44
11
- audio_engine/examples/basic_stt_llm_tts.py,sha256=tw8IIAL0WSG2M9U5SuLri75AOb7YM-twvAVAspaYVQM,6354
12
- audio_engine/examples/geneface_animation.py,sha256=ogjQAqPHT5EW6X3R8hn0tJwj-_QBbPiBFDZDl_olTGo,2945
13
- audio_engine/examples/personaplex_pipeline.py,sha256=OcpN8i5qoAS3Nmuc62tESzpRwPxsjxTGTrY_qICLETo,3641
14
- audio_engine/examples/websocket_server.py,sha256=HhTlAFnJQXJyOs_prwFJASuh6h-0FKEh2JGeJSChf_c,2398
15
- audio_engine/integrations/__init__.py,sha256=1y4CTaqybOwmfk_xxkWANYkc-A7PgH0JFMZCTq33fe4,126
16
- audio_engine/integrations/geneface.py,sha256=2oeVZazp2R9gN-YmQhzzrZb87CBpEiAyKA8hHUxUZJk,8788
17
- audio_engine/llm/__init__.py,sha256=mwr0C1E1Wf5589fVt7emOFMA2fHoXxQ5t-3dOxkXQEI,997
18
- audio_engine/llm/base.py,sha256=C-ZNOab0Ca-vlxWgnPzB8uZXFNYbPgAYfQLNvaal2KU,2873
19
- audio_engine/llm/groq.py,sha256=oGSjJBW0TiCmOzzl1HTE8zUhPC78I3ywhAYFq7Te2IA,6694
20
- audio_engine/pipelines/__init__.py,sha256=Q1iZjX38TigrZPBaFgv_5AXw21wBN1Z-4nfXPjV-xDI,49
21
- audio_engine/pipelines/personaplex/__init__.py,sha256=nX37MS93pYUPKiYwY2aa9G-PEI4x2yKjdLqGeab7wWI,916
22
- audio_engine/pipelines/personaplex/client.py,sha256=NAiG6V9nTWh8ozrb5jT-6h8fesTuJZDgh-l7DlHQm6M,8667
23
- audio_engine/pipelines/personaplex/config.py,sha256=6fBteI-HjJJl3ZcK5QZCCa9kcKVNDgPptLIkJNZc9kg,2935
24
- audio_engine/pipelines/personaplex/pipeline.py,sha256=WUkFalPQ9sxICeFpF-58HJxzfQ30vfZ4WAs-E5aI60s,10411
25
- audio_engine/pipelines/personaplex/types.py,sha256=6MvU2hBukBflJxat3MtC6bGQY1b33jaOIiOi2tZJRnU,4727
26
- audio_engine/pipelines/personaplex/utils.py,sha256=um_7nGRFH0QaLIIfLwPnBXgFW0fVGU7gkjF8Gm-Hq4U,5000
27
- audio_engine/scripts/debug_pipeline.py,sha256=HkrrVzimrmFsbltbEPKoAuJ_5yzBWBCWyrEH0_ZHOQM,2276
28
- audio_engine/scripts/debug_tts.py,sha256=Aj-vW8kmcR7lDa2FdTn1_6wrFw1vpP8Kjnh1rLwQ_ag,4479
29
- audio_engine/scripts/test_cartesia_connect.py,sha256=KoaBWxmfzdMBqpnDXwT2fFzAJsJlKg3hMsUYvAeU-L8,1529
30
- audio_engine/streaming/__init__.py,sha256=Pd_ICcYeW75DXMsFpMrJnn9N-RU5s1_Wb3WZ3YbOTC4,136
31
- audio_engine/streaming/websocket_server.py,sha256=miqHoVkUjznpmpQQrgkyaURR6DsDJLzkP_OGrBFOBYk,10994
32
- audio_engine/tests/__init__.py,sha256=1JoGYWcW0zfdTZAgxs7NZaK4Zo0zlvq79dXzVwKMP3I,34
33
- audio_engine/tests/test_personaplex/__init__.py,sha256=1JoGYWcW0zfdTZAgxs7NZaK4Zo0zlvq79dXzVwKMP3I,34
34
- audio_engine/tests/test_personaplex/test_personaplex.py,sha256=BrYWbWmWqlzdK3H5YZtpLr4DxtK5UeLpbdwUabuUTnE,457
35
- audio_engine/tests/test_personaplex/test_personaplex_client.py,sha256=RlGNHa-IcKC7CCiTQJDhUYN9HNMun7Q45AsFSu5swZ8,8377
36
- audio_engine/tests/test_personaplex/test_personaplex_config.py,sha256=c-86tJ81NSfPOk8tIV_JfDn3IcJnFrgCHVqJGyw14lM,2487
37
- audio_engine/tests/test_personaplex/test_personaplex_message.py,sha256=6gAbQUk954x4-PXkFdNb0GadxuJIJ49tRixPteFCiw4,2636
38
- audio_engine/tests/test_personaplex/test_personaplex_pipeline.py,sha256=GCvNRgUN72d81RK0klc3z5ecBhBMgf4rJXgq5auXv6M,7424
39
- audio_engine/tests/test_personaplex/test_personaplex_session.py,sha256=pF2s649MAh0TlRs4ooQBCExN-VSuc_DntknyfLw8Pxw,5780
40
- audio_engine/tests/test_personaplex/test_personaplex_transcript.py,sha256=XdNAghb1Gjg68BBcj6BPt-1K-6rzS9gD3tufnp8vVPo,6400
41
- audio_engine/tests/test_traditional_pipeline/__init__.py,sha256=1JoGYWcW0zfdTZAgxs7NZaK4Zo0zlvq79dXzVwKMP3I,34
42
- audio_engine/tests/test_traditional_pipeline/test_cartesia_asr.py,sha256=rLM_7s-UQJEJGL98A8ewXrgckruog6ei-lFtpPetIkk,15353
43
- audio_engine/tests/test_traditional_pipeline/test_config_env.py,sha256=pZd0doTKzZg7e_ZwEKLe3pfmZTBdXIlrO1-CUU1lPmc,3192
44
- audio_engine/tests/test_traditional_pipeline/test_conversation_context.py,sha256=t6lk_5QwGE1CfU1RIAGVIB6d6flfoqVLNgPYs-aE1PA,4049
45
- audio_engine/tests/test_traditional_pipeline/test_pipeline_creation.py,sha256=U8s4vc36JU79YTFVyv7HQlFN3Hj2KRfh-gWQKhsjiSA,2278
46
- audio_engine/tests/test_traditional_pipeline/test_pipeline_with_mocks.py,sha256=N5ajn2QevssnP0xEBeR87FumT0w1j7BdVAiMmBLqL2A,5583
47
- audio_engine/tests/test_traditional_pipeline/test_provider_factories.py,sha256=a9Da5wjhXV6-E_Q7E8AquKxbcTKAhjd2eVKUGBj3zpo,2240
48
- audio_engine/tests/test_traditional_pipeline/test_websocket_server.py,sha256=InR8GCRiRW09zJk9Htx6YQE_--_KJhpEJCuCs_lJjKE,1936
49
- audio_engine/tts/__init__.py,sha256=85XrpIkxFrRvOn19mWphkeBjTaEcsrFECYK_ZoGv1dQ,987
50
- audio_engine/tts/base.py,sha256=vo0MSiep9QJQtpdCmDJWN-okK-ERYRA6Sk_g6IXCYZk,4475
51
- audio_engine/tts/cartesia.py,sha256=bxhkNbWpQmlPTZ8RWcVCQzG_Q2mYr3t1aAd9OonSSWQ,17011
52
- audio_engine/utils/__init__.py,sha256=WIeVykg3MqyOoCYEWsuzGyVniP8SIl9FE881ieR7WuE,250
53
- audio_engine/utils/audio.py,sha256=Z7avyNqhzZ2fnBxZ_d0qUglOCCvHSffBveg5CQWTCM0,5529
54
- atom_audio_engine-0.1.2.dist-info/METADATA,sha256=l8ztaq4vAmVNT4qg1mHhJW7R2sjTHs1BJsjTPpM108w,6690
55
- atom_audio_engine-0.1.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
56
- atom_audio_engine-0.1.2.dist-info/top_level.txt,sha256=IyumwgFrsDL7nlZlBijX-0shiSVhhBCFPUNBRNKzWP4,13
57
- atom_audio_engine-0.1.2.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- """Example scripts for the audio engine."""