atom-audio-engine 0.1.2__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/PKG-INFO +1 -1
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/atom_audio_engine.egg-info/PKG-INFO +1 -1
- atom_audio_engine-0.1.5/atom_audio_engine.egg-info/SOURCES.txt +36 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/__init__.py +1 -1
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/asr/__init__.py +2 -3
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/asr/base.py +1 -1
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/asr/cartesia.py +4 -10
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/asr/deepgram.py +1 -1
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/core/__init__.py +3 -3
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/core/config.py +4 -4
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/core/pipeline.py +6 -10
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/integrations/__init__.py +1 -1
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/integrations/geneface.py +1 -1
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/llm/__init__.py +2 -4
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/llm/base.py +3 -5
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/llm/groq.py +2 -4
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/streaming/__init__.py +1 -1
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/streaming/websocket_server.py +7 -15
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/tts/__init__.py +2 -4
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/tts/base.py +3 -5
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/tts/cartesia.py +12 -34
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/utils/__init__.py +1 -1
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/utils/audio.py +1 -3
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/pyproject.toml +1 -6
- atom_audio_engine-0.1.2/atom_audio_engine.egg-info/SOURCES.txt +0 -61
- atom_audio_engine-0.1.2/audio_engine/examples/__init__.py +0 -1
- atom_audio_engine-0.1.2/audio_engine/examples/basic_stt_llm_tts.py +0 -200
- atom_audio_engine-0.1.2/audio_engine/examples/geneface_animation.py +0 -99
- atom_audio_engine-0.1.2/audio_engine/examples/personaplex_pipeline.py +0 -116
- atom_audio_engine-0.1.2/audio_engine/examples/websocket_server.py +0 -86
- atom_audio_engine-0.1.2/audio_engine/scripts/debug_pipeline.py +0 -79
- atom_audio_engine-0.1.2/audio_engine/scripts/debug_tts.py +0 -162
- atom_audio_engine-0.1.2/audio_engine/scripts/test_cartesia_connect.py +0 -57
- atom_audio_engine-0.1.2/audio_engine/tests/__init__.py +0 -1
- atom_audio_engine-0.1.2/audio_engine/tests/test_personaplex/__init__.py +0 -1
- atom_audio_engine-0.1.2/audio_engine/tests/test_personaplex/test_personaplex.py +0 -10
- atom_audio_engine-0.1.2/audio_engine/tests/test_personaplex/test_personaplex_client.py +0 -259
- atom_audio_engine-0.1.2/audio_engine/tests/test_personaplex/test_personaplex_config.py +0 -71
- atom_audio_engine-0.1.2/audio_engine/tests/test_personaplex/test_personaplex_message.py +0 -80
- atom_audio_engine-0.1.2/audio_engine/tests/test_personaplex/test_personaplex_pipeline.py +0 -226
- atom_audio_engine-0.1.2/audio_engine/tests/test_personaplex/test_personaplex_session.py +0 -184
- atom_audio_engine-0.1.2/audio_engine/tests/test_personaplex/test_personaplex_transcript.py +0 -184
- atom_audio_engine-0.1.2/audio_engine/tests/test_traditional_pipeline/__init__.py +0 -1
- atom_audio_engine-0.1.2/audio_engine/tests/test_traditional_pipeline/test_cartesia_asr.py +0 -474
- atom_audio_engine-0.1.2/audio_engine/tests/test_traditional_pipeline/test_config_env.py +0 -97
- atom_audio_engine-0.1.2/audio_engine/tests/test_traditional_pipeline/test_conversation_context.py +0 -115
- atom_audio_engine-0.1.2/audio_engine/tests/test_traditional_pipeline/test_pipeline_creation.py +0 -64
- atom_audio_engine-0.1.2/audio_engine/tests/test_traditional_pipeline/test_pipeline_with_mocks.py +0 -173
- atom_audio_engine-0.1.2/audio_engine/tests/test_traditional_pipeline/test_provider_factories.py +0 -61
- atom_audio_engine-0.1.2/audio_engine/tests/test_traditional_pipeline/test_websocket_server.py +0 -58
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/README.md +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/atom_audio_engine.egg-info/dependency_links.txt +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/atom_audio_engine.egg-info/requires.txt +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/atom_audio_engine.egg-info/top_level.txt +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/core/types.py +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/pipelines/__init__.py +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/pipelines/personaplex/__init__.py +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/pipelines/personaplex/client.py +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/pipelines/personaplex/config.py +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/pipelines/personaplex/pipeline.py +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/pipelines/personaplex/types.py +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/pipelines/personaplex/utils.py +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/setup.cfg +0 -0
- {atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/setup.py +0 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
setup.py
|
|
4
|
+
atom_audio_engine.egg-info/PKG-INFO
|
|
5
|
+
atom_audio_engine.egg-info/SOURCES.txt
|
|
6
|
+
atom_audio_engine.egg-info/dependency_links.txt
|
|
7
|
+
atom_audio_engine.egg-info/requires.txt
|
|
8
|
+
atom_audio_engine.egg-info/top_level.txt
|
|
9
|
+
audio_engine/__init__.py
|
|
10
|
+
audio_engine/asr/__init__.py
|
|
11
|
+
audio_engine/asr/base.py
|
|
12
|
+
audio_engine/asr/cartesia.py
|
|
13
|
+
audio_engine/asr/deepgram.py
|
|
14
|
+
audio_engine/core/__init__.py
|
|
15
|
+
audio_engine/core/config.py
|
|
16
|
+
audio_engine/core/pipeline.py
|
|
17
|
+
audio_engine/core/types.py
|
|
18
|
+
audio_engine/integrations/__init__.py
|
|
19
|
+
audio_engine/integrations/geneface.py
|
|
20
|
+
audio_engine/llm/__init__.py
|
|
21
|
+
audio_engine/llm/base.py
|
|
22
|
+
audio_engine/llm/groq.py
|
|
23
|
+
audio_engine/pipelines/__init__.py
|
|
24
|
+
audio_engine/pipelines/personaplex/__init__.py
|
|
25
|
+
audio_engine/pipelines/personaplex/client.py
|
|
26
|
+
audio_engine/pipelines/personaplex/config.py
|
|
27
|
+
audio_engine/pipelines/personaplex/pipeline.py
|
|
28
|
+
audio_engine/pipelines/personaplex/types.py
|
|
29
|
+
audio_engine/pipelines/personaplex/utils.py
|
|
30
|
+
audio_engine/streaming/__init__.py
|
|
31
|
+
audio_engine/streaming/websocket_server.py
|
|
32
|
+
audio_engine/tts/__init__.py
|
|
33
|
+
audio_engine/tts/base.py
|
|
34
|
+
audio_engine/tts/cartesia.py
|
|
35
|
+
audio_engine/utils/__init__.py
|
|
36
|
+
audio_engine/utils/audio.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""ASR (Speech-to-Text) providers."""
|
|
2
2
|
|
|
3
|
-
from core.config import ASRConfig
|
|
3
|
+
from ..core.config import ASRConfig
|
|
4
4
|
|
|
5
5
|
from .base import BaseASR
|
|
6
6
|
from .deepgram import DeepgramASR
|
|
@@ -40,6 +40,5 @@ def get_asr_from_config(config: ASRConfig) -> BaseASR:
|
|
|
40
40
|
)
|
|
41
41
|
else:
|
|
42
42
|
raise ValueError(
|
|
43
|
-
f"Unknown ASR provider: {config.provider}. "
|
|
44
|
-
f"Supported: deepgram, cartesia"
|
|
43
|
+
f"Unknown ASR provider: {config.provider}. " f"Supported: deepgram, cartesia"
|
|
45
44
|
)
|
|
@@ -8,7 +8,7 @@ from urllib.parse import quote
|
|
|
8
8
|
|
|
9
9
|
import websockets
|
|
10
10
|
|
|
11
|
-
from core.types import AudioChunk, TranscriptChunk
|
|
11
|
+
from ..core.types import AudioChunk, TranscriptChunk
|
|
12
12
|
from .base import BaseASR
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
@@ -135,9 +135,7 @@ class CartesiaASR(BaseASR):
|
|
|
135
135
|
logger.debug("Cartesia WebSocket connected")
|
|
136
136
|
except asyncio.TimeoutError:
|
|
137
137
|
logger.error(f"WebSocket connection timeout to {url}")
|
|
138
|
-
raise TimeoutError(
|
|
139
|
-
"Failed to connect to Cartesia WebSocket within 30s timeout"
|
|
140
|
-
)
|
|
138
|
+
raise TimeoutError("Failed to connect to Cartesia WebSocket within 30s timeout")
|
|
141
139
|
|
|
142
140
|
# Start background receive task
|
|
143
141
|
self._receive_task = asyncio.create_task(self._receive_loop())
|
|
@@ -232,9 +230,7 @@ class CartesiaASR(BaseASR):
|
|
|
232
230
|
transcript_parts = []
|
|
233
231
|
while True:
|
|
234
232
|
try:
|
|
235
|
-
response = await asyncio.wait_for(
|
|
236
|
-
self._response_queue.get(), timeout=10.0
|
|
237
|
-
)
|
|
233
|
+
response = await asyncio.wait_for(self._response_queue.get(), timeout=10.0)
|
|
238
234
|
|
|
239
235
|
if response.get("type") == "transcript":
|
|
240
236
|
text = response.get("text", "")
|
|
@@ -320,9 +316,7 @@ class CartesiaASR(BaseASR):
|
|
|
320
316
|
if done_sent:
|
|
321
317
|
while True:
|
|
322
318
|
try:
|
|
323
|
-
response = await asyncio.wait_for(
|
|
324
|
-
self._response_queue.get(), timeout=5.0
|
|
325
|
-
)
|
|
319
|
+
response = await asyncio.wait_for(self._response_queue.get(), timeout=5.0)
|
|
326
320
|
|
|
327
321
|
if response.get("type") == "transcript":
|
|
328
322
|
text = response.get("text", "")
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""Core pipeline and configuration."""
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
from
|
|
3
|
+
from .pipeline import Pipeline
|
|
4
|
+
from .config import AudioEngineConfig
|
|
5
|
+
from .types import AudioChunk, TranscriptChunk, ResponseChunk
|
|
6
6
|
|
|
7
7
|
__all__ = [
|
|
8
8
|
"Pipeline",
|
|
@@ -144,10 +144,10 @@ class AudioEngineConfig:
|
|
|
144
144
|
Raises:
|
|
145
145
|
ValueError: If provider initialization fails
|
|
146
146
|
"""
|
|
147
|
-
from asr import get_asr_from_config
|
|
148
|
-
from llm import get_llm_from_config
|
|
149
|
-
from tts import get_tts_from_config
|
|
150
|
-
from
|
|
147
|
+
from ..asr import get_asr_from_config
|
|
148
|
+
from ..llm import get_llm_from_config
|
|
149
|
+
from ..tts import get_tts_from_config
|
|
150
|
+
from .pipeline import Pipeline
|
|
151
151
|
|
|
152
152
|
asr = get_asr_from_config(self.asr)
|
|
153
153
|
llm = get_llm_from_config(self.llm)
|
|
@@ -5,10 +5,10 @@ import logging
|
|
|
5
5
|
import time
|
|
6
6
|
from typing import AsyncIterator, Optional, Callable, Any
|
|
7
7
|
|
|
8
|
-
from asr.base import BaseASR
|
|
9
|
-
from llm.base import BaseLLM
|
|
10
|
-
from tts.base import BaseTTS
|
|
11
|
-
from
|
|
8
|
+
from ..asr.base import BaseASR
|
|
9
|
+
from ..llm.base import BaseLLM
|
|
10
|
+
from ..tts.base import BaseTTS
|
|
11
|
+
from .types import (
|
|
12
12
|
AudioChunk,
|
|
13
13
|
TranscriptChunk,
|
|
14
14
|
ResponseChunk,
|
|
@@ -165,9 +165,7 @@ class Pipeline:
|
|
|
165
165
|
|
|
166
166
|
return response_audio
|
|
167
167
|
|
|
168
|
-
async def stream(
|
|
169
|
-
self, audio_stream: AsyncIterator[AudioChunk]
|
|
170
|
-
) -> AsyncIterator[AudioChunk]:
|
|
168
|
+
async def stream(self, audio_stream: AsyncIterator[AudioChunk]) -> AsyncIterator[AudioChunk]:
|
|
171
169
|
"""
|
|
172
170
|
Process streaming audio input and yield streaming audio output.
|
|
173
171
|
|
|
@@ -208,9 +206,7 @@ class Pipeline:
|
|
|
208
206
|
|
|
209
207
|
async def llm_text_stream() -> AsyncIterator[str]:
|
|
210
208
|
nonlocal response_buffer
|
|
211
|
-
async for chunk in self.llm.generate_stream(
|
|
212
|
-
transcript_buffer, self.context
|
|
213
|
-
):
|
|
209
|
+
async for chunk in self.llm.generate_stream(transcript_buffer, self.context):
|
|
214
210
|
response_buffer += chunk.text
|
|
215
211
|
yield chunk.text
|
|
216
212
|
if chunk.is_final:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""LLM (Large Language Model) providers."""
|
|
2
2
|
|
|
3
|
-
from core.config import LLMConfig
|
|
3
|
+
from ..core.config import LLMConfig
|
|
4
4
|
|
|
5
5
|
from .base import BaseLLM
|
|
6
6
|
from .groq import GroqLLM
|
|
@@ -33,6 +33,4 @@ def get_llm_from_config(config: LLMConfig) -> BaseLLM:
|
|
|
33
33
|
**config.extra,
|
|
34
34
|
)
|
|
35
35
|
else:
|
|
36
|
-
raise ValueError(
|
|
37
|
-
f"Unknown LLM provider: {config.provider}. " f"Supported: groq"
|
|
38
|
-
)
|
|
36
|
+
raise ValueError(f"Unknown LLM provider: {config.provider}. " f"Supported: groq")
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from typing import AsyncIterator, Optional
|
|
5
5
|
|
|
6
|
-
from core.types import ResponseChunk, ConversationContext
|
|
6
|
+
from ..core.types import ResponseChunk, ConversationContext
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class BaseLLM(ABC):
|
|
@@ -21,7 +21,7 @@ class BaseLLM(ABC):
|
|
|
21
21
|
temperature: float = 0.7,
|
|
22
22
|
max_tokens: int = 1024,
|
|
23
23
|
system_prompt: Optional[str] = None,
|
|
24
|
-
**kwargs
|
|
24
|
+
**kwargs,
|
|
25
25
|
):
|
|
26
26
|
"""
|
|
27
27
|
Initialize the LLM provider.
|
|
@@ -42,9 +42,7 @@ class BaseLLM(ABC):
|
|
|
42
42
|
self.config = kwargs
|
|
43
43
|
|
|
44
44
|
@abstractmethod
|
|
45
|
-
async def generate(
|
|
46
|
-
self, prompt: str, context: Optional[ConversationContext] = None
|
|
47
|
-
) -> str:
|
|
45
|
+
async def generate(self, prompt: str, context: Optional[ConversationContext] = None) -> str:
|
|
48
46
|
"""
|
|
49
47
|
Generate a complete response to a prompt.
|
|
50
48
|
|
|
@@ -5,7 +5,7 @@ from typing import AsyncIterator, Optional
|
|
|
5
5
|
|
|
6
6
|
from groq import Groq
|
|
7
7
|
|
|
8
|
-
from core.types import ResponseChunk, ConversationContext
|
|
8
|
+
from ..core.types import ResponseChunk, ConversationContext
|
|
9
9
|
from .base import BaseLLM
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
@@ -85,9 +85,7 @@ class GroqLLM(BaseLLM):
|
|
|
85
85
|
except Exception as e:
|
|
86
86
|
logger.error(f"Error disconnecting Groq: {e}")
|
|
87
87
|
|
|
88
|
-
async def generate(
|
|
89
|
-
self, prompt: str, context: Optional[ConversationContext] = None
|
|
90
|
-
) -> str:
|
|
88
|
+
async def generate(self, prompt: str, context: Optional[ConversationContext] = None) -> str:
|
|
91
89
|
"""
|
|
92
90
|
Generate a complete response to a prompt.
|
|
93
91
|
|
{atom_audio_engine-0.1.2 → atom_audio_engine-0.1.5}/audio_engine/streaming/websocket_server.py
RENAMED
|
@@ -7,9 +7,9 @@ from typing import Optional, Callable, Any
|
|
|
7
7
|
|
|
8
8
|
import websockets
|
|
9
9
|
|
|
10
|
-
from core.pipeline import Pipeline
|
|
11
|
-
from core.types import AudioChunk, AudioFormat
|
|
12
|
-
from core.config import AudioEngineConfig
|
|
10
|
+
from ..core.pipeline import Pipeline
|
|
11
|
+
from ..core.types import AudioChunk, AudioFormat
|
|
12
|
+
from ..core.config import AudioEngineConfig
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
@@ -62,9 +62,7 @@ class WebSocketServer:
|
|
|
62
62
|
on_disconnect: Callback when client disconnects
|
|
63
63
|
"""
|
|
64
64
|
if websockets is None:
|
|
65
|
-
raise ImportError(
|
|
66
|
-
"websockets package required. Install with: pip install websockets"
|
|
67
|
-
)
|
|
65
|
+
raise ImportError("websockets package required. Install with: pip install websockets")
|
|
68
66
|
|
|
69
67
|
self.pipeline = pipeline
|
|
70
68
|
self.host = host
|
|
@@ -137,9 +135,7 @@ class WebSocketServer:
|
|
|
137
135
|
if self.on_disconnect:
|
|
138
136
|
self.on_disconnect(client_id)
|
|
139
137
|
|
|
140
|
-
async def _process_client_stream(
|
|
141
|
-
self, websocket: WebSocketServerProtocol, client_id: str
|
|
142
|
-
):
|
|
138
|
+
async def _process_client_stream(self, websocket: WebSocketServerProtocol, client_id: str):
|
|
143
139
|
"""Process streaming audio from a client."""
|
|
144
140
|
audio_queue: asyncio.Queue[AudioChunk] = asyncio.Queue()
|
|
145
141
|
end_of_speech = asyncio.Event()
|
|
@@ -231,12 +227,8 @@ class WebSocketServer:
|
|
|
231
227
|
original_on_llm_response(text)
|
|
232
228
|
|
|
233
229
|
# Temporarily override callbacks
|
|
234
|
-
self.pipeline.on_transcript = lambda t: asyncio.create_task(
|
|
235
|
-
|
|
236
|
-
)
|
|
237
|
-
self.pipeline.on_llm_response = lambda t: asyncio.create_task(
|
|
238
|
-
send_llm_response(t)
|
|
239
|
-
)
|
|
230
|
+
self.pipeline.on_transcript = lambda t: asyncio.create_task(send_transcript(t))
|
|
231
|
+
self.pipeline.on_llm_response = lambda t: asyncio.create_task(send_llm_response(t))
|
|
240
232
|
|
|
241
233
|
try:
|
|
242
234
|
# Wait for some audio to arrive
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""TTS (Text-to-Speech) providers."""
|
|
2
2
|
|
|
3
|
-
from core.config import TTSConfig
|
|
3
|
+
from ..core.config import TTSConfig
|
|
4
4
|
|
|
5
5
|
from .base import BaseTTS
|
|
6
6
|
from .cartesia import CartesiaTTS
|
|
@@ -32,6 +32,4 @@ def get_tts_from_config(config: TTSConfig) -> BaseTTS:
|
|
|
32
32
|
**config.extra,
|
|
33
33
|
)
|
|
34
34
|
else:
|
|
35
|
-
raise ValueError(
|
|
36
|
-
f"Unknown TTS provider: {config.provider}. " f"Supported: cartesia"
|
|
37
|
-
)
|
|
35
|
+
raise ValueError(f"Unknown TTS provider: {config.provider}. " f"Supported: cartesia")
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from typing import AsyncIterator, Optional
|
|
5
5
|
|
|
6
|
-
from core.types import AudioChunk, AudioFormat
|
|
6
|
+
from ..core.types import AudioChunk, AudioFormat
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class BaseTTS(ABC):
|
|
@@ -21,7 +21,7 @@ class BaseTTS(ABC):
|
|
|
21
21
|
model: Optional[str] = None,
|
|
22
22
|
speed: float = 1.0,
|
|
23
23
|
output_format: AudioFormat = AudioFormat.PCM_24K,
|
|
24
|
-
**kwargs
|
|
24
|
+
**kwargs,
|
|
25
25
|
):
|
|
26
26
|
"""
|
|
27
27
|
Initialize the TTS provider.
|
|
@@ -97,9 +97,7 @@ class BaseTTS(ABC):
|
|
|
97
97
|
sentence = parts[0] + ender
|
|
98
98
|
|
|
99
99
|
if sentence.strip():
|
|
100
|
-
async for audio_chunk in self.synthesize_stream(
|
|
101
|
-
sentence.strip()
|
|
102
|
-
):
|
|
100
|
+
async for audio_chunk in self.synthesize_stream(sentence.strip()):
|
|
103
101
|
yield audio_chunk
|
|
104
102
|
|
|
105
103
|
buffer = parts[1] if len(parts) > 1 else ""
|
|
@@ -9,7 +9,7 @@ from typing import AsyncIterator, Optional
|
|
|
9
9
|
import websockets
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
from core.types import AudioChunk, AudioFormat
|
|
12
|
+
from ..core.types import AudioChunk, AudioFormat
|
|
13
13
|
from .base import BaseTTS
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger(__name__)
|
|
@@ -138,9 +138,7 @@ class CartesiaTTS(BaseTTS):
|
|
|
138
138
|
AudioChunk objects with audio data
|
|
139
139
|
"""
|
|
140
140
|
if websockets is None:
|
|
141
|
-
raise ImportError(
|
|
142
|
-
"websockets package required. Install: pip install websockets"
|
|
143
|
-
)
|
|
141
|
+
raise ImportError("websockets package required. Install: pip install websockets")
|
|
144
142
|
|
|
145
143
|
if not self.api_key:
|
|
146
144
|
raise ValueError("api_key required for Cartesia TTS")
|
|
@@ -151,16 +149,12 @@ class CartesiaTTS(BaseTTS):
|
|
|
151
149
|
context_id = str(uuid.uuid4())
|
|
152
150
|
|
|
153
151
|
ws_url = (
|
|
154
|
-
f"{self.WS_URL}"
|
|
155
|
-
f"?api_key={self.api_key}"
|
|
156
|
-
f"&cartesia_version={self.CARTESIA_VERSION}"
|
|
152
|
+
f"{self.WS_URL}" f"?api_key={self.api_key}" f"&cartesia_version={self.CARTESIA_VERSION}"
|
|
157
153
|
)
|
|
158
154
|
|
|
159
155
|
try:
|
|
160
156
|
async with websockets.connect(ws_url) as websocket:
|
|
161
|
-
logger.debug(
|
|
162
|
-
f"Cartesia TTS WebSocket connected | Context: {context_id}"
|
|
163
|
-
)
|
|
157
|
+
logger.debug(f"Cartesia TTS WebSocket connected | Context: {context_id}")
|
|
164
158
|
|
|
165
159
|
# Task to receive audio from WebSocket
|
|
166
160
|
async def receive_audio():
|
|
@@ -175,9 +169,7 @@ class CartesiaTTS(BaseTTS):
|
|
|
175
169
|
f"Cartesia: received response type={response.get('type')}"
|
|
176
170
|
)
|
|
177
171
|
# Handle audio chunk (base64 in "data" field)
|
|
178
|
-
if response.get("type") == "chunk" and response.get(
|
|
179
|
-
"data"
|
|
180
|
-
):
|
|
172
|
+
if response.get("type") == "chunk" and response.get("data"):
|
|
181
173
|
audio_bytes = base64.b64decode(response["data"])
|
|
182
174
|
yield audio_bytes
|
|
183
175
|
logger.debug(
|
|
@@ -199,17 +191,13 @@ class CartesiaTTS(BaseTTS):
|
|
|
199
191
|
or str(response)
|
|
200
192
|
)
|
|
201
193
|
logger.error(f"Cartesia TTS error: {error_msg}")
|
|
202
|
-
raise RuntimeError(
|
|
203
|
-
f"Cartesia API error: {error_msg}"
|
|
204
|
-
)
|
|
194
|
+
raise RuntimeError(f"Cartesia API error: {error_msg}")
|
|
205
195
|
else:
|
|
206
196
|
logger.debug(
|
|
207
197
|
f"Cartesia: response type {response.get('type')}"
|
|
208
198
|
)
|
|
209
199
|
except json.JSONDecodeError:
|
|
210
|
-
logger.warning(
|
|
211
|
-
f"Failed to parse Cartesia response: {message}"
|
|
212
|
-
)
|
|
200
|
+
logger.warning(f"Failed to parse Cartesia response: {message}")
|
|
213
201
|
except Exception as e:
|
|
214
202
|
logger.error(f"Cartesia receive error: {e}", exc_info=True)
|
|
215
203
|
raise
|
|
@@ -260,9 +248,7 @@ class CartesiaTTS(BaseTTS):
|
|
|
260
248
|
},
|
|
261
249
|
}
|
|
262
250
|
await websocket.send(json.dumps(request))
|
|
263
|
-
logger.debug(
|
|
264
|
-
f"Cartesia: sent text on timeout (continue=true)"
|
|
265
|
-
)
|
|
251
|
+
logger.debug(f"Cartesia: sent text on timeout (continue=true)")
|
|
266
252
|
accumulated_text = ""
|
|
267
253
|
continue
|
|
268
254
|
|
|
@@ -287,9 +273,7 @@ class CartesiaTTS(BaseTTS):
|
|
|
287
273
|
},
|
|
288
274
|
}
|
|
289
275
|
await websocket.send(json.dumps(request))
|
|
290
|
-
logger.debug(
|
|
291
|
-
f"Cartesia: sent final text (continue=false)"
|
|
292
|
-
)
|
|
276
|
+
logger.debug(f"Cartesia: sent final text (continue=false)")
|
|
293
277
|
else:
|
|
294
278
|
# Send empty transcript to signal end
|
|
295
279
|
request = {
|
|
@@ -309,9 +293,7 @@ class CartesiaTTS(BaseTTS):
|
|
|
309
293
|
},
|
|
310
294
|
}
|
|
311
295
|
await websocket.send(json.dumps(request))
|
|
312
|
-
logger.debug(
|
|
313
|
-
"Cartesia: sent empty transcript to signal end"
|
|
314
|
-
)
|
|
296
|
+
logger.debug("Cartesia: sent empty transcript to signal end")
|
|
315
297
|
logger.info("Cartesia: all text sent")
|
|
316
298
|
break
|
|
317
299
|
|
|
@@ -322,9 +304,7 @@ class CartesiaTTS(BaseTTS):
|
|
|
322
304
|
)
|
|
323
305
|
|
|
324
306
|
# Send when buffer is large enough or ends with punctuation
|
|
325
|
-
if len(accumulated_text) > 30 or token.endswith(
|
|
326
|
-
(".", "!", "?")
|
|
327
|
-
):
|
|
307
|
+
if len(accumulated_text) > 30 or token.endswith((".", "!", "?")):
|
|
328
308
|
request = {
|
|
329
309
|
"model_id": self.model,
|
|
330
310
|
"transcript": accumulated_text,
|
|
@@ -342,9 +322,7 @@ class CartesiaTTS(BaseTTS):
|
|
|
342
322
|
},
|
|
343
323
|
}
|
|
344
324
|
await websocket.send(json.dumps(request))
|
|
345
|
-
logger.debug(
|
|
346
|
-
f"Cartesia: sent buffered text (continue=true)"
|
|
347
|
-
)
|
|
325
|
+
logger.debug(f"Cartesia: sent buffered text (continue=true)")
|
|
348
326
|
accumulated_text = ""
|
|
349
327
|
|
|
350
328
|
except Exception as e:
|
|
@@ -58,9 +58,7 @@ def _simple_resample(
|
|
|
58
58
|
"""Simple linear interpolation resampling."""
|
|
59
59
|
if sample_width == 2:
|
|
60
60
|
fmt = "<h"
|
|
61
|
-
samples = [
|
|
62
|
-
struct.unpack(fmt, audio[i : i + 2])[0] for i in range(0, len(audio), 2)
|
|
63
|
-
]
|
|
61
|
+
samples = [struct.unpack(fmt, audio[i : i + 2])[0] for i in range(0, len(audio), 2)]
|
|
64
62
|
else:
|
|
65
63
|
raise ValueError(f"Unsupported sample width: {sample_width}")
|
|
66
64
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "atom-audio-engine"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.5"
|
|
8
8
|
description = "A pluggable, async-first Python framework for real-time audio-to-audio conversational AI"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -90,11 +90,6 @@ packages = [
|
|
|
90
90
|
"audio_engine.utils",
|
|
91
91
|
"audio_engine.pipelines",
|
|
92
92
|
"audio_engine.pipelines.personaplex",
|
|
93
|
-
"audio_engine.examples",
|
|
94
|
-
"audio_engine.scripts",
|
|
95
|
-
"audio_engine.tests",
|
|
96
|
-
"audio_engine.tests.test_personaplex",
|
|
97
|
-
"audio_engine.tests.test_traditional_pipeline",
|
|
98
93
|
]
|
|
99
94
|
|
|
100
95
|
[tool.setuptools.package-data]
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
README.md
|
|
2
|
-
pyproject.toml
|
|
3
|
-
setup.py
|
|
4
|
-
atom_audio_engine.egg-info/PKG-INFO
|
|
5
|
-
atom_audio_engine.egg-info/SOURCES.txt
|
|
6
|
-
atom_audio_engine.egg-info/dependency_links.txt
|
|
7
|
-
atom_audio_engine.egg-info/requires.txt
|
|
8
|
-
atom_audio_engine.egg-info/top_level.txt
|
|
9
|
-
audio_engine/__init__.py
|
|
10
|
-
audio_engine/asr/__init__.py
|
|
11
|
-
audio_engine/asr/base.py
|
|
12
|
-
audio_engine/asr/cartesia.py
|
|
13
|
-
audio_engine/asr/deepgram.py
|
|
14
|
-
audio_engine/core/__init__.py
|
|
15
|
-
audio_engine/core/config.py
|
|
16
|
-
audio_engine/core/pipeline.py
|
|
17
|
-
audio_engine/core/types.py
|
|
18
|
-
audio_engine/examples/__init__.py
|
|
19
|
-
audio_engine/examples/basic_stt_llm_tts.py
|
|
20
|
-
audio_engine/examples/geneface_animation.py
|
|
21
|
-
audio_engine/examples/personaplex_pipeline.py
|
|
22
|
-
audio_engine/examples/websocket_server.py
|
|
23
|
-
audio_engine/integrations/__init__.py
|
|
24
|
-
audio_engine/integrations/geneface.py
|
|
25
|
-
audio_engine/llm/__init__.py
|
|
26
|
-
audio_engine/llm/base.py
|
|
27
|
-
audio_engine/llm/groq.py
|
|
28
|
-
audio_engine/pipelines/__init__.py
|
|
29
|
-
audio_engine/pipelines/personaplex/__init__.py
|
|
30
|
-
audio_engine/pipelines/personaplex/client.py
|
|
31
|
-
audio_engine/pipelines/personaplex/config.py
|
|
32
|
-
audio_engine/pipelines/personaplex/pipeline.py
|
|
33
|
-
audio_engine/pipelines/personaplex/types.py
|
|
34
|
-
audio_engine/pipelines/personaplex/utils.py
|
|
35
|
-
audio_engine/scripts/debug_pipeline.py
|
|
36
|
-
audio_engine/scripts/debug_tts.py
|
|
37
|
-
audio_engine/scripts/test_cartesia_connect.py
|
|
38
|
-
audio_engine/streaming/__init__.py
|
|
39
|
-
audio_engine/streaming/websocket_server.py
|
|
40
|
-
audio_engine/tests/__init__.py
|
|
41
|
-
audio_engine/tests/test_personaplex/__init__.py
|
|
42
|
-
audio_engine/tests/test_personaplex/test_personaplex.py
|
|
43
|
-
audio_engine/tests/test_personaplex/test_personaplex_client.py
|
|
44
|
-
audio_engine/tests/test_personaplex/test_personaplex_config.py
|
|
45
|
-
audio_engine/tests/test_personaplex/test_personaplex_message.py
|
|
46
|
-
audio_engine/tests/test_personaplex/test_personaplex_pipeline.py
|
|
47
|
-
audio_engine/tests/test_personaplex/test_personaplex_session.py
|
|
48
|
-
audio_engine/tests/test_personaplex/test_personaplex_transcript.py
|
|
49
|
-
audio_engine/tests/test_traditional_pipeline/__init__.py
|
|
50
|
-
audio_engine/tests/test_traditional_pipeline/test_cartesia_asr.py
|
|
51
|
-
audio_engine/tests/test_traditional_pipeline/test_config_env.py
|
|
52
|
-
audio_engine/tests/test_traditional_pipeline/test_conversation_context.py
|
|
53
|
-
audio_engine/tests/test_traditional_pipeline/test_pipeline_creation.py
|
|
54
|
-
audio_engine/tests/test_traditional_pipeline/test_pipeline_with_mocks.py
|
|
55
|
-
audio_engine/tests/test_traditional_pipeline/test_provider_factories.py
|
|
56
|
-
audio_engine/tests/test_traditional_pipeline/test_websocket_server.py
|
|
57
|
-
audio_engine/tts/__init__.py
|
|
58
|
-
audio_engine/tts/base.py
|
|
59
|
-
audio_engine/tts/cartesia.py
|
|
60
|
-
audio_engine/utils/__init__.py
|
|
61
|
-
audio_engine/utils/audio.py
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Example scripts for the audio engine."""
|