dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Async text-to-speech service implementations."""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import base64
|
|
11
|
+
import json
|
|
12
|
+
from typing import AsyncGenerator, Optional
|
|
13
|
+
|
|
14
|
+
import aiohttp
|
|
15
|
+
from loguru import logger
|
|
16
|
+
from pydantic import BaseModel
|
|
17
|
+
|
|
18
|
+
from pipecat.frames.frames import (
|
|
19
|
+
CancelFrame,
|
|
20
|
+
EndFrame,
|
|
21
|
+
ErrorFrame,
|
|
22
|
+
Frame,
|
|
23
|
+
StartFrame,
|
|
24
|
+
StartInterruptionFrame,
|
|
25
|
+
TTSAudioRawFrame,
|
|
26
|
+
TTSStartedFrame,
|
|
27
|
+
TTSStoppedFrame,
|
|
28
|
+
)
|
|
29
|
+
from pipecat.processors.frame_processor import FrameDirection
|
|
30
|
+
from pipecat.services.tts_service import InterruptibleTTSService, TTSService
|
|
31
|
+
from pipecat.transcriptions.language import Language
|
|
32
|
+
from pipecat.utils.tracing.service_decorators import traced_tts
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
import websockets
|
|
36
|
+
from websockets.asyncio.client import connect as websocket_connect
|
|
37
|
+
from websockets.protocol import State
|
|
38
|
+
except ModuleNotFoundError as e:
|
|
39
|
+
logger.error(f"Exception: {e}")
|
|
40
|
+
logger.error("In order to use Async, you need to `pip install pipecat-ai[asyncai]`.")
|
|
41
|
+
raise Exception(f"Missing module: {e}")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def language_to_async_language(language: Language) -> Optional[str]:
|
|
45
|
+
"""Convert a Language enum to Async language code.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
language: The Language enum value to convert.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
The corresponding Async language code, or None if not supported.
|
|
52
|
+
"""
|
|
53
|
+
BASE_LANGUAGES = {
|
|
54
|
+
Language.EN: "en",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
result = BASE_LANGUAGES.get(language)
|
|
58
|
+
|
|
59
|
+
# If not found in base languages, try to find the base language from a variant
|
|
60
|
+
if not result:
|
|
61
|
+
# Convert enum value to string and get the base language part (e.g. en-En -> en)
|
|
62
|
+
lang_str = str(language.value)
|
|
63
|
+
base_code = lang_str.split("-")[0].lower()
|
|
64
|
+
# Look up the base code in our supported languages
|
|
65
|
+
result = base_code if base_code in BASE_LANGUAGES.values() else None
|
|
66
|
+
|
|
67
|
+
return result
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class AsyncAITTSService(InterruptibleTTSService):
|
|
71
|
+
"""Async TTS service with WebSocket streaming.
|
|
72
|
+
|
|
73
|
+
Provides text-to-speech using Async's streaming WebSocket API.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
class InputParams(BaseModel):
|
|
77
|
+
"""Input parameters for Async TTS configuration.
|
|
78
|
+
|
|
79
|
+
Parameters:
|
|
80
|
+
language: Language to use for synthesis.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
language: Optional[Language] = Language.EN
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
*,
|
|
88
|
+
api_key: str,
|
|
89
|
+
voice_id: str,
|
|
90
|
+
version: str = "v1",
|
|
91
|
+
url: str = "wss://api.async.ai/text_to_speech/websocket/ws",
|
|
92
|
+
model: str = "asyncflow_v2.0",
|
|
93
|
+
sample_rate: Optional[int] = None,
|
|
94
|
+
encoding: str = "pcm_s16le",
|
|
95
|
+
container: str = "raw",
|
|
96
|
+
params: Optional[InputParams] = None,
|
|
97
|
+
aggregate_sentences: Optional[bool] = True,
|
|
98
|
+
**kwargs,
|
|
99
|
+
):
|
|
100
|
+
"""Initialize the Async TTS service.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
api_key: Async API key.
|
|
104
|
+
voice_id: UUID of the voice to use for synthesis. See docs for a full list:
|
|
105
|
+
https://docs.async.ai/list-voices-16699698e0
|
|
106
|
+
version: Async API version.
|
|
107
|
+
url: WebSocket URL for Async TTS API.
|
|
108
|
+
model: TTS model to use (e.g., "asyncflow_v2.0").
|
|
109
|
+
sample_rate: Audio sample rate.
|
|
110
|
+
encoding: Audio encoding format.
|
|
111
|
+
container: Audio container format.
|
|
112
|
+
params: Additional input parameters for voice customization.
|
|
113
|
+
aggregate_sentences: Whether to aggregate sentences within the TTSService.
|
|
114
|
+
**kwargs: Additional arguments passed to the parent service.
|
|
115
|
+
"""
|
|
116
|
+
super().__init__(
|
|
117
|
+
aggregate_sentences=aggregate_sentences,
|
|
118
|
+
push_text_frames=False,
|
|
119
|
+
pause_frame_processing=True,
|
|
120
|
+
push_stop_frames=True,
|
|
121
|
+
sample_rate=sample_rate,
|
|
122
|
+
**kwargs,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
params = params or AsyncAITTSService.InputParams()
|
|
126
|
+
|
|
127
|
+
self._api_key = api_key
|
|
128
|
+
self._api_version = version
|
|
129
|
+
self._url = url
|
|
130
|
+
self._settings = {
|
|
131
|
+
"output_format": {
|
|
132
|
+
"container": container,
|
|
133
|
+
"encoding": encoding,
|
|
134
|
+
"sample_rate": 0,
|
|
135
|
+
},
|
|
136
|
+
"language": self.language_to_service_language(params.language)
|
|
137
|
+
if params.language
|
|
138
|
+
else "en",
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
self.set_model_name(model)
|
|
142
|
+
self.set_voice(voice_id)
|
|
143
|
+
|
|
144
|
+
self._receive_task = None
|
|
145
|
+
self._keepalive_task = None
|
|
146
|
+
self._started = False
|
|
147
|
+
|
|
148
|
+
def can_generate_metrics(self) -> bool:
|
|
149
|
+
"""Check if this service can generate processing metrics.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
True, as Async service supports metrics generation.
|
|
153
|
+
"""
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
def language_to_service_language(self, language: Language) -> Optional[str]:
|
|
157
|
+
"""Convert a Language enum to Async language format.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
language: The language to convert.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
The Async-specific language code, or None if not supported.
|
|
164
|
+
"""
|
|
165
|
+
return language_to_async_language(language)
|
|
166
|
+
|
|
167
|
+
def _build_msg(self, text: str = "", force: bool = False) -> str:
|
|
168
|
+
msg = {"transcript": text, "force": force}
|
|
169
|
+
return json.dumps(msg)
|
|
170
|
+
|
|
171
|
+
async def start(self, frame: StartFrame):
|
|
172
|
+
"""Start the Async TTS service.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
frame: The start frame containing initialization parameters.
|
|
176
|
+
"""
|
|
177
|
+
await super().start(frame)
|
|
178
|
+
self._settings["output_format"]["sample_rate"] = self.sample_rate
|
|
179
|
+
await self._connect()
|
|
180
|
+
|
|
181
|
+
async def stop(self, frame: EndFrame):
|
|
182
|
+
"""Stop the Async TTS service.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
frame: The end frame.
|
|
186
|
+
"""
|
|
187
|
+
await super().stop(frame)
|
|
188
|
+
await self._disconnect()
|
|
189
|
+
|
|
190
|
+
async def cancel(self, frame: CancelFrame):
|
|
191
|
+
"""Cancel the Async TTS service.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
frame: The cancel frame.
|
|
195
|
+
"""
|
|
196
|
+
await super().cancel(frame)
|
|
197
|
+
await self._disconnect()
|
|
198
|
+
|
|
199
|
+
async def _connect(self):
|
|
200
|
+
await self._connect_websocket()
|
|
201
|
+
|
|
202
|
+
if self._websocket and not self._receive_task:
|
|
203
|
+
self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
|
|
204
|
+
|
|
205
|
+
if self._websocket and not self._keepalive_task:
|
|
206
|
+
self._keepalive_task = self.create_task(self._keepalive_task_handler())
|
|
207
|
+
|
|
208
|
+
async def _disconnect(self):
|
|
209
|
+
if self._receive_task:
|
|
210
|
+
await self.cancel_task(self._receive_task)
|
|
211
|
+
self._receive_task = None
|
|
212
|
+
|
|
213
|
+
if self._keepalive_task:
|
|
214
|
+
await self.cancel_task(self._keepalive_task)
|
|
215
|
+
self._keepalive_task = None
|
|
216
|
+
|
|
217
|
+
await self._disconnect_websocket()
|
|
218
|
+
|
|
219
|
+
async def _connect_websocket(self):
|
|
220
|
+
try:
|
|
221
|
+
if self._websocket and self._websocket.state is State.OPEN:
|
|
222
|
+
return
|
|
223
|
+
logger.debug("Connecting to Async")
|
|
224
|
+
self._websocket = await websocket_connect(
|
|
225
|
+
f"{self._url}?api_key={self._api_key}&version={self._api_version}"
|
|
226
|
+
)
|
|
227
|
+
init_msg = {
|
|
228
|
+
"model_id": self._model_name,
|
|
229
|
+
"voice": {"mode": "id", "id": self._voice_id},
|
|
230
|
+
"output_format": self._settings["output_format"],
|
|
231
|
+
"language": self._settings["language"],
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
await self._get_websocket().send(json.dumps(init_msg))
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.error(f"{self} initialization error: {e}")
|
|
237
|
+
self._websocket = None
|
|
238
|
+
await self._call_event_handler("on_connection_error", f"{e}")
|
|
239
|
+
|
|
240
|
+
async def _disconnect_websocket(self):
|
|
241
|
+
try:
|
|
242
|
+
await self.stop_all_metrics()
|
|
243
|
+
|
|
244
|
+
if self._websocket:
|
|
245
|
+
logger.debug("Disconnecting from Async")
|
|
246
|
+
await self._websocket.close()
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logger.error(f"{self} error closing websocket: {e}")
|
|
249
|
+
finally:
|
|
250
|
+
self._websocket = None
|
|
251
|
+
self._started = False
|
|
252
|
+
|
|
253
|
+
def _get_websocket(self):
|
|
254
|
+
if self._websocket:
|
|
255
|
+
return self._websocket
|
|
256
|
+
raise Exception("Websocket not connected")
|
|
257
|
+
|
|
258
|
+
async def flush_audio(self):
|
|
259
|
+
"""Flush any pending audio."""
|
|
260
|
+
if not self._websocket:
|
|
261
|
+
return
|
|
262
|
+
logger.trace(f"{self}: flushing audio")
|
|
263
|
+
msg = self._build_msg(text=" ", force=True)
|
|
264
|
+
await self._websocket.send(msg)
|
|
265
|
+
|
|
266
|
+
async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
|
267
|
+
"""Push a frame downstream with special handling for stop conditions.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
frame: The frame to push.
|
|
271
|
+
direction: The direction to push the frame.
|
|
272
|
+
"""
|
|
273
|
+
await super().push_frame(frame, direction)
|
|
274
|
+
if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
|
|
275
|
+
self._started = False
|
|
276
|
+
|
|
277
|
+
async def _receive_messages(self):
|
|
278
|
+
async for message in self._get_websocket():
|
|
279
|
+
msg = json.loads(message)
|
|
280
|
+
if not msg:
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
elif msg.get("audio"):
|
|
284
|
+
await self.stop_ttfb_metrics()
|
|
285
|
+
frame = TTSAudioRawFrame(
|
|
286
|
+
audio=base64.b64decode(msg["audio"]),
|
|
287
|
+
sample_rate=self.sample_rate,
|
|
288
|
+
num_channels=1,
|
|
289
|
+
)
|
|
290
|
+
await self.push_frame(frame)
|
|
291
|
+
elif msg.get("error_code"):
|
|
292
|
+
logger.error(f"{self} error: {msg}")
|
|
293
|
+
await self.push_frame(TTSStoppedFrame())
|
|
294
|
+
await self.stop_all_metrics()
|
|
295
|
+
await self.push_error(ErrorFrame(f"{self} error: {msg['message']}"))
|
|
296
|
+
else:
|
|
297
|
+
logger.error(f"{self} error, unknown message type: {msg}")
|
|
298
|
+
|
|
299
|
+
async def _keepalive_task_handler(self):
|
|
300
|
+
"""Send periodic keepalive messages to maintain WebSocket connection."""
|
|
301
|
+
KEEPALIVE_SLEEP = 3
|
|
302
|
+
while True:
|
|
303
|
+
await asyncio.sleep(KEEPALIVE_SLEEP)
|
|
304
|
+
try:
|
|
305
|
+
if self._websocket and self._websocket.state is State.OPEN:
|
|
306
|
+
keepalive_message = {"transcript": " "}
|
|
307
|
+
logger.trace("Sending keepalive message")
|
|
308
|
+
await self._websocket.send(json.dumps(keepalive_message))
|
|
309
|
+
except websockets.ConnectionClosed as e:
|
|
310
|
+
logger.warning(f"{self} keepalive error: {e}")
|
|
311
|
+
break
|
|
312
|
+
|
|
313
|
+
@traced_tts
|
|
314
|
+
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
315
|
+
"""Generate speech from text using Async API websocket endpoint.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
text: The text to synthesize into speech.
|
|
319
|
+
|
|
320
|
+
Yields:
|
|
321
|
+
Frame: Audio frames containing the synthesized speech.
|
|
322
|
+
"""
|
|
323
|
+
logger.debug(f"{self}: Generating TTS [{text}]")
|
|
324
|
+
|
|
325
|
+
try:
|
|
326
|
+
if not self._websocket or self._websocket.state is State.CLOSED:
|
|
327
|
+
await self._connect()
|
|
328
|
+
|
|
329
|
+
if not self._started:
|
|
330
|
+
await self.start_ttfb_metrics()
|
|
331
|
+
yield TTSStartedFrame()
|
|
332
|
+
self._started = True
|
|
333
|
+
|
|
334
|
+
msg = self._build_msg(text=text, force=True)
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
await self._get_websocket().send(msg)
|
|
338
|
+
await self.start_tts_usage_metrics(text)
|
|
339
|
+
except Exception as e:
|
|
340
|
+
logger.error(f"{self} error sending message: {e}")
|
|
341
|
+
yield TTSStoppedFrame()
|
|
342
|
+
await self._disconnect()
|
|
343
|
+
await self._connect()
|
|
344
|
+
return
|
|
345
|
+
yield None
|
|
346
|
+
except Exception as e:
|
|
347
|
+
logger.error(f"{self} exception: {e}")
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
class AsyncAIHttpTTSService(TTSService):
|
|
351
|
+
"""HTTP-based Async TTS service.
|
|
352
|
+
|
|
353
|
+
Provides text-to-speech using Async's HTTP streaming API for simpler,
|
|
354
|
+
non-WebSocket integration. Suitable for use cases where streaming WebSocket
|
|
355
|
+
connection is not required or desired.
|
|
356
|
+
"""
|
|
357
|
+
|
|
358
|
+
class InputParams(BaseModel):
|
|
359
|
+
"""Input parameters for Async API.
|
|
360
|
+
|
|
361
|
+
Parameters:
|
|
362
|
+
language: Language to use for synthesis.
|
|
363
|
+
"""
|
|
364
|
+
|
|
365
|
+
language: Optional[Language] = Language.EN
|
|
366
|
+
|
|
367
|
+
def __init__(
|
|
368
|
+
self,
|
|
369
|
+
*,
|
|
370
|
+
api_key: str,
|
|
371
|
+
voice_id: str,
|
|
372
|
+
aiohttp_session: aiohttp.ClientSession,
|
|
373
|
+
model: str = "asyncflow_v2.0",
|
|
374
|
+
url: str = "https://api.async.ai",
|
|
375
|
+
version: str = "v1",
|
|
376
|
+
sample_rate: Optional[int] = None,
|
|
377
|
+
encoding: str = "pcm_s16le",
|
|
378
|
+
container: str = "raw",
|
|
379
|
+
params: Optional[InputParams] = None,
|
|
380
|
+
**kwargs,
|
|
381
|
+
):
|
|
382
|
+
"""Initialize the Async TTS service.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
api_key: Async API key.
|
|
386
|
+
voice_id: ID of the voice to use for synthesis.
|
|
387
|
+
aiohttp_session: An aiohttp session for making HTTP requests.
|
|
388
|
+
model: TTS model to use (e.g., "asyncflow_v2.0").
|
|
389
|
+
url: Base URL for Async API.
|
|
390
|
+
version: API version string for Async API.
|
|
391
|
+
sample_rate: Audio sample rate.
|
|
392
|
+
encoding: Audio encoding format.
|
|
393
|
+
container: Audio container format.
|
|
394
|
+
params: Additional input parameters for voice customization.
|
|
395
|
+
**kwargs: Additional arguments passed to the parent TTSService.
|
|
396
|
+
"""
|
|
397
|
+
super().__init__(sample_rate=sample_rate, **kwargs)
|
|
398
|
+
|
|
399
|
+
params = params or AsyncAIHttpTTSService.InputParams()
|
|
400
|
+
|
|
401
|
+
self._api_key = api_key
|
|
402
|
+
self._base_url = url
|
|
403
|
+
self._api_version = version
|
|
404
|
+
self._settings = {
|
|
405
|
+
"output_format": {
|
|
406
|
+
"container": container,
|
|
407
|
+
"encoding": encoding,
|
|
408
|
+
"sample_rate": 0,
|
|
409
|
+
},
|
|
410
|
+
"language": self.language_to_service_language(params.language)
|
|
411
|
+
if params.language
|
|
412
|
+
else "en",
|
|
413
|
+
}
|
|
414
|
+
self.set_voice(voice_id)
|
|
415
|
+
self.set_model_name(model)
|
|
416
|
+
|
|
417
|
+
self._session = aiohttp_session
|
|
418
|
+
|
|
419
|
+
def can_generate_metrics(self) -> bool:
|
|
420
|
+
"""Check if this service can generate processing metrics.
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
True, as Async HTTP service supports metrics generation.
|
|
424
|
+
"""
|
|
425
|
+
return True
|
|
426
|
+
|
|
427
|
+
def language_to_service_language(self, language: Language) -> Optional[str]:
|
|
428
|
+
"""Convert a Language enum to Async language format.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
language: The language to convert.
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
The Async-specific language code, or None if not supported.
|
|
435
|
+
"""
|
|
436
|
+
return language_to_async_language(language)
|
|
437
|
+
|
|
438
|
+
async def start(self, frame: StartFrame):
|
|
439
|
+
"""Start the Async HTTP TTS service.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
frame: The start frame containing initialization parameters.
|
|
443
|
+
"""
|
|
444
|
+
await super().start(frame)
|
|
445
|
+
self._settings["output_format"]["sample_rate"] = self.sample_rate
|
|
446
|
+
|
|
447
|
+
@traced_tts
|
|
448
|
+
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
449
|
+
"""Generate speech from text using Async's HTTP streaming API.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
text: The text to synthesize into speech.
|
|
453
|
+
|
|
454
|
+
Yields:
|
|
455
|
+
Frame: Audio frames containing the synthesized speech.
|
|
456
|
+
"""
|
|
457
|
+
logger.debug(f"{self}: Generating TTS [{text}]")
|
|
458
|
+
|
|
459
|
+
try:
|
|
460
|
+
voice_config = {"mode": "id", "id": self._voice_id}
|
|
461
|
+
await self.start_ttfb_metrics()
|
|
462
|
+
payload = {
|
|
463
|
+
"model_id": self._model_name,
|
|
464
|
+
"transcript": text,
|
|
465
|
+
"voice": voice_config,
|
|
466
|
+
"output_format": self._settings["output_format"],
|
|
467
|
+
"language": self._settings["language"],
|
|
468
|
+
}
|
|
469
|
+
yield TTSStartedFrame()
|
|
470
|
+
headers = {
|
|
471
|
+
"version": self._api_version,
|
|
472
|
+
"x-api-key": self._api_key,
|
|
473
|
+
"Content-Type": "application/json",
|
|
474
|
+
}
|
|
475
|
+
url = f"{self._base_url}/text_to_speech/streaming"
|
|
476
|
+
|
|
477
|
+
async with self._session.post(url, json=payload, headers=headers) as response:
|
|
478
|
+
if response.status != 200:
|
|
479
|
+
error_text = await response.text()
|
|
480
|
+
logger.error(f"Async API error: {error_text}")
|
|
481
|
+
await self.push_error(ErrorFrame(f"Async API error: {error_text}"))
|
|
482
|
+
raise Exception(f"Async API returned status {response.status}: {error_text}")
|
|
483
|
+
|
|
484
|
+
audio_data = await response.read()
|
|
485
|
+
|
|
486
|
+
await self.start_tts_usage_metrics(text)
|
|
487
|
+
|
|
488
|
+
frame = TTSAudioRawFrame(
|
|
489
|
+
audio=audio_data,
|
|
490
|
+
sample_rate=self.sample_rate,
|
|
491
|
+
num_channels=1,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
yield frame
|
|
495
|
+
|
|
496
|
+
except Exception as e:
|
|
497
|
+
logger.error(f"{self} exception: {e}")
|
|
498
|
+
await self.push_error(ErrorFrame(f"Error generating TTS: {e}"))
|
|
499
|
+
finally:
|
|
500
|
+
await self.stop_ttfb_metrics()
|
|
501
|
+
yield TTSStoppedFrame()
|