dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
pipecat/services/tavus/video.py
CHANGED
|
@@ -4,7 +4,11 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
-
"""
|
|
7
|
+
"""Tavus video service implementation for avatar-based video generation.
|
|
8
|
+
|
|
9
|
+
This module implements Tavus as a sink transport layer, providing video
|
|
10
|
+
avatar functionality through Tavus's streaming API.
|
|
11
|
+
"""
|
|
8
12
|
|
|
9
13
|
import asyncio
|
|
10
14
|
from typing import Optional
|
|
@@ -13,41 +17,37 @@ import aiohttp
|
|
|
13
17
|
from daily.daily import AudioData, VideoFrame
|
|
14
18
|
from loguru import logger
|
|
15
19
|
|
|
16
|
-
from pipecat.audio.utils import
|
|
20
|
+
from pipecat.audio.utils import create_stream_resampler
|
|
17
21
|
from pipecat.frames.frames import (
|
|
22
|
+
BotStartedSpeakingFrame,
|
|
18
23
|
CancelFrame,
|
|
19
24
|
EndFrame,
|
|
20
25
|
Frame,
|
|
21
26
|
OutputAudioRawFrame,
|
|
22
27
|
OutputImageRawFrame,
|
|
28
|
+
OutputTransportReadyFrame,
|
|
29
|
+
SpeechOutputAudioRawFrame,
|
|
23
30
|
StartFrame,
|
|
24
31
|
StartInterruptionFrame,
|
|
25
32
|
TTSAudioRawFrame,
|
|
33
|
+
TTSStartedFrame,
|
|
26
34
|
)
|
|
27
35
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup
|
|
28
36
|
from pipecat.services.ai_service import AIService
|
|
29
37
|
from pipecat.transports.services.tavus import TavusCallbacks, TavusParams, TavusTransportClient
|
|
30
|
-
from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
|
|
31
38
|
|
|
32
39
|
|
|
33
40
|
class TavusVideoService(AIService):
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
api_key (str): Tavus API key used for authentication.
|
|
47
|
-
replica_id (str): ID of the Tavus voice replica to use for speech synthesis.
|
|
48
|
-
persona_id (str): ID of the Tavus persona. Defaults to "pipecat-stream" to use the Pipecat TTS voice.
|
|
49
|
-
session (aiohttp.ClientSession): Async HTTP session used for communication with Tavus.
|
|
50
|
-
**kwargs: Additional arguments passed to the parent `AIService` class.
|
|
41
|
+
"""Service that proxies audio to Tavus and receives audio and video in return.
|
|
42
|
+
|
|
43
|
+
Uses the TavusTransportClient to manage sessions and handle communication.
|
|
44
|
+
When audio is sent, Tavus responds with both audio and video streams, which
|
|
45
|
+
are routed through Pipecat's media pipeline.
|
|
46
|
+
|
|
47
|
+
In use cases with DailyTransport, this creates two distinct virtual rooms:
|
|
48
|
+
|
|
49
|
+
- Tavus room: Contains the Tavus Avatar and the Pipecat Bot
|
|
50
|
+
- User room: Contains the Pipecat Bot and the user
|
|
51
51
|
"""
|
|
52
52
|
|
|
53
53
|
def __init__(
|
|
@@ -59,6 +59,15 @@ class TavusVideoService(AIService):
|
|
|
59
59
|
session: aiohttp.ClientSession,
|
|
60
60
|
**kwargs,
|
|
61
61
|
) -> None:
|
|
62
|
+
"""Initialize the Tavus video service.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
api_key: Tavus API key used for authentication.
|
|
66
|
+
replica_id: ID of the Tavus voice replica to use for speech synthesis.
|
|
67
|
+
persona_id: ID of the Tavus persona. Defaults to "pipecat-stream" for Pipecat TTS voice.
|
|
68
|
+
session: Async HTTP session used for communication with Tavus.
|
|
69
|
+
**kwargs: Additional arguments passed to the parent AIService class.
|
|
70
|
+
"""
|
|
62
71
|
super().__init__(**kwargs)
|
|
63
72
|
self._api_key = api_key
|
|
64
73
|
self._session = session
|
|
@@ -69,14 +78,20 @@ class TavusVideoService(AIService):
|
|
|
69
78
|
self._client: Optional[TavusTransportClient] = None
|
|
70
79
|
|
|
71
80
|
self._conversation_id: str
|
|
72
|
-
self._resampler =
|
|
81
|
+
self._resampler = create_stream_resampler()
|
|
73
82
|
|
|
74
83
|
self._audio_buffer = bytearray()
|
|
75
84
|
self._send_task: Optional[asyncio.Task] = None
|
|
76
85
|
# This is the custom track destination expected by Tavus
|
|
77
86
|
self._transport_destination: Optional[str] = "stream"
|
|
87
|
+
self._transport_ready = False
|
|
78
88
|
|
|
79
89
|
async def setup(self, setup: FrameProcessorSetup):
|
|
90
|
+
"""Set up the Tavus video service.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
setup: Frame processor setup configuration.
|
|
94
|
+
"""
|
|
80
95
|
await super().setup(setup)
|
|
81
96
|
callbacks = TavusCallbacks(
|
|
82
97
|
on_participant_joined=self._on_participant_joined,
|
|
@@ -99,15 +114,18 @@ class TavusVideoService(AIService):
|
|
|
99
114
|
await self._client.setup(setup)
|
|
100
115
|
|
|
101
116
|
async def cleanup(self):
|
|
117
|
+
"""Clean up the service and release resources."""
|
|
102
118
|
await super().cleanup()
|
|
103
119
|
await self._client.cleanup()
|
|
104
120
|
self._client = None
|
|
105
121
|
|
|
106
122
|
async def _on_participant_left(self, participant, reason):
|
|
123
|
+
"""Handle participant leaving the session."""
|
|
107
124
|
participant_id = participant["id"]
|
|
108
125
|
logger.info(f"Participant left {participant_id}, reason: {reason}")
|
|
109
126
|
|
|
110
127
|
async def _on_participant_joined(self, participant):
|
|
128
|
+
"""Handle participant joining the session."""
|
|
111
129
|
participant_id = participant["id"]
|
|
112
130
|
logger.info(f"Participant joined {participant_id}")
|
|
113
131
|
if not self._other_participant_has_joined:
|
|
@@ -124,32 +142,51 @@ class TavusVideoService(AIService):
|
|
|
124
142
|
async def _on_participant_video_frame(
|
|
125
143
|
self, participant_id: str, video_frame: VideoFrame, video_source: str
|
|
126
144
|
):
|
|
145
|
+
"""Handle incoming video frames from participants."""
|
|
127
146
|
frame = OutputImageRawFrame(
|
|
128
147
|
image=video_frame.buffer,
|
|
129
148
|
size=(video_frame.width, video_frame.height),
|
|
130
149
|
format=video_frame.color_format,
|
|
131
150
|
)
|
|
132
151
|
frame.transport_source = video_source
|
|
133
|
-
|
|
152
|
+
if self._transport_ready:
|
|
153
|
+
await self.push_frame(frame)
|
|
134
154
|
|
|
135
155
|
async def _on_participant_audio_data(
|
|
136
156
|
self, participant_id: str, audio: AudioData, audio_source: str
|
|
137
157
|
):
|
|
138
|
-
|
|
158
|
+
"""Handle incoming audio data from participants."""
|
|
159
|
+
frame = SpeechOutputAudioRawFrame(
|
|
139
160
|
audio=audio.audio_frames,
|
|
140
161
|
sample_rate=audio.sample_rate,
|
|
141
162
|
num_channels=audio.num_channels,
|
|
142
163
|
)
|
|
143
164
|
frame.transport_source = audio_source
|
|
144
|
-
|
|
165
|
+
if self._transport_ready:
|
|
166
|
+
await self.push_frame(frame)
|
|
145
167
|
|
|
146
168
|
def can_generate_metrics(self) -> bool:
|
|
169
|
+
"""Check if this service can generate processing metrics.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
True, as Tavus service supports metrics generation.
|
|
173
|
+
"""
|
|
147
174
|
return True
|
|
148
175
|
|
|
149
176
|
async def get_persona_name(self) -> str:
|
|
177
|
+
"""Get the name of the current persona.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
The persona name from the Tavus client.
|
|
181
|
+
"""
|
|
150
182
|
return await self._client.get_persona_name()
|
|
151
183
|
|
|
152
184
|
async def start(self, frame: StartFrame):
|
|
185
|
+
"""Start the Tavus video service.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
frame: The start frame containing initialization parameters.
|
|
189
|
+
"""
|
|
153
190
|
await super().start(frame)
|
|
154
191
|
await self._client.start(frame)
|
|
155
192
|
if self._transport_destination:
|
|
@@ -157,16 +194,32 @@ class TavusVideoService(AIService):
|
|
|
157
194
|
await self._create_send_task()
|
|
158
195
|
|
|
159
196
|
async def stop(self, frame: EndFrame):
|
|
197
|
+
"""Stop the Tavus video service.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
frame: The end frame.
|
|
201
|
+
"""
|
|
160
202
|
await super().stop(frame)
|
|
161
203
|
await self._end_conversation()
|
|
162
204
|
await self._cancel_send_task()
|
|
163
205
|
|
|
164
206
|
async def cancel(self, frame: CancelFrame):
|
|
207
|
+
"""Cancel the Tavus video service.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
frame: The cancel frame.
|
|
211
|
+
"""
|
|
165
212
|
await super().cancel(frame)
|
|
166
213
|
await self._end_conversation()
|
|
167
214
|
await self._cancel_send_task()
|
|
168
215
|
|
|
169
216
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
217
|
+
"""Process frames through the service.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
frame: The frame to process.
|
|
221
|
+
direction: The direction of frame processing.
|
|
222
|
+
"""
|
|
170
223
|
await super().process_frame(frame, direction)
|
|
171
224
|
|
|
172
225
|
if isinstance(frame, StartInterruptionFrame):
|
|
@@ -174,29 +227,44 @@ class TavusVideoService(AIService):
|
|
|
174
227
|
await self.push_frame(frame, direction)
|
|
175
228
|
elif isinstance(frame, TTSAudioRawFrame):
|
|
176
229
|
await self._handle_audio_frame(frame)
|
|
230
|
+
elif isinstance(frame, OutputTransportReadyFrame):
|
|
231
|
+
self._transport_ready = True
|
|
232
|
+
await self.push_frame(frame, direction)
|
|
233
|
+
elif isinstance(frame, TTSStartedFrame):
|
|
234
|
+
await self.start_ttfb_metrics()
|
|
235
|
+
elif isinstance(frame, BotStartedSpeakingFrame):
|
|
236
|
+
# We constantly receive audio through WebRTC, but most of the time it is silence.
|
|
237
|
+
# As soon as we receive actual audio, the base output transport will create a
|
|
238
|
+
# BotStartedSpeakingFrame, which we can use as a signal for the TTFB metrics.
|
|
239
|
+
await self.stop_ttfb_metrics()
|
|
177
240
|
else:
|
|
178
241
|
await self.push_frame(frame, direction)
|
|
179
242
|
|
|
180
243
|
async def _handle_interruptions(self):
|
|
244
|
+
"""Handle interruption events by resetting send tasks and notifying client."""
|
|
181
245
|
await self._cancel_send_task()
|
|
182
246
|
await self._create_send_task()
|
|
183
247
|
await self._client.send_interrupt_message()
|
|
184
248
|
|
|
185
249
|
async def _end_conversation(self):
|
|
250
|
+
"""End the current conversation and reset state."""
|
|
186
251
|
await self._client.stop()
|
|
187
252
|
self._other_participant_has_joined = False
|
|
188
253
|
|
|
189
254
|
async def _create_send_task(self):
|
|
255
|
+
"""Create the audio sending task if it doesn't exist."""
|
|
190
256
|
if not self._send_task:
|
|
191
|
-
self._queue =
|
|
257
|
+
self._queue = asyncio.Queue()
|
|
192
258
|
self._send_task = self.create_task(self._send_task_handler())
|
|
193
259
|
|
|
194
260
|
async def _cancel_send_task(self):
|
|
261
|
+
"""Cancel the audio sending task if it exists."""
|
|
195
262
|
if self._send_task:
|
|
196
263
|
await self.cancel_task(self._send_task)
|
|
197
264
|
self._send_task = None
|
|
198
265
|
|
|
199
266
|
async def _handle_audio_frame(self, frame: OutputAudioRawFrame):
|
|
267
|
+
"""Process audio frames for sending to Tavus."""
|
|
200
268
|
sample_rate = self._client.out_sample_rate
|
|
201
269
|
# 40 ms of audio
|
|
202
270
|
chunk_size = int((sample_rate * 2) / 25)
|
|
@@ -215,6 +283,7 @@ class TavusVideoService(AIService):
|
|
|
215
283
|
self._audio_buffer = self._audio_buffer[chunk_size:]
|
|
216
284
|
|
|
217
285
|
async def _send_task_handler(self):
|
|
286
|
+
"""Handle sending audio frames to the Tavus client."""
|
|
218
287
|
while True:
|
|
219
288
|
frame = await self._queue.get()
|
|
220
289
|
if isinstance(frame, OutputAudioRawFrame) and self._client:
|
pipecat/services/together/llm.py
CHANGED
|
@@ -16,12 +16,6 @@ class TogetherLLMService(OpenAILLMService):
|
|
|
16
16
|
|
|
17
17
|
This service extends OpenAILLMService to connect to Together.ai's API endpoint while
|
|
18
18
|
maintaining full compatibility with OpenAI's interface and functionality.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
api_key: The API key for accessing Together.ai's API.
|
|
22
|
-
base_url: The base URL for Together.ai API. Defaults to "https://api.together.xyz/v1".
|
|
23
|
-
model: The model identifier to use. Defaults to "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo".
|
|
24
|
-
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
|
25
19
|
"""
|
|
26
20
|
|
|
27
21
|
def __init__(
|
|
@@ -32,6 +26,14 @@ class TogetherLLMService(OpenAILLMService):
|
|
|
32
26
|
model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
|
33
27
|
**kwargs,
|
|
34
28
|
):
|
|
29
|
+
"""Initialize Together.ai LLM service.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
api_key: The API key for accessing Together.ai's API.
|
|
33
|
+
base_url: The base URL for Together.ai API. Defaults to "https://api.together.xyz/v1".
|
|
34
|
+
model: The model identifier to use. Defaults to "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo".
|
|
35
|
+
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
|
36
|
+
"""
|
|
35
37
|
super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
|
|
36
38
|
|
|
37
39
|
def create_client(self, api_key=None, base_url=None, **kwargs):
|
pipecat/services/tts_service.py
CHANGED
|
@@ -37,7 +37,6 @@ from pipecat.processors.frame_processor import FrameDirection
|
|
|
37
37
|
from pipecat.services.ai_service import AIService
|
|
38
38
|
from pipecat.services.websocket_service import WebsocketService
|
|
39
39
|
from pipecat.transcriptions.language import Language
|
|
40
|
-
from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
|
|
41
40
|
from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
|
|
42
41
|
from pipecat.utils.text.base_text_filter import BaseTextFilter
|
|
43
42
|
from pipecat.utils.text.simple_text_aggregator import SimpleTextAggregator
|
|
@@ -50,21 +49,6 @@ class TTSService(AIService):
|
|
|
50
49
|
Provides common functionality for TTS services including text aggregation,
|
|
51
50
|
filtering, audio generation, and frame management. Supports configurable
|
|
52
51
|
sentence aggregation, silence insertion, and frame processing control.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
aggregate_sentences: Whether to aggregate text into sentences before synthesis.
|
|
56
|
-
push_text_frames: Whether to push TextFrames and LLMFullResponseEndFrames.
|
|
57
|
-
push_stop_frames: Whether to automatically push TTSStoppedFrames.
|
|
58
|
-
stop_frame_timeout_s: Idle time before pushing TTSStoppedFrame when push_stop_frames is True.
|
|
59
|
-
push_silence_after_stop: Whether to push silence audio after TTSStoppedFrame.
|
|
60
|
-
silence_time_s: Duration of silence to push when push_silence_after_stop is True.
|
|
61
|
-
pause_frame_processing: Whether to pause frame processing during audio generation.
|
|
62
|
-
sample_rate: Output sample rate for generated audio.
|
|
63
|
-
text_aggregator: Custom text aggregator for processing incoming text.
|
|
64
|
-
text_filters: Sequence of text filters to apply after aggregation.
|
|
65
|
-
text_filter: Single text filter (deprecated, use text_filters).
|
|
66
|
-
transport_destination: Destination for generated audio frames.
|
|
67
|
-
**kwargs: Additional arguments passed to the parent AIService.
|
|
68
52
|
"""
|
|
69
53
|
|
|
70
54
|
def __init__(
|
|
@@ -97,6 +81,27 @@ class TTSService(AIService):
|
|
|
97
81
|
transport_destination: Optional[str] = None,
|
|
98
82
|
**kwargs,
|
|
99
83
|
):
|
|
84
|
+
"""Initialize the TTS service.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
aggregate_sentences: Whether to aggregate text into sentences before synthesis.
|
|
88
|
+
push_text_frames: Whether to push TextFrames and LLMFullResponseEndFrames.
|
|
89
|
+
push_stop_frames: Whether to automatically push TTSStoppedFrames.
|
|
90
|
+
stop_frame_timeout_s: Idle time before pushing TTSStoppedFrame when push_stop_frames is True.
|
|
91
|
+
push_silence_after_stop: Whether to push silence audio after TTSStoppedFrame.
|
|
92
|
+
silence_time_s: Duration of silence to push when push_silence_after_stop is True.
|
|
93
|
+
pause_frame_processing: Whether to pause frame processing during audio generation.
|
|
94
|
+
sample_rate: Output sample rate for generated audio.
|
|
95
|
+
text_aggregator: Custom text aggregator for processing incoming text.
|
|
96
|
+
text_filters: Sequence of text filters to apply after aggregation.
|
|
97
|
+
text_filter: Single text filter (deprecated, use text_filters).
|
|
98
|
+
|
|
99
|
+
.. deprecated:: 0.0.59
|
|
100
|
+
Use `text_filters` instead, which allows multiple filters.
|
|
101
|
+
|
|
102
|
+
transport_destination: Destination for generated audio frames.
|
|
103
|
+
**kwargs: Additional arguments passed to the parent AIService.
|
|
104
|
+
"""
|
|
100
105
|
super().__init__(**kwargs)
|
|
101
106
|
self._aggregate_sentences: bool = aggregate_sentences
|
|
102
107
|
self._push_text_frames: bool = push_text_frames
|
|
@@ -112,9 +117,10 @@ class TTSService(AIService):
|
|
|
112
117
|
self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator()
|
|
113
118
|
self._text_filters: Sequence[BaseTextFilter] = text_filters or []
|
|
114
119
|
self._transport_destination: Optional[str] = transport_destination
|
|
115
|
-
|
|
116
120
|
self._tracing_enabled: bool = False
|
|
117
|
-
|
|
121
|
+
self._voice_config: Dict[str, Any] = {}
|
|
122
|
+
self._voice = None
|
|
123
|
+
self._voice_clone_params = None
|
|
118
124
|
|
|
119
125
|
if text_filter:
|
|
120
126
|
import warnings
|
|
@@ -225,6 +231,7 @@ class TTSService(AIService):
|
|
|
225
231
|
self._sample_rate = self._init_sample_rate or frame.audio_out_sample_rate
|
|
226
232
|
if self._push_stop_frames and not self._stop_frame_task:
|
|
227
233
|
self._stop_frame_task = self.create_task(self._stop_frame_handler())
|
|
234
|
+
self._tracing_enabled = frame.enable_tracing
|
|
228
235
|
|
|
229
236
|
async def stop(self, frame: EndFrame):
|
|
230
237
|
"""Stop the TTS service.
|
|
@@ -257,7 +264,7 @@ class TTSService(AIService):
|
|
|
257
264
|
self._settings[key] = self.language_to_service_language(value)
|
|
258
265
|
elif key == "model":
|
|
259
266
|
self.set_model_name(value)
|
|
260
|
-
elif key == "voice":
|
|
267
|
+
elif key == "voice" or key == "voice_id":
|
|
261
268
|
self.set_voice(value)
|
|
262
269
|
elif key == "text_filter":
|
|
263
270
|
for filter in self._text_filters:
|
|
@@ -268,9 +275,20 @@ class TTSService(AIService):
|
|
|
268
275
|
async def say(self, text: str):
|
|
269
276
|
"""Immediately speak the provided text.
|
|
270
277
|
|
|
278
|
+
.. deprecated:: 0.0.79
|
|
279
|
+
Push a `TTSSpeakFrame` instead to ensure frame ordering is maintained.
|
|
280
|
+
|
|
271
281
|
Args:
|
|
272
282
|
text: The text to speak.
|
|
273
283
|
"""
|
|
284
|
+
import warnings
|
|
285
|
+
|
|
286
|
+
warnings.warn(
|
|
287
|
+
"`TTSService.say()` is deprecated. Push a `TTSSpeakFrame` instead.",
|
|
288
|
+
DeprecationWarning,
|
|
289
|
+
stacklevel=2,
|
|
290
|
+
)
|
|
291
|
+
|
|
274
292
|
await self.queue_frame(TTSSpeakFrame(text))
|
|
275
293
|
|
|
276
294
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
@@ -433,7 +451,7 @@ class TTSService(AIService):
|
|
|
433
451
|
while True:
|
|
434
452
|
try:
|
|
435
453
|
frame = await asyncio.wait_for(
|
|
436
|
-
self._stop_frame_queue.get(), self._stop_frame_timeout_s
|
|
454
|
+
self._stop_frame_queue.get(), timeout=self._stop_frame_timeout_s
|
|
437
455
|
)
|
|
438
456
|
if isinstance(frame, TTSStartedFrame):
|
|
439
457
|
has_started = True
|
|
@@ -443,8 +461,6 @@ class TTSService(AIService):
|
|
|
443
461
|
if has_started:
|
|
444
462
|
await self.push_frame(TTSStoppedFrame())
|
|
445
463
|
has_started = False
|
|
446
|
-
finally:
|
|
447
|
-
self.reset_watchdog()
|
|
448
464
|
|
|
449
465
|
|
|
450
466
|
class WordTTSService(TTSService):
|
|
@@ -452,12 +468,14 @@ class WordTTSService(TTSService):
|
|
|
452
468
|
|
|
453
469
|
Word timestamps are useful to synchronize audio with text of the spoken
|
|
454
470
|
words. This way only the spoken words are added to the conversation context.
|
|
455
|
-
|
|
456
|
-
Args:
|
|
457
|
-
**kwargs: Additional arguments passed to the parent TTSService.
|
|
458
471
|
"""
|
|
459
472
|
|
|
460
473
|
def __init__(self, **kwargs):
|
|
474
|
+
"""Initialize the Word TTS service.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
**kwargs: Additional arguments passed to the parent TTSService.
|
|
478
|
+
"""
|
|
461
479
|
super().__init__(**kwargs)
|
|
462
480
|
self._initial_word_timestamp = -1
|
|
463
481
|
self._words_task = None
|
|
@@ -529,7 +547,7 @@ class WordTTSService(TTSService):
|
|
|
529
547
|
|
|
530
548
|
def _create_words_task(self):
|
|
531
549
|
if not self._words_task:
|
|
532
|
-
self._words_queue =
|
|
550
|
+
self._words_queue = asyncio.Queue()
|
|
533
551
|
self._words_task = self.create_task(self._words_task_handler())
|
|
534
552
|
|
|
535
553
|
async def _stop_words_task(self):
|
|
@@ -566,22 +584,23 @@ class WebsocketTTSService(TTSService, WebsocketService):
|
|
|
566
584
|
Combines TTS functionality with websocket connectivity, providing automatic
|
|
567
585
|
error handling and reconnection capabilities.
|
|
568
586
|
|
|
569
|
-
Args:
|
|
570
|
-
reconnect_on_error: Whether to automatically reconnect on websocket errors.
|
|
571
|
-
**kwargs: Additional arguments passed to parent classes.
|
|
572
|
-
|
|
573
587
|
Event handlers:
|
|
574
588
|
on_connection_error: Called when a websocket connection error occurs.
|
|
575
589
|
|
|
576
|
-
Example
|
|
577
|
-
|
|
590
|
+
Example::
|
|
591
|
+
|
|
578
592
|
@tts.event_handler("on_connection_error")
|
|
579
593
|
async def on_connection_error(tts: TTSService, error: str):
|
|
580
594
|
logger.error(f"TTS connection error: {error}")
|
|
581
|
-
```
|
|
582
595
|
"""
|
|
583
596
|
|
|
584
597
|
def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
|
|
598
|
+
"""Initialize the Websocket TTS service.
|
|
599
|
+
|
|
600
|
+
Args:
|
|
601
|
+
reconnect_on_error: Whether to automatically reconnect on websocket errors.
|
|
602
|
+
**kwargs: Additional arguments passed to parent classes.
|
|
603
|
+
"""
|
|
585
604
|
TTSService.__init__(self, **kwargs)
|
|
586
605
|
WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
|
|
587
606
|
self._register_event_handler("on_connection_error")
|
|
@@ -596,12 +615,14 @@ class InterruptibleTTSService(WebsocketTTSService):
|
|
|
596
615
|
|
|
597
616
|
Designed for TTS services that don't support word timestamps. Handles interruptions
|
|
598
617
|
by reconnecting the websocket when the bot is speaking and gets interrupted.
|
|
599
|
-
|
|
600
|
-
Args:
|
|
601
|
-
**kwargs: Additional arguments passed to the parent WebsocketTTSService.
|
|
602
618
|
"""
|
|
603
619
|
|
|
604
620
|
def __init__(self, **kwargs):
|
|
621
|
+
"""Initialize the Interruptible TTS service.
|
|
622
|
+
|
|
623
|
+
Args:
|
|
624
|
+
**kwargs: Additional arguments passed to the parent WebsocketTTSService.
|
|
625
|
+
"""
|
|
605
626
|
super().__init__(**kwargs)
|
|
606
627
|
|
|
607
628
|
# Indicates if the bot is speaking. If the bot is not speaking we don't
|
|
@@ -635,22 +656,23 @@ class WebsocketWordTTSService(WordTTSService, WebsocketService):
|
|
|
635
656
|
|
|
636
657
|
Combines word timestamp functionality with websocket connectivity.
|
|
637
658
|
|
|
638
|
-
Args:
|
|
639
|
-
reconnect_on_error: Whether to automatically reconnect on websocket errors.
|
|
640
|
-
**kwargs: Additional arguments passed to parent classes.
|
|
641
|
-
|
|
642
659
|
Event handlers:
|
|
643
660
|
on_connection_error: Called when a websocket connection error occurs.
|
|
644
661
|
|
|
645
|
-
Example
|
|
646
|
-
|
|
662
|
+
Example::
|
|
663
|
+
|
|
647
664
|
@tts.event_handler("on_connection_error")
|
|
648
665
|
async def on_connection_error(tts: TTSService, error: str):
|
|
649
666
|
logger.error(f"TTS connection error: {error}")
|
|
650
|
-
```
|
|
651
667
|
"""
|
|
652
668
|
|
|
653
669
|
def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
|
|
670
|
+
"""Initialize the Websocket Word TTS service.
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
reconnect_on_error: Whether to automatically reconnect on websocket errors.
|
|
674
|
+
**kwargs: Additional arguments passed to parent classes.
|
|
675
|
+
"""
|
|
654
676
|
WordTTSService.__init__(self, **kwargs)
|
|
655
677
|
WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
|
|
656
678
|
self._register_event_handler("on_connection_error")
|
|
@@ -665,12 +687,14 @@ class InterruptibleWordTTSService(WebsocketWordTTSService):
|
|
|
665
687
|
|
|
666
688
|
For TTS services that support word timestamps but can't correlate generated
|
|
667
689
|
audio with requested text. Handles interruptions by reconnecting when needed.
|
|
668
|
-
|
|
669
|
-
Args:
|
|
670
|
-
**kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
|
|
671
690
|
"""
|
|
672
691
|
|
|
673
692
|
def __init__(self, **kwargs):
|
|
693
|
+
"""Initialize the Interruptible Word TTS service.
|
|
694
|
+
|
|
695
|
+
Args:
|
|
696
|
+
**kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
|
|
697
|
+
"""
|
|
674
698
|
super().__init__(**kwargs)
|
|
675
699
|
|
|
676
700
|
# Indicates if the bot is speaking. If the bot is not speaking we don't
|
|
@@ -713,12 +737,14 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
|
|
|
713
737
|
The audio received from the TTS will be played in context order. That is, if
|
|
714
738
|
we requested audio for a context "A" and then audio for context "B", the
|
|
715
739
|
audio from context ID "A" will be played first.
|
|
716
|
-
|
|
717
|
-
Args:
|
|
718
|
-
**kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
|
|
719
740
|
"""
|
|
720
741
|
|
|
721
742
|
def __init__(self, **kwargs):
|
|
743
|
+
"""Initialize the Audio Context Word TTS service.
|
|
744
|
+
|
|
745
|
+
Args:
|
|
746
|
+
**kwargs: Additional arguments passed to the parent WebsocketWordTTSService.
|
|
747
|
+
"""
|
|
722
748
|
super().__init__(**kwargs)
|
|
723
749
|
self._contexts: Dict[str, asyncio.Queue] = {}
|
|
724
750
|
self._audio_context_task = None
|
|
@@ -792,7 +818,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
|
|
|
792
818
|
# Indicate no more audio contexts are available. this will end the
|
|
793
819
|
# task cleanly after all contexts have been processed.
|
|
794
820
|
await self._contexts_queue.put(None)
|
|
795
|
-
await self.
|
|
821
|
+
await self._audio_context_task
|
|
796
822
|
self._audio_context_task = None
|
|
797
823
|
|
|
798
824
|
async def cancel(self, frame: CancelFrame):
|
|
@@ -811,7 +837,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
|
|
|
811
837
|
|
|
812
838
|
def _create_audio_context_task(self):
|
|
813
839
|
if not self._audio_context_task:
|
|
814
|
-
self._contexts_queue =
|
|
840
|
+
self._contexts_queue = asyncio.Queue()
|
|
815
841
|
self._contexts: Dict[str, asyncio.Queue] = {}
|
|
816
842
|
self._audio_context_task = self.create_task(self._audio_context_task_handler())
|
|
817
843
|
|
|
@@ -853,12 +879,10 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
|
|
|
853
879
|
while running:
|
|
854
880
|
try:
|
|
855
881
|
frame = await asyncio.wait_for(queue.get(), timeout=AUDIO_CONTEXT_TIMEOUT)
|
|
856
|
-
self.reset_watchdog()
|
|
857
882
|
if frame:
|
|
858
883
|
await self.push_frame(frame)
|
|
859
884
|
running = frame is not None
|
|
860
885
|
except asyncio.TimeoutError:
|
|
861
|
-
self.reset_watchdog()
|
|
862
886
|
# We didn't get audio, so let's consider this context finished.
|
|
863
887
|
logger.trace(f"{self} time out on audio context {context_id}")
|
|
864
888
|
break
|