dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""HeyGen implementation for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides integration with the HeyGen platform for creating conversational
|
|
10
|
+
AI applications with avatars. It manages conversation sessions and provides real-time
|
|
11
|
+
audio/video streaming capabilities through the HeyGen API.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
import aiohttp
|
|
18
|
+
from loguru import logger
|
|
19
|
+
|
|
20
|
+
from pipecat.audio.utils import create_stream_resampler
|
|
21
|
+
from pipecat.frames.frames import (
|
|
22
|
+
AudioRawFrame,
|
|
23
|
+
BotStartedSpeakingFrame,
|
|
24
|
+
CancelFrame,
|
|
25
|
+
EndFrame,
|
|
26
|
+
Frame,
|
|
27
|
+
ImageRawFrame,
|
|
28
|
+
OutputAudioRawFrame,
|
|
29
|
+
OutputImageRawFrame,
|
|
30
|
+
OutputTransportReadyFrame,
|
|
31
|
+
SpeechOutputAudioRawFrame,
|
|
32
|
+
StartFrame,
|
|
33
|
+
TTSAudioRawFrame,
|
|
34
|
+
TTSStartedFrame,
|
|
35
|
+
UserStartedSpeakingFrame,
|
|
36
|
+
UserStoppedSpeakingFrame,
|
|
37
|
+
)
|
|
38
|
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup
|
|
39
|
+
from pipecat.services.ai_service import AIService
|
|
40
|
+
from pipecat.services.heygen.api import NewSessionRequest
|
|
41
|
+
from pipecat.services.heygen.client import HEY_GEN_SAMPLE_RATE, HeyGenCallbacks, HeyGenClient
|
|
42
|
+
from pipecat.transports.base_transport import TransportParams
|
|
43
|
+
|
|
44
|
+
# Using the same values that we do in the BaseOutputTransport
|
|
45
|
+
AVATAR_VAD_STOP_SECS = 0.35
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class HeyGenVideoService(AIService):
|
|
49
|
+
"""A service that integrates HeyGen's interactive avatar capabilities into the pipeline.
|
|
50
|
+
|
|
51
|
+
This service manages the lifecycle of a HeyGen avatar session by handling bidirectional
|
|
52
|
+
audio/video streaming, avatar animations, and user interactions. It processes various frame types
|
|
53
|
+
to coordinate the avatar's behavior and maintains synchronization between audio and video streams.
|
|
54
|
+
|
|
55
|
+
The service supports:
|
|
56
|
+
|
|
57
|
+
- Real-time avatar animation based on audio input
|
|
58
|
+
- Voice activity detection for natural interactions
|
|
59
|
+
- Interrupt handling for more natural conversations
|
|
60
|
+
- Audio resampling for optimal quality
|
|
61
|
+
- Automatic session management
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
api_key (str): HeyGen API key for authentication
|
|
65
|
+
session (aiohttp.ClientSession): HTTP client session for API requests
|
|
66
|
+
session_request (NewSessionRequest, optional): Configuration for the HeyGen session.
|
|
67
|
+
Defaults to using the "Shawn_Therapist_public" avatar with "v2" version.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
*,
|
|
73
|
+
api_key: str,
|
|
74
|
+
session: aiohttp.ClientSession,
|
|
75
|
+
session_request: NewSessionRequest = NewSessionRequest(avatar_id="Shawn_Therapist_public"),
|
|
76
|
+
**kwargs,
|
|
77
|
+
) -> None:
|
|
78
|
+
"""Initialize the HeyGen video service.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
api_key: HeyGen API key for authentication
|
|
82
|
+
session: HTTP client session for API requests
|
|
83
|
+
session_request: Configuration for the HeyGen session (default: uses Shawn_Therapist_public avatar)
|
|
84
|
+
**kwargs: Additional arguments passed to parent AIService
|
|
85
|
+
"""
|
|
86
|
+
super().__init__(**kwargs)
|
|
87
|
+
self._api_key = api_key
|
|
88
|
+
self._session = session
|
|
89
|
+
self._client: Optional[HeyGenClient] = None
|
|
90
|
+
self._send_task: Optional[asyncio.Task] = None
|
|
91
|
+
self._resampler = create_stream_resampler()
|
|
92
|
+
self._is_interrupting = False
|
|
93
|
+
self._session_request = session_request
|
|
94
|
+
self._other_participant_has_joined = False
|
|
95
|
+
self._event_id = None
|
|
96
|
+
self._audio_chunk_size = 0
|
|
97
|
+
|
|
98
|
+
async def setup(self, setup: FrameProcessorSetup):
|
|
99
|
+
"""Set up the HeyGen video service with necessary configuration.
|
|
100
|
+
|
|
101
|
+
Initializes the HeyGen client, establishes connections, and prepares the service
|
|
102
|
+
for audio/video processing. This includes setting up audio/video streams,
|
|
103
|
+
configuring callbacks, and initializing the resampler.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
setup: Configuration parameters for the frame processor.
|
|
107
|
+
"""
|
|
108
|
+
await super().setup(setup)
|
|
109
|
+
self._client = HeyGenClient(
|
|
110
|
+
api_key=self._api_key,
|
|
111
|
+
session=self._session,
|
|
112
|
+
params=TransportParams(
|
|
113
|
+
audio_in_enabled=True,
|
|
114
|
+
video_in_enabled=True,
|
|
115
|
+
audio_out_enabled=True,
|
|
116
|
+
audio_out_sample_rate=HEY_GEN_SAMPLE_RATE,
|
|
117
|
+
),
|
|
118
|
+
session_request=self._session_request,
|
|
119
|
+
callbacks=HeyGenCallbacks(
|
|
120
|
+
on_participant_connected=self._on_participant_connected,
|
|
121
|
+
on_participant_disconnected=self._on_participant_disconnected,
|
|
122
|
+
),
|
|
123
|
+
)
|
|
124
|
+
await self._client.setup(setup)
|
|
125
|
+
|
|
126
|
+
async def cleanup(self):
|
|
127
|
+
"""Clean up the service and release resources.
|
|
128
|
+
|
|
129
|
+
Terminates the HeyGen client session and cleans up associated resources.
|
|
130
|
+
"""
|
|
131
|
+
await super().cleanup()
|
|
132
|
+
await self._client.cleanup()
|
|
133
|
+
self._client = None
|
|
134
|
+
|
|
135
|
+
async def _on_participant_connected(self, participant_id: str):
|
|
136
|
+
"""Handle participant connected events."""
|
|
137
|
+
logger.info(f"Participant connected {participant_id}")
|
|
138
|
+
if not self._other_participant_has_joined:
|
|
139
|
+
self._other_participant_has_joined = True
|
|
140
|
+
await self._client.capture_participant_video(
|
|
141
|
+
participant_id, self._on_participant_video_frame
|
|
142
|
+
)
|
|
143
|
+
await self._client.capture_participant_audio(
|
|
144
|
+
participant_id, self._on_participant_audio_data
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
async def _on_participant_disconnected(self, participant_id: str):
|
|
148
|
+
"""Handle participant disconnected events."""
|
|
149
|
+
logger.info(f"Participant disconnected {participant_id}")
|
|
150
|
+
|
|
151
|
+
async def _on_participant_video_frame(self, video_frame: ImageRawFrame):
|
|
152
|
+
"""Handle incoming video frames from participants."""
|
|
153
|
+
frame = OutputImageRawFrame(
|
|
154
|
+
image=video_frame.image,
|
|
155
|
+
size=video_frame.size,
|
|
156
|
+
format=video_frame.format,
|
|
157
|
+
)
|
|
158
|
+
await self.push_frame(frame)
|
|
159
|
+
|
|
160
|
+
async def _on_participant_audio_data(self, audio_frame: AudioRawFrame):
|
|
161
|
+
"""Handle incoming audio data from participants."""
|
|
162
|
+
frame = SpeechOutputAudioRawFrame(
|
|
163
|
+
audio=audio_frame.audio,
|
|
164
|
+
sample_rate=audio_frame.sample_rate,
|
|
165
|
+
num_channels=audio_frame.num_channels,
|
|
166
|
+
)
|
|
167
|
+
await self.push_frame(frame)
|
|
168
|
+
|
|
169
|
+
async def start(self, frame: StartFrame):
|
|
170
|
+
"""Start the HeyGen video service and initialize the avatar session.
|
|
171
|
+
|
|
172
|
+
Creates necessary tasks for audio/video processing and establishes
|
|
173
|
+
the connection with the HeyGen service.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
frame: The start frame containing initialization parameters.
|
|
177
|
+
"""
|
|
178
|
+
await super().start(frame)
|
|
179
|
+
# 40 ms of audio, match the default behavior from the output transport
|
|
180
|
+
self._audio_chunk_size = int((HEY_GEN_SAMPLE_RATE * 2) / 25)
|
|
181
|
+
await self._client.start(frame, self._audio_chunk_size)
|
|
182
|
+
await self._create_send_task()
|
|
183
|
+
|
|
184
|
+
async def stop(self, frame: EndFrame):
|
|
185
|
+
"""Stop the HeyGen video service gracefully.
|
|
186
|
+
|
|
187
|
+
Performs cleanup by ending the conversation and cancelling ongoing tasks
|
|
188
|
+
in a controlled manner.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
frame: The end frame.
|
|
192
|
+
"""
|
|
193
|
+
await super().stop(frame)
|
|
194
|
+
await self._end_conversation()
|
|
195
|
+
await self._cancel_send_task()
|
|
196
|
+
|
|
197
|
+
async def cancel(self, frame: CancelFrame):
|
|
198
|
+
"""Cancel the HeyGen video service.
|
|
199
|
+
|
|
200
|
+
Performs an immediate termination of the service, cleaning up resources
|
|
201
|
+
without waiting for ongoing operations to complete.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
frame: The cancel frame.
|
|
205
|
+
"""
|
|
206
|
+
await super().cancel(frame)
|
|
207
|
+
await self._end_conversation()
|
|
208
|
+
await self._cancel_send_task()
|
|
209
|
+
|
|
210
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
211
|
+
"""Process incoming frames and coordinate avatar behavior.
|
|
212
|
+
|
|
213
|
+
Handles different types of frames to manage avatar interactions:
|
|
214
|
+
- UserStartedSpeakingFrame: Activates avatar's listening animation
|
|
215
|
+
- UserStoppedSpeakingFrame: Deactivates avatar's listening state
|
|
216
|
+
- TTSAudioRawFrame: Processes audio for avatar speech
|
|
217
|
+
- Other frames: Forwards them through the pipeline
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
frame: The frame to be processed.
|
|
221
|
+
direction: The direction of frame processing (input/output).
|
|
222
|
+
"""
|
|
223
|
+
await super().process_frame(frame, direction)
|
|
224
|
+
|
|
225
|
+
if isinstance(frame, UserStartedSpeakingFrame):
|
|
226
|
+
await self._handle_user_started_speaking()
|
|
227
|
+
await self.push_frame(frame, direction)
|
|
228
|
+
elif isinstance(frame, UserStoppedSpeakingFrame):
|
|
229
|
+
await self._client.stop_agent_listening()
|
|
230
|
+
await self.push_frame(frame, direction)
|
|
231
|
+
elif isinstance(frame, OutputTransportReadyFrame):
|
|
232
|
+
self._client.transport_ready()
|
|
233
|
+
await self.push_frame(frame, direction)
|
|
234
|
+
elif isinstance(frame, TTSAudioRawFrame):
|
|
235
|
+
await self._handle_audio_frame(frame)
|
|
236
|
+
elif isinstance(frame, TTSStartedFrame):
|
|
237
|
+
await self.start_ttfb_metrics()
|
|
238
|
+
elif isinstance(frame, BotStartedSpeakingFrame):
|
|
239
|
+
# We constantly receive audio through WebRTC, but most of the time it is silence.
|
|
240
|
+
# As soon as we receive actual audio, the base output transport will create a
|
|
241
|
+
# BotStartedSpeakingFrame, which we can use as a signal for the TTFB metrics.
|
|
242
|
+
await self.stop_ttfb_metrics()
|
|
243
|
+
else:
|
|
244
|
+
await self.push_frame(frame, direction)
|
|
245
|
+
|
|
246
|
+
def can_generate_metrics(self) -> bool:
|
|
247
|
+
"""Check if the service can generate metrics.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
True if metrics generation is supported.
|
|
251
|
+
"""
|
|
252
|
+
return True
|
|
253
|
+
|
|
254
|
+
async def _handle_user_started_speaking(self):
|
|
255
|
+
"""Handle the event when a user starts speaking.
|
|
256
|
+
|
|
257
|
+
Manages the interruption flow by:
|
|
258
|
+
1. Setting the interruption flag
|
|
259
|
+
2. Signaling the client to interrupt current speech
|
|
260
|
+
3. Cancelling ongoing audio sending tasks
|
|
261
|
+
4. Creating a new send task
|
|
262
|
+
5. Activating the avatar's listening animation
|
|
263
|
+
"""
|
|
264
|
+
self._is_interrupting = True
|
|
265
|
+
await self._client.interrupt(self._event_id)
|
|
266
|
+
await self._cancel_send_task()
|
|
267
|
+
self._is_interrupting = False
|
|
268
|
+
await self._create_send_task()
|
|
269
|
+
await self._client.start_agent_listening()
|
|
270
|
+
|
|
271
|
+
async def _end_conversation(self):
|
|
272
|
+
"""End the current conversation and reset state.
|
|
273
|
+
|
|
274
|
+
Stops the HeyGen client and cleans up conversation-specific resources.
|
|
275
|
+
"""
|
|
276
|
+
self._other_participant_has_joined = False
|
|
277
|
+
await self._client.stop()
|
|
278
|
+
|
|
279
|
+
async def _create_send_task(self):
|
|
280
|
+
"""Create the audio sending task if it doesn't exist."""
|
|
281
|
+
if not self._send_task:
|
|
282
|
+
self._queue = asyncio.Queue()
|
|
283
|
+
self._send_task = self.create_task(self._send_task_handler())
|
|
284
|
+
|
|
285
|
+
async def _cancel_send_task(self):
|
|
286
|
+
"""Cancel the audio sending task if it exists."""
|
|
287
|
+
if self._send_task:
|
|
288
|
+
await self.cancel_task(self._send_task)
|
|
289
|
+
self._send_task = None
|
|
290
|
+
|
|
291
|
+
async def _handle_audio_frame(self, frame: OutputAudioRawFrame):
|
|
292
|
+
"""Queue an audio frame for processing.
|
|
293
|
+
|
|
294
|
+
Places the audio frame in the processing queue for synchronized
|
|
295
|
+
delivery to the HeyGen service.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
frame: The audio frame to process.
|
|
299
|
+
"""
|
|
300
|
+
await self._queue.put(frame)
|
|
301
|
+
|
|
302
|
+
async def _send_task_handler(self):
|
|
303
|
+
"""Handle sending audio frames to the HeyGen client.
|
|
304
|
+
|
|
305
|
+
Continuously processes audio frames from the queue and sends them to the
|
|
306
|
+
HeyGen client. Handles timeouts and silence detection for proper audio
|
|
307
|
+
streaming management.
|
|
308
|
+
"""
|
|
309
|
+
sample_rate = self._client.out_sample_rate
|
|
310
|
+
audio_buffer = bytearray()
|
|
311
|
+
self._event_id = None
|
|
312
|
+
|
|
313
|
+
while True:
|
|
314
|
+
try:
|
|
315
|
+
frame = await asyncio.wait_for(self._queue.get(), timeout=AVATAR_VAD_STOP_SECS)
|
|
316
|
+
if self._is_interrupting:
|
|
317
|
+
break
|
|
318
|
+
if isinstance(frame, TTSAudioRawFrame):
|
|
319
|
+
# starting the new inference
|
|
320
|
+
if self._event_id is None:
|
|
321
|
+
self._event_id = str(frame.id)
|
|
322
|
+
|
|
323
|
+
audio = await self._resampler.resample(
|
|
324
|
+
frame.audio, frame.sample_rate, sample_rate
|
|
325
|
+
)
|
|
326
|
+
audio_buffer.extend(audio)
|
|
327
|
+
while len(audio_buffer) >= self._audio_chunk_size:
|
|
328
|
+
chunk = audio_buffer[: self._audio_chunk_size]
|
|
329
|
+
audio_buffer = audio_buffer[self._audio_chunk_size :]
|
|
330
|
+
|
|
331
|
+
await self._client.agent_speak(bytes(chunk), self._event_id)
|
|
332
|
+
self._queue.task_done()
|
|
333
|
+
except asyncio.TimeoutError:
|
|
334
|
+
# Bot has stopped speaking
|
|
335
|
+
if self._event_id is not None:
|
|
336
|
+
await self._client.agent_speak_end(self._event_id)
|
|
337
|
+
self._event_id = None
|
|
338
|
+
audio_buffer.clear()
|
|
@@ -24,12 +24,14 @@ class ImageGenService(AIService):
|
|
|
24
24
|
Processes TextFrames by using their content as prompts for image generation.
|
|
25
25
|
Subclasses must implement the run_image_gen method to provide actual image
|
|
26
26
|
generation functionality using their specific AI service.
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
**kwargs: Additional arguments passed to the parent AIService.
|
|
30
27
|
"""
|
|
31
28
|
|
|
32
29
|
def __init__(self, **kwargs):
|
|
30
|
+
"""Initialize the image generation service.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
**kwargs: Additional arguments passed to the parent AIService.
|
|
34
|
+
"""
|
|
33
35
|
super().__init__(**kwargs)
|
|
34
36
|
|
|
35
37
|
# Renders the image. Returns an Image object.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|