dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""HeyGen implementation for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides integration with the HeyGen platform for creating conversational
|
|
10
|
+
AI applications with avatars. It manages conversation sessions and provides real-time
|
|
11
|
+
audio/video streaming capabilities through the HeyGen API.
|
|
12
|
+
|
|
13
|
+
The module consists of three main components:
|
|
14
|
+
- HeyGenInputTransport: Handles incoming audio and events from HeyGen conversations
|
|
15
|
+
- HeyGenOutputTransport: Manages outgoing audio and events to HeyGen conversations
|
|
16
|
+
- HeyGenTransport: Main transport implementation that coordinates input/output transports
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from typing import Any, Optional
|
|
20
|
+
|
|
21
|
+
import aiohttp
|
|
22
|
+
from loguru import logger
|
|
23
|
+
|
|
24
|
+
from pipecat.frames.frames import (
|
|
25
|
+
AudioRawFrame,
|
|
26
|
+
BotStartedSpeakingFrame,
|
|
27
|
+
BotStoppedSpeakingFrame,
|
|
28
|
+
CancelFrame,
|
|
29
|
+
EndFrame,
|
|
30
|
+
Frame,
|
|
31
|
+
InputAudioRawFrame,
|
|
32
|
+
InterruptionFrame,
|
|
33
|
+
OutputAudioRawFrame,
|
|
34
|
+
StartFrame,
|
|
35
|
+
UserStartedSpeakingFrame,
|
|
36
|
+
UserStoppedSpeakingFrame,
|
|
37
|
+
)
|
|
38
|
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup
|
|
39
|
+
from pipecat.services.heygen.api import NewSessionRequest
|
|
40
|
+
from pipecat.services.heygen.client import HeyGenCallbacks, HeyGenClient
|
|
41
|
+
from pipecat.transports.base_input import BaseInputTransport
|
|
42
|
+
from pipecat.transports.base_output import BaseOutputTransport
|
|
43
|
+
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class HeyGenInputTransport(BaseInputTransport):
|
|
47
|
+
"""Input transport for receiving audio and events from HeyGen conversations.
|
|
48
|
+
|
|
49
|
+
Handles incoming audio streams from participants and manages audio capture
|
|
50
|
+
from the Daily room connected to the HeyGen conversation.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
client: HeyGenClient,
|
|
56
|
+
params: TransportParams,
|
|
57
|
+
**kwargs,
|
|
58
|
+
):
|
|
59
|
+
"""Initialize the HeyGen input transport.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
client: The HeyGen transport client instance.
|
|
63
|
+
params: Transport configuration parameters.
|
|
64
|
+
**kwargs: Additional arguments passed to parent class.
|
|
65
|
+
"""
|
|
66
|
+
super().__init__(params, **kwargs)
|
|
67
|
+
self._client = client
|
|
68
|
+
self._params = params
|
|
69
|
+
# Whether we have seen a StartFrame already.
|
|
70
|
+
self._initialized = False
|
|
71
|
+
|
|
72
|
+
async def setup(self, setup: FrameProcessorSetup):
|
|
73
|
+
"""Setup the input transport.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
setup: The frame processor setup configuration.
|
|
77
|
+
"""
|
|
78
|
+
await super().setup(setup)
|
|
79
|
+
await self._client.setup(setup)
|
|
80
|
+
|
|
81
|
+
async def cleanup(self):
|
|
82
|
+
"""Cleanup input transport resources."""
|
|
83
|
+
await super().cleanup()
|
|
84
|
+
await self._client.cleanup()
|
|
85
|
+
|
|
86
|
+
async def start(self, frame: StartFrame):
|
|
87
|
+
"""Start the input transport.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
frame: The start frame containing initialization parameters.
|
|
91
|
+
"""
|
|
92
|
+
await super().start(frame)
|
|
93
|
+
|
|
94
|
+
if self._initialized:
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
self._initialized = True
|
|
98
|
+
|
|
99
|
+
await self.set_transport_ready(frame)
|
|
100
|
+
|
|
101
|
+
async def stop(self, frame: EndFrame):
|
|
102
|
+
"""Stop the input transport.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
frame: The end frame signaling transport shutdown.
|
|
106
|
+
"""
|
|
107
|
+
await super().stop(frame)
|
|
108
|
+
await self._client.stop()
|
|
109
|
+
|
|
110
|
+
async def cancel(self, frame: CancelFrame):
|
|
111
|
+
"""Cancel the input transport.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
frame: The cancel frame signaling immediate cancellation.
|
|
115
|
+
"""
|
|
116
|
+
await super().cancel(frame)
|
|
117
|
+
await self._client.stop()
|
|
118
|
+
|
|
119
|
+
async def start_capturing_audio(self, participant_id: str):
|
|
120
|
+
"""Start capturing audio from a participant.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
participant_id: The participant to capture audio from.
|
|
124
|
+
"""
|
|
125
|
+
if self._params.audio_in_enabled:
|
|
126
|
+
logger.info(f"HeyGenTransport start capturing audio for participant {participant_id}")
|
|
127
|
+
await self._client.capture_participant_audio(
|
|
128
|
+
participant_id, self._on_participant_audio_data
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
async def _on_participant_audio_data(self, audio_frame: AudioRawFrame):
|
|
132
|
+
"""Handle received participant audio data."""
|
|
133
|
+
frame = InputAudioRawFrame(
|
|
134
|
+
audio=audio_frame.audio,
|
|
135
|
+
sample_rate=audio_frame.sample_rate,
|
|
136
|
+
num_channels=audio_frame.num_channels,
|
|
137
|
+
)
|
|
138
|
+
await self.push_audio_frame(frame)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class HeyGenOutputTransport(BaseOutputTransport):
|
|
142
|
+
"""Output transport for sending audio and events to HeyGen conversations.
|
|
143
|
+
|
|
144
|
+
Handles outgoing audio streams to participants and manages the custom
|
|
145
|
+
audio track expected by the HeyGen platform.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
def __init__(
|
|
149
|
+
self,
|
|
150
|
+
client: HeyGenClient,
|
|
151
|
+
params: TransportParams,
|
|
152
|
+
**kwargs,
|
|
153
|
+
):
|
|
154
|
+
"""Initialize the HeyGen output transport.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
client: The HeyGen transport client instance.
|
|
158
|
+
params: Transport configuration parameters.
|
|
159
|
+
**kwargs: Additional arguments passed to parent class.
|
|
160
|
+
"""
|
|
161
|
+
super().__init__(params, **kwargs)
|
|
162
|
+
self._client = client
|
|
163
|
+
self._params = params
|
|
164
|
+
|
|
165
|
+
# Whether we have seen a StartFrame already.
|
|
166
|
+
self._initialized = False
|
|
167
|
+
self._event_id = None
|
|
168
|
+
|
|
169
|
+
async def setup(self, setup: FrameProcessorSetup):
|
|
170
|
+
"""Setup the output transport.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
setup: The frame processor setup configuration.
|
|
174
|
+
"""
|
|
175
|
+
await super().setup(setup)
|
|
176
|
+
await self._client.setup(setup)
|
|
177
|
+
|
|
178
|
+
async def cleanup(self):
|
|
179
|
+
"""Cleanup output transport resources."""
|
|
180
|
+
await super().cleanup()
|
|
181
|
+
await self._client.cleanup()
|
|
182
|
+
|
|
183
|
+
async def start(self, frame: StartFrame):
|
|
184
|
+
"""Start the output transport.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
frame: The start frame containing initialization parameters.
|
|
188
|
+
"""
|
|
189
|
+
await super().start(frame)
|
|
190
|
+
|
|
191
|
+
if self._initialized:
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
self._initialized = True
|
|
195
|
+
await self._client.start(frame, self.audio_chunk_size)
|
|
196
|
+
await self.set_transport_ready(frame)
|
|
197
|
+
self._client.transport_ready()
|
|
198
|
+
|
|
199
|
+
async def stop(self, frame: EndFrame):
|
|
200
|
+
"""Stop the output transport.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
frame: The end frame signaling transport shutdown.
|
|
204
|
+
"""
|
|
205
|
+
await super().stop(frame)
|
|
206
|
+
await self._client.stop()
|
|
207
|
+
|
|
208
|
+
async def cancel(self, frame: CancelFrame):
|
|
209
|
+
"""Cancel the output transport.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
frame: The cancel frame signaling immediate cancellation.
|
|
213
|
+
"""
|
|
214
|
+
await super().cancel(frame)
|
|
215
|
+
await self._client.stop()
|
|
216
|
+
|
|
217
|
+
async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
|
218
|
+
"""Push a frame to the next processor in the pipeline.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
frame: The frame to push.
|
|
222
|
+
direction: The direction to push the frame.
|
|
223
|
+
"""
|
|
224
|
+
# The BotStartedSpeakingFrame and BotStoppedSpeakingFrame are created inside BaseOutputTransport
|
|
225
|
+
# This is a workaround, so we can more reliably be aware when the bot has started or stopped speaking
|
|
226
|
+
if direction == FrameDirection.DOWNSTREAM:
|
|
227
|
+
if isinstance(frame, BotStartedSpeakingFrame):
|
|
228
|
+
if self._event_id is not None:
|
|
229
|
+
logger.warning("self._event_id is already defined!")
|
|
230
|
+
self._event_id = str(frame.id)
|
|
231
|
+
elif isinstance(frame, BotStoppedSpeakingFrame):
|
|
232
|
+
await self._client.agent_speak_end(self._event_id)
|
|
233
|
+
self._event_id = None
|
|
234
|
+
await super().push_frame(frame, direction)
|
|
235
|
+
|
|
236
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
237
|
+
"""Process frames and handle interruptions.
|
|
238
|
+
|
|
239
|
+
Handles various types of frames including interruption events and user speaking states.
|
|
240
|
+
Updates the HeyGen client state based on the received frames.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
frame: The frame to process
|
|
244
|
+
direction: The direction of frame flow in the pipeline
|
|
245
|
+
|
|
246
|
+
Note:
|
|
247
|
+
Special handling is implemented for:
|
|
248
|
+
- InterruptionFrame: Triggers interruption of current speech
|
|
249
|
+
- UserStartedSpeakingFrame: Initiates agent listening mode
|
|
250
|
+
- UserStoppedSpeakingFrame: Stops agent listening mode
|
|
251
|
+
"""
|
|
252
|
+
await super().process_frame(frame, direction)
|
|
253
|
+
if isinstance(frame, InterruptionFrame):
|
|
254
|
+
await self._client.interrupt(self._event_id)
|
|
255
|
+
await self.push_frame(frame, direction)
|
|
256
|
+
if isinstance(frame, UserStartedSpeakingFrame):
|
|
257
|
+
await self._client.start_agent_listening()
|
|
258
|
+
await self.push_frame(frame, direction)
|
|
259
|
+
elif isinstance(frame, UserStoppedSpeakingFrame):
|
|
260
|
+
await self._client.stop_agent_listening()
|
|
261
|
+
await self.push_frame(frame, direction)
|
|
262
|
+
|
|
263
|
+
async def write_audio_frame(self, frame: OutputAudioRawFrame) -> bool:
|
|
264
|
+
"""Write an audio frame to the HeyGen transport.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
frame: The audio frame to write.
|
|
268
|
+
"""
|
|
269
|
+
await self._client.agent_speak(bytes(frame.audio), self._event_id)
|
|
270
|
+
return True
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class HeyGenParams(TransportParams):
|
|
274
|
+
"""Configuration parameters for the HeyGen transport.
|
|
275
|
+
|
|
276
|
+
Parameters:
|
|
277
|
+
audio_in_enabled: Whether to enable audio input from participants.
|
|
278
|
+
audio_out_enabled: Whether to enable audio output to participants.
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
audio_in_enabled: bool = True
|
|
282
|
+
audio_out_enabled: bool = True
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class HeyGenTransport(BaseTransport):
|
|
286
|
+
"""Transport implementation for HeyGen video calls.
|
|
287
|
+
|
|
288
|
+
When used, the Pipecat bot joins the same virtual room as the HeyGen Avatar and the user.
|
|
289
|
+
This is achieved by using `HeyGenTransport`, which initiates the conversation via
|
|
290
|
+
`HeyGenApi` and obtains a room URL that all participants connect to.
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
def __init__(
|
|
294
|
+
self,
|
|
295
|
+
session: aiohttp.ClientSession,
|
|
296
|
+
api_key: str,
|
|
297
|
+
params: HeyGenParams = HeyGenParams(),
|
|
298
|
+
input_name: Optional[str] = None,
|
|
299
|
+
output_name: Optional[str] = None,
|
|
300
|
+
session_request: NewSessionRequest = NewSessionRequest(
|
|
301
|
+
avatar_id="Shawn_Therapist_public",
|
|
302
|
+
version="v2",
|
|
303
|
+
),
|
|
304
|
+
):
|
|
305
|
+
"""Initialize the HeyGen transport.
|
|
306
|
+
|
|
307
|
+
Sets up a new HeyGen transport instance with the specified configuration for
|
|
308
|
+
handling video calls between the Pipecat bot and HeyGen Avatar.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
session: aiohttp session for making async HTTP requests
|
|
312
|
+
api_key: HeyGen API key for authentication
|
|
313
|
+
params: HeyGen-specific configuration parameters (default: HeyGenParams())
|
|
314
|
+
input_name: Optional custom name for the input transport
|
|
315
|
+
output_name: Optional custom name for the output transport
|
|
316
|
+
session_request: Configuration for the HeyGen session (default: uses Shawn_Therapist_public avatar)
|
|
317
|
+
|
|
318
|
+
Note:
|
|
319
|
+
The transport will automatically join the same virtual room as the HeyGen Avatar
|
|
320
|
+
and user through the HeyGenClient, which handles session initialization via HeyGenApi.
|
|
321
|
+
"""
|
|
322
|
+
super().__init__(input_name=input_name, output_name=output_name)
|
|
323
|
+
self._params = params
|
|
324
|
+
self._client = HeyGenClient(
|
|
325
|
+
api_key=api_key,
|
|
326
|
+
session=session,
|
|
327
|
+
params=params,
|
|
328
|
+
session_request=session_request,
|
|
329
|
+
callbacks=HeyGenCallbacks(
|
|
330
|
+
on_participant_connected=self._on_participant_connected,
|
|
331
|
+
on_participant_disconnected=self._on_participant_disconnected,
|
|
332
|
+
),
|
|
333
|
+
)
|
|
334
|
+
self._input: Optional[HeyGenInputTransport] = None
|
|
335
|
+
self._output: Optional[HeyGenOutputTransport] = None
|
|
336
|
+
self._HeyGen_participant_id = None
|
|
337
|
+
|
|
338
|
+
# Register supported handlers. The user will only be able to register
|
|
339
|
+
# these handlers.
|
|
340
|
+
self._register_event_handler("on_client_connected")
|
|
341
|
+
self._register_event_handler("on_client_disconnected")
|
|
342
|
+
|
|
343
|
+
async def _on_participant_disconnected(self, participant_id: str):
|
|
344
|
+
logger.debug(f"HeyGen participant {participant_id} disconnected")
|
|
345
|
+
if participant_id != "heygen":
|
|
346
|
+
await self._on_client_disconnected(participant_id)
|
|
347
|
+
|
|
348
|
+
async def _on_participant_connected(self, participant_id: str):
|
|
349
|
+
logger.debug(f"HeyGen participant {participant_id} connected")
|
|
350
|
+
if participant_id != "heygen":
|
|
351
|
+
await self._on_client_connected(participant_id)
|
|
352
|
+
if self._input:
|
|
353
|
+
await self._input.start_capturing_audio(participant_id)
|
|
354
|
+
|
|
355
|
+
def input(self) -> FrameProcessor:
|
|
356
|
+
"""Get the input transport for receiving media and events.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
The HeyGen input transport instance.
|
|
360
|
+
"""
|
|
361
|
+
if not self._input:
|
|
362
|
+
self._input = HeyGenInputTransport(client=self._client, params=self._params)
|
|
363
|
+
return self._input
|
|
364
|
+
|
|
365
|
+
def output(self) -> FrameProcessor:
|
|
366
|
+
"""Get the output transport for sending media and events.
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
The HeyGen output transport instance.
|
|
370
|
+
"""
|
|
371
|
+
if not self._output:
|
|
372
|
+
self._output = HeyGenOutputTransport(client=self._client, params=self._params)
|
|
373
|
+
return self._output
|
|
374
|
+
|
|
375
|
+
async def _on_client_connected(self, participant: Any):
|
|
376
|
+
"""Handle client connected events."""
|
|
377
|
+
await self._call_event_handler("on_client_connected", participant)
|
|
378
|
+
|
|
379
|
+
async def _on_client_disconnected(self, participant: Any):
|
|
380
|
+
"""Handle client disconnected events."""
|
|
381
|
+
await self._call_event_handler("on_client_disconnected", participant)
|