dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -0,0 +1,338 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """HeyGen implementation for Pipecat.
8
+
9
+ This module provides integration with the HeyGen platform for creating conversational
10
+ AI applications with avatars. It manages conversation sessions and provides real-time
11
+ audio/video streaming capabilities through the HeyGen API.
12
+ """
13
+
14
+ import asyncio
15
+ from typing import Optional
16
+
17
+ import aiohttp
18
+ from loguru import logger
19
+
20
+ from pipecat.audio.utils import create_stream_resampler
21
+ from pipecat.frames.frames import (
22
+ AudioRawFrame,
23
+ BotStartedSpeakingFrame,
24
+ CancelFrame,
25
+ EndFrame,
26
+ Frame,
27
+ ImageRawFrame,
28
+ OutputAudioRawFrame,
29
+ OutputImageRawFrame,
30
+ OutputTransportReadyFrame,
31
+ SpeechOutputAudioRawFrame,
32
+ StartFrame,
33
+ TTSAudioRawFrame,
34
+ TTSStartedFrame,
35
+ UserStartedSpeakingFrame,
36
+ UserStoppedSpeakingFrame,
37
+ )
38
+ from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup
39
+ from pipecat.services.ai_service import AIService
40
+ from pipecat.services.heygen.api import NewSessionRequest
41
+ from pipecat.services.heygen.client import HEY_GEN_SAMPLE_RATE, HeyGenCallbacks, HeyGenClient
42
+ from pipecat.transports.base_transport import TransportParams
43
+
44
+ # Using the same values that we do in the BaseOutputTransport
45
+ AVATAR_VAD_STOP_SECS = 0.35
46
+
47
+
48
+ class HeyGenVideoService(AIService):
49
+ """A service that integrates HeyGen's interactive avatar capabilities into the pipeline.
50
+
51
+ This service manages the lifecycle of a HeyGen avatar session by handling bidirectional
52
+ audio/video streaming, avatar animations, and user interactions. It processes various frame types
53
+ to coordinate the avatar's behavior and maintains synchronization between audio and video streams.
54
+
55
+ The service supports:
56
+
57
+ - Real-time avatar animation based on audio input
58
+ - Voice activity detection for natural interactions
59
+ - Interrupt handling for more natural conversations
60
+ - Audio resampling for optimal quality
61
+ - Automatic session management
62
+
63
+ Args:
64
+ api_key (str): HeyGen API key for authentication
65
+ session (aiohttp.ClientSession): HTTP client session for API requests
66
+ session_request (NewSessionRequest, optional): Configuration for the HeyGen session.
67
+ Defaults to using the "Shawn_Therapist_public" avatar with "v2" version.
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ *,
73
+ api_key: str,
74
+ session: aiohttp.ClientSession,
75
+ session_request: NewSessionRequest = NewSessionRequest(avatar_id="Shawn_Therapist_public"),
76
+ **kwargs,
77
+ ) -> None:
78
+ """Initialize the HeyGen video service.
79
+
80
+ Args:
81
+ api_key: HeyGen API key for authentication
82
+ session: HTTP client session for API requests
83
+ session_request: Configuration for the HeyGen session (default: uses Shawn_Therapist_public avatar)
84
+ **kwargs: Additional arguments passed to parent AIService
85
+ """
86
+ super().__init__(**kwargs)
87
+ self._api_key = api_key
88
+ self._session = session
89
+ self._client: Optional[HeyGenClient] = None
90
+ self._send_task: Optional[asyncio.Task] = None
91
+ self._resampler = create_stream_resampler()
92
+ self._is_interrupting = False
93
+ self._session_request = session_request
94
+ self._other_participant_has_joined = False
95
+ self._event_id = None
96
+ self._audio_chunk_size = 0
97
+
98
+ async def setup(self, setup: FrameProcessorSetup):
99
+ """Set up the HeyGen video service with necessary configuration.
100
+
101
+ Initializes the HeyGen client, establishes connections, and prepares the service
102
+ for audio/video processing. This includes setting up audio/video streams,
103
+ configuring callbacks, and initializing the resampler.
104
+
105
+ Args:
106
+ setup: Configuration parameters for the frame processor.
107
+ """
108
+ await super().setup(setup)
109
+ self._client = HeyGenClient(
110
+ api_key=self._api_key,
111
+ session=self._session,
112
+ params=TransportParams(
113
+ audio_in_enabled=True,
114
+ video_in_enabled=True,
115
+ audio_out_enabled=True,
116
+ audio_out_sample_rate=HEY_GEN_SAMPLE_RATE,
117
+ ),
118
+ session_request=self._session_request,
119
+ callbacks=HeyGenCallbacks(
120
+ on_participant_connected=self._on_participant_connected,
121
+ on_participant_disconnected=self._on_participant_disconnected,
122
+ ),
123
+ )
124
+ await self._client.setup(setup)
125
+
126
+ async def cleanup(self):
127
+ """Clean up the service and release resources.
128
+
129
+ Terminates the HeyGen client session and cleans up associated resources.
130
+ """
131
+ await super().cleanup()
132
+ await self._client.cleanup()
133
+ self._client = None
134
+
135
+ async def _on_participant_connected(self, participant_id: str):
136
+ """Handle participant connected events."""
137
+ logger.info(f"Participant connected {participant_id}")
138
+ if not self._other_participant_has_joined:
139
+ self._other_participant_has_joined = True
140
+ await self._client.capture_participant_video(
141
+ participant_id, self._on_participant_video_frame
142
+ )
143
+ await self._client.capture_participant_audio(
144
+ participant_id, self._on_participant_audio_data
145
+ )
146
+
147
+ async def _on_participant_disconnected(self, participant_id: str):
148
+ """Handle participant disconnected events."""
149
+ logger.info(f"Participant disconnected {participant_id}")
150
+
151
+ async def _on_participant_video_frame(self, video_frame: ImageRawFrame):
152
+ """Handle incoming video frames from participants."""
153
+ frame = OutputImageRawFrame(
154
+ image=video_frame.image,
155
+ size=video_frame.size,
156
+ format=video_frame.format,
157
+ )
158
+ await self.push_frame(frame)
159
+
160
+ async def _on_participant_audio_data(self, audio_frame: AudioRawFrame):
161
+ """Handle incoming audio data from participants."""
162
+ frame = SpeechOutputAudioRawFrame(
163
+ audio=audio_frame.audio,
164
+ sample_rate=audio_frame.sample_rate,
165
+ num_channels=audio_frame.num_channels,
166
+ )
167
+ await self.push_frame(frame)
168
+
169
+ async def start(self, frame: StartFrame):
170
+ """Start the HeyGen video service and initialize the avatar session.
171
+
172
+ Creates necessary tasks for audio/video processing and establishes
173
+ the connection with the HeyGen service.
174
+
175
+ Args:
176
+ frame: The start frame containing initialization parameters.
177
+ """
178
+ await super().start(frame)
179
+ # 40 ms of audio, match the default behavior from the output transport
180
+ self._audio_chunk_size = int((HEY_GEN_SAMPLE_RATE * 2) / 25)
181
+ await self._client.start(frame, self._audio_chunk_size)
182
+ await self._create_send_task()
183
+
184
+ async def stop(self, frame: EndFrame):
185
+ """Stop the HeyGen video service gracefully.
186
+
187
+ Performs cleanup by ending the conversation and cancelling ongoing tasks
188
+ in a controlled manner.
189
+
190
+ Args:
191
+ frame: The end frame.
192
+ """
193
+ await super().stop(frame)
194
+ await self._end_conversation()
195
+ await self._cancel_send_task()
196
+
197
+ async def cancel(self, frame: CancelFrame):
198
+ """Cancel the HeyGen video service.
199
+
200
+ Performs an immediate termination of the service, cleaning up resources
201
+ without waiting for ongoing operations to complete.
202
+
203
+ Args:
204
+ frame: The cancel frame.
205
+ """
206
+ await super().cancel(frame)
207
+ await self._end_conversation()
208
+ await self._cancel_send_task()
209
+
210
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
211
+ """Process incoming frames and coordinate avatar behavior.
212
+
213
+ Handles different types of frames to manage avatar interactions:
214
+ - UserStartedSpeakingFrame: Activates avatar's listening animation
215
+ - UserStoppedSpeakingFrame: Deactivates avatar's listening state
216
+ - TTSAudioRawFrame: Processes audio for avatar speech
217
+ - Other frames: Forwards them through the pipeline
218
+
219
+ Args:
220
+ frame: The frame to be processed.
221
+ direction: The direction of frame processing (input/output).
222
+ """
223
+ await super().process_frame(frame, direction)
224
+
225
+ if isinstance(frame, UserStartedSpeakingFrame):
226
+ await self._handle_user_started_speaking()
227
+ await self.push_frame(frame, direction)
228
+ elif isinstance(frame, UserStoppedSpeakingFrame):
229
+ await self._client.stop_agent_listening()
230
+ await self.push_frame(frame, direction)
231
+ elif isinstance(frame, OutputTransportReadyFrame):
232
+ self._client.transport_ready()
233
+ await self.push_frame(frame, direction)
234
+ elif isinstance(frame, TTSAudioRawFrame):
235
+ await self._handle_audio_frame(frame)
236
+ elif isinstance(frame, TTSStartedFrame):
237
+ await self.start_ttfb_metrics()
238
+ elif isinstance(frame, BotStartedSpeakingFrame):
239
+ # We constantly receive audio through WebRTC, but most of the time it is silence.
240
+ # As soon as we receive actual audio, the base output transport will create a
241
+ # BotStartedSpeakingFrame, which we can use as a signal for the TTFB metrics.
242
+ await self.stop_ttfb_metrics()
243
+ else:
244
+ await self.push_frame(frame, direction)
245
+
246
+ def can_generate_metrics(self) -> bool:
247
+ """Check if the service can generate metrics.
248
+
249
+ Returns:
250
+ True if metrics generation is supported.
251
+ """
252
+ return True
253
+
254
+ async def _handle_user_started_speaking(self):
255
+ """Handle the event when a user starts speaking.
256
+
257
+ Manages the interruption flow by:
258
+ 1. Setting the interruption flag
259
+ 2. Signaling the client to interrupt current speech
260
+ 3. Cancelling ongoing audio sending tasks
261
+ 4. Creating a new send task
262
+ 5. Activating the avatar's listening animation
263
+ """
264
+ self._is_interrupting = True
265
+ await self._client.interrupt(self._event_id)
266
+ await self._cancel_send_task()
267
+ self._is_interrupting = False
268
+ await self._create_send_task()
269
+ await self._client.start_agent_listening()
270
+
271
+ async def _end_conversation(self):
272
+ """End the current conversation and reset state.
273
+
274
+ Stops the HeyGen client and cleans up conversation-specific resources.
275
+ """
276
+ self._other_participant_has_joined = False
277
+ await self._client.stop()
278
+
279
+ async def _create_send_task(self):
280
+ """Create the audio sending task if it doesn't exist."""
281
+ if not self._send_task:
282
+ self._queue = asyncio.Queue()
283
+ self._send_task = self.create_task(self._send_task_handler())
284
+
285
+ async def _cancel_send_task(self):
286
+ """Cancel the audio sending task if it exists."""
287
+ if self._send_task:
288
+ await self.cancel_task(self._send_task)
289
+ self._send_task = None
290
+
291
+ async def _handle_audio_frame(self, frame: OutputAudioRawFrame):
292
+ """Queue an audio frame for processing.
293
+
294
+ Places the audio frame in the processing queue for synchronized
295
+ delivery to the HeyGen service.
296
+
297
+ Args:
298
+ frame: The audio frame to process.
299
+ """
300
+ await self._queue.put(frame)
301
+
302
+ async def _send_task_handler(self):
303
+ """Handle sending audio frames to the HeyGen client.
304
+
305
+ Continuously processes audio frames from the queue and sends them to the
306
+ HeyGen client. Handles timeouts and silence detection for proper audio
307
+ streaming management.
308
+ """
309
+ sample_rate = self._client.out_sample_rate
310
+ audio_buffer = bytearray()
311
+ self._event_id = None
312
+
313
+ while True:
314
+ try:
315
+ frame = await asyncio.wait_for(self._queue.get(), timeout=AVATAR_VAD_STOP_SECS)
316
+ if self._is_interrupting:
317
+ break
318
+ if isinstance(frame, TTSAudioRawFrame):
319
+ # starting the new inference
320
+ if self._event_id is None:
321
+ self._event_id = str(frame.id)
322
+
323
+ audio = await self._resampler.resample(
324
+ frame.audio, frame.sample_rate, sample_rate
325
+ )
326
+ audio_buffer.extend(audio)
327
+ while len(audio_buffer) >= self._audio_chunk_size:
328
+ chunk = audio_buffer[: self._audio_chunk_size]
329
+ audio_buffer = audio_buffer[self._audio_chunk_size :]
330
+
331
+ await self._client.agent_speak(bytes(chunk), self._event_id)
332
+ self._queue.task_done()
333
+ except asyncio.TimeoutError:
334
+ # Bot has stopped speaking
335
+ if self._event_id is not None:
336
+ await self._client.agent_speak_end(self._event_id)
337
+ self._event_id = None
338
+ audio_buffer.clear()
@@ -24,12 +24,14 @@ class ImageGenService(AIService):
24
24
  Processes TextFrames by using their content as prompts for image generation.
25
25
  Subclasses must implement the run_image_gen method to provide actual image
26
26
  generation functionality using their specific AI service.
27
-
28
- Args:
29
- **kwargs: Additional arguments passed to the parent AIService.
30
27
  """
31
28
 
32
29
  def __init__(self, **kwargs):
30
+ """Initialize the image generation service.
31
+
32
+ Args:
33
+ **kwargs: Additional arguments passed to the parent AIService.
34
+ """
33
35
  super().__init__(**kwargs)
34
36
 
35
37
  # Renders the image. Returns an Image object.
@@ -0,0 +1 @@
1
+