dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show
  1. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
  2. dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
  3. pipecat/__init__.py +17 -0
  4. pipecat/adapters/base_llm_adapter.py +36 -1
  5. pipecat/adapters/schemas/direct_function.py +296 -0
  6. pipecat/adapters/schemas/function_schema.py +15 -6
  7. pipecat/adapters/schemas/tools_schema.py +55 -7
  8. pipecat/adapters/services/anthropic_adapter.py +22 -3
  9. pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
  10. pipecat/adapters/services/bedrock_adapter.py +22 -3
  11. pipecat/adapters/services/gemini_adapter.py +16 -3
  12. pipecat/adapters/services/open_ai_adapter.py +17 -2
  13. pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
  14. pipecat/audio/filters/base_audio_filter.py +30 -6
  15. pipecat/audio/filters/koala_filter.py +37 -2
  16. pipecat/audio/filters/krisp_filter.py +59 -6
  17. pipecat/audio/filters/noisereduce_filter.py +37 -0
  18. pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
  19. pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
  20. pipecat/audio/mixers/base_audio_mixer.py +30 -7
  21. pipecat/audio/mixers/soundfile_mixer.py +53 -6
  22. pipecat/audio/resamplers/base_audio_resampler.py +17 -9
  23. pipecat/audio/resamplers/resampy_resampler.py +26 -1
  24. pipecat/audio/resamplers/soxr_resampler.py +32 -1
  25. pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
  26. pipecat/audio/utils.py +194 -1
  27. pipecat/audio/vad/silero.py +60 -3
  28. pipecat/audio/vad/vad_analyzer.py +114 -30
  29. pipecat/clocks/base_clock.py +19 -0
  30. pipecat/clocks/system_clock.py +25 -0
  31. pipecat/extensions/voicemail/__init__.py +0 -0
  32. pipecat/extensions/voicemail/voicemail_detector.py +707 -0
  33. pipecat/frames/frames.py +590 -156
  34. pipecat/metrics/metrics.py +64 -1
  35. pipecat/observers/base_observer.py +58 -19
  36. pipecat/observers/loggers/debug_log_observer.py +56 -64
  37. pipecat/observers/loggers/llm_log_observer.py +8 -1
  38. pipecat/observers/loggers/transcription_log_observer.py +19 -7
  39. pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
  40. pipecat/observers/turn_tracking_observer.py +26 -1
  41. pipecat/pipeline/base_pipeline.py +5 -7
  42. pipecat/pipeline/base_task.py +52 -9
  43. pipecat/pipeline/parallel_pipeline.py +121 -177
  44. pipecat/pipeline/pipeline.py +129 -20
  45. pipecat/pipeline/runner.py +50 -1
  46. pipecat/pipeline/sync_parallel_pipeline.py +132 -32
  47. pipecat/pipeline/task.py +263 -280
  48. pipecat/pipeline/task_observer.py +85 -34
  49. pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
  50. pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
  51. pipecat/processors/aggregators/gated.py +25 -24
  52. pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
  53. pipecat/processors/aggregators/llm_response.py +398 -89
  54. pipecat/processors/aggregators/openai_llm_context.py +161 -13
  55. pipecat/processors/aggregators/sentence.py +25 -14
  56. pipecat/processors/aggregators/user_response.py +28 -3
  57. pipecat/processors/aggregators/vision_image_frame.py +24 -14
  58. pipecat/processors/async_generator.py +28 -0
  59. pipecat/processors/audio/audio_buffer_processor.py +78 -37
  60. pipecat/processors/consumer_processor.py +25 -6
  61. pipecat/processors/filters/frame_filter.py +23 -0
  62. pipecat/processors/filters/function_filter.py +30 -0
  63. pipecat/processors/filters/identity_filter.py +17 -2
  64. pipecat/processors/filters/null_filter.py +24 -1
  65. pipecat/processors/filters/stt_mute_filter.py +56 -21
  66. pipecat/processors/filters/wake_check_filter.py +46 -3
  67. pipecat/processors/filters/wake_notifier_filter.py +21 -3
  68. pipecat/processors/frame_processor.py +488 -131
  69. pipecat/processors/frameworks/langchain.py +38 -3
  70. pipecat/processors/frameworks/rtvi.py +719 -34
  71. pipecat/processors/gstreamer/pipeline_source.py +41 -0
  72. pipecat/processors/idle_frame_processor.py +26 -3
  73. pipecat/processors/logger.py +23 -0
  74. pipecat/processors/metrics/frame_processor_metrics.py +77 -4
  75. pipecat/processors/metrics/sentry.py +42 -4
  76. pipecat/processors/producer_processor.py +34 -14
  77. pipecat/processors/text_transformer.py +22 -10
  78. pipecat/processors/transcript_processor.py +48 -29
  79. pipecat/processors/user_idle_processor.py +31 -21
  80. pipecat/runner/__init__.py +1 -0
  81. pipecat/runner/daily.py +132 -0
  82. pipecat/runner/livekit.py +148 -0
  83. pipecat/runner/run.py +543 -0
  84. pipecat/runner/types.py +67 -0
  85. pipecat/runner/utils.py +515 -0
  86. pipecat/serializers/base_serializer.py +42 -0
  87. pipecat/serializers/exotel.py +17 -6
  88. pipecat/serializers/genesys.py +95 -0
  89. pipecat/serializers/livekit.py +33 -0
  90. pipecat/serializers/plivo.py +16 -15
  91. pipecat/serializers/protobuf.py +37 -1
  92. pipecat/serializers/telnyx.py +18 -17
  93. pipecat/serializers/twilio.py +32 -16
  94. pipecat/services/ai_service.py +5 -3
  95. pipecat/services/anthropic/llm.py +113 -43
  96. pipecat/services/assemblyai/models.py +63 -5
  97. pipecat/services/assemblyai/stt.py +64 -11
  98. pipecat/services/asyncai/__init__.py +0 -0
  99. pipecat/services/asyncai/tts.py +501 -0
  100. pipecat/services/aws/llm.py +185 -111
  101. pipecat/services/aws/stt.py +217 -23
  102. pipecat/services/aws/tts.py +118 -52
  103. pipecat/services/aws/utils.py +101 -5
  104. pipecat/services/aws_nova_sonic/aws.py +82 -64
  105. pipecat/services/aws_nova_sonic/context.py +15 -6
  106. pipecat/services/azure/common.py +10 -2
  107. pipecat/services/azure/image.py +32 -0
  108. pipecat/services/azure/llm.py +9 -7
  109. pipecat/services/azure/stt.py +65 -2
  110. pipecat/services/azure/tts.py +154 -23
  111. pipecat/services/cartesia/stt.py +125 -8
  112. pipecat/services/cartesia/tts.py +102 -38
  113. pipecat/services/cerebras/llm.py +15 -23
  114. pipecat/services/deepgram/stt.py +19 -11
  115. pipecat/services/deepgram/tts.py +36 -0
  116. pipecat/services/deepseek/llm.py +14 -23
  117. pipecat/services/elevenlabs/tts.py +330 -64
  118. pipecat/services/fal/image.py +43 -0
  119. pipecat/services/fal/stt.py +48 -10
  120. pipecat/services/fireworks/llm.py +14 -21
  121. pipecat/services/fish/tts.py +109 -9
  122. pipecat/services/gemini_multimodal_live/__init__.py +1 -0
  123. pipecat/services/gemini_multimodal_live/events.py +83 -2
  124. pipecat/services/gemini_multimodal_live/file_api.py +189 -0
  125. pipecat/services/gemini_multimodal_live/gemini.py +218 -21
  126. pipecat/services/gladia/config.py +17 -10
  127. pipecat/services/gladia/stt.py +82 -36
  128. pipecat/services/google/frames.py +40 -0
  129. pipecat/services/google/google.py +2 -0
  130. pipecat/services/google/image.py +39 -2
  131. pipecat/services/google/llm.py +176 -58
  132. pipecat/services/google/llm_openai.py +26 -4
  133. pipecat/services/google/llm_vertex.py +37 -15
  134. pipecat/services/google/rtvi.py +41 -0
  135. pipecat/services/google/stt.py +65 -17
  136. pipecat/services/google/test-google-chirp.py +45 -0
  137. pipecat/services/google/tts.py +390 -19
  138. pipecat/services/grok/llm.py +8 -6
  139. pipecat/services/groq/llm.py +8 -6
  140. pipecat/services/groq/stt.py +13 -9
  141. pipecat/services/groq/tts.py +40 -0
  142. pipecat/services/hamsa/__init__.py +9 -0
  143. pipecat/services/hamsa/stt.py +241 -0
  144. pipecat/services/heygen/__init__.py +5 -0
  145. pipecat/services/heygen/api.py +281 -0
  146. pipecat/services/heygen/client.py +620 -0
  147. pipecat/services/heygen/video.py +338 -0
  148. pipecat/services/image_service.py +5 -3
  149. pipecat/services/inworld/__init__.py +1 -0
  150. pipecat/services/inworld/tts.py +592 -0
  151. pipecat/services/llm_service.py +127 -45
  152. pipecat/services/lmnt/tts.py +80 -7
  153. pipecat/services/mcp_service.py +85 -44
  154. pipecat/services/mem0/memory.py +42 -13
  155. pipecat/services/minimax/tts.py +74 -15
  156. pipecat/services/mistral/__init__.py +0 -0
  157. pipecat/services/mistral/llm.py +185 -0
  158. pipecat/services/moondream/vision.py +55 -10
  159. pipecat/services/neuphonic/tts.py +275 -48
  160. pipecat/services/nim/llm.py +8 -6
  161. pipecat/services/ollama/llm.py +27 -7
  162. pipecat/services/openai/base_llm.py +54 -16
  163. pipecat/services/openai/image.py +30 -0
  164. pipecat/services/openai/llm.py +7 -5
  165. pipecat/services/openai/stt.py +13 -9
  166. pipecat/services/openai/tts.py +42 -10
  167. pipecat/services/openai_realtime_beta/azure.py +11 -9
  168. pipecat/services/openai_realtime_beta/context.py +7 -5
  169. pipecat/services/openai_realtime_beta/events.py +10 -7
  170. pipecat/services/openai_realtime_beta/openai.py +37 -18
  171. pipecat/services/openpipe/llm.py +30 -24
  172. pipecat/services/openrouter/llm.py +9 -7
  173. pipecat/services/perplexity/llm.py +15 -19
  174. pipecat/services/piper/tts.py +26 -12
  175. pipecat/services/playht/tts.py +227 -65
  176. pipecat/services/qwen/llm.py +8 -6
  177. pipecat/services/rime/tts.py +128 -17
  178. pipecat/services/riva/stt.py +160 -22
  179. pipecat/services/riva/tts.py +67 -2
  180. pipecat/services/sambanova/llm.py +19 -17
  181. pipecat/services/sambanova/stt.py +14 -8
  182. pipecat/services/sarvam/tts.py +60 -13
  183. pipecat/services/simli/video.py +82 -21
  184. pipecat/services/soniox/__init__.py +0 -0
  185. pipecat/services/soniox/stt.py +398 -0
  186. pipecat/services/speechmatics/stt.py +29 -17
  187. pipecat/services/stt_service.py +47 -11
  188. pipecat/services/tavus/video.py +94 -25
  189. pipecat/services/together/llm.py +8 -6
  190. pipecat/services/tts_service.py +77 -53
  191. pipecat/services/ultravox/stt.py +46 -43
  192. pipecat/services/vision_service.py +5 -3
  193. pipecat/services/websocket_service.py +12 -11
  194. pipecat/services/whisper/base_stt.py +58 -12
  195. pipecat/services/whisper/stt.py +69 -58
  196. pipecat/services/xtts/tts.py +59 -2
  197. pipecat/sync/base_notifier.py +19 -0
  198. pipecat/sync/event_notifier.py +24 -0
  199. pipecat/tests/utils.py +73 -5
  200. pipecat/transcriptions/language.py +24 -0
  201. pipecat/transports/base_input.py +112 -8
  202. pipecat/transports/base_output.py +235 -13
  203. pipecat/transports/base_transport.py +119 -0
  204. pipecat/transports/local/audio.py +76 -0
  205. pipecat/transports/local/tk.py +84 -0
  206. pipecat/transports/network/fastapi_websocket.py +174 -15
  207. pipecat/transports/network/small_webrtc.py +383 -39
  208. pipecat/transports/network/webrtc_connection.py +214 -8
  209. pipecat/transports/network/websocket_client.py +171 -1
  210. pipecat/transports/network/websocket_server.py +147 -9
  211. pipecat/transports/services/daily.py +792 -70
  212. pipecat/transports/services/helpers/daily_rest.py +122 -129
  213. pipecat/transports/services/livekit.py +339 -4
  214. pipecat/transports/services/tavus.py +273 -38
  215. pipecat/utils/asyncio/task_manager.py +92 -186
  216. pipecat/utils/base_object.py +83 -1
  217. pipecat/utils/network.py +2 -0
  218. pipecat/utils/string.py +114 -58
  219. pipecat/utils/text/base_text_aggregator.py +44 -13
  220. pipecat/utils/text/base_text_filter.py +46 -0
  221. pipecat/utils/text/markdown_text_filter.py +70 -14
  222. pipecat/utils/text/pattern_pair_aggregator.py +18 -14
  223. pipecat/utils/text/simple_text_aggregator.py +43 -2
  224. pipecat/utils/text/skip_tags_aggregator.py +21 -13
  225. pipecat/utils/time.py +36 -0
  226. pipecat/utils/tracing/class_decorators.py +32 -7
  227. pipecat/utils/tracing/conversation_context_provider.py +12 -2
  228. pipecat/utils/tracing/service_attributes.py +80 -64
  229. pipecat/utils/tracing/service_decorators.py +48 -21
  230. pipecat/utils/tracing/setup.py +13 -7
  231. pipecat/utils/tracing/turn_context_provider.py +12 -2
  232. pipecat/utils/tracing/turn_trace_observer.py +27 -0
  233. pipecat/utils/utils.py +14 -14
  234. dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
  235. pipecat/examples/daily_runner.py +0 -64
  236. pipecat/examples/run.py +0 -265
  237. pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
  238. pipecat/utils/asyncio/watchdog_event.py +0 -42
  239. pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
  240. pipecat/utils/asyncio/watchdog_queue.py +0 -48
  241. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
  242. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
  243. {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
  244. /pipecat/{examples → extensions}/__init__.py +0 -0
@@ -0,0 +1,9 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ from .stt import HamsaSTTService
8
+
9
+ __all__ = ["HamsaSTTService"]
@@ -0,0 +1,241 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Hamsa Speech-to-Text service implementation.
8
+
9
+ This module implements speech-to-text transcription using the Hamsa API.
10
+ Hamsa supports Arabic and English languages via HTTP POST requests.
11
+ """
12
+
13
+ import asyncio
14
+ import base64
15
+ import json
16
+ from typing import AsyncGenerator, Optional
17
+
18
+ import aiohttp
19
+ from loguru import logger
20
+ from pydantic import BaseModel, Field
21
+ from typing_extensions import override
22
+
23
+ from pipecat.frames.frames import (
24
+ ErrorFrame,
25
+ Frame,
26
+ TranscriptionFrame,
27
+ )
28
+ from pipecat.services.stt_service import SegmentedSTTService
29
+ from pipecat.transcriptions.language import Language
30
+ from pipecat.utils.time import time_now_iso8601
31
+ from pipecat.utils.tracing.service_decorators import traced_stt
32
+
33
+
34
+ def language_to_hamsa_language(language: Language) -> Optional[str]:
35
+ """Convert a Language enum to Hamsa's language code format.
36
+
37
+ Args:
38
+ language: The Language enum value to convert
39
+
40
+ Returns:
41
+ The Hamsa language code string or None if not supported
42
+ """
43
+ # Hamsa supports Arabic and English
44
+ language_map = {
45
+ # Arabic
46
+ Language.AR: "ar",
47
+ Language.AR_AE: "ar",
48
+ Language.AR_BH: "ar",
49
+ Language.AR_DZ: "ar",
50
+ Language.AR_EG: "ar",
51
+ Language.AR_IQ: "ar",
52
+ Language.AR_JO: "ar",
53
+ Language.AR_KW: "ar",
54
+ Language.AR_LB: "ar",
55
+ Language.AR_LY: "ar",
56
+ Language.AR_MA: "ar",
57
+ Language.AR_OM: "ar",
58
+ Language.AR_QA: "ar",
59
+ Language.AR_SA: "ar",
60
+ Language.AR_SY: "ar",
61
+ Language.AR_TN: "ar",
62
+ Language.AR_YE: "ar",
63
+ # English
64
+ Language.EN: "en",
65
+ Language.EN_AU: "en",
66
+ Language.EN_CA: "en",
67
+ Language.EN_GB: "en",
68
+ Language.EN_HK: "en",
69
+ Language.EN_IE: "en",
70
+ Language.EN_IN: "en",
71
+ Language.EN_KE: "en",
72
+ Language.EN_NG: "en",
73
+ Language.EN_NZ: "en",
74
+ Language.EN_PH: "en",
75
+ Language.EN_SG: "en",
76
+ Language.EN_TZ: "en",
77
+ Language.EN_US: "en",
78
+ Language.EN_ZA: "en",
79
+ }
80
+ return language_map.get(language)
81
+
82
+
83
+ class HamsaSTTService(SegmentedSTTService):
84
+ """Hamsa Speech-to-Text service implementation.
85
+
86
+ This service uses the Hamsa API for speech-to-text transcription.
87
+ It inherits from SegmentedSTTService to handle audio buffering and
88
+ processes complete audio segments when the user stops speaking.
89
+
90
+ Features:
91
+ - Supports Arabic and English languages
92
+ - Uses HTTP POST requests (not streaming)
93
+ - Configurable End of Speech (EOS) detection
94
+ - Base64 audio encoding
95
+
96
+ Args:
97
+ api_key: Hamsa API key for authentication
98
+ language: Language for transcription (defaults to Arabic "ar")
99
+ eos_threshold: End of speech threshold (0.0-1.0, default 0.3)
100
+ base_url: Hamsa API base URL
101
+ aiohttp_session: Optional aiohttp session for connection pooling
102
+ **kwargs: Additional arguments passed to SegmentedSTTService
103
+ """
104
+
105
+ class InputParams(BaseModel):
106
+ language: str = Field(default="ar", description="Language code ('ar' or 'en')")
107
+ eos_threshold: float = Field(default=0.3, description="End of speech threshold")
108
+
109
+ def __init__(
110
+ self,
111
+ *,
112
+ api_key: str,
113
+ language: Language = Language.AR,
114
+ eos_threshold: float = 0.3,
115
+ base_url: str = "https://api.tryhamsa.com",
116
+ aiohttp_session: Optional[aiohttp.ClientSession] = None,
117
+ **kwargs,
118
+ ):
119
+ super().__init__(**kwargs)
120
+
121
+ self._api_key = api_key
122
+ self._base_url = base_url.rstrip("/")
123
+ self._language = language_to_hamsa_language(language) or "ar"
124
+ self._eos_threshold = eos_threshold
125
+ self._aiohttp_session = aiohttp_session
126
+ self._endpoint = f"{self._base_url}/v1/realtime/stt"
127
+
128
+ # Store current settings
129
+ self._settings = {
130
+ "language": self._language,
131
+ "eos_threshold": self._eos_threshold,
132
+ }
133
+
134
+ async def set_language(self, language: Language):
135
+ """Set the language for speech recognition.
136
+
137
+ Args:
138
+ language: The language to use for speech recognition
139
+ """
140
+ hamsa_language = language_to_hamsa_language(language)
141
+ if hamsa_language:
142
+ self._language = hamsa_language
143
+ self._settings["language"] = hamsa_language
144
+ logger.info(f"Updated Hamsa STT language to: {hamsa_language}")
145
+ else:
146
+ logger.warning(f"Language {language} not supported by Hamsa STT")
147
+
148
+ @traced_stt
149
+ @override
150
+ async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
151
+ """Run speech-to-text transcription on the provided audio.
152
+
153
+ Args:
154
+ audio: Raw audio bytes (WAV format) to transcribe
155
+
156
+ Yields:
157
+ Frame: TranscriptionFrame with transcription results or ErrorFrame on failure
158
+ """
159
+ try:
160
+ # Convert audio bytes to base64
161
+ audio_b64 = base64.b64encode(audio).decode("utf-8")
162
+
163
+ # Prepare request payload
164
+ payload = {
165
+ "audioBase64": audio_b64,
166
+ "language": self._language,
167
+ "eos_threshold": self._eos_threshold,
168
+ }
169
+
170
+ headers = {
171
+ "Authorization": f"Token {self._api_key}",
172
+ "Content-Type": "application/json",
173
+ }
174
+
175
+ # Use provided session or create a new one
176
+ session = self._aiohttp_session
177
+ should_close_session = False
178
+
179
+ if not session:
180
+ session = aiohttp.ClientSession()
181
+ should_close_session = True
182
+
183
+ try:
184
+ # Make the HTTP POST request
185
+ async with session.post(
186
+ self._endpoint,
187
+ json=payload,
188
+ headers=headers,
189
+ timeout=aiohttp.ClientTimeout(total=30),
190
+ ) as response:
191
+ if response.status == 200:
192
+ result = await response.json()
193
+
194
+ # Extract transcribed text from response
195
+ transcription = result.get("text", "").strip()
196
+
197
+ if transcription:
198
+ logger.debug(f"Hamsa STT transcription: {transcription}")
199
+ yield TranscriptionFrame(
200
+ text=transcription,
201
+ user_id="user",
202
+ timestamp=time_now_iso8601(),
203
+ )
204
+ else:
205
+ logger.debug("Hamsa STT returned empty transcription")
206
+
207
+ elif response.status == 401:
208
+ error_msg = "Hamsa STT authentication failed - check API key"
209
+ logger.error(error_msg)
210
+ yield ErrorFrame(error=error_msg)
211
+
212
+ elif response.status == 400:
213
+ error_text = await response.text()
214
+ error_msg = f"Hamsa STT bad request: {error_text}"
215
+ logger.error(error_msg)
216
+ yield ErrorFrame(error=error_msg)
217
+
218
+ else:
219
+ error_text = await response.text()
220
+ error_msg = f"Hamsa STT request failed: {response.status} - {error_text}"
221
+ logger.error(error_msg)
222
+ yield ErrorFrame(error=error_msg)
223
+
224
+ finally:
225
+ if should_close_session and session:
226
+ await session.close()
227
+
228
+ except asyncio.TimeoutError:
229
+ error_msg = "Hamsa STT request timed out"
230
+ logger.error(error_msg)
231
+ yield ErrorFrame(error=error_msg)
232
+
233
+ except aiohttp.ClientError as e:
234
+ error_msg = f"Hamsa STT client error: {str(e)}"
235
+ logger.error(error_msg)
236
+ yield ErrorFrame(error=error_msg)
237
+
238
+ except Exception as e:
239
+ error_msg = f"Hamsa STT unexpected error: {str(e)}"
240
+ logger.error(error_msg)
241
+ yield ErrorFrame(error=error_msg)
@@ -0,0 +1,5 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
@@ -0,0 +1,281 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """HeyGen API.
8
+
9
+ API to communicate with HeyGen Streaming API.
10
+ """
11
+
12
+ from enum import Enum
13
+ from typing import Any, Dict, Literal, Optional
14
+
15
+ import aiohttp
16
+ from loguru import logger
17
+ from pydantic import BaseModel, Field
18
+
19
+
20
+ class AvatarQuality(str, Enum):
21
+ """Enum representing different avatar quality levels."""
22
+
23
+ low = "low"
24
+ medium = "medium"
25
+ high = "high"
26
+
27
+
28
+ class VideoEncoding(str, Enum):
29
+ """Enum representing the video encoding."""
30
+
31
+ H264 = "H264"
32
+ VP8 = "VP8"
33
+
34
+
35
+ class VoiceEmotion(str, Enum):
36
+ """Enum representing different voice emotion types."""
37
+
38
+ EXCITED = "excited"
39
+ SERIOUS = "serious"
40
+ FRIENDLY = "friendly"
41
+ SOOTHING = "soothing"
42
+ BROADCASTER = "broadcaster"
43
+
44
+
45
+ class ElevenLabsSettings(BaseModel):
46
+ """Settings for ElevenLabs voice configuration.
47
+
48
+ Parameters:
49
+ stability (Optional[float]): Stability of the voice synthesis.
50
+ similarity_boost (Optional[float]): Adjustment for similarity in voice performance.
51
+ model_id (Optional[str]): Identifier for the ElevenLabs model to use.
52
+ style (Optional[int]): Style metric to apply for the voice.
53
+ use_speaker_boost (Optional[bool]): Flag to enable speaker boost.
54
+ """
55
+
56
+ stability: Optional[float] = None
57
+ similarity_boost: Optional[float] = None
58
+ model_id: Optional[str] = None
59
+ style: Optional[int] = None
60
+ use_speaker_boost: Optional[bool] = None
61
+
62
+
63
+ class VoiceSettings(BaseModel):
64
+ """Voice configuration settings.
65
+
66
+ Parameters:
67
+ voice_id (Optional[str]): ID of the voice to be used.
68
+ rate (Optional[float]): Speaking rate for the voice.
69
+ emotion (Optional[VoiceEmotion]): Emotion tone for the voice.
70
+ elevenlabs_settings (Optional[ElevenLabsSettings]): Details for ElevenLabs configuration.
71
+ """
72
+
73
+ voice_id: Optional[str] = Field(None, alias="voiceId")
74
+ rate: Optional[float] = None
75
+ emotion: Optional[VoiceEmotion] = None
76
+ elevenlabs_settings: Optional[ElevenLabsSettings] = Field(None, alias="elevenlabsSettings")
77
+
78
+
79
+ class NewSessionRequest(BaseModel):
80
+ """Requesting model for creating a new HeyGen session.
81
+
82
+ Parameters:
83
+ quality (Optional[AvatarQuality]): Desired quality of the avatar.
84
+ avatar_id (Optional[str]): Unique identifier for the avatar.
85
+ voice (Optional[VoiceSettings]): Voice configurations for the session.
86
+ video_encoding (Optional[VideoEncoding]): Desired encoding for the video stream.
87
+ knowledge_id (Optional[str]): Identifier for the knowledge base (if applicable).
88
+ knowledge_base (Optional[str]): Details of any external knowledge base.
89
+ version (Literal["v2"]): API version to use.
90
+ disable_idle_timeout (Optional[bool]): Flag to disable automatic idle timeout.
91
+ activity_idle_timeout (Optional[int]): Timeout in seconds for activity-based idle detection.
92
+ """
93
+
94
+ quality: Optional[AvatarQuality] = None
95
+ avatar_id: Optional[str] = None
96
+ voice: Optional[VoiceSettings] = None
97
+ video_encoding: Optional[VideoEncoding] = None
98
+ knowledge_id: Optional[str] = None
99
+ knowledge_base: Optional[str] = None
100
+ version: Literal["v2"] = "v2"
101
+ disable_idle_timeout: Optional[bool] = None
102
+ activity_idle_timeout: Optional[int] = None
103
+
104
+
105
+ class HeyGenSession(BaseModel):
106
+ """Response model for a HeyGen session.
107
+
108
+ Parameters:
109
+ session_id (str): Unique identifier for the streaming session.
110
+ access_token (str): Token for accessing the session securely.
111
+ realtime_endpoint (str): Real-time communication endpoint URL.
112
+ url (str): Direct URL for the session.
113
+ """
114
+
115
+ session_id: str
116
+ access_token: str
117
+ realtime_endpoint: str
118
+ url: str
119
+
120
+
121
+ class HeygenApiError(Exception):
122
+ """Custom exception for HeyGen API errors."""
123
+
124
+ def __init__(self, message: str, status: int, response_text: str) -> None:
125
+ """Initialize the HeyGen API error.
126
+
127
+ Args:
128
+ message: Error message
129
+ status: HTTP status code
130
+ response_text: Raw response text from the API
131
+ """
132
+ super().__init__(message)
133
+ self.status = status
134
+ self.response_text = response_text
135
+
136
+
137
+ class HeyGenApi:
138
+ """HeyGen Streaming API client."""
139
+
140
+ BASE_URL = "https://api.heygen.com/v1"
141
+
142
+ def __init__(self, api_key: str, session: aiohttp.ClientSession) -> None:
143
+ """Initialize the HeyGen API.
144
+
145
+ Args:
146
+ api_key: HeyGen API key
147
+ session: Optional aiohttp client session
148
+ """
149
+ self.api_key = api_key
150
+ self.session = session
151
+
152
+ async def _request(self, path: str, params: Dict[str, Any], expect_data: bool = True) -> Any:
153
+ """Make a POST request to the HeyGen API.
154
+
155
+ Args:
156
+ path: API endpoint path.
157
+ params: JSON-serializable parameters.
158
+ expect_data: Whether to expect and extract 'data' field from response (default: True).
159
+
160
+ Returns:
161
+ Parsed JSON response data.
162
+
163
+ Raises:
164
+ HeygenApiError: If the API response is not successful or data is missing when expected.
165
+ aiohttp.ClientError: For network-related errors.
166
+ """
167
+ url = f"{self.BASE_URL}{path}"
168
+ headers = {
169
+ "x-api-key": self.api_key,
170
+ "Content-Type": "application/json",
171
+ }
172
+
173
+ logger.debug(f"HeyGen API request: {url}")
174
+
175
+ try:
176
+ async with self.session.post(url, json=params, headers=headers) as response:
177
+ if not response.ok:
178
+ response_text = await response.text()
179
+ logger.error(f"HeyGen API error: {response_text}")
180
+ raise HeygenApiError(
181
+ f"API request failed with status {response.status}",
182
+ response.status,
183
+ response_text,
184
+ )
185
+ if expect_data:
186
+ json_data = await response.json()
187
+ data = json_data.get("data")
188
+ return data
189
+ return await response.text()
190
+ except aiohttp.ClientError as e:
191
+ logger.error(f"Network error while calling HeyGen API: {str(e)}")
192
+ raise
193
+
194
+ async def new_session(self, request_data: NewSessionRequest) -> HeyGenSession:
195
+ """Create a new streaming session.
196
+
197
+ https://docs.heygen.com/reference/new-session
198
+
199
+ Args:
200
+ request_data: Session configuration parameters.
201
+
202
+ Returns:
203
+ Session information, including ID and access token.
204
+ """
205
+ params = {
206
+ "quality": request_data.quality,
207
+ "avatar_id": request_data.avatar_id,
208
+ "voice": {
209
+ "voice_id": request_data.voice.voiceId if request_data.voice else None,
210
+ "rate": request_data.voice.rate if request_data.voice else None,
211
+ "emotion": request_data.voice.emotion if request_data.voice else None,
212
+ "elevenlabs_settings": (
213
+ request_data.voice.elevenlabsSettings if request_data.voice else None
214
+ ),
215
+ },
216
+ "knowledge_id": request_data.knowledge_id,
217
+ "knowledge_base": request_data.knowledge_base,
218
+ "version": request_data.version,
219
+ "video_encoding": request_data.video_encoding,
220
+ "disable_idle_timeout": request_data.disable_idle_timeout,
221
+ "activity_idle_timeout": request_data.activity_idle_timeout,
222
+ }
223
+ session_info = await self._request("/streaming.new", params)
224
+ print("heygen session info", session_info)
225
+
226
+ return HeyGenSession.model_validate(session_info)
227
+
228
+ async def start_session(self, session_id: str) -> Any:
229
+ """Start the streaming session.
230
+
231
+ https://docs.heygen.com/reference/start-session
232
+
233
+ Args:
234
+ session_id: ID of the session to start.
235
+
236
+ Returns:
237
+ Response data from the start session API call.
238
+
239
+ Raises:
240
+ ValueError: If session ID is not set.
241
+ """
242
+ if not session_id:
243
+ raise ValueError("Session ID is not set. Call new_session first.")
244
+
245
+ params = {
246
+ "session_id": session_id,
247
+ }
248
+ return await self._request("/streaming.start", params)
249
+
250
+ async def close_session(self, session_id: str) -> Any:
251
+ """Terminate an active the streaming session.
252
+
253
+ https://docs.heygen.com/reference/close-session
254
+
255
+ Args:
256
+ session_id: ID of the session to stop.
257
+
258
+ Returns:
259
+ Response data from the stop session API call.
260
+
261
+ Raises:
262
+ ValueError: If session ID is not set.
263
+ """
264
+ if not session_id:
265
+ raise ValueError("Session ID is not set. Call new_session first.")
266
+
267
+ params = {
268
+ "session_id": session_id,
269
+ }
270
+ return await self._request("/streaming.stop", params, expect_data=False)
271
+
272
+ async def create_token(self) -> str:
273
+ """Create a streaming token.
274
+
275
+ https://docs.heygen.com/reference/streaming-token
276
+
277
+ Returns:
278
+ str: The generated access token for the streaming session
279
+ """
280
+ token_info = await self._request("/streaming.create_token", {})
281
+ return token_info["token"]