dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -14,6 +14,7 @@ from typing import AsyncGenerator, Dict, Literal, Optional
14
14
 
15
15
  from loguru import logger
16
16
  from openai import AsyncOpenAI, BadRequestError
17
+ from pydantic import BaseModel
17
18
 
18
19
  from pipecat.frames.frames import (
19
20
  ErrorFrame,
@@ -55,6 +56,17 @@ class OpenAITTSService(TTSService):
55
56
 
56
57
  OPENAI_SAMPLE_RATE = 24000 # OpenAI TTS always outputs at 24kHz
57
58
 
59
+ class InputParams(BaseModel):
60
+ """Input parameters for OpenAI TTS configuration.
61
+
62
+ Parameters:
63
+ instructions: Instructions to guide voice synthesis behavior.
64
+ speed: Voice speed control (0.25 to 4.0, default 1.0).
65
+ """
66
+
67
+ instructions: Optional[str] = None
68
+ speed: Optional[float] = None
69
+
58
70
  def __init__(
59
71
  self,
60
72
  *,
@@ -64,6 +76,8 @@ class OpenAITTSService(TTSService):
64
76
  model: str = "gpt-4o-mini-tts",
65
77
  sample_rate: Optional[int] = None,
66
78
  instructions: Optional[str] = None,
79
+ speed: Optional[float] = None,
80
+ params: Optional[InputParams] = None,
67
81
  **kwargs,
68
82
  ):
69
83
  """Initialize OpenAI TTS service.
@@ -75,7 +89,12 @@ class OpenAITTSService(TTSService):
75
89
  model: TTS model to use. Defaults to "gpt-4o-mini-tts".
76
90
  sample_rate: Output audio sample rate in Hz. If None, uses OpenAI's default 24kHz.
77
91
  instructions: Optional instructions to guide voice synthesis behavior.
92
+ speed: Voice speed control (0.25 to 4.0, default 1.0).
93
+ params: Optional synthesis controls (acting instructions, speed, ...).
78
94
  **kwargs: Additional keyword arguments passed to TTSService.
95
+
96
+ .. deprecated:: 0.0.91
97
+ The `instructions` and `speed` parameters are deprecated, use `InputParams` instead.
79
98
  """
80
99
  if sample_rate and sample_rate != self.OPENAI_SAMPLE_RATE:
81
100
  logger.warning(
@@ -86,9 +105,24 @@ class OpenAITTSService(TTSService):
86
105
 
87
106
  self.set_model_name(model)
88
107
  self.set_voice(voice)
89
- self._instructions = instructions
90
108
  self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
91
109
 
110
+ if instructions or speed:
111
+ import warnings
112
+
113
+ with warnings.catch_warnings():
114
+ warnings.simplefilter("always")
115
+ warnings.warn(
116
+ "The `instructions` and `speed` parameters are deprecated, use `InputParams` instead.",
117
+ DeprecationWarning,
118
+ stacklevel=2,
119
+ )
120
+
121
+ self._settings = {
122
+ "instructions": params.instructions if params else instructions,
123
+ "speed": params.speed if params else speed,
124
+ }
125
+
92
126
  def can_generate_metrics(self) -> bool:
93
127
  """Check if this service can generate processing metrics.
94
128
 
@@ -133,17 +167,22 @@ class OpenAITTSService(TTSService):
133
167
  try:
134
168
  await self.start_ttfb_metrics()
135
169
 
136
- # Setup extra body parameters
137
- extra_body = {}
138
- if self._instructions:
139
- extra_body["instructions"] = self._instructions
170
+ # Setup API parameters
171
+ create_params = {
172
+ "input": text,
173
+ "model": self.model_name,
174
+ "voice": VALID_VOICES[self._voice_id],
175
+ "response_format": "pcm",
176
+ }
177
+
178
+ if self._settings["instructions"]:
179
+ create_params["instructions"] = self._settings["instructions"]
180
+
181
+ if self._settings["speed"]:
182
+ create_params["speed"] = self._settings["speed"]
140
183
 
141
184
  async with self._client.audio.speech.with_streaming_response.create(
142
- input=text,
143
- model=self.model_name,
144
- voice=VALID_VOICES[self._voice_id],
145
- response_format="pcm",
146
- extra_body=extra_body,
185
+ **create_params
147
186
  ) as r:
148
187
  if r.status_code != 200:
149
188
  error = await r.text()
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ import warnings
8
+
9
+ from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
10
+ from pipecat.services.openai.realtime.events import (
11
+ InputAudioNoiseReduction,
12
+ InputAudioTranscription,
13
+ SemanticTurnDetection,
14
+ SessionProperties,
15
+ TurnDetection,
16
+ )
17
+ from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
18
+
19
+ with warnings.catch_warnings():
20
+ warnings.simplefilter("always")
21
+ warnings.warn(
22
+ "Types in pipecat.services.openai_realtime are deprecated. "
23
+ "Please use the equivalent types from "
24
+ "pipecat.services.openai.realtime instead.",
25
+ DeprecationWarning,
26
+ stacklevel=2,
27
+ )
@@ -0,0 +1,21 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Azure OpenAI Realtime LLM service implementation."""
8
+
9
+ import warnings
10
+
11
+ from pipecat.services.azure.realtime.llm import *
12
+
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("always")
15
+ warnings.warn(
16
+ "Types in pipecat.services.openai_realtime.azure are deprecated. "
17
+ "Please use the equivalent types from "
18
+ "pipecat.services.azure.realtime.llm instead.",
19
+ DeprecationWarning,
20
+ stacklevel=2,
21
+ )
@@ -0,0 +1,21 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """OpenAI Realtime LLM context and aggregator implementations."""
8
+
9
+ import warnings
10
+
11
+ from pipecat.services.openai.realtime.context import *
12
+
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("always")
15
+ warnings.warn(
16
+ "Types in pipecat.services.openai_realtime.context are deprecated. "
17
+ "Please use the equivalent types from "
18
+ "pipecat.services.openai.realtime.context instead.",
19
+ DeprecationWarning,
20
+ stacklevel=2,
21
+ )
@@ -0,0 +1,21 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Event models and data structures for OpenAI Realtime API communication."""
8
+
9
+ import warnings
10
+
11
+ from pipecat.services.openai.realtime.events import *
12
+
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("always")
15
+ warnings.warn(
16
+ "Types in pipecat.services.openai_realtime.events are deprecated. "
17
+ "Please use the equivalent types from "
18
+ "pipecat.services.openai.realtime.events instead.",
19
+ DeprecationWarning,
20
+ stacklevel=2,
21
+ )
@@ -0,0 +1,21 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Custom frame types for OpenAI Realtime API integration."""
8
+
9
+ import warnings
10
+
11
+ from pipecat.services.openai.realtime.frames import *
12
+
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("always")
15
+ warnings.warn(
16
+ "Types in pipecat.services.openai_realtime.frames are deprecated. "
17
+ "Please use the equivalent types from "
18
+ "pipecat.services.openai.realtime.frames instead.",
19
+ DeprecationWarning,
20
+ stacklevel=2,
21
+ )
@@ -6,6 +6,8 @@
6
6
 
7
7
  """Azure OpenAI Realtime Beta LLM service implementation."""
8
8
 
9
+ import warnings
10
+
9
11
  from loguru import logger
10
12
 
11
13
  from .openai import OpenAIRealtimeBetaLLMService
@@ -23,6 +25,10 @@ except ModuleNotFoundError as e:
23
25
  class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
24
26
  """Azure OpenAI Realtime Beta LLM service with Azure-specific authentication.
25
27
 
28
+ .. deprecated:: 0.0.84
29
+ `AzureRealtimeBetaLLMService` is deprecated, use `AzureRealtimeLLMService` instead.
30
+ This class will be removed in version 1.0.0.
31
+
26
32
  Extends the OpenAI Realtime service to work with Azure OpenAI endpoints,
27
33
  using Azure's authentication headers and endpoint format. Provides the same
28
34
  real-time audio and text communication capabilities as the base OpenAI service.
@@ -44,6 +50,16 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
44
50
  **kwargs: Additional arguments passed to parent OpenAIRealtimeBetaLLMService.
45
51
  """
46
52
  super().__init__(base_url=base_url, api_key=api_key, **kwargs)
53
+
54
+ with warnings.catch_warnings():
55
+ warnings.simplefilter("always")
56
+ warnings.warn(
57
+ "AzureRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. "
58
+ "Use AzureRealtimeLLMService instead.",
59
+ DeprecationWarning,
60
+ stacklevel=2,
61
+ )
62
+
47
63
  self.api_key = api_key
48
64
  self.base_url = base_url
49
65
 
@@ -9,6 +9,7 @@
9
9
  import base64
10
10
  import json
11
11
  import time
12
+ import warnings
12
13
  from dataclasses import dataclass
13
14
  from typing import Optional
14
15
 
@@ -23,6 +24,7 @@ from pipecat.frames.frames import (
23
24
  Frame,
24
25
  InputAudioRawFrame,
25
26
  InterimTranscriptionFrame,
27
+ InterruptionFrame,
26
28
  LLMContextFrame,
27
29
  LLMFullResponseEndFrame,
28
30
  LLMFullResponseStartFrame,
@@ -31,7 +33,6 @@ from pipecat.frames.frames import (
31
33
  LLMTextFrame,
32
34
  LLMUpdateSettingsFrame,
33
35
  StartFrame,
34
- StartInterruptionFrame,
35
36
  TranscriptionFrame,
36
37
  TTSAudioRawFrame,
37
38
  TTSStartedFrame,
@@ -92,6 +93,10 @@ class CurrentAudioResponse:
92
93
  class OpenAIRealtimeBetaLLMService(LLMService):
93
94
  """OpenAI Realtime Beta LLM service providing real-time audio and text communication.
94
95
 
96
+ .. deprecated:: 0.0.84
97
+ `OpenAIRealtimeBetaLLMService` is deprecated, use `OpenAIRealtimeLLMService` instead.
98
+ This class will be removed in version 1.0.0.
99
+
95
100
  Implements the OpenAI Realtime API Beta with WebSocket communication for low-latency
96
101
  bidirectional audio and text interactions. Supports function calling, conversation
97
102
  management, and real-time transcription.
@@ -124,6 +129,15 @@ class OpenAIRealtimeBetaLLMService(LLMService):
124
129
  send_transcription_frames: Whether to emit transcription frames. Defaults to True.
125
130
  **kwargs: Additional arguments passed to parent LLMService.
126
131
  """
132
+ with warnings.catch_warnings():
133
+ warnings.simplefilter("always")
134
+ warnings.warn(
135
+ "OpenAIRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. "
136
+ "Use OpenAIRealtimeLLMService instead.",
137
+ DeprecationWarning,
138
+ stacklevel=2,
139
+ )
140
+
127
141
  full_url = f"{base_url}?model={model}"
128
142
  super().__init__(base_url=full_url, **kwargs)
129
143
 
@@ -350,7 +364,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
350
364
  elif isinstance(frame, InputAudioRawFrame):
351
365
  if not self._audio_input_paused:
352
366
  await self._send_user_audio(frame)
353
- elif isinstance(frame, StartInterruptionFrame):
367
+ elif isinstance(frame, InterruptionFrame):
354
368
  await self._handle_interruption()
355
369
  elif isinstance(frame, UserStartedSpeakingFrame):
356
370
  await self._handle_user_started_speaking(frame)
@@ -644,14 +658,12 @@ class OpenAIRealtimeBetaLLMService(LLMService):
644
658
 
645
659
  async def _handle_evt_speech_started(self, evt):
646
660
  await self._truncate_current_audio_response()
647
- await self._start_interruption() # cancels this processor task
648
- await self.push_frame(StartInterruptionFrame()) # cancels downstream tasks
661
+ await self.push_interruption_task_frame_and_wait()
649
662
  await self.push_frame(UserStartedSpeakingFrame())
650
663
 
651
664
  async def _handle_evt_speech_stopped(self, evt):
652
665
  await self.start_ttfb_metrics()
653
666
  await self.start_processing_metrics()
654
- await self._stop_interruption()
655
667
  await self.push_frame(UserStoppedSpeakingFrame())
656
668
 
657
669
  async def _maybe_handle_evt_retrieve_conversation_item_error(self, evt: events.ErrorEvent):
@@ -14,7 +14,6 @@ from loguru import logger
14
14
  from pipecat.frames.frames import (
15
15
  ErrorFrame,
16
16
  Frame,
17
- TTSAudioRawFrame,
18
17
  TTSStartedFrame,
19
18
  TTSStoppedFrame,
20
19
  )
@@ -99,16 +98,15 @@ class PiperTTSService(TTSService):
99
98
 
100
99
  await self.start_tts_usage_metrics(text)
101
100
 
101
+ yield TTSStartedFrame()
102
+
102
103
  CHUNK_SIZE = self.chunk_size
103
104
 
104
- yield TTSStartedFrame()
105
- async for chunk in response.content.iter_chunked(CHUNK_SIZE):
106
- # remove wav header if present
107
- if chunk.startswith(b"RIFF"):
108
- chunk = chunk[44:]
109
- if len(chunk) > 0:
110
- await self.stop_ttfb_metrics()
111
- yield TTSAudioRawFrame(chunk, self.sample_rate, 1)
105
+ async for frame in self._stream_audio_frames_from_iterator(
106
+ response.content.iter_chunked(CHUNK_SIZE), strip_wav_header=True
107
+ ):
108
+ await self.stop_ttfb_metrics()
109
+ yield frame
112
110
  except Exception as e:
113
111
  logger.error(f"Error in run_tts: {e}")
114
112
  yield ErrorFrame(error=str(e))
@@ -14,6 +14,7 @@ import io
14
14
  import json
15
15
  import struct
16
16
  import uuid
17
+ import warnings
17
18
  from typing import AsyncGenerator, Optional
18
19
 
19
20
  import aiohttp
@@ -25,8 +26,8 @@ from pipecat.frames.frames import (
25
26
  EndFrame,
26
27
  ErrorFrame,
27
28
  Frame,
29
+ InterruptionFrame,
28
30
  StartFrame,
29
- StartInterruptionFrame,
30
31
  TTSAudioRawFrame,
31
32
  TTSStartedFrame,
32
33
  TTSStoppedFrame,
@@ -110,6 +111,11 @@ def language_to_playht_language(language: Language) -> Optional[str]:
110
111
  class PlayHTTTSService(InterruptibleTTSService):
111
112
  """PlayHT WebSocket-based text-to-speech service.
112
113
 
114
+ .. deprecated:: 0.0.88
115
+
116
+ This class is deprecated and will be removed in a future version.
117
+ PlayHT is shutting down their API on December 31st, 2025.
118
+
113
119
  Provides real-time text-to-speech synthesis using PlayHT's WebSocket API.
114
120
  Supports streaming audio generation with configurable voice engines and
115
121
  language settings.
@@ -158,6 +164,15 @@ class PlayHTTTSService(InterruptibleTTSService):
158
164
  **kwargs,
159
165
  )
160
166
 
167
+ with warnings.catch_warnings():
168
+ warnings.simplefilter("always")
169
+ warnings.warn(
170
+ "PlayHT is shutting down their API on December 31st, 2025. "
171
+ "'PlayHTTTSService' is deprecated and will be removed in a future version.",
172
+ DeprecationWarning,
173
+ stacklevel=2,
174
+ )
175
+
161
176
  params = params or PlayHTTTSService.InputParams()
162
177
 
163
178
  self._api_key = api_key
@@ -254,6 +269,8 @@ class PlayHTTTSService(InterruptibleTTSService):
254
269
  raise ValueError("WebSocket URL is not a string")
255
270
 
256
271
  self._websocket = await websocket_connect(self._websocket_url)
272
+
273
+ await self._call_event_handler("on_connected")
257
274
  except ValueError as e:
258
275
  logger.error(f"{self} initialization error: {e}")
259
276
  self._websocket = None
@@ -276,6 +293,7 @@ class PlayHTTTSService(InterruptibleTTSService):
276
293
  finally:
277
294
  self._request_id = None
278
295
  self._websocket = None
296
+ await self._call_event_handler("on_disconnected")
279
297
 
280
298
  async def _get_websocket_url(self):
281
299
  """Retrieve WebSocket URL from PlayHT API."""
@@ -312,7 +330,7 @@ class PlayHTTTSService(InterruptibleTTSService):
312
330
  return self._websocket
313
331
  raise Exception("Websocket not connected")
314
332
 
315
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
333
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
316
334
  """Handle interruption by stopping metrics and clearing request ID."""
317
335
  await super()._handle_interruption(frame, direction)
318
336
  await self.stop_all_metrics()
@@ -401,6 +419,11 @@ class PlayHTTTSService(InterruptibleTTSService):
401
419
  class PlayHTHttpTTSService(TTSService):
402
420
  """PlayHT HTTP-based text-to-speech service.
403
421
 
422
+ .. deprecated:: 0.0.88
423
+
424
+ This class is deprecated and will be removed in a future version.
425
+ PlayHT is shutting down their API on December 31st, 2025.
426
+
404
427
  Provides text-to-speech synthesis using PlayHT's HTTP API for simpler,
405
428
  non-streaming synthesis. Suitable for use cases where streaming is not
406
429
  required and simpler integration is preferred.
@@ -454,8 +477,6 @@ class PlayHTHttpTTSService(TTSService):
454
477
 
455
478
  # Warn about deprecated protocol parameter if explicitly provided
456
479
  if protocol:
457
- import warnings
458
-
459
480
  with warnings.catch_warnings():
460
481
  warnings.simplefilter("always")
461
482
  warnings.warn(
@@ -464,6 +485,15 @@ class PlayHTHttpTTSService(TTSService):
464
485
  stacklevel=2,
465
486
  )
466
487
 
488
+ with warnings.catch_warnings():
489
+ warnings.simplefilter("always")
490
+ warnings.warn(
491
+ "PlayHT is shutting down their API on December 31st, 2025. "
492
+ "'PlayHTHttpTTSService' is deprecated and will be removed in a future version.",
493
+ DeprecationWarning,
494
+ stacklevel=2,
495
+ )
496
+
467
497
  params = params or PlayHTHttpTTSService.InputParams()
468
498
 
469
499
  self._user_id = user_id
@@ -24,15 +24,14 @@ from pipecat.frames.frames import (
24
24
  EndFrame,
25
25
  ErrorFrame,
26
26
  Frame,
27
+ InterruptionFrame,
27
28
  StartFrame,
28
- StartInterruptionFrame,
29
29
  TTSAudioRawFrame,
30
30
  TTSStartedFrame,
31
31
  TTSStoppedFrame,
32
32
  )
33
33
  from pipecat.processors.frame_processor import FrameDirection
34
34
  from pipecat.services.tts_service import AudioContextWordTTSService, TTSService
35
- from pipecat.transcriptions import language
36
35
  from pipecat.transcriptions.language import Language
37
36
  from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
38
37
  from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator
@@ -256,6 +255,8 @@ class RimeTTSService(AudioContextWordTTSService):
256
255
  url = f"{self._url}?{params}"
257
256
  headers = {"Authorization": f"Bearer {self._api_key}"}
258
257
  self._websocket = await websocket_connect(url, additional_headers=headers)
258
+
259
+ await self._call_event_handler("on_connected")
259
260
  except Exception as e:
260
261
  logger.error(f"{self} initialization error: {e}")
261
262
  self._websocket = None
@@ -273,6 +274,7 @@ class RimeTTSService(AudioContextWordTTSService):
273
274
  finally:
274
275
  self._context_id = None
275
276
  self._websocket = None
277
+ await self._call_event_handler("on_disconnected")
276
278
 
277
279
  def _get_websocket(self):
278
280
  """Get active websocket connection or raise exception."""
@@ -280,7 +282,7 @@ class RimeTTSService(AudioContextWordTTSService):
280
282
  return self._websocket
281
283
  raise Exception("Websocket not connected")
282
284
 
283
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
285
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
284
286
  """Handle interruption by clearing current context."""
285
287
  await super()._handle_interruption(frame, direction)
286
288
  await self.stop_all_metrics()
@@ -375,7 +377,7 @@ class RimeTTSService(AudioContextWordTTSService):
375
377
  direction: The direction to push the frame.
376
378
  """
377
379
  await super().push_frame(frame, direction)
378
- if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
380
+ if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
379
381
  if isinstance(frame, TTSStoppedFrame):
380
382
  await self.add_word_timestamps([("Reset", 0)])
381
383
 
@@ -554,15 +556,13 @@ class RimeHttpTTSService(TTSService):
554
556
 
555
557
  CHUNK_SIZE = self.chunk_size
556
558
 
557
- async for chunk in response.content.iter_chunked(CHUNK_SIZE):
558
- if need_to_strip_wav_header and chunk.startswith(b"RIFF"):
559
- chunk = chunk[44:]
560
- need_to_strip_wav_header = False
559
+ async for frame in self._stream_audio_frames_from_iterator(
560
+ response.content.iter_chunked(CHUNK_SIZE),
561
+ strip_wav_header=need_to_strip_wav_header,
562
+ ):
563
+ await self.stop_ttfb_metrics()
564
+ yield frame
561
565
 
562
- if len(chunk) > 0:
563
- await self.stop_ttfb_metrics()
564
- frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
565
- yield frame
566
566
  except Exception as e:
567
567
  logger.exception(f"Error generating TTS: {e}")
568
568
  yield ErrorFrame(error=f"Rime TTS error: {str(e)}")
@@ -583,7 +583,9 @@ class RivaSegmentedSTTService(SegmentedSTTService):
583
583
  self._config.language_code = self._language
584
584
 
585
585
  @traced_stt
586
- async def _handle_transcription(self, transcript: str, language: Optional[Language] = None):
586
+ async def _handle_transcription(
587
+ self, transcript: str, is_final: bool, language: Optional[Language] = None
588
+ ):
587
589
  """Handle a transcription result with tracing."""
588
590
  pass
589
591
 
@@ -0,0 +1,9 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ from .llm import SalesforceAgentLLMService
8
+
9
+ __all__ = ["SalesforceAgentLLMService"]