dv-pipecat-ai 0.0.85.dev5__py3-none-any.whl → 0.0.85.dev698__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (157) hide show
  1. {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/METADATA +78 -117
  2. {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/RECORD +157 -123
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  11. pipecat/audio/filters/noisereduce_filter.py +15 -0
  12. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  13. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  14. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  15. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  16. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  17. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  18. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  19. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  20. pipecat/audio/vad/data/README.md +10 -0
  21. pipecat/audio/vad/vad_analyzer.py +13 -1
  22. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  23. pipecat/frames/frames.py +120 -87
  24. pipecat/observers/loggers/debug_log_observer.py +3 -3
  25. pipecat/observers/loggers/llm_log_observer.py +7 -3
  26. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  27. pipecat/pipeline/runner.py +12 -4
  28. pipecat/pipeline/service_switcher.py +64 -36
  29. pipecat/pipeline/task.py +85 -24
  30. pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
  31. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  32. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  33. pipecat/processors/aggregators/llm_response.py +6 -7
  34. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  35. pipecat/processors/aggregators/user_response.py +6 -6
  36. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  37. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  38. pipecat/processors/filters/stt_mute_filter.py +2 -0
  39. pipecat/processors/frame_processor.py +103 -17
  40. pipecat/processors/frameworks/langchain.py +8 -2
  41. pipecat/processors/frameworks/rtvi.py +209 -68
  42. pipecat/processors/frameworks/strands_agents.py +170 -0
  43. pipecat/processors/logger.py +2 -2
  44. pipecat/processors/transcript_processor.py +4 -4
  45. pipecat/processors/user_idle_processor.py +3 -6
  46. pipecat/runner/run.py +270 -50
  47. pipecat/runner/types.py +2 -0
  48. pipecat/runner/utils.py +51 -10
  49. pipecat/serializers/exotel.py +5 -5
  50. pipecat/serializers/livekit.py +20 -0
  51. pipecat/serializers/plivo.py +6 -9
  52. pipecat/serializers/protobuf.py +6 -5
  53. pipecat/serializers/telnyx.py +2 -2
  54. pipecat/serializers/twilio.py +43 -23
  55. pipecat/services/ai_service.py +2 -6
  56. pipecat/services/anthropic/llm.py +2 -25
  57. pipecat/services/asyncai/tts.py +2 -3
  58. pipecat/services/aws/__init__.py +1 -0
  59. pipecat/services/aws/llm.py +122 -97
  60. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  61. pipecat/services/aws/nova_sonic/context.py +367 -0
  62. pipecat/services/aws/nova_sonic/frames.py +25 -0
  63. pipecat/services/aws/nova_sonic/llm.py +1155 -0
  64. pipecat/services/aws/stt.py +1 -3
  65. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  66. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  67. pipecat/services/aws_nova_sonic/context.py +13 -355
  68. pipecat/services/aws_nova_sonic/frames.py +13 -17
  69. pipecat/services/azure/realtime/__init__.py +0 -0
  70. pipecat/services/azure/realtime/llm.py +65 -0
  71. pipecat/services/azure/stt.py +15 -0
  72. pipecat/services/cartesia/tts.py +2 -2
  73. pipecat/services/deepgram/__init__.py +1 -0
  74. pipecat/services/deepgram/flux/__init__.py +0 -0
  75. pipecat/services/deepgram/flux/stt.py +636 -0
  76. pipecat/services/elevenlabs/__init__.py +2 -1
  77. pipecat/services/elevenlabs/stt.py +254 -276
  78. pipecat/services/elevenlabs/tts.py +5 -5
  79. pipecat/services/fish/tts.py +2 -2
  80. pipecat/services/gemini_multimodal_live/events.py +38 -524
  81. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  82. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  83. pipecat/services/gladia/stt.py +56 -72
  84. pipecat/services/google/__init__.py +1 -0
  85. pipecat/services/google/gemini_live/__init__.py +3 -0
  86. pipecat/services/google/gemini_live/file_api.py +189 -0
  87. pipecat/services/google/gemini_live/llm.py +1582 -0
  88. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  89. pipecat/services/google/llm.py +15 -11
  90. pipecat/services/google/llm_openai.py +3 -3
  91. pipecat/services/google/llm_vertex.py +86 -16
  92. pipecat/services/google/tts.py +7 -3
  93. pipecat/services/heygen/api.py +2 -0
  94. pipecat/services/heygen/client.py +8 -4
  95. pipecat/services/heygen/video.py +2 -0
  96. pipecat/services/hume/__init__.py +5 -0
  97. pipecat/services/hume/tts.py +220 -0
  98. pipecat/services/inworld/tts.py +6 -6
  99. pipecat/services/llm_service.py +15 -5
  100. pipecat/services/lmnt/tts.py +2 -2
  101. pipecat/services/mcp_service.py +4 -2
  102. pipecat/services/mem0/memory.py +6 -5
  103. pipecat/services/mistral/llm.py +29 -8
  104. pipecat/services/moondream/vision.py +42 -16
  105. pipecat/services/neuphonic/tts.py +2 -2
  106. pipecat/services/openai/__init__.py +1 -0
  107. pipecat/services/openai/base_llm.py +27 -20
  108. pipecat/services/openai/realtime/__init__.py +0 -0
  109. pipecat/services/openai/realtime/context.py +272 -0
  110. pipecat/services/openai/realtime/events.py +1106 -0
  111. pipecat/services/openai/realtime/frames.py +37 -0
  112. pipecat/services/openai/realtime/llm.py +829 -0
  113. pipecat/services/openai/tts.py +16 -8
  114. pipecat/services/openai_realtime/__init__.py +27 -0
  115. pipecat/services/openai_realtime/azure.py +21 -0
  116. pipecat/services/openai_realtime/context.py +21 -0
  117. pipecat/services/openai_realtime/events.py +21 -0
  118. pipecat/services/openai_realtime/frames.py +21 -0
  119. pipecat/services/openai_realtime_beta/azure.py +16 -0
  120. pipecat/services/openai_realtime_beta/openai.py +17 -5
  121. pipecat/services/playht/tts.py +31 -4
  122. pipecat/services/rime/tts.py +3 -4
  123. pipecat/services/sarvam/tts.py +2 -6
  124. pipecat/services/simli/video.py +2 -2
  125. pipecat/services/speechmatics/stt.py +1 -7
  126. pipecat/services/stt_service.py +34 -0
  127. pipecat/services/tavus/video.py +2 -2
  128. pipecat/services/tts_service.py +9 -9
  129. pipecat/services/vision_service.py +7 -6
  130. pipecat/services/vistaar/llm.py +4 -0
  131. pipecat/tests/utils.py +4 -4
  132. pipecat/transcriptions/language.py +41 -1
  133. pipecat/transports/base_input.py +17 -42
  134. pipecat/transports/base_output.py +42 -26
  135. pipecat/transports/daily/transport.py +199 -26
  136. pipecat/transports/heygen/__init__.py +0 -0
  137. pipecat/transports/heygen/transport.py +381 -0
  138. pipecat/transports/livekit/transport.py +228 -63
  139. pipecat/transports/local/audio.py +6 -1
  140. pipecat/transports/local/tk.py +11 -2
  141. pipecat/transports/network/fastapi_websocket.py +1 -1
  142. pipecat/transports/smallwebrtc/connection.py +98 -19
  143. pipecat/transports/smallwebrtc/request_handler.py +204 -0
  144. pipecat/transports/smallwebrtc/transport.py +65 -23
  145. pipecat/transports/tavus/transport.py +23 -12
  146. pipecat/transports/websocket/client.py +41 -5
  147. pipecat/transports/websocket/fastapi.py +21 -11
  148. pipecat/transports/websocket/server.py +14 -7
  149. pipecat/transports/whatsapp/api.py +8 -0
  150. pipecat/transports/whatsapp/client.py +47 -0
  151. pipecat/utils/base_object.py +54 -22
  152. pipecat/utils/string.py +12 -1
  153. pipecat/utils/tracing/service_decorators.py +21 -21
  154. {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/WHEEL +0 -0
  155. {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/licenses/LICENSE +0 -0
  156. {dv_pipecat_ai-0.0.85.dev5.dist-info → dv_pipecat_ai-0.0.85.dev698.dist-info}/top_level.txt +0 -0
  157. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -64,6 +64,7 @@ class OpenAITTSService(TTSService):
64
64
  model: str = "gpt-4o-mini-tts",
65
65
  sample_rate: Optional[int] = None,
66
66
  instructions: Optional[str] = None,
67
+ speed: Optional[float] = None,
67
68
  **kwargs,
68
69
  ):
69
70
  """Initialize OpenAI TTS service.
@@ -75,6 +76,7 @@ class OpenAITTSService(TTSService):
75
76
  model: TTS model to use. Defaults to "gpt-4o-mini-tts".
76
77
  sample_rate: Output audio sample rate in Hz. If None, uses OpenAI's default 24kHz.
77
78
  instructions: Optional instructions to guide voice synthesis behavior.
79
+ speed: Voice speed control (0.25 to 4.0, default 1.0).
78
80
  **kwargs: Additional keyword arguments passed to TTSService.
79
81
  """
80
82
  if sample_rate and sample_rate != self.OPENAI_SAMPLE_RATE:
@@ -84,6 +86,7 @@ class OpenAITTSService(TTSService):
84
86
  )
85
87
  super().__init__(sample_rate=sample_rate, **kwargs)
86
88
 
89
+ self._speed = speed
87
90
  self.set_model_name(model)
88
91
  self.set_voice(voice)
89
92
  self._instructions = instructions
@@ -133,17 +136,22 @@ class OpenAITTSService(TTSService):
133
136
  try:
134
137
  await self.start_ttfb_metrics()
135
138
 
136
- # Setup extra body parameters
137
- extra_body = {}
139
+ # Setup API parameters
140
+ create_params = {
141
+ "input": text,
142
+ "model": self.model_name,
143
+ "voice": VALID_VOICES[self._voice_id],
144
+ "response_format": "pcm",
145
+ }
146
+
138
147
  if self._instructions:
139
- extra_body["instructions"] = self._instructions
148
+ create_params["instructions"] = self._instructions
149
+
150
+ if self._speed:
151
+ create_params["speed"] = self._speed
140
152
 
141
153
  async with self._client.audio.speech.with_streaming_response.create(
142
- input=text,
143
- model=self.model_name,
144
- voice=VALID_VOICES[self._voice_id],
145
- response_format="pcm",
146
- extra_body=extra_body,
154
+ **create_params
147
155
  ) as r:
148
156
  if r.status_code != 200:
149
157
  error = await r.text()
@@ -0,0 +1,27 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ import warnings
8
+
9
+ from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
10
+ from pipecat.services.openai.realtime.events import (
11
+ InputAudioNoiseReduction,
12
+ InputAudioTranscription,
13
+ SemanticTurnDetection,
14
+ SessionProperties,
15
+ TurnDetection,
16
+ )
17
+ from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
18
+
19
+ with warnings.catch_warnings():
20
+ warnings.simplefilter("always")
21
+ warnings.warn(
22
+ "Types in pipecat.services.openai_realtime are deprecated. "
23
+ "Please use the equivalent types from "
24
+ "pipecat.services.openai.realtime instead.",
25
+ DeprecationWarning,
26
+ stacklevel=2,
27
+ )
@@ -0,0 +1,21 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Azure OpenAI Realtime LLM service implementation."""
8
+
9
+ import warnings
10
+
11
+ from pipecat.services.azure.realtime.llm import *
12
+
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("always")
15
+ warnings.warn(
16
+ "Types in pipecat.services.openai_realtime.azure are deprecated. "
17
+ "Please use the equivalent types from "
18
+ "pipecat.services.azure.realtime.llm instead.",
19
+ DeprecationWarning,
20
+ stacklevel=2,
21
+ )
@@ -0,0 +1,21 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """OpenAI Realtime LLM context and aggregator implementations."""
8
+
9
+ import warnings
10
+
11
+ from pipecat.services.openai.realtime.context import *
12
+
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("always")
15
+ warnings.warn(
16
+ "Types in pipecat.services.openai_realtime.context are deprecated. "
17
+ "Please use the equivalent types from "
18
+ "pipecat.services.openai.realtime.context instead.",
19
+ DeprecationWarning,
20
+ stacklevel=2,
21
+ )
@@ -0,0 +1,21 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Event models and data structures for OpenAI Realtime API communication."""
8
+
9
+ import warnings
10
+
11
+ from pipecat.services.openai.realtime.events import *
12
+
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("always")
15
+ warnings.warn(
16
+ "Types in pipecat.services.openai_realtime.events are deprecated. "
17
+ "Please use the equivalent types from "
18
+ "pipecat.services.openai.realtime.events instead.",
19
+ DeprecationWarning,
20
+ stacklevel=2,
21
+ )
@@ -0,0 +1,21 @@
1
+ #
2
+ # Copyright (c) 2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Custom frame types for OpenAI Realtime API integration."""
8
+
9
+ import warnings
10
+
11
+ from pipecat.services.openai.realtime.frames import *
12
+
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("always")
15
+ warnings.warn(
16
+ "Types in pipecat.services.openai_realtime.frames are deprecated. "
17
+ "Please use the equivalent types from "
18
+ "pipecat.services.openai.realtime.frames instead.",
19
+ DeprecationWarning,
20
+ stacklevel=2,
21
+ )
@@ -6,6 +6,8 @@
6
6
 
7
7
  """Azure OpenAI Realtime Beta LLM service implementation."""
8
8
 
9
+ import warnings
10
+
9
11
  from loguru import logger
10
12
 
11
13
  from .openai import OpenAIRealtimeBetaLLMService
@@ -23,6 +25,10 @@ except ModuleNotFoundError as e:
23
25
  class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
24
26
  """Azure OpenAI Realtime Beta LLM service with Azure-specific authentication.
25
27
 
28
+ .. deprecated:: 0.0.84
29
+ `AzureRealtimeBetaLLMService` is deprecated, use `AzureRealtimeLLMService` instead.
30
+ This class will be removed in version 1.0.0.
31
+
26
32
  Extends the OpenAI Realtime service to work with Azure OpenAI endpoints,
27
33
  using Azure's authentication headers and endpoint format. Provides the same
28
34
  real-time audio and text communication capabilities as the base OpenAI service.
@@ -44,6 +50,16 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
44
50
  **kwargs: Additional arguments passed to parent OpenAIRealtimeBetaLLMService.
45
51
  """
46
52
  super().__init__(base_url=base_url, api_key=api_key, **kwargs)
53
+
54
+ with warnings.catch_warnings():
55
+ warnings.simplefilter("always")
56
+ warnings.warn(
57
+ "AzureRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. "
58
+ "Use AzureRealtimeLLMService instead.",
59
+ DeprecationWarning,
60
+ stacklevel=2,
61
+ )
62
+
47
63
  self.api_key = api_key
48
64
  self.base_url = base_url
49
65
 
@@ -9,6 +9,7 @@
9
9
  import base64
10
10
  import json
11
11
  import time
12
+ import warnings
12
13
  from dataclasses import dataclass
13
14
  from typing import Optional
14
15
 
@@ -23,6 +24,7 @@ from pipecat.frames.frames import (
23
24
  Frame,
24
25
  InputAudioRawFrame,
25
26
  InterimTranscriptionFrame,
27
+ InterruptionFrame,
26
28
  LLMContextFrame,
27
29
  LLMFullResponseEndFrame,
28
30
  LLMFullResponseStartFrame,
@@ -31,7 +33,6 @@ from pipecat.frames.frames import (
31
33
  LLMTextFrame,
32
34
  LLMUpdateSettingsFrame,
33
35
  StartFrame,
34
- StartInterruptionFrame,
35
36
  TranscriptionFrame,
36
37
  TTSAudioRawFrame,
37
38
  TTSStartedFrame,
@@ -92,6 +93,10 @@ class CurrentAudioResponse:
92
93
  class OpenAIRealtimeBetaLLMService(LLMService):
93
94
  """OpenAI Realtime Beta LLM service providing real-time audio and text communication.
94
95
 
96
+ .. deprecated:: 0.0.84
97
+ `OpenAIRealtimeBetaLLMService` is deprecated, use `OpenAIRealtimeLLMService` instead.
98
+ This class will be removed in version 1.0.0.
99
+
95
100
  Implements the OpenAI Realtime API Beta with WebSocket communication for low-latency
96
101
  bidirectional audio and text interactions. Supports function calling, conversation
97
102
  management, and real-time transcription.
@@ -124,6 +129,15 @@ class OpenAIRealtimeBetaLLMService(LLMService):
124
129
  send_transcription_frames: Whether to emit transcription frames. Defaults to True.
125
130
  **kwargs: Additional arguments passed to parent LLMService.
126
131
  """
132
+ with warnings.catch_warnings():
133
+ warnings.simplefilter("always")
134
+ warnings.warn(
135
+ "OpenAIRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. "
136
+ "Use OpenAIRealtimeLLMService instead.",
137
+ DeprecationWarning,
138
+ stacklevel=2,
139
+ )
140
+
127
141
  full_url = f"{base_url}?model={model}"
128
142
  super().__init__(base_url=full_url, **kwargs)
129
143
 
@@ -350,7 +364,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
350
364
  elif isinstance(frame, InputAudioRawFrame):
351
365
  if not self._audio_input_paused:
352
366
  await self._send_user_audio(frame)
353
- elif isinstance(frame, StartInterruptionFrame):
367
+ elif isinstance(frame, InterruptionFrame):
354
368
  await self._handle_interruption()
355
369
  elif isinstance(frame, UserStartedSpeakingFrame):
356
370
  await self._handle_user_started_speaking(frame)
@@ -644,14 +658,12 @@ class OpenAIRealtimeBetaLLMService(LLMService):
644
658
 
645
659
  async def _handle_evt_speech_started(self, evt):
646
660
  await self._truncate_current_audio_response()
647
- await self._start_interruption() # cancels this processor task
648
- await self.push_frame(StartInterruptionFrame()) # cancels downstream tasks
661
+ await self.push_interruption_task_frame_and_wait()
649
662
  await self.push_frame(UserStartedSpeakingFrame())
650
663
 
651
664
  async def _handle_evt_speech_stopped(self, evt):
652
665
  await self.start_ttfb_metrics()
653
666
  await self.start_processing_metrics()
654
- await self._stop_interruption()
655
667
  await self.push_frame(UserStoppedSpeakingFrame())
656
668
 
657
669
  async def _maybe_handle_evt_retrieve_conversation_item_error(self, evt: events.ErrorEvent):
@@ -14,6 +14,7 @@ import io
14
14
  import json
15
15
  import struct
16
16
  import uuid
17
+ import warnings
17
18
  from typing import AsyncGenerator, Optional
18
19
 
19
20
  import aiohttp
@@ -25,8 +26,8 @@ from pipecat.frames.frames import (
25
26
  EndFrame,
26
27
  ErrorFrame,
27
28
  Frame,
29
+ InterruptionFrame,
28
30
  StartFrame,
29
- StartInterruptionFrame,
30
31
  TTSAudioRawFrame,
31
32
  TTSStartedFrame,
32
33
  TTSStoppedFrame,
@@ -110,6 +111,11 @@ def language_to_playht_language(language: Language) -> Optional[str]:
110
111
  class PlayHTTTSService(InterruptibleTTSService):
111
112
  """PlayHT WebSocket-based text-to-speech service.
112
113
 
114
+ .. deprecated:: 0.0.88
115
+
116
+ This class is deprecated and will be removed in a future version.
117
+ PlayHT is shutting down their API on December 31st, 2025.
118
+
113
119
  Provides real-time text-to-speech synthesis using PlayHT's WebSocket API.
114
120
  Supports streaming audio generation with configurable voice engines and
115
121
  language settings.
@@ -158,6 +164,15 @@ class PlayHTTTSService(InterruptibleTTSService):
158
164
  **kwargs,
159
165
  )
160
166
 
167
+ with warnings.catch_warnings():
168
+ warnings.simplefilter("always")
169
+ warnings.warn(
170
+ "PlayHT is shutting down their API on December 31st, 2025. "
171
+ "'PlayHTTTSService' is deprecated and will be removed in a future version.",
172
+ DeprecationWarning,
173
+ stacklevel=2,
174
+ )
175
+
161
176
  params = params or PlayHTTTSService.InputParams()
162
177
 
163
178
  self._api_key = api_key
@@ -312,7 +327,7 @@ class PlayHTTTSService(InterruptibleTTSService):
312
327
  return self._websocket
313
328
  raise Exception("Websocket not connected")
314
329
 
315
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
330
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
316
331
  """Handle interruption by stopping metrics and clearing request ID."""
317
332
  await super()._handle_interruption(frame, direction)
318
333
  await self.stop_all_metrics()
@@ -401,6 +416,11 @@ class PlayHTTTSService(InterruptibleTTSService):
401
416
  class PlayHTHttpTTSService(TTSService):
402
417
  """PlayHT HTTP-based text-to-speech service.
403
418
 
419
+ .. deprecated:: 0.0.88
420
+
421
+ This class is deprecated and will be removed in a future version.
422
+ PlayHT is shutting down their API on December 31st, 2025.
423
+
404
424
  Provides text-to-speech synthesis using PlayHT's HTTP API for simpler,
405
425
  non-streaming synthesis. Suitable for use cases where streaming is not
406
426
  required and simpler integration is preferred.
@@ -454,8 +474,6 @@ class PlayHTHttpTTSService(TTSService):
454
474
 
455
475
  # Warn about deprecated protocol parameter if explicitly provided
456
476
  if protocol:
457
- import warnings
458
-
459
477
  with warnings.catch_warnings():
460
478
  warnings.simplefilter("always")
461
479
  warnings.warn(
@@ -464,6 +482,15 @@ class PlayHTHttpTTSService(TTSService):
464
482
  stacklevel=2,
465
483
  )
466
484
 
485
+ with warnings.catch_warnings():
486
+ warnings.simplefilter("always")
487
+ warnings.warn(
488
+ "PlayHT is shutting down their API on December 31st, 2025. "
489
+ "'PlayHTHttpTTSService' is deprecated and will be removed in a future version.",
490
+ DeprecationWarning,
491
+ stacklevel=2,
492
+ )
493
+
467
494
  params = params or PlayHTHttpTTSService.InputParams()
468
495
 
469
496
  self._user_id = user_id
@@ -24,15 +24,14 @@ from pipecat.frames.frames import (
24
24
  EndFrame,
25
25
  ErrorFrame,
26
26
  Frame,
27
+ InterruptionFrame,
27
28
  StartFrame,
28
- StartInterruptionFrame,
29
29
  TTSAudioRawFrame,
30
30
  TTSStartedFrame,
31
31
  TTSStoppedFrame,
32
32
  )
33
33
  from pipecat.processors.frame_processor import FrameDirection
34
34
  from pipecat.services.tts_service import AudioContextWordTTSService, TTSService
35
- from pipecat.transcriptions import language
36
35
  from pipecat.transcriptions.language import Language
37
36
  from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
38
37
  from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator
@@ -280,7 +279,7 @@ class RimeTTSService(AudioContextWordTTSService):
280
279
  return self._websocket
281
280
  raise Exception("Websocket not connected")
282
281
 
283
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
282
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
284
283
  """Handle interruption by clearing current context."""
285
284
  await super()._handle_interruption(frame, direction)
286
285
  await self.stop_all_metrics()
@@ -375,7 +374,7 @@ class RimeTTSService(AudioContextWordTTSService):
375
374
  direction: The direction to push the frame.
376
375
  """
377
376
  await super().push_frame(frame, direction)
378
- if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
377
+ if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
379
378
  if isinstance(frame, TTSStoppedFrame):
380
379
  await self.add_word_timestamps([("Reset", 0)])
381
380
 
@@ -20,6 +20,7 @@ from pipecat.frames.frames import (
20
20
  EndFrame,
21
21
  ErrorFrame,
22
22
  Frame,
23
+ InterruptionFrame,
23
24
  LLMFullResponseEndFrame,
24
25
  StartFrame,
25
26
  StartInterruptionFrame,
@@ -455,7 +456,7 @@ class SarvamTTSService(InterruptibleTTSService):
455
456
  direction: The direction to push the frame.
456
457
  """
457
458
  await super().push_frame(frame, direction)
458
- if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
459
+ if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
459
460
  self._started = False
460
461
 
461
462
  async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -632,11 +633,6 @@ class SarvamTTSService(InterruptibleTTSService):
632
633
  """
633
634
  logger.debug(f"Generating TTS: [{text}]")
634
635
 
635
- # Validate text input
636
- if not text or not isinstance(text, str) or not text.strip():
637
- logger.warning(f"Invalid text input for Sarvam TTS run_tts: {repr(text)}")
638
- return
639
-
640
636
  try:
641
637
  if not self._websocket or self._websocket.state is State.CLOSED:
642
638
  await self._connect()
@@ -15,8 +15,8 @@ from pipecat.frames.frames import (
15
15
  CancelFrame,
16
16
  EndFrame,
17
17
  Frame,
18
+ InterruptionFrame,
18
19
  OutputImageRawFrame,
19
- StartInterruptionFrame,
20
20
  TTSAudioRawFrame,
21
21
  TTSStoppedFrame,
22
22
  UserStartedSpeakingFrame,
@@ -179,7 +179,7 @@ class SimliVideoService(FrameProcessor):
179
179
  return
180
180
  elif isinstance(frame, (EndFrame, CancelFrame)):
181
181
  await self._stop()
182
- elif isinstance(frame, (StartInterruptionFrame, UserStartedSpeakingFrame)):
182
+ elif isinstance(frame, (InterruptionFrame, UserStartedSpeakingFrame)):
183
183
  if not self._previously_interrupted:
184
184
  await self._simli_client.clearBuffer()
185
185
  self._previously_interrupted = self._is_trinity_avatar
@@ -19,7 +19,6 @@ from loguru import logger
19
19
  from pydantic import BaseModel
20
20
 
21
21
  from pipecat.frames.frames import (
22
- BotInterruptionFrame,
23
22
  CancelFrame,
24
23
  EndFrame,
25
24
  ErrorFrame,
@@ -749,14 +748,13 @@ class SpeechmaticsSTTService(STTService):
749
748
  return
750
749
 
751
750
  # Frames to send
752
- upstream_frames: list[Frame] = []
753
751
  downstream_frames: list[Frame] = []
754
752
 
755
753
  # If VAD is enabled, then send a speaking frame
756
754
  if self._params.enable_vad and not self._is_speaking:
757
755
  logger.debug("User started speaking")
758
756
  self._is_speaking = True
759
- upstream_frames += [BotInterruptionFrame()]
757
+ await self.push_interruption_task_frame_and_wait()
760
758
  downstream_frames += [UserStartedSpeakingFrame()]
761
759
 
762
760
  # If final, then re-parse into TranscriptionFrame
@@ -794,10 +792,6 @@ class SpeechmaticsSTTService(STTService):
794
792
  self._is_speaking = False
795
793
  downstream_frames += [UserStoppedSpeakingFrame()]
796
794
 
797
- # Send UPSTREAM frames
798
- for frame in upstream_frames:
799
- await self.push_frame(frame, FrameDirection.UPSTREAM)
800
-
801
795
  # Send the DOWNSTREAM frames
802
796
  for frame in downstream_frames:
803
797
  await self.push_frame(frame, FrameDirection.DOWNSTREAM)
@@ -16,6 +16,7 @@ from loguru import logger
16
16
  from pipecat.frames.frames import (
17
17
  AudioRawFrame,
18
18
  BotStoppedSpeakingFrame,
19
+ ErrorFrame,
19
20
  Frame,
20
21
  StartFrame,
21
22
  STTMuteFrame,
@@ -25,6 +26,7 @@ from pipecat.frames.frames import (
25
26
  )
26
27
  from pipecat.processors.frame_processor import FrameDirection
27
28
  from pipecat.services.ai_service import AIService
29
+ from pipecat.services.websocket_service import WebsocketService
28
30
  from pipecat.transcriptions.language import Language
29
31
 
30
32
 
@@ -298,3 +300,35 @@ class SegmentedSTTService(STTService):
298
300
  if not self._user_speaking and len(self._audio_buffer) > self._audio_buffer_size_1s:
299
301
  discarded = len(self._audio_buffer) - self._audio_buffer_size_1s
300
302
  self._audio_buffer = self._audio_buffer[discarded:]
303
+
304
+
305
+ class WebsocketSTTService(STTService, WebsocketService):
306
+ """Base class for websocket-based STT services.
307
+
308
+ Combines STT functionality with websocket connectivity, providing automatic
309
+ error handling and reconnection capabilities.
310
+
311
+ Event handlers:
312
+ on_connection_error: Called when a websocket connection error occurs.
313
+
314
+ Example::
315
+
316
+ @stt.event_handler("on_connection_error")
317
+ async def on_connection_error(stt: STTService, error: str):
318
+ logger.error(f"STT connection error: {error}")
319
+ """
320
+
321
+ def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
322
+ """Initialize the Websocket STT service.
323
+
324
+ Args:
325
+ reconnect_on_error: Whether to automatically reconnect on websocket errors.
326
+ **kwargs: Additional arguments passed to parent classes.
327
+ """
328
+ STTService.__init__(self, **kwargs)
329
+ WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
330
+ self._register_event_handler("on_connection_error")
331
+
332
+ async def _report_error(self, error: ErrorFrame):
333
+ await self._call_event_handler("on_connection_error", error.error)
334
+ await self.push_error(error)
@@ -23,12 +23,12 @@ from pipecat.frames.frames import (
23
23
  CancelFrame,
24
24
  EndFrame,
25
25
  Frame,
26
+ InterruptionFrame,
26
27
  OutputAudioRawFrame,
27
28
  OutputImageRawFrame,
28
29
  OutputTransportReadyFrame,
29
30
  SpeechOutputAudioRawFrame,
30
31
  StartFrame,
31
- StartInterruptionFrame,
32
32
  TTSAudioRawFrame,
33
33
  TTSStartedFrame,
34
34
  )
@@ -222,7 +222,7 @@ class TavusVideoService(AIService):
222
222
  """
223
223
  await super().process_frame(frame, direction)
224
224
 
225
- if isinstance(frame, StartInterruptionFrame):
225
+ if isinstance(frame, InterruptionFrame):
226
226
  await self._handle_interruptions()
227
227
  await self.push_frame(frame, direction)
228
228
  elif isinstance(frame, TTSAudioRawFrame):
@@ -20,10 +20,10 @@ from pipecat.frames.frames import (
20
20
  ErrorFrame,
21
21
  Frame,
22
22
  InterimTranscriptionFrame,
23
+ InterruptionFrame,
23
24
  LLMFullResponseEndFrame,
24
25
  LLMFullResponseStartFrame,
25
26
  StartFrame,
26
- StartInterruptionFrame,
27
27
  TextFrame,
28
28
  TranscriptionFrame,
29
29
  TTSAudioRawFrame,
@@ -319,7 +319,7 @@ class TTSService(AIService):
319
319
  and not isinstance(frame, TranscriptionFrame)
320
320
  ):
321
321
  await self._process_text_frame(frame)
322
- elif isinstance(frame, StartInterruptionFrame):
322
+ elif isinstance(frame, InterruptionFrame):
323
323
  await self._handle_interruption(frame, direction)
324
324
  await self.push_frame(frame, direction)
325
325
  elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
@@ -377,14 +377,14 @@ class TTSService(AIService):
377
377
  await super().push_frame(frame, direction)
378
378
 
379
379
  if self._push_stop_frames and (
380
- isinstance(frame, StartInterruptionFrame)
380
+ isinstance(frame, InterruptionFrame)
381
381
  or isinstance(frame, TTSStartedFrame)
382
382
  or isinstance(frame, TTSAudioRawFrame)
383
383
  or isinstance(frame, TTSStoppedFrame)
384
384
  ):
385
385
  await self._stop_frame_queue.put(frame)
386
386
 
387
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
387
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
388
388
  self._processing_text = False
389
389
  await self._text_aggregator.handle_interruption()
390
390
  for filter in self._text_filters:
@@ -465,7 +465,7 @@ class TTSService(AIService):
465
465
  )
466
466
  if isinstance(frame, TTSStartedFrame):
467
467
  has_started = True
468
- elif isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
468
+ elif isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
469
469
  has_started = False
470
470
  except asyncio.TimeoutError:
471
471
  if has_started:
@@ -550,7 +550,7 @@ class WordTTSService(TTSService):
550
550
  elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
551
551
  await self.flush_audio()
552
552
 
553
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
553
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
554
554
  await super()._handle_interruption(frame, direction)
555
555
  self._llm_response_started = False
556
556
  self.reset_word_timestamps()
@@ -640,7 +640,7 @@ class InterruptibleTTSService(WebsocketTTSService):
640
640
  # user interrupts we need to reconnect.
641
641
  self._bot_speaking = False
642
642
 
643
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
643
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
644
644
  await super()._handle_interruption(frame, direction)
645
645
  if self._bot_speaking:
646
646
  await self._disconnect()
@@ -712,7 +712,7 @@ class InterruptibleWordTTSService(WebsocketWordTTSService):
712
712
  # user interrupts we need to reconnect.
713
713
  self._bot_speaking = False
714
714
 
715
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
715
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
716
716
  await super()._handle_interruption(frame, direction)
717
717
  if self._bot_speaking:
718
718
  await self._disconnect()
@@ -840,7 +840,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
840
840
  await super().cancel(frame)
841
841
  await self._stop_audio_context_task()
842
842
 
843
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
843
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
844
844
  await super()._handle_interruption(frame, direction)
845
845
  await self._stop_audio_context_task()
846
846
  self._create_audio_context_task()