dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
pipecat/services/openai/tts.py
CHANGED
|
@@ -14,6 +14,7 @@ from typing import AsyncGenerator, Dict, Literal, Optional
|
|
|
14
14
|
|
|
15
15
|
from loguru import logger
|
|
16
16
|
from openai import AsyncOpenAI, BadRequestError
|
|
17
|
+
from pydantic import BaseModel
|
|
17
18
|
|
|
18
19
|
from pipecat.frames.frames import (
|
|
19
20
|
ErrorFrame,
|
|
@@ -55,6 +56,17 @@ class OpenAITTSService(TTSService):
|
|
|
55
56
|
|
|
56
57
|
OPENAI_SAMPLE_RATE = 24000 # OpenAI TTS always outputs at 24kHz
|
|
57
58
|
|
|
59
|
+
class InputParams(BaseModel):
|
|
60
|
+
"""Input parameters for OpenAI TTS configuration.
|
|
61
|
+
|
|
62
|
+
Parameters:
|
|
63
|
+
instructions: Instructions to guide voice synthesis behavior.
|
|
64
|
+
speed: Voice speed control (0.25 to 4.0, default 1.0).
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
instructions: Optional[str] = None
|
|
68
|
+
speed: Optional[float] = None
|
|
69
|
+
|
|
58
70
|
def __init__(
|
|
59
71
|
self,
|
|
60
72
|
*,
|
|
@@ -64,6 +76,8 @@ class OpenAITTSService(TTSService):
|
|
|
64
76
|
model: str = "gpt-4o-mini-tts",
|
|
65
77
|
sample_rate: Optional[int] = None,
|
|
66
78
|
instructions: Optional[str] = None,
|
|
79
|
+
speed: Optional[float] = None,
|
|
80
|
+
params: Optional[InputParams] = None,
|
|
67
81
|
**kwargs,
|
|
68
82
|
):
|
|
69
83
|
"""Initialize OpenAI TTS service.
|
|
@@ -75,7 +89,12 @@ class OpenAITTSService(TTSService):
|
|
|
75
89
|
model: TTS model to use. Defaults to "gpt-4o-mini-tts".
|
|
76
90
|
sample_rate: Output audio sample rate in Hz. If None, uses OpenAI's default 24kHz.
|
|
77
91
|
instructions: Optional instructions to guide voice synthesis behavior.
|
|
92
|
+
speed: Voice speed control (0.25 to 4.0, default 1.0).
|
|
93
|
+
params: Optional synthesis controls (acting instructions, speed, ...).
|
|
78
94
|
**kwargs: Additional keyword arguments passed to TTSService.
|
|
95
|
+
|
|
96
|
+
.. deprecated:: 0.0.91
|
|
97
|
+
The `instructions` and `speed` parameters are deprecated, use `InputParams` instead.
|
|
79
98
|
"""
|
|
80
99
|
if sample_rate and sample_rate != self.OPENAI_SAMPLE_RATE:
|
|
81
100
|
logger.warning(
|
|
@@ -86,9 +105,24 @@ class OpenAITTSService(TTSService):
|
|
|
86
105
|
|
|
87
106
|
self.set_model_name(model)
|
|
88
107
|
self.set_voice(voice)
|
|
89
|
-
self._instructions = instructions
|
|
90
108
|
self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
91
109
|
|
|
110
|
+
if instructions or speed:
|
|
111
|
+
import warnings
|
|
112
|
+
|
|
113
|
+
with warnings.catch_warnings():
|
|
114
|
+
warnings.simplefilter("always")
|
|
115
|
+
warnings.warn(
|
|
116
|
+
"The `instructions` and `speed` parameters are deprecated, use `InputParams` instead.",
|
|
117
|
+
DeprecationWarning,
|
|
118
|
+
stacklevel=2,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
self._settings = {
|
|
122
|
+
"instructions": params.instructions if params else instructions,
|
|
123
|
+
"speed": params.speed if params else speed,
|
|
124
|
+
}
|
|
125
|
+
|
|
92
126
|
def can_generate_metrics(self) -> bool:
|
|
93
127
|
"""Check if this service can generate processing metrics.
|
|
94
128
|
|
|
@@ -133,17 +167,22 @@ class OpenAITTSService(TTSService):
|
|
|
133
167
|
try:
|
|
134
168
|
await self.start_ttfb_metrics()
|
|
135
169
|
|
|
136
|
-
# Setup
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
170
|
+
# Setup API parameters
|
|
171
|
+
create_params = {
|
|
172
|
+
"input": text,
|
|
173
|
+
"model": self.model_name,
|
|
174
|
+
"voice": VALID_VOICES[self._voice_id],
|
|
175
|
+
"response_format": "pcm",
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if self._settings["instructions"]:
|
|
179
|
+
create_params["instructions"] = self._settings["instructions"]
|
|
180
|
+
|
|
181
|
+
if self._settings["speed"]:
|
|
182
|
+
create_params["speed"] = self._settings["speed"]
|
|
140
183
|
|
|
141
184
|
async with self._client.audio.speech.with_streaming_response.create(
|
|
142
|
-
|
|
143
|
-
model=self.model_name,
|
|
144
|
-
voice=VALID_VOICES[self._voice_id],
|
|
145
|
-
response_format="pcm",
|
|
146
|
-
extra_body=extra_body,
|
|
185
|
+
**create_params
|
|
147
186
|
) as r:
|
|
148
187
|
if r.status_code != 200:
|
|
149
188
|
error = await r.text()
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
import warnings
|
|
8
|
+
|
|
9
|
+
from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
|
|
10
|
+
from pipecat.services.openai.realtime.events import (
|
|
11
|
+
InputAudioNoiseReduction,
|
|
12
|
+
InputAudioTranscription,
|
|
13
|
+
SemanticTurnDetection,
|
|
14
|
+
SessionProperties,
|
|
15
|
+
TurnDetection,
|
|
16
|
+
)
|
|
17
|
+
from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
|
|
18
|
+
|
|
19
|
+
with warnings.catch_warnings():
|
|
20
|
+
warnings.simplefilter("always")
|
|
21
|
+
warnings.warn(
|
|
22
|
+
"Types in pipecat.services.openai_realtime are deprecated. "
|
|
23
|
+
"Please use the equivalent types from "
|
|
24
|
+
"pipecat.services.openai.realtime instead.",
|
|
25
|
+
DeprecationWarning,
|
|
26
|
+
stacklevel=2,
|
|
27
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Azure OpenAI Realtime LLM service implementation."""
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
from pipecat.services.azure.realtime.llm import *
|
|
12
|
+
|
|
13
|
+
with warnings.catch_warnings():
|
|
14
|
+
warnings.simplefilter("always")
|
|
15
|
+
warnings.warn(
|
|
16
|
+
"Types in pipecat.services.openai_realtime.azure are deprecated. "
|
|
17
|
+
"Please use the equivalent types from "
|
|
18
|
+
"pipecat.services.azure.realtime.llm instead.",
|
|
19
|
+
DeprecationWarning,
|
|
20
|
+
stacklevel=2,
|
|
21
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""OpenAI Realtime LLM context and aggregator implementations."""
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
from pipecat.services.openai.realtime.context import *
|
|
12
|
+
|
|
13
|
+
with warnings.catch_warnings():
|
|
14
|
+
warnings.simplefilter("always")
|
|
15
|
+
warnings.warn(
|
|
16
|
+
"Types in pipecat.services.openai_realtime.context are deprecated. "
|
|
17
|
+
"Please use the equivalent types from "
|
|
18
|
+
"pipecat.services.openai.realtime.context instead.",
|
|
19
|
+
DeprecationWarning,
|
|
20
|
+
stacklevel=2,
|
|
21
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Event models and data structures for OpenAI Realtime API communication."""
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
from pipecat.services.openai.realtime.events import *
|
|
12
|
+
|
|
13
|
+
with warnings.catch_warnings():
|
|
14
|
+
warnings.simplefilter("always")
|
|
15
|
+
warnings.warn(
|
|
16
|
+
"Types in pipecat.services.openai_realtime.events are deprecated. "
|
|
17
|
+
"Please use the equivalent types from "
|
|
18
|
+
"pipecat.services.openai.realtime.events instead.",
|
|
19
|
+
DeprecationWarning,
|
|
20
|
+
stacklevel=2,
|
|
21
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Custom frame types for OpenAI Realtime API integration."""
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
from pipecat.services.openai.realtime.frames import *
|
|
12
|
+
|
|
13
|
+
with warnings.catch_warnings():
|
|
14
|
+
warnings.simplefilter("always")
|
|
15
|
+
warnings.warn(
|
|
16
|
+
"Types in pipecat.services.openai_realtime.frames are deprecated. "
|
|
17
|
+
"Please use the equivalent types from "
|
|
18
|
+
"pipecat.services.openai.realtime.frames instead.",
|
|
19
|
+
DeprecationWarning,
|
|
20
|
+
stacklevel=2,
|
|
21
|
+
)
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
|
|
7
7
|
"""Azure OpenAI Realtime Beta LLM service implementation."""
|
|
8
8
|
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
9
11
|
from loguru import logger
|
|
10
12
|
|
|
11
13
|
from .openai import OpenAIRealtimeBetaLLMService
|
|
@@ -23,6 +25,10 @@ except ModuleNotFoundError as e:
|
|
|
23
25
|
class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
|
|
24
26
|
"""Azure OpenAI Realtime Beta LLM service with Azure-specific authentication.
|
|
25
27
|
|
|
28
|
+
.. deprecated:: 0.0.84
|
|
29
|
+
`AzureRealtimeBetaLLMService` is deprecated, use `AzureRealtimeLLMService` instead.
|
|
30
|
+
This class will be removed in version 1.0.0.
|
|
31
|
+
|
|
26
32
|
Extends the OpenAI Realtime service to work with Azure OpenAI endpoints,
|
|
27
33
|
using Azure's authentication headers and endpoint format. Provides the same
|
|
28
34
|
real-time audio and text communication capabilities as the base OpenAI service.
|
|
@@ -44,6 +50,16 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
|
|
|
44
50
|
**kwargs: Additional arguments passed to parent OpenAIRealtimeBetaLLMService.
|
|
45
51
|
"""
|
|
46
52
|
super().__init__(base_url=base_url, api_key=api_key, **kwargs)
|
|
53
|
+
|
|
54
|
+
with warnings.catch_warnings():
|
|
55
|
+
warnings.simplefilter("always")
|
|
56
|
+
warnings.warn(
|
|
57
|
+
"AzureRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. "
|
|
58
|
+
"Use AzureRealtimeLLMService instead.",
|
|
59
|
+
DeprecationWarning,
|
|
60
|
+
stacklevel=2,
|
|
61
|
+
)
|
|
62
|
+
|
|
47
63
|
self.api_key = api_key
|
|
48
64
|
self.base_url = base_url
|
|
49
65
|
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
import base64
|
|
10
10
|
import json
|
|
11
11
|
import time
|
|
12
|
+
import warnings
|
|
12
13
|
from dataclasses import dataclass
|
|
13
14
|
from typing import Optional
|
|
14
15
|
|
|
@@ -23,6 +24,7 @@ from pipecat.frames.frames import (
|
|
|
23
24
|
Frame,
|
|
24
25
|
InputAudioRawFrame,
|
|
25
26
|
InterimTranscriptionFrame,
|
|
27
|
+
InterruptionFrame,
|
|
26
28
|
LLMContextFrame,
|
|
27
29
|
LLMFullResponseEndFrame,
|
|
28
30
|
LLMFullResponseStartFrame,
|
|
@@ -31,7 +33,6 @@ from pipecat.frames.frames import (
|
|
|
31
33
|
LLMTextFrame,
|
|
32
34
|
LLMUpdateSettingsFrame,
|
|
33
35
|
StartFrame,
|
|
34
|
-
StartInterruptionFrame,
|
|
35
36
|
TranscriptionFrame,
|
|
36
37
|
TTSAudioRawFrame,
|
|
37
38
|
TTSStartedFrame,
|
|
@@ -92,6 +93,10 @@ class CurrentAudioResponse:
|
|
|
92
93
|
class OpenAIRealtimeBetaLLMService(LLMService):
|
|
93
94
|
"""OpenAI Realtime Beta LLM service providing real-time audio and text communication.
|
|
94
95
|
|
|
96
|
+
.. deprecated:: 0.0.84
|
|
97
|
+
`OpenAIRealtimeBetaLLMService` is deprecated, use `OpenAIRealtimeLLMService` instead.
|
|
98
|
+
This class will be removed in version 1.0.0.
|
|
99
|
+
|
|
95
100
|
Implements the OpenAI Realtime API Beta with WebSocket communication for low-latency
|
|
96
101
|
bidirectional audio and text interactions. Supports function calling, conversation
|
|
97
102
|
management, and real-time transcription.
|
|
@@ -124,6 +129,15 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
124
129
|
send_transcription_frames: Whether to emit transcription frames. Defaults to True.
|
|
125
130
|
**kwargs: Additional arguments passed to parent LLMService.
|
|
126
131
|
"""
|
|
132
|
+
with warnings.catch_warnings():
|
|
133
|
+
warnings.simplefilter("always")
|
|
134
|
+
warnings.warn(
|
|
135
|
+
"OpenAIRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. "
|
|
136
|
+
"Use OpenAIRealtimeLLMService instead.",
|
|
137
|
+
DeprecationWarning,
|
|
138
|
+
stacklevel=2,
|
|
139
|
+
)
|
|
140
|
+
|
|
127
141
|
full_url = f"{base_url}?model={model}"
|
|
128
142
|
super().__init__(base_url=full_url, **kwargs)
|
|
129
143
|
|
|
@@ -350,7 +364,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
350
364
|
elif isinstance(frame, InputAudioRawFrame):
|
|
351
365
|
if not self._audio_input_paused:
|
|
352
366
|
await self._send_user_audio(frame)
|
|
353
|
-
elif isinstance(frame,
|
|
367
|
+
elif isinstance(frame, InterruptionFrame):
|
|
354
368
|
await self._handle_interruption()
|
|
355
369
|
elif isinstance(frame, UserStartedSpeakingFrame):
|
|
356
370
|
await self._handle_user_started_speaking(frame)
|
|
@@ -644,14 +658,12 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
644
658
|
|
|
645
659
|
async def _handle_evt_speech_started(self, evt):
|
|
646
660
|
await self._truncate_current_audio_response()
|
|
647
|
-
await self.
|
|
648
|
-
await self.push_frame(StartInterruptionFrame()) # cancels downstream tasks
|
|
661
|
+
await self.push_interruption_task_frame_and_wait()
|
|
649
662
|
await self.push_frame(UserStartedSpeakingFrame())
|
|
650
663
|
|
|
651
664
|
async def _handle_evt_speech_stopped(self, evt):
|
|
652
665
|
await self.start_ttfb_metrics()
|
|
653
666
|
await self.start_processing_metrics()
|
|
654
|
-
await self._stop_interruption()
|
|
655
667
|
await self.push_frame(UserStoppedSpeakingFrame())
|
|
656
668
|
|
|
657
669
|
async def _maybe_handle_evt_retrieve_conversation_item_error(self, evt: events.ErrorEvent):
|
pipecat/services/piper/tts.py
CHANGED
|
@@ -14,7 +14,6 @@ from loguru import logger
|
|
|
14
14
|
from pipecat.frames.frames import (
|
|
15
15
|
ErrorFrame,
|
|
16
16
|
Frame,
|
|
17
|
-
TTSAudioRawFrame,
|
|
18
17
|
TTSStartedFrame,
|
|
19
18
|
TTSStoppedFrame,
|
|
20
19
|
)
|
|
@@ -99,16 +98,15 @@ class PiperTTSService(TTSService):
|
|
|
99
98
|
|
|
100
99
|
await self.start_tts_usage_metrics(text)
|
|
101
100
|
|
|
101
|
+
yield TTSStartedFrame()
|
|
102
|
+
|
|
102
103
|
CHUNK_SIZE = self.chunk_size
|
|
103
104
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
if len(chunk) > 0:
|
|
110
|
-
await self.stop_ttfb_metrics()
|
|
111
|
-
yield TTSAudioRawFrame(chunk, self.sample_rate, 1)
|
|
105
|
+
async for frame in self._stream_audio_frames_from_iterator(
|
|
106
|
+
response.content.iter_chunked(CHUNK_SIZE), strip_wav_header=True
|
|
107
|
+
):
|
|
108
|
+
await self.stop_ttfb_metrics()
|
|
109
|
+
yield frame
|
|
112
110
|
except Exception as e:
|
|
113
111
|
logger.error(f"Error in run_tts: {e}")
|
|
114
112
|
yield ErrorFrame(error=str(e))
|
pipecat/services/playht/tts.py
CHANGED
|
@@ -14,6 +14,7 @@ import io
|
|
|
14
14
|
import json
|
|
15
15
|
import struct
|
|
16
16
|
import uuid
|
|
17
|
+
import warnings
|
|
17
18
|
from typing import AsyncGenerator, Optional
|
|
18
19
|
|
|
19
20
|
import aiohttp
|
|
@@ -25,8 +26,8 @@ from pipecat.frames.frames import (
|
|
|
25
26
|
EndFrame,
|
|
26
27
|
ErrorFrame,
|
|
27
28
|
Frame,
|
|
29
|
+
InterruptionFrame,
|
|
28
30
|
StartFrame,
|
|
29
|
-
StartInterruptionFrame,
|
|
30
31
|
TTSAudioRawFrame,
|
|
31
32
|
TTSStartedFrame,
|
|
32
33
|
TTSStoppedFrame,
|
|
@@ -110,6 +111,11 @@ def language_to_playht_language(language: Language) -> Optional[str]:
|
|
|
110
111
|
class PlayHTTTSService(InterruptibleTTSService):
|
|
111
112
|
"""PlayHT WebSocket-based text-to-speech service.
|
|
112
113
|
|
|
114
|
+
.. deprecated:: 0.0.88
|
|
115
|
+
|
|
116
|
+
This class is deprecated and will be removed in a future version.
|
|
117
|
+
PlayHT is shutting down their API on December 31st, 2025.
|
|
118
|
+
|
|
113
119
|
Provides real-time text-to-speech synthesis using PlayHT's WebSocket API.
|
|
114
120
|
Supports streaming audio generation with configurable voice engines and
|
|
115
121
|
language settings.
|
|
@@ -158,6 +164,15 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
158
164
|
**kwargs,
|
|
159
165
|
)
|
|
160
166
|
|
|
167
|
+
with warnings.catch_warnings():
|
|
168
|
+
warnings.simplefilter("always")
|
|
169
|
+
warnings.warn(
|
|
170
|
+
"PlayHT is shutting down their API on December 31st, 2025. "
|
|
171
|
+
"'PlayHTTTSService' is deprecated and will be removed in a future version.",
|
|
172
|
+
DeprecationWarning,
|
|
173
|
+
stacklevel=2,
|
|
174
|
+
)
|
|
175
|
+
|
|
161
176
|
params = params or PlayHTTTSService.InputParams()
|
|
162
177
|
|
|
163
178
|
self._api_key = api_key
|
|
@@ -254,6 +269,8 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
254
269
|
raise ValueError("WebSocket URL is not a string")
|
|
255
270
|
|
|
256
271
|
self._websocket = await websocket_connect(self._websocket_url)
|
|
272
|
+
|
|
273
|
+
await self._call_event_handler("on_connected")
|
|
257
274
|
except ValueError as e:
|
|
258
275
|
logger.error(f"{self} initialization error: {e}")
|
|
259
276
|
self._websocket = None
|
|
@@ -276,6 +293,7 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
276
293
|
finally:
|
|
277
294
|
self._request_id = None
|
|
278
295
|
self._websocket = None
|
|
296
|
+
await self._call_event_handler("on_disconnected")
|
|
279
297
|
|
|
280
298
|
async def _get_websocket_url(self):
|
|
281
299
|
"""Retrieve WebSocket URL from PlayHT API."""
|
|
@@ -312,7 +330,7 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
312
330
|
return self._websocket
|
|
313
331
|
raise Exception("Websocket not connected")
|
|
314
332
|
|
|
315
|
-
async def _handle_interruption(self, frame:
|
|
333
|
+
async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
|
|
316
334
|
"""Handle interruption by stopping metrics and clearing request ID."""
|
|
317
335
|
await super()._handle_interruption(frame, direction)
|
|
318
336
|
await self.stop_all_metrics()
|
|
@@ -401,6 +419,11 @@ class PlayHTTTSService(InterruptibleTTSService):
|
|
|
401
419
|
class PlayHTHttpTTSService(TTSService):
|
|
402
420
|
"""PlayHT HTTP-based text-to-speech service.
|
|
403
421
|
|
|
422
|
+
.. deprecated:: 0.0.88
|
|
423
|
+
|
|
424
|
+
This class is deprecated and will be removed in a future version.
|
|
425
|
+
PlayHT is shutting down their API on December 31st, 2025.
|
|
426
|
+
|
|
404
427
|
Provides text-to-speech synthesis using PlayHT's HTTP API for simpler,
|
|
405
428
|
non-streaming synthesis. Suitable for use cases where streaming is not
|
|
406
429
|
required and simpler integration is preferred.
|
|
@@ -454,8 +477,6 @@ class PlayHTHttpTTSService(TTSService):
|
|
|
454
477
|
|
|
455
478
|
# Warn about deprecated protocol parameter if explicitly provided
|
|
456
479
|
if protocol:
|
|
457
|
-
import warnings
|
|
458
|
-
|
|
459
480
|
with warnings.catch_warnings():
|
|
460
481
|
warnings.simplefilter("always")
|
|
461
482
|
warnings.warn(
|
|
@@ -464,6 +485,15 @@ class PlayHTHttpTTSService(TTSService):
|
|
|
464
485
|
stacklevel=2,
|
|
465
486
|
)
|
|
466
487
|
|
|
488
|
+
with warnings.catch_warnings():
|
|
489
|
+
warnings.simplefilter("always")
|
|
490
|
+
warnings.warn(
|
|
491
|
+
"PlayHT is shutting down their API on December 31st, 2025. "
|
|
492
|
+
"'PlayHTHttpTTSService' is deprecated and will be removed in a future version.",
|
|
493
|
+
DeprecationWarning,
|
|
494
|
+
stacklevel=2,
|
|
495
|
+
)
|
|
496
|
+
|
|
467
497
|
params = params or PlayHTHttpTTSService.InputParams()
|
|
468
498
|
|
|
469
499
|
self._user_id = user_id
|
pipecat/services/rime/tts.py
CHANGED
|
@@ -24,15 +24,14 @@ from pipecat.frames.frames import (
|
|
|
24
24
|
EndFrame,
|
|
25
25
|
ErrorFrame,
|
|
26
26
|
Frame,
|
|
27
|
+
InterruptionFrame,
|
|
27
28
|
StartFrame,
|
|
28
|
-
StartInterruptionFrame,
|
|
29
29
|
TTSAudioRawFrame,
|
|
30
30
|
TTSStartedFrame,
|
|
31
31
|
TTSStoppedFrame,
|
|
32
32
|
)
|
|
33
33
|
from pipecat.processors.frame_processor import FrameDirection
|
|
34
34
|
from pipecat.services.tts_service import AudioContextWordTTSService, TTSService
|
|
35
|
-
from pipecat.transcriptions import language
|
|
36
35
|
from pipecat.transcriptions.language import Language
|
|
37
36
|
from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
|
|
38
37
|
from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator
|
|
@@ -256,6 +255,8 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
256
255
|
url = f"{self._url}?{params}"
|
|
257
256
|
headers = {"Authorization": f"Bearer {self._api_key}"}
|
|
258
257
|
self._websocket = await websocket_connect(url, additional_headers=headers)
|
|
258
|
+
|
|
259
|
+
await self._call_event_handler("on_connected")
|
|
259
260
|
except Exception as e:
|
|
260
261
|
logger.error(f"{self} initialization error: {e}")
|
|
261
262
|
self._websocket = None
|
|
@@ -273,6 +274,7 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
273
274
|
finally:
|
|
274
275
|
self._context_id = None
|
|
275
276
|
self._websocket = None
|
|
277
|
+
await self._call_event_handler("on_disconnected")
|
|
276
278
|
|
|
277
279
|
def _get_websocket(self):
|
|
278
280
|
"""Get active websocket connection or raise exception."""
|
|
@@ -280,7 +282,7 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
280
282
|
return self._websocket
|
|
281
283
|
raise Exception("Websocket not connected")
|
|
282
284
|
|
|
283
|
-
async def _handle_interruption(self, frame:
|
|
285
|
+
async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
|
|
284
286
|
"""Handle interruption by clearing current context."""
|
|
285
287
|
await super()._handle_interruption(frame, direction)
|
|
286
288
|
await self.stop_all_metrics()
|
|
@@ -375,7 +377,7 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
375
377
|
direction: The direction to push the frame.
|
|
376
378
|
"""
|
|
377
379
|
await super().push_frame(frame, direction)
|
|
378
|
-
if isinstance(frame, (TTSStoppedFrame,
|
|
380
|
+
if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
|
|
379
381
|
if isinstance(frame, TTSStoppedFrame):
|
|
380
382
|
await self.add_word_timestamps([("Reset", 0)])
|
|
381
383
|
|
|
@@ -554,15 +556,13 @@ class RimeHttpTTSService(TTSService):
|
|
|
554
556
|
|
|
555
557
|
CHUNK_SIZE = self.chunk_size
|
|
556
558
|
|
|
557
|
-
async for
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
559
|
+
async for frame in self._stream_audio_frames_from_iterator(
|
|
560
|
+
response.content.iter_chunked(CHUNK_SIZE),
|
|
561
|
+
strip_wav_header=need_to_strip_wav_header,
|
|
562
|
+
):
|
|
563
|
+
await self.stop_ttfb_metrics()
|
|
564
|
+
yield frame
|
|
561
565
|
|
|
562
|
-
if len(chunk) > 0:
|
|
563
|
-
await self.stop_ttfb_metrics()
|
|
564
|
-
frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
|
|
565
|
-
yield frame
|
|
566
566
|
except Exception as e:
|
|
567
567
|
logger.exception(f"Error generating TTS: {e}")
|
|
568
568
|
yield ErrorFrame(error=f"Rime TTS error: {str(e)}")
|
pipecat/services/riva/stt.py
CHANGED
|
@@ -583,7 +583,9 @@ class RivaSegmentedSTTService(SegmentedSTTService):
|
|
|
583
583
|
self._config.language_code = self._language
|
|
584
584
|
|
|
585
585
|
@traced_stt
|
|
586
|
-
async def _handle_transcription(
|
|
586
|
+
async def _handle_transcription(
|
|
587
|
+
self, transcript: str, is_final: bool, language: Optional[Language] = None
|
|
588
|
+
):
|
|
587
589
|
"""Handle a transcription result with tracing."""
|
|
588
590
|
pass
|
|
589
591
|
|