dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
"""Base OpenAI LLM service implementation."""
|
|
8
8
|
|
|
9
|
+
import asyncio
|
|
9
10
|
import base64
|
|
10
11
|
import json
|
|
11
12
|
from typing import Any, Dict, List, Mapping, Optional
|
|
@@ -14,6 +15,7 @@ import httpx
|
|
|
14
15
|
from loguru import logger
|
|
15
16
|
from openai import (
|
|
16
17
|
NOT_GIVEN,
|
|
18
|
+
APITimeoutError,
|
|
17
19
|
AsyncOpenAI,
|
|
18
20
|
AsyncStream,
|
|
19
21
|
DefaultAsyncHttpxClient,
|
|
@@ -37,7 +39,6 @@ from pipecat.processors.aggregators.openai_llm_context import (
|
|
|
37
39
|
)
|
|
38
40
|
from pipecat.processors.frame_processor import FrameDirection
|
|
39
41
|
from pipecat.services.llm_service import FunctionCallFromLLM, LLMService
|
|
40
|
-
from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
|
|
41
42
|
from pipecat.utils.tracing.service_decorators import traced_llm
|
|
42
43
|
|
|
43
44
|
|
|
@@ -48,16 +49,6 @@ class BaseOpenAILLMService(LLMService):
|
|
|
48
49
|
to an OpenAILLMContext object. The context defines what is sent to the LLM for
|
|
49
50
|
completion, including user, assistant, and system messages, as well as tool
|
|
50
51
|
choices and function call configurations.
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
model: The OpenAI model name to use (e.g., "gpt-4.1", "gpt-4o").
|
|
54
|
-
api_key: OpenAI API key. If None, uses environment variable.
|
|
55
|
-
base_url: Custom base URL for OpenAI API. If None, uses default.
|
|
56
|
-
organization: OpenAI organization ID.
|
|
57
|
-
project: OpenAI project ID.
|
|
58
|
-
default_headers: Additional HTTP headers to include in requests.
|
|
59
|
-
params: Input parameters for model configuration and behavior.
|
|
60
|
-
**kwargs: Additional arguments passed to the parent LLMService.
|
|
61
52
|
"""
|
|
62
53
|
|
|
63
54
|
class InputParams(BaseModel):
|
|
@@ -101,8 +92,24 @@ class BaseOpenAILLMService(LLMService):
|
|
|
101
92
|
project=None,
|
|
102
93
|
default_headers: Optional[Mapping[str, str]] = None,
|
|
103
94
|
params: Optional[InputParams] = None,
|
|
95
|
+
retry_timeout_secs: Optional[float] = 5.0,
|
|
96
|
+
retry_on_timeout: Optional[bool] = False,
|
|
104
97
|
**kwargs,
|
|
105
98
|
):
|
|
99
|
+
"""Initialize the BaseOpenAILLMService.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
model: The OpenAI model name to use (e.g., "gpt-4.1", "gpt-4o").
|
|
103
|
+
api_key: OpenAI API key. If None, uses environment variable.
|
|
104
|
+
base_url: Custom base URL for OpenAI API. If None, uses default.
|
|
105
|
+
organization: OpenAI organization ID.
|
|
106
|
+
project: OpenAI project ID.
|
|
107
|
+
default_headers: Additional HTTP headers to include in requests.
|
|
108
|
+
params: Input parameters for model configuration and behavior.
|
|
109
|
+
retry_timeout_secs: Request timeout in seconds. Defaults to 5.0 seconds.
|
|
110
|
+
retry_on_timeout: Whether to retry the request once if it times out.
|
|
111
|
+
**kwargs: Additional arguments passed to the parent LLMService.
|
|
112
|
+
"""
|
|
106
113
|
super().__init__(**kwargs)
|
|
107
114
|
|
|
108
115
|
params = params or BaseOpenAILLMService.InputParams()
|
|
@@ -117,6 +124,8 @@ class BaseOpenAILLMService(LLMService):
|
|
|
117
124
|
"max_completion_tokens": params.max_completion_tokens,
|
|
118
125
|
"extra": params.extra if isinstance(params.extra, dict) else {},
|
|
119
126
|
}
|
|
127
|
+
self._retry_timeout_secs = retry_timeout_secs
|
|
128
|
+
self._retry_on_timeout = retry_on_timeout
|
|
120
129
|
self.set_model_name(model)
|
|
121
130
|
self._client = self.create_client(
|
|
122
131
|
api_key=api_key,
|
|
@@ -173,7 +182,7 @@ class BaseOpenAILLMService(LLMService):
|
|
|
173
182
|
async def get_chat_completions(
|
|
174
183
|
self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
|
|
175
184
|
) -> AsyncStream[ChatCompletionChunk]:
|
|
176
|
-
"""Get streaming chat completions from OpenAI API.
|
|
185
|
+
"""Get streaming chat completions from OpenAI API with optional timeout and retry.
|
|
177
186
|
|
|
178
187
|
Args:
|
|
179
188
|
context: The LLM context containing tools and configuration.
|
|
@@ -182,6 +191,37 @@ class BaseOpenAILLMService(LLMService):
|
|
|
182
191
|
Returns:
|
|
183
192
|
Async stream of chat completion chunks.
|
|
184
193
|
"""
|
|
194
|
+
params = self.build_chat_completion_params(context, messages)
|
|
195
|
+
|
|
196
|
+
if self._retry_on_timeout:
|
|
197
|
+
try:
|
|
198
|
+
chunks = await asyncio.wait_for(
|
|
199
|
+
self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
|
|
200
|
+
)
|
|
201
|
+
return chunks
|
|
202
|
+
except (APITimeoutError, asyncio.TimeoutError):
|
|
203
|
+
# Retry, this time without a timeout so we get a response
|
|
204
|
+
logger.debug(f"{self}: Retrying chat completion due to timeout")
|
|
205
|
+
chunks = await self._client.chat.completions.create(**params)
|
|
206
|
+
return chunks
|
|
207
|
+
else:
|
|
208
|
+
chunks = await self._client.chat.completions.create(**params)
|
|
209
|
+
return chunks
|
|
210
|
+
|
|
211
|
+
def build_chat_completion_params(
|
|
212
|
+
self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
|
|
213
|
+
) -> dict:
|
|
214
|
+
"""Build parameters for chat completion request.
|
|
215
|
+
|
|
216
|
+
Subclasses can override this to customize parameters for different providers.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
context: The LLM context containing tools and configuration.
|
|
220
|
+
messages: List of chat completion messages to send.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Dictionary of parameters for the chat completion request.
|
|
224
|
+
"""
|
|
185
225
|
params = {
|
|
186
226
|
"model": self.model_name,
|
|
187
227
|
"stream": True,
|
|
@@ -199,9 +239,7 @@ class BaseOpenAILLMService(LLMService):
|
|
|
199
239
|
}
|
|
200
240
|
|
|
201
241
|
params.update(self._settings["extra"])
|
|
202
|
-
|
|
203
|
-
chunks = await self._client.chat.completions.create(**params)
|
|
204
|
-
return chunks
|
|
242
|
+
return params
|
|
205
243
|
|
|
206
244
|
async def _stream_chat_completions(
|
|
207
245
|
self, context: OpenAILLMContext
|
|
@@ -245,7 +283,7 @@ class BaseOpenAILLMService(LLMService):
|
|
|
245
283
|
context
|
|
246
284
|
)
|
|
247
285
|
|
|
248
|
-
async for chunk in
|
|
286
|
+
async for chunk in chunk_stream:
|
|
249
287
|
if chunk.usage:
|
|
250
288
|
tokens = LLMTokenUsage(
|
|
251
289
|
prompt_tokens=chunk.usage.prompt_tokens,
|
pipecat/services/openai/image.py
CHANGED
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""OpenAI image generation service implementation.
|
|
8
|
+
|
|
9
|
+
This module provides integration with OpenAI's DALL-E image generation API
|
|
10
|
+
for creating images from text prompts.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import io
|
|
8
14
|
from typing import AsyncGenerator, Literal, Optional
|
|
9
15
|
|
|
@@ -21,6 +27,13 @@ from pipecat.services.image_service import ImageGenService
|
|
|
21
27
|
|
|
22
28
|
|
|
23
29
|
class OpenAIImageGenService(ImageGenService):
|
|
30
|
+
"""OpenAI DALL-E image generation service.
|
|
31
|
+
|
|
32
|
+
Provides image generation capabilities using OpenAI's DALL-E models.
|
|
33
|
+
Supports various image sizes and can generate images from text prompts
|
|
34
|
+
with configurable quality and style parameters.
|
|
35
|
+
"""
|
|
36
|
+
|
|
24
37
|
def __init__(
|
|
25
38
|
self,
|
|
26
39
|
*,
|
|
@@ -30,6 +43,15 @@ class OpenAIImageGenService(ImageGenService):
|
|
|
30
43
|
image_size: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"],
|
|
31
44
|
model: str = "dall-e-3",
|
|
32
45
|
):
|
|
46
|
+
"""Initialize the OpenAI image generation service.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
api_key: OpenAI API key for authentication.
|
|
50
|
+
base_url: Custom base URL for OpenAI API. If None, uses default.
|
|
51
|
+
aiohttp_session: HTTP session for downloading generated images.
|
|
52
|
+
image_size: Target size for generated images.
|
|
53
|
+
model: DALL-E model to use for generation. Defaults to "dall-e-3".
|
|
54
|
+
"""
|
|
33
55
|
super().__init__()
|
|
34
56
|
self.set_model_name(model)
|
|
35
57
|
self._image_size = image_size
|
|
@@ -37,6 +59,14 @@ class OpenAIImageGenService(ImageGenService):
|
|
|
37
59
|
self._aiohttp_session = aiohttp_session
|
|
38
60
|
|
|
39
61
|
async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
|
|
62
|
+
"""Generate an image from a text prompt using OpenAI's DALL-E.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
prompt: Text description of the image to generate.
|
|
66
|
+
|
|
67
|
+
Yields:
|
|
68
|
+
Frame: URLImageRawFrame containing the generated image data.
|
|
69
|
+
"""
|
|
40
70
|
logger.debug(f"Generating image from prompt: {prompt}")
|
|
41
71
|
|
|
42
72
|
image = await self._client.images.generate(
|
pipecat/services/openai/llm.py
CHANGED
|
@@ -61,11 +61,6 @@ class OpenAILLMService(BaseOpenAILLMService):
|
|
|
61
61
|
Provides a complete OpenAI LLM service with context aggregation support.
|
|
62
62
|
Uses the BaseOpenAILLMService for core functionality and adds OpenAI-specific
|
|
63
63
|
context aggregator creation.
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
model: The OpenAI model name to use. Defaults to "gpt-4.1".
|
|
67
|
-
params: Input parameters for model configuration.
|
|
68
|
-
**kwargs: Additional arguments passed to the parent BaseOpenAILLMService.
|
|
69
64
|
"""
|
|
70
65
|
|
|
71
66
|
def __init__(
|
|
@@ -75,6 +70,13 @@ class OpenAILLMService(BaseOpenAILLMService):
|
|
|
75
70
|
params: Optional[BaseOpenAILLMService.InputParams] = None,
|
|
76
71
|
**kwargs,
|
|
77
72
|
):
|
|
73
|
+
"""Initialize OpenAI LLM service.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
model: The OpenAI model name to use. Defaults to "gpt-4.1".
|
|
77
|
+
params: Input parameters for model configuration.
|
|
78
|
+
**kwargs: Additional arguments passed to the parent BaseOpenAILLMService.
|
|
79
|
+
"""
|
|
78
80
|
super().__init__(model=model, params=params, **kwargs)
|
|
79
81
|
|
|
80
82
|
def create_context_aggregator(
|
pipecat/services/openai/stt.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""OpenAI Speech-to-Text service implementation using OpenAI's transcription API."""
|
|
8
|
+
|
|
7
9
|
from typing import Optional
|
|
8
10
|
|
|
9
11
|
from pipecat.services.whisper.base_stt import BaseWhisperSTTService, Transcription
|
|
@@ -15,15 +17,6 @@ class OpenAISTTService(BaseWhisperSTTService):
|
|
|
15
17
|
|
|
16
18
|
Uses OpenAI's transcription API to convert audio to text. Requires an OpenAI API key
|
|
17
19
|
set via the api_key parameter or OPENAI_API_KEY environment variable.
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
model: Model to use — either gpt-4o or Whisper. Defaults to "gpt-4o-transcribe".
|
|
21
|
-
api_key: OpenAI API key. Defaults to None.
|
|
22
|
-
base_url: API base URL. Defaults to None.
|
|
23
|
-
language: Language of the audio input. Defaults to English.
|
|
24
|
-
prompt: Optional text to guide the model's style or continue a previous segment.
|
|
25
|
-
temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.
|
|
26
|
-
**kwargs: Additional arguments passed to BaseWhisperSTTService.
|
|
27
20
|
"""
|
|
28
21
|
|
|
29
22
|
def __init__(
|
|
@@ -37,6 +30,17 @@ class OpenAISTTService(BaseWhisperSTTService):
|
|
|
37
30
|
temperature: Optional[float] = None,
|
|
38
31
|
**kwargs,
|
|
39
32
|
):
|
|
33
|
+
"""Initialize OpenAI STT service.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
model: Model to use — either gpt-4o or Whisper. Defaults to "gpt-4o-transcribe".
|
|
37
|
+
api_key: OpenAI API key. Defaults to None.
|
|
38
|
+
base_url: API base URL. Defaults to None.
|
|
39
|
+
language: Language of the audio input. Defaults to English.
|
|
40
|
+
prompt: Optional text to guide the model's style or continue a previous segment.
|
|
41
|
+
temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.
|
|
42
|
+
**kwargs: Additional arguments passed to BaseWhisperSTTService.
|
|
43
|
+
"""
|
|
40
44
|
super().__init__(
|
|
41
45
|
model=model,
|
|
42
46
|
api_key=api_key,
|
pipecat/services/openai/tts.py
CHANGED
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""OpenAI text-to-speech service implementation.
|
|
8
|
+
|
|
9
|
+
This module provides integration with OpenAI's text-to-speech API for
|
|
10
|
+
generating high-quality synthetic speech from text input.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
from typing import AsyncGenerator, Dict, Literal, Optional
|
|
8
14
|
|
|
9
15
|
from loguru import logger
|
|
@@ -43,16 +49,8 @@ class OpenAITTSService(TTSService):
|
|
|
43
49
|
"""OpenAI Text-to-Speech service that generates audio from text.
|
|
44
50
|
|
|
45
51
|
This service uses the OpenAI TTS API to generate PCM-encoded audio at 24kHz.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
api_key: OpenAI API key. Defaults to None.
|
|
49
|
-
voice: Voice ID to use. Defaults to "alloy".
|
|
50
|
-
model: TTS model to use. Defaults to "gpt-4o-mini-tts".
|
|
51
|
-
sample_rate: Output audio sample rate in Hz. Defaults to None.
|
|
52
|
-
**kwargs: Additional keyword arguments passed to TTSService.
|
|
53
|
-
|
|
54
|
-
The service returns PCM-encoded audio at the specified sample rate.
|
|
55
|
-
|
|
52
|
+
Supports multiple voice models and configurable parameters for high-quality
|
|
53
|
+
speech synthesis with streaming audio output.
|
|
56
54
|
"""
|
|
57
55
|
|
|
58
56
|
OPENAI_SAMPLE_RATE = 24000 # OpenAI TTS always outputs at 24kHz
|
|
@@ -68,6 +66,17 @@ class OpenAITTSService(TTSService):
|
|
|
68
66
|
instructions: Optional[str] = None,
|
|
69
67
|
**kwargs,
|
|
70
68
|
):
|
|
69
|
+
"""Initialize OpenAI TTS service.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
api_key: OpenAI API key for authentication. If None, uses environment variable.
|
|
73
|
+
base_url: Custom base URL for OpenAI API. If None, uses default.
|
|
74
|
+
voice: Voice ID to use for synthesis. Defaults to "alloy".
|
|
75
|
+
model: TTS model to use. Defaults to "gpt-4o-mini-tts".
|
|
76
|
+
sample_rate: Output audio sample rate in Hz. If None, uses OpenAI's default 24kHz.
|
|
77
|
+
instructions: Optional instructions to guide voice synthesis behavior.
|
|
78
|
+
**kwargs: Additional keyword arguments passed to TTSService.
|
|
79
|
+
"""
|
|
71
80
|
if sample_rate and sample_rate != self.OPENAI_SAMPLE_RATE:
|
|
72
81
|
logger.warning(
|
|
73
82
|
f"OpenAI TTS only supports {self.OPENAI_SAMPLE_RATE}Hz sample rate. "
|
|
@@ -81,13 +90,28 @@ class OpenAITTSService(TTSService):
|
|
|
81
90
|
self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
82
91
|
|
|
83
92
|
def can_generate_metrics(self) -> bool:
|
|
93
|
+
"""Check if this service can generate processing metrics.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
True, as OpenAI TTS service supports metrics generation.
|
|
97
|
+
"""
|
|
84
98
|
return True
|
|
85
99
|
|
|
86
100
|
async def set_model(self, model: str):
|
|
101
|
+
"""Set the TTS model to use.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
model: The model name to use for text-to-speech synthesis.
|
|
105
|
+
"""
|
|
87
106
|
logger.info(f"Switching TTS model to: [{model}]")
|
|
88
107
|
self.set_model_name(model)
|
|
89
108
|
|
|
90
109
|
async def start(self, frame: StartFrame):
|
|
110
|
+
"""Start the OpenAI TTS service.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
frame: The start frame containing initialization parameters.
|
|
114
|
+
"""
|
|
91
115
|
await super().start(frame)
|
|
92
116
|
if self.sample_rate != self.OPENAI_SAMPLE_RATE:
|
|
93
117
|
logger.warning(
|
|
@@ -97,6 +121,14 @@ class OpenAITTSService(TTSService):
|
|
|
97
121
|
|
|
98
122
|
@traced_tts
|
|
99
123
|
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
124
|
+
"""Generate speech from text using OpenAI's TTS API.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
text: The text to synthesize into speech.
|
|
128
|
+
|
|
129
|
+
Yields:
|
|
130
|
+
Frame: Audio frames containing the synthesized speech data.
|
|
131
|
+
"""
|
|
100
132
|
logger.debug(f"{self}: Generating TTS [{text}]")
|
|
101
133
|
try:
|
|
102
134
|
await self.start_ttfb_metrics()
|
|
@@ -11,7 +11,7 @@ from loguru import logger
|
|
|
11
11
|
from .openai import OpenAIRealtimeBetaLLMService
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
|
-
import
|
|
14
|
+
from websockets.asyncio.client import connect as websocket_connect
|
|
15
15
|
except ModuleNotFoundError as e:
|
|
16
16
|
logger.error(f"Exception: {e}")
|
|
17
17
|
logger.error(
|
|
@@ -26,12 +26,6 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
|
|
|
26
26
|
Extends the OpenAI Realtime service to work with Azure OpenAI endpoints,
|
|
27
27
|
using Azure's authentication headers and endpoint format. Provides the same
|
|
28
28
|
real-time audio and text communication capabilities as the base OpenAI service.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
api_key: The API key for the Azure OpenAI service.
|
|
32
|
-
base_url: The full Azure WebSocket endpoint URL including api-version and deployment.
|
|
33
|
-
Example: "wss://my-project.openai.azure.com/openai/realtime?api-version=2024-10-01-preview&deployment=my-realtime-deployment"
|
|
34
|
-
**kwargs: Additional arguments passed to parent OpenAIRealtimeBetaLLMService.
|
|
35
29
|
"""
|
|
36
30
|
|
|
37
31
|
def __init__(
|
|
@@ -41,6 +35,14 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
|
|
|
41
35
|
base_url: str,
|
|
42
36
|
**kwargs,
|
|
43
37
|
):
|
|
38
|
+
"""Initialize Azure Realtime Beta LLM service.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
api_key: The API key for the Azure OpenAI service.
|
|
42
|
+
base_url: The full Azure WebSocket endpoint URL including api-version and deployment.
|
|
43
|
+
Example: "wss://my-project.openai.azure.com/openai/realtime?api-version=2024-10-01-preview&deployment=my-realtime-deployment"
|
|
44
|
+
**kwargs: Additional arguments passed to parent OpenAIRealtimeBetaLLMService.
|
|
45
|
+
"""
|
|
44
46
|
super().__init__(base_url=base_url, api_key=api_key, **kwargs)
|
|
45
47
|
self.api_key = api_key
|
|
46
48
|
self.base_url = base_url
|
|
@@ -53,9 +55,9 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
|
|
|
53
55
|
return
|
|
54
56
|
|
|
55
57
|
logger.info(f"Connecting to {self.base_url}, api key: {self.api_key}")
|
|
56
|
-
self._websocket = await
|
|
58
|
+
self._websocket = await websocket_connect(
|
|
57
59
|
uri=self.base_url,
|
|
58
|
-
|
|
60
|
+
additional_headers={
|
|
59
61
|
"api-key": self.api_key,
|
|
60
62
|
},
|
|
61
63
|
)
|
|
@@ -37,14 +37,16 @@ class OpenAIRealtimeLLMContext(OpenAILLMContext):
|
|
|
37
37
|
Extends the standard OpenAI LLM context to support real-time session properties,
|
|
38
38
|
instruction management, and conversion between standard message formats and
|
|
39
39
|
realtime conversation items.
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
messages: Initial conversation messages. Defaults to None.
|
|
43
|
-
tools: Available function tools. Defaults to None.
|
|
44
|
-
**kwargs: Additional arguments passed to parent OpenAILLMContext.
|
|
45
40
|
"""
|
|
46
41
|
|
|
47
42
|
def __init__(self, messages=None, tools=None, **kwargs):
|
|
43
|
+
"""Initialize the OpenAIRealtimeLLMContext.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
messages: Initial conversation messages. Defaults to None.
|
|
47
|
+
tools: Available function tools. Defaults to None.
|
|
48
|
+
**kwargs: Additional arguments passed to parent OpenAILLMContext.
|
|
49
|
+
"""
|
|
48
50
|
super().__init__(messages=messages, tools=tools, **kwargs)
|
|
49
51
|
self.__setup_local()
|
|
50
52
|
|
|
@@ -18,13 +18,7 @@ from pydantic import BaseModel, ConfigDict, Field
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class InputAudioTranscription(BaseModel):
|
|
21
|
-
"""Configuration for audio transcription settings.
|
|
22
|
-
|
|
23
|
-
Parameters:
|
|
24
|
-
model: Transcription model to use (e.g., "gpt-4o-transcribe", "whisper-1").
|
|
25
|
-
language: Optional language code for transcription.
|
|
26
|
-
prompt: Optional transcription hint text.
|
|
27
|
-
"""
|
|
21
|
+
"""Configuration for audio transcription settings."""
|
|
28
22
|
|
|
29
23
|
model: str = "gpt-4o-transcribe"
|
|
30
24
|
language: Optional[str]
|
|
@@ -36,6 +30,13 @@ class InputAudioTranscription(BaseModel):
|
|
|
36
30
|
language: Optional[str] = None,
|
|
37
31
|
prompt: Optional[str] = None,
|
|
38
32
|
):
|
|
33
|
+
"""Initialize InputAudioTranscription.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
model: Transcription model to use (e.g., "gpt-4o-transcribe", "whisper-1").
|
|
37
|
+
language: Optional language code for transcription.
|
|
38
|
+
prompt: Optional transcription hint text.
|
|
39
|
+
"""
|
|
39
40
|
super().__init__(model=model, language=language, prompt=prompt)
|
|
40
41
|
|
|
41
42
|
|
|
@@ -881,6 +882,8 @@ class TokenDetails(BaseModel):
|
|
|
881
882
|
audio_tokens: Optional[int] = 0
|
|
882
883
|
|
|
883
884
|
class Config:
|
|
885
|
+
"""Pydantic configuration for TokenDetails."""
|
|
886
|
+
|
|
884
887
|
extra = "allow"
|
|
885
888
|
|
|
886
889
|
|
|
@@ -53,7 +53,6 @@ from pipecat.processors.frame_processor import FrameDirection
|
|
|
53
53
|
from pipecat.services.llm_service import FunctionCallFromLLM, LLMService
|
|
54
54
|
from pipecat.services.openai.llm import OpenAIContextAggregatorPair
|
|
55
55
|
from pipecat.transcriptions.language import Language
|
|
56
|
-
from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
|
|
57
56
|
from pipecat.utils.time import time_now_iso8601
|
|
58
57
|
from pipecat.utils.tracing.service_decorators import traced_openai_realtime, traced_stt
|
|
59
58
|
|
|
@@ -66,7 +65,7 @@ from .context import (
|
|
|
66
65
|
from .frames import RealtimeFunctionCallResultFrame, RealtimeMessagesUpdateFrame
|
|
67
66
|
|
|
68
67
|
try:
|
|
69
|
-
import
|
|
68
|
+
from websockets.asyncio.client import connect as websocket_connect
|
|
70
69
|
except ModuleNotFoundError as e:
|
|
71
70
|
logger.error(f"Exception: {e}")
|
|
72
71
|
logger.error("In order to use OpenAI, you need to `pip install pipecat-ai[openai]`.")
|
|
@@ -96,17 +95,6 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
96
95
|
Implements the OpenAI Realtime API Beta with WebSocket communication for low-latency
|
|
97
96
|
bidirectional audio and text interactions. Supports function calling, conversation
|
|
98
97
|
management, and real-time transcription.
|
|
99
|
-
|
|
100
|
-
Args:
|
|
101
|
-
api_key: OpenAI API key for authentication.
|
|
102
|
-
model: OpenAI model name. Defaults to "gpt-4o-realtime-preview-2025-06-03".
|
|
103
|
-
base_url: WebSocket base URL for the realtime API.
|
|
104
|
-
Defaults to "wss://api.openai.com/v1/realtime".
|
|
105
|
-
session_properties: Configuration properties for the realtime session.
|
|
106
|
-
If None, uses default SessionProperties.
|
|
107
|
-
start_audio_paused: Whether to start with audio input paused. Defaults to False.
|
|
108
|
-
send_transcription_frames: Whether to emit transcription frames. Defaults to True.
|
|
109
|
-
**kwargs: Additional arguments passed to parent LLMService.
|
|
110
98
|
"""
|
|
111
99
|
|
|
112
100
|
# Overriding the default adapter to use the OpenAIRealtimeLLMAdapter one.
|
|
@@ -123,6 +111,19 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
123
111
|
send_transcription_frames: bool = True,
|
|
124
112
|
**kwargs,
|
|
125
113
|
):
|
|
114
|
+
"""Initialize the OpenAI Realtime Beta LLM service.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
api_key: OpenAI API key for authentication.
|
|
118
|
+
model: OpenAI model name. Defaults to "gpt-4o-realtime-preview-2025-06-03".
|
|
119
|
+
base_url: WebSocket base URL for the realtime API.
|
|
120
|
+
Defaults to "wss://api.openai.com/v1/realtime".
|
|
121
|
+
session_properties: Configuration properties for the realtime session.
|
|
122
|
+
If None, uses default SessionProperties.
|
|
123
|
+
start_audio_paused: Whether to start with audio input paused. Defaults to False.
|
|
124
|
+
send_transcription_frames: Whether to emit transcription frames. Defaults to True.
|
|
125
|
+
**kwargs: Additional arguments passed to parent LLMService.
|
|
126
|
+
"""
|
|
126
127
|
full_url = f"{base_url}?model={model}"
|
|
127
128
|
super().__init__(base_url=full_url, **kwargs)
|
|
128
129
|
|
|
@@ -169,6 +170,15 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
169
170
|
"""
|
|
170
171
|
self._audio_input_paused = paused
|
|
171
172
|
|
|
173
|
+
def _is_modality_enabled(self, modality: str) -> bool:
|
|
174
|
+
"""Check if a specific modality is enabled, "text" or "audio"."""
|
|
175
|
+
modalities = self._session_properties.modalities or ["audio", "text"]
|
|
176
|
+
return modality in modalities
|
|
177
|
+
|
|
178
|
+
def _get_enabled_modalities(self) -> list[str]:
|
|
179
|
+
"""Get the list of enabled modalities."""
|
|
180
|
+
return self._session_properties.modalities or ["audio", "text"]
|
|
181
|
+
|
|
172
182
|
async def retrieve_conversation_item(self, item_id: str):
|
|
173
183
|
"""Retrieve a conversation item by ID from the server.
|
|
174
184
|
|
|
@@ -241,7 +251,9 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
241
251
|
await self.stop_all_metrics()
|
|
242
252
|
if self._current_assistant_response:
|
|
243
253
|
await self.push_frame(LLMFullResponseEndFrame())
|
|
244
|
-
|
|
254
|
+
# Only push TTSStoppedFrame if audio modality is enabled
|
|
255
|
+
if self._is_modality_enabled("audio"):
|
|
256
|
+
await self.push_frame(TTSStoppedFrame())
|
|
245
257
|
|
|
246
258
|
async def _handle_user_started_speaking(self, frame):
|
|
247
259
|
pass
|
|
@@ -385,9 +397,9 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
385
397
|
# Here we assume that if we have a websocket, we are connected. We
|
|
386
398
|
# handle disconnections in the send/recv code paths.
|
|
387
399
|
return
|
|
388
|
-
self._websocket = await
|
|
400
|
+
self._websocket = await websocket_connect(
|
|
389
401
|
uri=self.base_url,
|
|
390
|
-
|
|
402
|
+
additional_headers={
|
|
391
403
|
"Authorization": f"Bearer {self.api_key}",
|
|
392
404
|
"OpenAI-Beta": "realtime=v1",
|
|
393
405
|
},
|
|
@@ -443,7 +455,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
443
455
|
#
|
|
444
456
|
|
|
445
457
|
async def _receive_task_handler(self):
|
|
446
|
-
async for message in
|
|
458
|
+
async for message in self._websocket:
|
|
447
459
|
evt = events.parse_server_event(message)
|
|
448
460
|
if evt.type == "session.created":
|
|
449
461
|
await self._handle_evt_session_created(evt)
|
|
@@ -467,6 +479,8 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
467
479
|
await self._handle_evt_speech_started(evt)
|
|
468
480
|
elif evt.type == "input_audio_buffer.speech_stopped":
|
|
469
481
|
await self._handle_evt_speech_stopped(evt)
|
|
482
|
+
elif evt.type == "response.text.delta":
|
|
483
|
+
await self._handle_evt_text_delta(evt)
|
|
470
484
|
elif evt.type == "response.audio_transcript.delta":
|
|
471
485
|
await self._handle_evt_audio_transcript_delta(evt)
|
|
472
486
|
elif evt.type == "error":
|
|
@@ -615,6 +629,10 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
615
629
|
# Response message without preceding user message. Add it to the context.
|
|
616
630
|
await self._handle_assistant_output(evt.response.output)
|
|
617
631
|
|
|
632
|
+
async def _handle_evt_text_delta(self, evt):
|
|
633
|
+
if evt.delta:
|
|
634
|
+
await self.push_frame(LLMTextFrame(evt.delta))
|
|
635
|
+
|
|
618
636
|
async def _handle_evt_audio_transcript_delta(self, evt):
|
|
619
637
|
if evt.delta:
|
|
620
638
|
await self.push_frame(LLMTextFrame(evt.delta))
|
|
@@ -637,6 +655,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
637
655
|
"""Maybe handle an error event related to retrieving a conversation item.
|
|
638
656
|
|
|
639
657
|
If the given error event is an error retrieving a conversation item:
|
|
658
|
+
|
|
640
659
|
- set an exception on the future that retrieve_conversation_item() is waiting on
|
|
641
660
|
- return true
|
|
642
661
|
Otherwise:
|
|
@@ -720,7 +739,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
|
|
|
720
739
|
await self.start_ttfb_metrics()
|
|
721
740
|
await self.send_client_event(
|
|
722
741
|
events.ResponseCreateEvent(
|
|
723
|
-
response=events.ResponseProperties(modalities=
|
|
742
|
+
response=events.ResponseProperties(modalities=self._get_enabled_modalities())
|
|
724
743
|
)
|
|
725
744
|
)
|
|
726
745
|
|