PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/openai/base_llm.py CHANGED Viewed

@@ -6,6 +6,7 @@
 """Base OpenAI LLM service implementation."""
+import asyncio
 import base64
 import json
 from typing import Any, Dict, List, Mapping, Optional
@@ -14,6 +15,7 @@ import httpx
 from loguru import logger
 from openai import (
     NOT_GIVEN,
+    APITimeoutError,
     AsyncOpenAI,
     AsyncStream,
     DefaultAsyncHttpxClient,
@@ -37,7 +39,6 @@ from pipecat.processors.aggregators.openai_llm_context import (
 )
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.llm_service import FunctionCallFromLLM, LLMService
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 from pipecat.utils.tracing.service_decorators import traced_llm
@@ -48,16 +49,6 @@ class BaseOpenAILLMService(LLMService):
     to an OpenAILLMContext object. The context defines what is sent to the LLM for
     completion, including user, assistant, and system messages, as well as tool
     choices and function call configurations.
-    Args:
-        model: The OpenAI model name to use (e.g., "gpt-4.1", "gpt-4o").
-        api_key: OpenAI API key. If None, uses environment variable.
-        base_url: Custom base URL for OpenAI API. If None, uses default.
-        organization: OpenAI organization ID.
-        project: OpenAI project ID.
-        default_headers: Additional HTTP headers to include in requests.
-        params: Input parameters for model configuration and behavior.
-        **kwargs: Additional arguments passed to the parent LLMService.
     """
     class InputParams(BaseModel):
@@ -101,8 +92,24 @@ class BaseOpenAILLMService(LLMService):
         project=None,
         default_headers: Optional[Mapping[str, str]] = None,
         params: Optional[InputParams] = None,
+        retry_timeout_secs: Optional[float] = 5.0,
+        retry_on_timeout: Optional[bool] = False,
         **kwargs,
     ):
+        """Initialize the BaseOpenAILLMService.
+        Args:
+            model: The OpenAI model name to use (e.g., "gpt-4.1", "gpt-4o").
+            api_key: OpenAI API key. If None, uses environment variable.
+            base_url: Custom base URL for OpenAI API. If None, uses default.
+            organization: OpenAI organization ID.
+            project: OpenAI project ID.
+            default_headers: Additional HTTP headers to include in requests.
+            params: Input parameters for model configuration and behavior.
+            retry_timeout_secs: Request timeout in seconds. Defaults to 5.0 seconds.
+            retry_on_timeout: Whether to retry the request once if it times out.
+            **kwargs: Additional arguments passed to the parent LLMService.
+        """
         super().__init__(**kwargs)
         params = params or BaseOpenAILLMService.InputParams()
@@ -117,6 +124,8 @@ class BaseOpenAILLMService(LLMService):
             "max_completion_tokens": params.max_completion_tokens,
             "extra": params.extra if isinstance(params.extra, dict) else {},
         }
+        self._retry_timeout_secs = retry_timeout_secs
+        self._retry_on_timeout = retry_on_timeout
         self.set_model_name(model)
         self._client = self.create_client(
             api_key=api_key,
@@ -173,7 +182,7 @@ class BaseOpenAILLMService(LLMService):
     async def get_chat_completions(
         self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
     ) -> AsyncStream[ChatCompletionChunk]:
-        """Get streaming chat completions from OpenAI API.
+        """Get streaming chat completions from OpenAI API with optional timeout and retry.
         Args:
             context: The LLM context containing tools and configuration.
@@ -182,6 +191,37 @@ class BaseOpenAILLMService(LLMService):
         Returns:
             Async stream of chat completion chunks.
         """
+        params = self.build_chat_completion_params(context, messages)
+        if self._retry_on_timeout:
+            try:
+                chunks = await asyncio.wait_for(
+                    self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
+                )
+                return chunks
+            except (APITimeoutError, asyncio.TimeoutError):
+                # Retry, this time without a timeout so we get a response
+                logger.debug(f"{self}: Retrying chat completion due to timeout")
+                chunks = await self._client.chat.completions.create(**params)
+                return chunks
+        else:
+            chunks = await self._client.chat.completions.create(**params)
+            return chunks
+    def build_chat_completion_params(
+        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
+    ) -> dict:
+        """Build parameters for chat completion request.
+        Subclasses can override this to customize parameters for different providers.
+        Args:
+            context: The LLM context containing tools and configuration.
+            messages: List of chat completion messages to send.
+        Returns:
+            Dictionary of parameters for the chat completion request.
+        """
         params = {
             "model": self.model_name,
             "stream": True,
@@ -199,9 +239,7 @@ class BaseOpenAILLMService(LLMService):
         }
         params.update(self._settings["extra"])
-        chunks = await self._client.chat.completions.create(**params)
-        return chunks
+        return params
     async def _stream_chat_completions(
         self, context: OpenAILLMContext
@@ -245,7 +283,7 @@ class BaseOpenAILLMService(LLMService):
             context
         )
-        async for chunk in WatchdogAsyncIterator(chunk_stream, manager=self.task_manager):
+        async for chunk in chunk_stream:
             if chunk.usage:
                 tokens = LLMTokenUsage(
                     prompt_tokens=chunk.usage.prompt_tokens,

pipecat/services/openai/image.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""OpenAI image generation service implementation.
+This module provides integration with OpenAI's DALL-E image generation API
+for creating images from text prompts.
+"""
 import io
 from typing import AsyncGenerator, Literal, Optional
@@ -21,6 +27,13 @@ from pipecat.services.image_service import ImageGenService
 class OpenAIImageGenService(ImageGenService):
+    """OpenAI DALL-E image generation service.
+    Provides image generation capabilities using OpenAI's DALL-E models.
+    Supports various image sizes and can generate images from text prompts
+    with configurable quality and style parameters.
+    """
     def __init__(
         self,
         *,
@@ -30,6 +43,15 @@ class OpenAIImageGenService(ImageGenService):
         image_size: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"],
         model: str = "dall-e-3",
     ):
+        """Initialize the OpenAI image generation service.
+        Args:
+            api_key: OpenAI API key for authentication.
+            base_url: Custom base URL for OpenAI API. If None, uses default.
+            aiohttp_session: HTTP session for downloading generated images.
+            image_size: Target size for generated images.
+            model: DALL-E model to use for generation. Defaults to "dall-e-3".
+        """
         super().__init__()
         self.set_model_name(model)
         self._image_size = image_size
@@ -37,6 +59,14 @@ class OpenAIImageGenService(ImageGenService):
         self._aiohttp_session = aiohttp_session
     async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
+        """Generate an image from a text prompt using OpenAI's DALL-E.
+        Args:
+            prompt: Text description of the image to generate.
+        Yields:
+            Frame: URLImageRawFrame containing the generated image data.
+        """
         logger.debug(f"Generating image from prompt: {prompt}")
         image = await self._client.images.generate(

pipecat/services/openai/llm.py CHANGED Viewed

@@ -61,11 +61,6 @@ class OpenAILLMService(BaseOpenAILLMService):
     Provides a complete OpenAI LLM service with context aggregation support.
     Uses the BaseOpenAILLMService for core functionality and adds OpenAI-specific
     context aggregator creation.
-    Args:
-        model: The OpenAI model name to use. Defaults to "gpt-4.1".
-        params: Input parameters for model configuration.
-        **kwargs: Additional arguments passed to the parent BaseOpenAILLMService.
     """
     def __init__(
@@ -75,6 +70,13 @@ class OpenAILLMService(BaseOpenAILLMService):
         params: Optional[BaseOpenAILLMService.InputParams] = None,
         **kwargs,
     ):
+        """Initialize OpenAI LLM service.
+        Args:
+            model: The OpenAI model name to use. Defaults to "gpt-4.1".
+            params: Input parameters for model configuration.
+            **kwargs: Additional arguments passed to the parent BaseOpenAILLMService.
+        """
         super().__init__(model=model, params=params, **kwargs)
     def create_context_aggregator(

pipecat/services/openai/stt.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""OpenAI Speech-to-Text service implementation using OpenAI's transcription API."""
 from typing import Optional
 from pipecat.services.whisper.base_stt import BaseWhisperSTTService, Transcription
@@ -15,15 +17,6 @@ class OpenAISTTService(BaseWhisperSTTService):
     Uses OpenAI's transcription API to convert audio to text. Requires an OpenAI API key
     set via the api_key parameter or OPENAI_API_KEY environment variable.
-    Args:
-        model: Model to use — either gpt-4o or Whisper. Defaults to "gpt-4o-transcribe".
-        api_key: OpenAI API key. Defaults to None.
-        base_url: API base URL. Defaults to None.
-        language: Language of the audio input. Defaults to English.
-        prompt: Optional text to guide the model's style or continue a previous segment.
-        temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.
-        **kwargs: Additional arguments passed to BaseWhisperSTTService.
     """
     def __init__(
@@ -37,6 +30,17 @@ class OpenAISTTService(BaseWhisperSTTService):
         temperature: Optional[float] = None,
         **kwargs,
     ):
+        """Initialize OpenAI STT service.
+        Args:
+            model: Model to use — either gpt-4o or Whisper. Defaults to "gpt-4o-transcribe".
+            api_key: OpenAI API key. Defaults to None.
+            base_url: API base URL. Defaults to None.
+            language: Language of the audio input. Defaults to English.
+            prompt: Optional text to guide the model's style or continue a previous segment.
+            temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.
+            **kwargs: Additional arguments passed to BaseWhisperSTTService.
+        """
         super().__init__(
             model=model,
             api_key=api_key,

pipecat/services/openai/tts.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""OpenAI text-to-speech service implementation.
+This module provides integration with OpenAI's text-to-speech API for
+generating high-quality synthetic speech from text input.
+"""
 from typing import AsyncGenerator, Dict, Literal, Optional
 from loguru import logger
@@ -43,16 +49,8 @@ class OpenAITTSService(TTSService):
     """OpenAI Text-to-Speech service that generates audio from text.
     This service uses the OpenAI TTS API to generate PCM-encoded audio at 24kHz.
-    Args:
-        api_key: OpenAI API key. Defaults to None.
-        voice: Voice ID to use. Defaults to "alloy".
-        model: TTS model to use. Defaults to "gpt-4o-mini-tts".
-        sample_rate: Output audio sample rate in Hz. Defaults to None.
-        **kwargs: Additional keyword arguments passed to TTSService.
-    The service returns PCM-encoded audio at the specified sample rate.
+    Supports multiple voice models and configurable parameters for high-quality
+    speech synthesis with streaming audio output.
     """
     OPENAI_SAMPLE_RATE = 24000  # OpenAI TTS always outputs at 24kHz
@@ -68,6 +66,17 @@ class OpenAITTSService(TTSService):
         instructions: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize OpenAI TTS service.
+        Args:
+            api_key: OpenAI API key for authentication. If None, uses environment variable.
+            base_url: Custom base URL for OpenAI API. If None, uses default.
+            voice: Voice ID to use for synthesis. Defaults to "alloy".
+            model: TTS model to use. Defaults to "gpt-4o-mini-tts".
+            sample_rate: Output audio sample rate in Hz. If None, uses OpenAI's default 24kHz.
+            instructions: Optional instructions to guide voice synthesis behavior.
+            **kwargs: Additional keyword arguments passed to TTSService.
+        """
         if sample_rate and sample_rate != self.OPENAI_SAMPLE_RATE:
             logger.warning(
                 f"OpenAI TTS only supports {self.OPENAI_SAMPLE_RATE}Hz sample rate. "
@@ -81,13 +90,28 @@ class OpenAITTSService(TTSService):
         self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as OpenAI TTS service supports metrics generation.
+        """
         return True
     async def set_model(self, model: str):
+        """Set the TTS model to use.
+        Args:
+            model: The model name to use for text-to-speech synthesis.
+        """
         logger.info(f"Switching TTS model to: [{model}]")
         self.set_model_name(model)
     async def start(self, frame: StartFrame):
+        """Start the OpenAI TTS service.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
         if self.sample_rate != self.OPENAI_SAMPLE_RATE:
             logger.warning(
@@ -97,6 +121,14 @@ class OpenAITTSService(TTSService):
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate speech from text using OpenAI's TTS API.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech data.
+        """
         logger.debug(f"{self}: Generating TTS [{text}]")
         try:
             await self.start_ttfb_metrics()

pipecat/services/openai_realtime_beta/azure.py CHANGED Viewed

@@ -11,7 +11,7 @@ from loguru import logger
 from .openai import OpenAIRealtimeBetaLLMService
 try:
-    import websockets
+    from websockets.asyncio.client import connect as websocket_connect
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error(
@@ -26,12 +26,6 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
     Extends the OpenAI Realtime service to work with Azure OpenAI endpoints,
     using Azure's authentication headers and endpoint format. Provides the same
     real-time audio and text communication capabilities as the base OpenAI service.
-    Args:
-        api_key: The API key for the Azure OpenAI service.
-        base_url: The full Azure WebSocket endpoint URL including api-version and deployment.
-            Example: "wss://my-project.openai.azure.com/openai/realtime?api-version=2024-10-01-preview&deployment=my-realtime-deployment"
-        **kwargs: Additional arguments passed to parent OpenAIRealtimeBetaLLMService.
     """
     def __init__(
@@ -41,6 +35,14 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
         base_url: str,
         **kwargs,
     ):
+        """Initialize Azure Realtime Beta LLM service.
+        Args:
+            api_key: The API key for the Azure OpenAI service.
+            base_url: The full Azure WebSocket endpoint URL including api-version and deployment.
+                Example: "wss://my-project.openai.azure.com/openai/realtime?api-version=2024-10-01-preview&deployment=my-realtime-deployment"
+            **kwargs: Additional arguments passed to parent OpenAIRealtimeBetaLLMService.
+        """
         super().__init__(base_url=base_url, api_key=api_key, **kwargs)
         self.api_key = api_key
         self.base_url = base_url
@@ -53,9 +55,9 @@ class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService):
                 return
             logger.info(f"Connecting to {self.base_url}, api key: {self.api_key}")
-            self._websocket = await websockets.connect(
+            self._websocket = await websocket_connect(
                 uri=self.base_url,
-                extra_headers={
+                additional_headers={
                     "api-key": self.api_key,
                 },
             )

pipecat/services/openai_realtime_beta/context.py CHANGED Viewed

@@ -37,14 +37,16 @@ class OpenAIRealtimeLLMContext(OpenAILLMContext):
     Extends the standard OpenAI LLM context to support real-time session properties,
     instruction management, and conversion between standard message formats and
     realtime conversation items.
-    Args:
-        messages: Initial conversation messages. Defaults to None.
-        tools: Available function tools. Defaults to None.
-        **kwargs: Additional arguments passed to parent OpenAILLMContext.
     """
     def __init__(self, messages=None, tools=None, **kwargs):
+        """Initialize the OpenAIRealtimeLLMContext.
+        Args:
+            messages: Initial conversation messages. Defaults to None.
+            tools: Available function tools. Defaults to None.
+            **kwargs: Additional arguments passed to parent OpenAILLMContext.
+        """
         super().__init__(messages=messages, tools=tools, **kwargs)
         self.__setup_local()

pipecat/services/openai_realtime_beta/events.py CHANGED Viewed

@@ -18,13 +18,7 @@ from pydantic import BaseModel, ConfigDict, Field
 class InputAudioTranscription(BaseModel):
-    """Configuration for audio transcription settings.
-    Parameters:
-        model: Transcription model to use (e.g., "gpt-4o-transcribe", "whisper-1").
-        language: Optional language code for transcription.
-        prompt: Optional transcription hint text.
-    """
+    """Configuration for audio transcription settings."""
     model: str = "gpt-4o-transcribe"
     language: Optional[str]
@@ -36,6 +30,13 @@ class InputAudioTranscription(BaseModel):
         language: Optional[str] = None,
         prompt: Optional[str] = None,
     ):
+        """Initialize InputAudioTranscription.
+        Args:
+            model: Transcription model to use (e.g., "gpt-4o-transcribe", "whisper-1").
+            language: Optional language code for transcription.
+            prompt: Optional transcription hint text.
+        """
         super().__init__(model=model, language=language, prompt=prompt)
@@ -881,6 +882,8 @@ class TokenDetails(BaseModel):
     audio_tokens: Optional[int] = 0
     class Config:
+        """Pydantic configuration for TokenDetails."""
         extra = "allow"

pipecat/services/openai_realtime_beta/openai.py CHANGED Viewed

@@ -53,7 +53,6 @@ from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.llm_service import FunctionCallFromLLM, LLMService
 from pipecat.services.openai.llm import OpenAIContextAggregatorPair
 from pipecat.transcriptions.language import Language
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 from pipecat.utils.time import time_now_iso8601
 from pipecat.utils.tracing.service_decorators import traced_openai_realtime, traced_stt
@@ -66,7 +65,7 @@ from .context import (
 from .frames import RealtimeFunctionCallResultFrame, RealtimeMessagesUpdateFrame
 try:
-    import websockets
+    from websockets.asyncio.client import connect as websocket_connect
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use OpenAI, you need to `pip install pipecat-ai[openai]`.")
@@ -96,17 +95,6 @@ class OpenAIRealtimeBetaLLMService(LLMService):
     Implements the OpenAI Realtime API Beta with WebSocket communication for low-latency
     bidirectional audio and text interactions. Supports function calling, conversation
     management, and real-time transcription.
-    Args:
-        api_key: OpenAI API key for authentication.
-        model: OpenAI model name. Defaults to "gpt-4o-realtime-preview-2025-06-03".
-        base_url: WebSocket base URL for the realtime API.
-            Defaults to "wss://api.openai.com/v1/realtime".
-        session_properties: Configuration properties for the realtime session.
-            If None, uses default SessionProperties.
-        start_audio_paused: Whether to start with audio input paused. Defaults to False.
-        send_transcription_frames: Whether to emit transcription frames. Defaults to True.
-        **kwargs: Additional arguments passed to parent LLMService.
     """
     # Overriding the default adapter to use the OpenAIRealtimeLLMAdapter one.
@@ -123,6 +111,19 @@ class OpenAIRealtimeBetaLLMService(LLMService):
         send_transcription_frames: bool = True,
         **kwargs,
     ):
+        """Initialize the OpenAI Realtime Beta LLM service.
+        Args:
+            api_key: OpenAI API key for authentication.
+            model: OpenAI model name. Defaults to "gpt-4o-realtime-preview-2025-06-03".
+            base_url: WebSocket base URL for the realtime API.
+                Defaults to "wss://api.openai.com/v1/realtime".
+            session_properties: Configuration properties for the realtime session.
+                If None, uses default SessionProperties.
+            start_audio_paused: Whether to start with audio input paused. Defaults to False.
+            send_transcription_frames: Whether to emit transcription frames. Defaults to True.
+            **kwargs: Additional arguments passed to parent LLMService.
+        """
         full_url = f"{base_url}?model={model}"
         super().__init__(base_url=full_url, **kwargs)
@@ -169,6 +170,15 @@ class OpenAIRealtimeBetaLLMService(LLMService):
         """
         self._audio_input_paused = paused
+    def _is_modality_enabled(self, modality: str) -> bool:
+        """Check if a specific modality is enabled, "text" or "audio"."""
+        modalities = self._session_properties.modalities or ["audio", "text"]
+        return modality in modalities
+    def _get_enabled_modalities(self) -> list[str]:
+        """Get the list of enabled modalities."""
+        return self._session_properties.modalities or ["audio", "text"]
     async def retrieve_conversation_item(self, item_id: str):
         """Retrieve a conversation item by ID from the server.
@@ -241,7 +251,9 @@ class OpenAIRealtimeBetaLLMService(LLMService):
         await self.stop_all_metrics()
         if self._current_assistant_response:
             await self.push_frame(LLMFullResponseEndFrame())
-            await self.push_frame(TTSStoppedFrame())
+            # Only push TTSStoppedFrame if audio modality is enabled
+            if self._is_modality_enabled("audio"):
+                await self.push_frame(TTSStoppedFrame())
     async def _handle_user_started_speaking(self, frame):
         pass
@@ -385,9 +397,9 @@ class OpenAIRealtimeBetaLLMService(LLMService):
                 # Here we assume that if we have a websocket, we are connected. We
                 # handle disconnections in the send/recv code paths.
                 return
-            self._websocket = await websockets.connect(
+            self._websocket = await websocket_connect(
                 uri=self.base_url,
-                extra_headers={
+                additional_headers={
                     "Authorization": f"Bearer {self.api_key}",
                     "OpenAI-Beta": "realtime=v1",
                 },
@@ -443,7 +455,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
     #
     async def _receive_task_handler(self):
-        async for message in WatchdogAsyncIterator(self._websocket, manager=self.task_manager):
+        async for message in self._websocket:
             evt = events.parse_server_event(message)
             if evt.type == "session.created":
                 await self._handle_evt_session_created(evt)
@@ -467,6 +479,8 @@ class OpenAIRealtimeBetaLLMService(LLMService):
                 await self._handle_evt_speech_started(evt)
             elif evt.type == "input_audio_buffer.speech_stopped":
                 await self._handle_evt_speech_stopped(evt)
+            elif evt.type == "response.text.delta":
+                await self._handle_evt_text_delta(evt)
             elif evt.type == "response.audio_transcript.delta":
                 await self._handle_evt_audio_transcript_delta(evt)
             elif evt.type == "error":
@@ -615,6 +629,10 @@ class OpenAIRealtimeBetaLLMService(LLMService):
             # Response message without preceding user message. Add it to the context.
             await self._handle_assistant_output(evt.response.output)
+    async def _handle_evt_text_delta(self, evt):
+        if evt.delta:
+            await self.push_frame(LLMTextFrame(evt.delta))
     async def _handle_evt_audio_transcript_delta(self, evt):
         if evt.delta:
             await self.push_frame(LLMTextFrame(evt.delta))
@@ -637,6 +655,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
         """Maybe handle an error event related to retrieving a conversation item.
         If the given error event is an error retrieving a conversation item:
         - set an exception on the future that retrieve_conversation_item() is waiting on
         - return true
         Otherwise:
@@ -720,7 +739,7 @@ class OpenAIRealtimeBetaLLMService(LLMService):
         await self.start_ttfb_metrics()
         await self.send_client_event(
             events.ResponseCreateEvent(
-                response=events.ResponseProperties(modalities=["audio", "text"])
+                response=events.ResponseProperties(modalities=self._get_enabled_modalities())
             )
         )

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl