PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show

{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
pipecat/adapters/base_llm_adapter.py +38 -1
pipecat/adapters/services/anthropic_adapter.py +9 -14
pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
pipecat/adapters/services/bedrock_adapter.py +236 -13
pipecat/adapters/services/gemini_adapter.py +12 -8
pipecat/adapters/services/open_ai_adapter.py +19 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/filters/krisp_viva_filter.py +193 -0
pipecat/audio/filters/noisereduce_filter.py +15 -0
pipecat/audio/turn/base_turn_analyzer.py +9 -1
pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
pipecat/audio/vad/data/README.md +10 -0
pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
pipecat/audio/vad/silero.py +9 -3
pipecat/audio/vad/vad_analyzer.py +13 -1
pipecat/extensions/voicemail/voicemail_detector.py +5 -5
pipecat/frames/frames.py +277 -86
pipecat/observers/loggers/debug_log_observer.py +3 -3
pipecat/observers/loggers/llm_log_observer.py +7 -3
pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
pipecat/pipeline/runner.py +18 -6
pipecat/pipeline/service_switcher.py +64 -36
pipecat/pipeline/task.py +125 -79
pipecat/pipeline/tts_switcher.py +30 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
pipecat/processors/aggregators/llm_context.py +40 -2
pipecat/processors/aggregators/llm_response.py +32 -15
pipecat/processors/aggregators/llm_response_universal.py +19 -15
pipecat/processors/aggregators/user_response.py +6 -6
pipecat/processors/aggregators/vision_image_frame.py +24 -2
pipecat/processors/audio/audio_buffer_processor.py +43 -8
pipecat/processors/dtmf_aggregator.py +174 -77
pipecat/processors/filters/stt_mute_filter.py +17 -0
pipecat/processors/frame_processor.py +110 -24
pipecat/processors/frameworks/langchain.py +8 -2
pipecat/processors/frameworks/rtvi.py +210 -68
pipecat/processors/frameworks/strands_agents.py +170 -0
pipecat/processors/logger.py +2 -2
pipecat/processors/transcript_processor.py +26 -5
pipecat/processors/user_idle_processor.py +35 -11
pipecat/runner/daily.py +59 -20
pipecat/runner/run.py +395 -93
pipecat/runner/types.py +6 -4
pipecat/runner/utils.py +51 -10
pipecat/serializers/__init__.py +5 -1
pipecat/serializers/asterisk.py +16 -2
pipecat/serializers/convox.py +41 -4
pipecat/serializers/custom.py +257 -0
pipecat/serializers/exotel.py +5 -5
pipecat/serializers/livekit.py +20 -0
pipecat/serializers/plivo.py +5 -5
pipecat/serializers/protobuf.py +6 -5
pipecat/serializers/telnyx.py +2 -2
pipecat/serializers/twilio.py +43 -23
pipecat/serializers/vi.py +324 -0
pipecat/services/ai_service.py +2 -6
pipecat/services/anthropic/llm.py +2 -25
pipecat/services/assemblyai/models.py +6 -0
pipecat/services/assemblyai/stt.py +13 -5
pipecat/services/asyncai/tts.py +5 -3
pipecat/services/aws/__init__.py +1 -0
pipecat/services/aws/llm.py +147 -105
pipecat/services/aws/nova_sonic/__init__.py +0 -0
pipecat/services/aws/nova_sonic/context.py +436 -0
pipecat/services/aws/nova_sonic/frames.py +25 -0
pipecat/services/aws/nova_sonic/llm.py +1265 -0
pipecat/services/aws/stt.py +3 -3
pipecat/services/aws_nova_sonic/__init__.py +19 -1
pipecat/services/aws_nova_sonic/aws.py +11 -1151
pipecat/services/aws_nova_sonic/context.py +8 -354
pipecat/services/aws_nova_sonic/frames.py +13 -17
pipecat/services/azure/llm.py +51 -1
pipecat/services/azure/realtime/__init__.py +0 -0
pipecat/services/azure/realtime/llm.py +65 -0
pipecat/services/azure/stt.py +15 -0
pipecat/services/cartesia/stt.py +77 -70
pipecat/services/cartesia/tts.py +80 -13
pipecat/services/deepgram/__init__.py +1 -0
pipecat/services/deepgram/flux/__init__.py +0 -0
pipecat/services/deepgram/flux/stt.py +640 -0
pipecat/services/elevenlabs/__init__.py +4 -1
pipecat/services/elevenlabs/stt.py +339 -0
pipecat/services/elevenlabs/tts.py +87 -46
pipecat/services/fish/tts.py +5 -2
pipecat/services/gemini_multimodal_live/events.py +38 -524
pipecat/services/gemini_multimodal_live/file_api.py +23 -173
pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
pipecat/services/gladia/stt.py +56 -72
pipecat/services/google/__init__.py +1 -0
pipecat/services/google/gemini_live/__init__.py +3 -0
pipecat/services/google/gemini_live/file_api.py +189 -0
pipecat/services/google/gemini_live/llm.py +1582 -0
pipecat/services/google/gemini_live/llm_vertex.py +184 -0
pipecat/services/google/llm.py +15 -11
pipecat/services/google/llm_openai.py +3 -3
pipecat/services/google/llm_vertex.py +86 -16
pipecat/services/google/stt.py +4 -0
pipecat/services/google/tts.py +7 -3
pipecat/services/heygen/api.py +2 -0
pipecat/services/heygen/client.py +8 -4
pipecat/services/heygen/video.py +2 -0
pipecat/services/hume/__init__.py +5 -0
pipecat/services/hume/tts.py +220 -0
pipecat/services/inworld/tts.py +6 -6
pipecat/services/llm_service.py +15 -5
pipecat/services/lmnt/tts.py +4 -2
pipecat/services/mcp_service.py +4 -2
pipecat/services/mem0/memory.py +6 -5
pipecat/services/mistral/llm.py +29 -8
pipecat/services/moondream/vision.py +42 -16
pipecat/services/neuphonic/tts.py +5 -2
pipecat/services/openai/__init__.py +1 -0
pipecat/services/openai/base_llm.py +27 -20
pipecat/services/openai/realtime/__init__.py +0 -0
pipecat/services/openai/realtime/context.py +272 -0
pipecat/services/openai/realtime/events.py +1106 -0
pipecat/services/openai/realtime/frames.py +37 -0
pipecat/services/openai/realtime/llm.py +829 -0
pipecat/services/openai/tts.py +49 -10
pipecat/services/openai_realtime/__init__.py +27 -0
pipecat/services/openai_realtime/azure.py +21 -0
pipecat/services/openai_realtime/context.py +21 -0
pipecat/services/openai_realtime/events.py +21 -0
pipecat/services/openai_realtime/frames.py +21 -0
pipecat/services/openai_realtime_beta/azure.py +16 -0
pipecat/services/openai_realtime_beta/openai.py +17 -5
pipecat/services/piper/tts.py +7 -9
pipecat/services/playht/tts.py +34 -4
pipecat/services/rime/tts.py +12 -12
pipecat/services/riva/stt.py +3 -1
pipecat/services/salesforce/__init__.py +9 -0
pipecat/services/salesforce/llm.py +700 -0
pipecat/services/sarvam/__init__.py +7 -0
pipecat/services/sarvam/stt.py +540 -0
pipecat/services/sarvam/tts.py +97 -13
pipecat/services/simli/video.py +2 -2
pipecat/services/speechmatics/stt.py +22 -10
pipecat/services/stt_service.py +47 -0
pipecat/services/tavus/video.py +2 -2
pipecat/services/tts_service.py +75 -22
pipecat/services/vision_service.py +7 -6
pipecat/services/vistaar/llm.py +51 -9
pipecat/tests/utils.py +4 -4
pipecat/transcriptions/language.py +41 -1
pipecat/transports/base_input.py +13 -34
pipecat/transports/base_output.py +140 -104
pipecat/transports/daily/transport.py +199 -26
pipecat/transports/heygen/__init__.py +0 -0
pipecat/transports/heygen/transport.py +381 -0
pipecat/transports/livekit/transport.py +228 -63
pipecat/transports/local/audio.py +6 -1
pipecat/transports/local/tk.py +11 -2
pipecat/transports/network/fastapi_websocket.py +1 -1
pipecat/transports/smallwebrtc/connection.py +103 -19
pipecat/transports/smallwebrtc/request_handler.py +246 -0
pipecat/transports/smallwebrtc/transport.py +65 -23
pipecat/transports/tavus/transport.py +23 -12
pipecat/transports/websocket/client.py +41 -5
pipecat/transports/websocket/fastapi.py +21 -11
pipecat/transports/websocket/server.py +14 -7
pipecat/transports/whatsapp/api.py +8 -0
pipecat/transports/whatsapp/client.py +47 -0
pipecat/utils/base_object.py +54 -22
pipecat/utils/redis.py +58 -0
pipecat/utils/string.py +13 -1
pipecat/utils/tracing/service_decorators.py +21 -21
pipecat/serializers/genesys.py +0 -95
pipecat/services/google/test-google-chirp.py +0 -45
pipecat/services/openai.py +0 -698
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
/pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0

pipecat/services/openai.py DELETED Viewed

@@ -1,698 +0,0 @@
-#
-# Copyright (c) 2024–2025, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-import base64
-import io
-import json
-from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Dict, List, Literal, Optional
-import aiohttp
-import httpx
-from loguru import logger
-from openai import (
-    NOT_GIVEN,
-    AsyncOpenAI,
-    AsyncStream,
-    BadRequestError,
-    DefaultAsyncHttpxClient,
-)
-from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
-from PIL import Image
-from pydantic import BaseModel, Field
-from pipecat.frames.frames import (
-    ErrorFrame,
-    Frame,
-    FunctionCallInProgressFrame,
-    FunctionCallResultFrame,
-    FunctionCallResultProperties,
-    LLMFullResponseEndFrame,
-    LLMFullResponseStartFrame,
-    LLMMessagesFrame,
-    LLMTextFrame,
-    LLMUpdateSettingsFrame,
-    OpenAILLMContextAssistantTimestampFrame,
-    StartFrame,
-    StartInterruptionFrame,
-    TTSAudioRawFrame,
-    TTSStartedFrame,
-    TTSStoppedFrame,
-    URLImageRawFrame,
-    UserImageRawFrame,
-    UserImageRequestFrame,
-    VisionImageRawFrame,
-)
-from pipecat.metrics.metrics import LLMTokenUsage
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantContextAggregator,
-    LLMUserContextAggregator,
-)
-from pipecat.processors.aggregators.openai_llm_context import (
-    OpenAILLMContext,
-    OpenAILLMContextFrame,
-)
-from pipecat.processors.frame_processor import FrameDirection
-from pipecat.services.ai_services import (
-    ImageGenService,
-    LLMService,
-    TTSService,
-)
-from pipecat.services.base_whisper import BaseWhisperSTTService, Transcription
-from pipecat.transcriptions.language import Language
-from pipecat.utils.time import time_now_iso8601
-ValidVoice = Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
-VALID_VOICES: Dict[str, ValidVoice] = {
-    "alloy": "alloy",
-    "echo": "echo",
-    "fable": "fable",
-    "onyx": "onyx",
-    "nova": "nova",
-    "shimmer": "shimmer",
-}
-class OpenAIUnhandledFunctionException(Exception):
-    pass
-class BaseOpenAILLMService(LLMService):
-    """This is the base for all services that use the AsyncOpenAI client.
-    This service consumes OpenAILLMContextFrame frames, which contain a reference
-    to an OpenAILLMContext frame. The OpenAILLMContext object defines the context
-    sent to the LLM for a completion. This includes user, assistant and system messages
-    as well as tool choices and the tool, which is used if requesting function
-    calls from the LLM.
-    """
-    class InputParams(BaseModel):
-        frequency_penalty: Optional[float] = Field(
-            default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0
-        )
-        presence_penalty: Optional[float] = Field(
-            default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0
-        )
-        seed: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0)
-        temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=2.0)
-        # Note: top_k is currently not supported by the OpenAI client library,
-        # so top_k is ignored right now.
-        top_k: Optional[int] = Field(default=None, ge=0)
-        top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
-        max_tokens: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=1)
-        max_completion_tokens: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=1)
-        extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
-    def __init__(
-        self,
-        *,
-        model: str,
-        api_key=None,
-        base_url=None,
-        organization=None,
-        project=None,
-        params: InputParams = InputParams(),
-        **kwargs,
-    ):
-        super().__init__(**kwargs)
-        self._settings = {
-            "frequency_penalty": params.frequency_penalty,
-            "presence_penalty": params.presence_penalty,
-            "seed": params.seed,
-            "temperature": params.temperature,
-            "top_p": params.top_p,
-            "max_tokens": params.max_tokens,
-            "max_completion_tokens": params.max_completion_tokens,
-            "extra": params.extra if isinstance(params.extra, dict) else {},
-        }
-        self.set_model_name(model)
-        self._client = self.create_client(
-            api_key=api_key, base_url=base_url, organization=organization, project=project, **kwargs
-        )
-    def create_client(self, api_key=None, base_url=None, organization=None, project=None, **kwargs):
-        return AsyncOpenAI(
-            api_key=api_key,
-            base_url=base_url,
-            organization=organization,
-            project=project,
-            http_client=DefaultAsyncHttpxClient(
-                limits=httpx.Limits(
-                    max_keepalive_connections=100, max_connections=1000, keepalive_expiry=None
-                )
-            ),
-        )
-    def can_generate_metrics(self) -> bool:
-        return True
-    async def get_chat_completions(
-        self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> AsyncStream[ChatCompletionChunk]:
-        params = {
-            "model": self.model_name,
-            "stream": True,
-            "messages": messages,
-            "tools": context.tools,
-            "tool_choice": context.tool_choice,
-            "stream_options": {"include_usage": True},
-            "frequency_penalty": self._settings["frequency_penalty"],
-            "presence_penalty": self._settings["presence_penalty"],
-            "seed": self._settings["seed"],
-            "temperature": self._settings["temperature"],
-            "top_p": self._settings["top_p"],
-            "max_tokens": self._settings["max_tokens"],
-            "max_completion_tokens": self._settings["max_completion_tokens"],
-        }
-        params.update(self._settings["extra"])
-        chunks = await self._client.chat.completions.create(**params)
-        return chunks
-    async def _stream_chat_completions(
-        self, context: OpenAILLMContext
-    ) -> AsyncStream[ChatCompletionChunk]:
-        self.logger.debug(f"Generating chat: {context.get_messages_for_logging()}")
-        messages: List[ChatCompletionMessageParam] = context.get_messages()
-        # base64 encode any images
-        for message in messages:
-            if message.get("mime_type") == "image/jpeg":
-                encoded_image = base64.b64encode(message["data"].getvalue()).decode("utf-8")
-                text = message["content"]
-                message["content"] = [
-                    {"type": "text", "text": text},
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
-                    },
-                ]
-                del message["data"]
-                del message["mime_type"]
-        chunks = await self.get_chat_completions(context, messages)
-        return chunks
-    async def _process_context(self, context: OpenAILLMContext):
-        functions_list = []
-        arguments_list = []
-        tool_id_list = []
-        func_idx = 0
-        function_name = ""
-        arguments = ""
-        tool_call_id = ""
-        await self.start_ttfb_metrics()
-        chunk_stream: AsyncStream[ChatCompletionChunk] = await self._stream_chat_completions(
-            context
-        )
-        async for chunk in chunk_stream:
-            if chunk.usage:
-                tokens = LLMTokenUsage(
-                    prompt_tokens=chunk.usage.prompt_tokens,
-                    completion_tokens=chunk.usage.completion_tokens,
-                    total_tokens=chunk.usage.total_tokens,
-                )
-                await self.start_llm_usage_metrics(tokens)
-            if chunk.choices is None or len(chunk.choices) == 0:
-                continue
-            await self.stop_ttfb_metrics()
-            if not chunk.choices[0].delta:
-                continue
-            if chunk.choices[0].delta.tool_calls:
-                # We're streaming the LLM response to enable the fastest response times.
-                # For text, we just yield each chunk as we receive it and count on consumers
-                # to do whatever coalescing they need (eg. to pass full sentences to TTS)
-                #
-                # If the LLM is a function call, we'll do some coalescing here.
-                # If the response contains a function name, we'll yield a frame to tell consumers
-                # that they can start preparing to call the function with that name.
-                # We accumulate all the arguments for the rest of the streamed response, then when
-                # the response is done, we package up all the arguments and the function name and
-                # yield a frame containing the function name and the arguments.
-                tool_call = chunk.choices[0].delta.tool_calls[0]
-                if tool_call.index != func_idx:
-                    functions_list.append(function_name)
-                    arguments_list.append(arguments)
-                    tool_id_list.append(tool_call_id)
-                    function_name = ""
-                    arguments = ""
-                    tool_call_id = ""
-                    func_idx += 1
-                if tool_call.function and tool_call.function.name:
-                    function_name += tool_call.function.name
-                    tool_call_id = tool_call.id
-                if tool_call.function and tool_call.function.arguments:
-                    # Keep iterating through the response to collect all the argument fragments
-                    arguments += tool_call.function.arguments
-            elif chunk.choices[0].delta.content:
-                await self.push_frame(LLMTextFrame(chunk.choices[0].delta.content))
-        # if we got a function name and arguments, check to see if it's a function with
-        # a registered handler. If so, run the registered callback, save the result to
-        # the context, and re-prompt to get a chat answer. If we don't have a registered
-        # handler, raise an exception.
-        if function_name and arguments:
-            # added to the list as last function name and arguments not added to the list
-            functions_list.append(function_name)
-            arguments_list.append(arguments)
-            tool_id_list.append(tool_call_id)
-            for index, (function_name, arguments, tool_id) in enumerate(
-                zip(functions_list, arguments_list, tool_id_list), start=1
-            ):
-                if self.has_function(function_name):
-                    run_llm = False
-                    arguments = json.loads(arguments)
-                    await self.call_function(
-                        context=context,
-                        function_name=function_name,
-                        arguments=arguments,
-                        tool_call_id=tool_id,
-                        run_llm=run_llm,
-                    )
-                else:
-                    raise OpenAIUnhandledFunctionException(
-                        f"The LLM tried to call a function named '{function_name}', but there isn't a callback registered for that function."
-                    )
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-        context = None
-        if isinstance(frame, OpenAILLMContextFrame):
-            context: OpenAILLMContext = frame.context
-        elif isinstance(frame, LLMMessagesFrame):
-            context = OpenAILLMContext.from_messages(frame.messages)
-        elif isinstance(frame, VisionImageRawFrame):
-            context = OpenAILLMContext()
-            context.add_image_frame_message(
-                format=frame.format, size=frame.size, image=frame.image, text=frame.text
-            )
-        elif isinstance(frame, LLMUpdateSettingsFrame):
-            await self._update_settings(frame.settings)
-        else:
-            await self.push_frame(frame, direction)
-        if context:
-            try:
-                await self.push_frame(LLMFullResponseStartFrame())
-                await self.start_processing_metrics()
-                await self._process_context(context)
-            except httpx.TimeoutException:
-                await self._call_event_handler("on_completion_timeout")
-            finally:
-                await self.stop_processing_metrics()
-                await self.push_frame(LLMFullResponseEndFrame())
-@dataclass
-class OpenAIContextAggregatorPair:
-    _user: "OpenAIUserContextAggregator"
-    _assistant: "OpenAIAssistantContextAggregator"
-    def user(self) -> "OpenAIUserContextAggregator":
-        return self._user
-    def assistant(self) -> "OpenAIAssistantContextAggregator":
-        return self._assistant
-class OpenAILLMService(BaseOpenAILLMService):
-    def __init__(
-        self,
-        *,
-        model: str = "gpt-4o",
-        params: BaseOpenAILLMService.InputParams = BaseOpenAILLMService.InputParams(),
-        **kwargs,
-    ):
-        super().__init__(model=model, params=params, **kwargs)
-    @staticmethod
-    def create_context_aggregator(
-        context: OpenAILLMContext, *, assistant_expect_stripped_words: bool = True
-    ) -> OpenAIContextAggregatorPair:
-        user = OpenAIUserContextAggregator(context)
-        assistant = OpenAIAssistantContextAggregator(
-            context, expect_stripped_words=assistant_expect_stripped_words
-        )
-        return OpenAIContextAggregatorPair(_user=user, _assistant=assistant)
-class OpenAIImageGenService(ImageGenService):
-    def __init__(
-        self,
-        *,
-        api_key: str,
-        aiohttp_session: aiohttp.ClientSession,
-        image_size: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"],
-        model: str = "dall-e-3",
-    ):
-        super().__init__()
-        self.set_model_name(model)
-        self._image_size = image_size
-        self._client = AsyncOpenAI(api_key=api_key)
-        self._aiohttp_session = aiohttp_session
-    async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
-        logger.debug(f"Generating image from prompt: {prompt}")
-        image = await self._client.images.generate(
-            prompt=prompt, model=self.model_name, n=1, size=self._image_size
-        )
-        image_url = image.data[0].url
-        if not image_url:
-            logger.error(f"{self} No image provided in response: {image}")
-            yield ErrorFrame("Image generation failed")
-            return
-        # Load the image from the url
-        async with self._aiohttp_session.get(image_url) as response:
-            image_stream = io.BytesIO(await response.content.read())
-            image = Image.open(image_stream)
-            frame = URLImageRawFrame(image_url, image.tobytes(), image.size, image.format)
-            yield frame
-class OpenAISTTService(BaseWhisperSTTService):
-    """OpenAI Whisper speech-to-text service.
-    Uses OpenAI's Whisper API to convert audio to text. Requires an OpenAI API key
-    set via the api_key parameter or OPENAI_API_KEY environment variable.
-    Args:
-        model: Whisper model to use. Defaults to "whisper-1".
-        api_key: OpenAI API key. Defaults to None.
-        base_url: API base URL. Defaults to None.
-        language: Language of the audio input. Defaults to English.
-        prompt: Optional text to guide the model's style or continue a previous segment.
-        temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.
-        **kwargs: Additional arguments passed to BaseWhisperSTTService.
-    """
-    def __init__(
-        self,
-        *,
-        model: str = "whisper-1",
-        api_key: Optional[str] = None,
-        base_url: Optional[str] = None,
-        language: Optional[Language] = Language.EN,
-        prompt: Optional[str] = None,
-        temperature: Optional[float] = None,
-        **kwargs,
-    ):
-        super().__init__(
-            model=model,
-            api_key=api_key,
-            base_url=base_url,
-            language=language,
-            prompt=prompt,
-            temperature=temperature,
-            **kwargs,
-        )
-    async def _transcribe(self, audio: bytes) -> Transcription:
-        assert self._language is not None  # Assigned in the BaseWhisperSTTService class
-        # Build kwargs dict with only set parameters
-        kwargs = {
-            "file": ("audio.wav", audio, "audio/wav"),
-            "model": self.model_name,
-            "language": self._language,
-        }
-        if self._prompt is not None:
-            kwargs["prompt"] = self._prompt
-        if self._temperature is not None:
-            kwargs["temperature"] = self._temperature
-        return await self._client.audio.transcriptions.create(**kwargs)
-class OpenAITTSService(TTSService):
-    """OpenAI Text-to-Speech service that generates audio from text.
-    This service uses the OpenAI TTS API to generate PCM-encoded audio at 24kHz.
-    When using with DailyTransport, configure the sample rate in DailyParams
-    as shown below:
-    DailyParams(
-        audio_out_enabled=True,
-        audio_out_sample_rate=24_000,
-    )
-    Args:
-        api_key: OpenAI API key. Defaults to None.
-        voice: Voice ID to use. Defaults to "alloy".
-        model: TTS model to use ("tts-1" or "tts-1-hd"). Defaults to "tts-1".
-        sample_rate: Output audio sample rate in Hz. Defaults to 24000.
-        **kwargs: Additional keyword arguments passed to TTSService.
-    The service returns PCM-encoded audio at the specified sample rate.
-    """
-    OPENAI_SAMPLE_RATE = 24000  # OpenAI TTS always outputs at 24kHz
-    def __init__(
-        self,
-        *,
-        api_key: Optional[str] = None,
-        voice: str = "alloy",
-        model: Literal["tts-1", "tts-1-hd"] = "tts-1",
-        sample_rate: Optional[int] = None,
-        **kwargs,
-    ):
-        if sample_rate and sample_rate != self.OPENAI_SAMPLE_RATE:
-            logger.warning(
-                f"OpenAI TTS only supports {self.OPENAI_SAMPLE_RATE}Hz sample rate. "
-                f"Current rate of {self.sample_rate}Hz may cause issues."
-            )
-        super().__init__(sample_rate=sample_rate, **kwargs)
-        self.set_model_name(model)
-        self.set_voice(voice)
-        self._client = AsyncOpenAI(api_key=api_key)
-    def can_generate_metrics(self) -> bool:
-        return True
-    async def set_model(self, model: str):
-        logger.info(f"Switching TTS model to: [{model}]")
-        self.set_model_name(model)
-    async def start(self, frame: StartFrame):
-        await super().start(frame)
-        if self.sample_rate != self.OPENAI_SAMPLE_RATE:
-            logger.warning(
-                f"OpenAI TTS requires {self.OPENAI_SAMPLE_RATE}Hz sample rate. "
-                f"Current rate of {self.sample_rate}Hz may cause issues."
-            )
-    async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
-        logger.debug(f"Generating TTS: [{text}]")
-        try:
-            await self.start_ttfb_metrics()
-            async with self._client.audio.speech.with_streaming_response.create(
-                input=text or " ",  # Text must contain at least one character
-                model=self.model_name,
-                voice=VALID_VOICES[self._voice_id],
-                response_format="pcm",
-            ) as r:
-                if r.status_code != 200:
-                    error = await r.text()
-                    logger.error(
-                        f"{self} error getting audio (status: {r.status_code}, error: {error})"
-                    )
-                    yield ErrorFrame(
-                        f"Error getting audio (status: {r.status_code}, error: {error})"
-                    )
-                    return
-                await self.start_tts_usage_metrics(text)
-                CHUNK_SIZE = 1024
-                yield TTSStartedFrame()
-                async for chunk in r.iter_bytes(CHUNK_SIZE):
-                    if len(chunk) > 0:
-                        await self.stop_ttfb_metrics()
-                        frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
-                        yield frame
-                yield TTSStoppedFrame()
-        except BadRequestError as e:
-            logger.exception(f"{self} error generating TTS: {e}")
-# internal use only -- todo: refactor
-@dataclass
-class OpenAIImageMessageFrame(Frame):
-    user_image_raw_frame: UserImageRawFrame
-    text: Optional[str] = None
-class OpenAIUserContextAggregator(LLMUserContextAggregator):
-    def __init__(self, context: OpenAILLMContext, **kwargs):
-        super().__init__(context=context, **kwargs)
-    async def process_frame(self, frame, direction):
-        await super().process_frame(frame, direction)
-        # Our parent method has already called push_frame(). So we can't interrupt the
-        # flow here and we don't need to call push_frame() ourselves.
-        try:
-            if isinstance(frame, UserImageRequestFrame):
-                # The LLM sends a UserImageRequestFrame upstream. Cache any context provided with
-                # that frame so we can use it when we assemble the image message in the assistant
-                # context aggregator.
-                if frame.context:
-                    if isinstance(frame.context, str):
-                        self._context._user_image_request_context[frame.user_id] = frame.context
-                    else:
-                        self.logger.error(
-                            f"Unexpected UserImageRequestFrame context type: {type(frame.context)}"
-                        )
-                        del self._context._user_image_request_context[frame.user_id]
-                else:
-                    if frame.user_id in self._context._user_image_request_context:
-                        del self._context._user_image_request_context[frame.user_id]
-            elif isinstance(frame, UserImageRawFrame):
-                # Push a new OpenAIImageMessageFrame with the text context we cached
-                # downstream to be handled by our assistant context aggregator. This is
-                # necessary so that we add the message to the context in the right order.
-                text = self._context._user_image_request_context.get(frame.user_id) or ""
-                if text:
-                    del self._context._user_image_request_context[frame.user_id]
-                frame = OpenAIImageMessageFrame(user_image_raw_frame=frame, text=text)
-                await self.push_frame(frame)
-        except Exception as e:
-            self.logger.error(f"Error processing frame: {e}")
-class OpenAIAssistantContextAggregator(LLMAssistantContextAggregator):
-    def __init__(self, context: OpenAILLMContext, **kwargs):
-        super().__init__(context=context, **kwargs)
-        self._function_calls_in_progress = {}
-        self._function_call_result = None
-        self._pending_image_frame_message = None
-    async def process_frame(self, frame, direction):
-        await super().process_frame(frame, direction)
-        # See note above about not calling push_frame() here.
-        if isinstance(frame, StartInterruptionFrame):
-            self._function_calls_in_progress.clear()
-            self._function_call_finished = None
-        elif isinstance(frame, FunctionCallInProgressFrame):
-            self.logger.debug(f"FunctionCallInProgressFrame: {frame}")
-            self._function_calls_in_progress[frame.tool_call_id] = frame
-        elif isinstance(frame, FunctionCallResultFrame):
-            self.logger.debug(f"FunctionCallResultFrame: {frame}")
-            if frame.tool_call_id in self._function_calls_in_progress:
-                del self._function_calls_in_progress[frame.tool_call_id]
-                self._function_call_result = frame
-                # TODO-CB: Kwin wants us to refactor this out of here but I REFUSE
-                await self.push_aggregation()
-            else:
-                self.logger.warning(
-                    "FunctionCallResultFrame tool_call_id does not match any function call in progress"
-                )
-                self._function_call_result = None
-        elif isinstance(frame, OpenAIImageMessageFrame):
-            self._pending_image_frame_message = frame
-            await self.push_aggregation()
-    async def push_aggregation(self):
-        if not (
-            self._aggregation or self._function_call_result or self._pending_image_frame_message
-        ):
-            return
-        run_llm = False
-        properties: Optional[FunctionCallResultProperties] = None
-        aggregation = self._aggregation.strip()
-        self.reset()
-        try:
-            if aggregation:
-                self._context.add_message({"role": "assistant", "content": aggregation})
-            if self._function_call_result:
-                frame = self._function_call_result
-                properties = frame.properties
-                self._function_call_result = None
-                if frame.result:
-                    self._context.add_message(
-                        {
-                            "role": "assistant",
-                            "tool_calls": [
-                                {
-                                    "id": frame.tool_call_id,
-                                    "function": {
-                                        "name": frame.function_name,
-                                        "arguments": json.dumps(frame.arguments),
-                                    },
-                                    "type": "function",
-                                }
-                            ],
-                        }
-                    )
-                    self._context.add_message(
-                        {
-                            "role": "tool",
-                            "content": json.dumps(frame.result),
-                            "tool_call_id": frame.tool_call_id,
-                        }
-                    )
-                    if properties and properties.run_llm is not None:
-                        # If the tool call result has a run_llm property, use it
-                        run_llm = properties.run_llm
-                    else:
-                        # Default behavior is to run the LLM if there are no function calls in progress
-                        run_llm = not bool(self._function_calls_in_progress)
-            if self._pending_image_frame_message:
-                frame = self._pending_image_frame_message
-                self._pending_image_frame_message = None
-                self._context.add_image_frame_message(
-                    format=frame.user_image_raw_frame.format,
-                    size=frame.user_image_raw_frame.size,
-                    image=frame.user_image_raw_frame.image,
-                    text=frame.text,
-                )
-                run_llm = True
-            if run_llm:
-                await self.push_context_frame(FrameDirection.UPSTREAM)
-            # Emit the on_context_updated callback once the function call result is added to the context
-            if properties and properties.on_context_updated is not None:
-                await properties.on_context_updated()
-            # Push context frame
-            await self.push_context_frame()
-            # Push timestamp frame with current time
-            timestamp_frame = OpenAILLMContextAssistantTimestampFrame(timestamp=time_now_iso8601())
-            await self.push_frame(timestamp_frame)
-        except Exception as e:
-            self.logger.error(f"Error processing frame: {e}")

{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl