PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/anthropic/llm.py CHANGED Viewed

@@ -26,6 +26,7 @@ from pydantic import BaseModel, Field
 from pipecat.adapters.services.anthropic_adapter import AnthropicLLMAdapter
 from pipecat.frames.frames import (
+    ErrorFrame,
     Frame,
     FunctionCallCancelFrame,
     FunctionCallInProgressFrame,
@@ -52,11 +53,10 @@ from pipecat.processors.aggregators.openai_llm_context import (
 )
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.llm_service import FunctionCallFromLLM, LLMService
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 from pipecat.utils.tracing.service_decorators import traced_llm
 try:
-    from anthropic import NOT_GIVEN, AsyncAnthropic, NotGiven
+    from anthropic import NOT_GIVEN, APITimeoutError, AsyncAnthropic, NotGiven
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use Anthropic, you need to `pip install pipecat-ai[anthropic]`.")
@@ -101,13 +101,6 @@ class AnthropicLLMService(LLMService):
     Provides inference capabilities with Claude models including support for
     function calling, vision processing, streaming responses, and prompt caching.
     Can use custom clients like AsyncAnthropicBedrock and AsyncAnthropicVertex.
-    Args:
-        api_key: Anthropic API key for authentication.
-        model: Model name to use. Defaults to "claude-sonnet-4-20250514".
-        params: Optional model parameters for inference.
-        client: Optional custom Anthropic client instance.
-        **kwargs: Additional arguments passed to parent LLMService.
     """
     # Overriding the default adapter to use the Anthropic one.
@@ -139,14 +132,29 @@ class AnthropicLLMService(LLMService):
         model: str = "claude-sonnet-4-20250514",
         params: Optional[InputParams] = None,
         client=None,
+        retry_timeout_secs: Optional[float] = 5.0,
+        retry_on_timeout: Optional[bool] = False,
         **kwargs,
     ):
+        """Initialize the Anthropic LLM service.
+        Args:
+            api_key: Anthropic API key for authentication.
+            model: Model name to use. Defaults to "claude-sonnet-4-20250514".
+            params: Optional model parameters for inference.
+            client: Optional custom Anthropic client instance.
+            retry_timeout_secs: Request timeout in seconds for retry logic.
+            retry_on_timeout: Whether to retry the request once if it times out.
+            **kwargs: Additional arguments passed to parent LLMService.
+        """
         super().__init__(**kwargs)
         params = params or AnthropicLLMService.InputParams()
         self._client = client or AsyncAnthropic(
             api_key=api_key
         )  # if the client is provided, use it and remove it, otherwise create a new one
         self.set_model_name(model)
+        self._retry_timeout_secs = retry_timeout_secs
+        self._retry_on_timeout = retry_on_timeout
         self._settings = {
             "max_tokens": params.max_tokens,
             "enable_prompt_caching_beta": params.enable_prompt_caching_beta or False,
@@ -164,6 +172,31 @@ class AnthropicLLMService(LLMService):
         """
         return True
+    async def _create_message_stream(self, api_call, params):
+        """Create message stream with optional timeout and retry.
+        Args:
+            api_call: The Anthropic API method to call.
+            params: Parameters for the API call.
+        Returns:
+            Async stream of message events.
+        """
+        if self._retry_on_timeout:
+            try:
+                response = await asyncio.wait_for(
+                    api_call(**params), timeout=self._retry_timeout_secs
+                )
+                return response
+            except (APITimeoutError, asyncio.TimeoutError):
+                # Retry, this time without a timeout so we get a response
+                logger.debug(f"{self}: Retrying message creation due to timeout")
+                response = await api_call(**params)
+                return response
+        else:
+            response = await api_call(**params)
+            return response
     @property
     def enable_prompt_caching_beta(self) -> bool:
         """Check if prompt caching beta feature is enabled.
@@ -247,7 +280,7 @@ class AnthropicLLMService(LLMService):
             params.update(self._settings["extra"])
-            response = await api_call(**params)
+            response = await self._create_message_stream(api_call, params)
             await self.stop_ttfb_metrics()
@@ -256,7 +289,7 @@ class AnthropicLLMService(LLMService):
             json_accumulator = ""
             function_calls = []
-            async for event in WatchdogAsyncIterator(response, manager=self.task_manager):
+            async for event in response:
                 # Aggregate streaming content, create frames, trigger events
                 if event.type == "content_block_delta":
@@ -344,6 +377,7 @@ class AnthropicLLMService(LLMService):
             await self._call_event_handler("on_completion_timeout")
         except Exception as e:
             self.logger.exception(f"{self} exception: {e}")
+            await self.push_error(ErrorFrame(f"{e}"))
         finally:
             await self.stop_processing_metrics()
             await self.push_frame(LLMFullResponseEndFrame())
@@ -425,12 +459,6 @@ class AnthropicLLMContext(OpenAILLMContext):
     Extends OpenAILLMContext to handle Anthropic-specific features like
     system messages, prompt caching, and message format conversions.
     Manages conversation state and message history formatting.
-    Args:
-        messages: Initial list of conversation messages.
-        tools: Available function calling tools.
-        tool_choice: Tool selection preference.
-        system: System message content.
     """
     def __init__(
@@ -441,15 +469,25 @@ class AnthropicLLMContext(OpenAILLMContext):
         *,
         system: Union[str, NotGiven] = NOT_GIVEN,
     ):
+        """Initialize the Anthropic LLM context.
+        Args:
+            messages: Initial list of conversation messages.
+            tools: Available function calling tools.
+            tool_choice: Tool selection preference.
+            system: System message content.
+        """
         super().__init__(messages=messages, tools=tools, tool_choice=tool_choice)
+        self.__setup_local()
+        self.system = system
+    def __setup_local(self):
         # For beta prompt caching. This is a counter that tracks the number of turns
         # we've seen above the cache threshold. We reset this when we reset the
         # messages list. We only care about this number being 0, 1, or 2. But
         # it's easiest just to treat it as a counter.
         self.turns_above_cache_threshold = 0
-        self.system = system
+        return
     @staticmethod
     def upgrade_to_anthropic(obj: OpenAILLMContext) -> "AnthropicLLMContext":
@@ -466,6 +504,7 @@ class AnthropicLLMContext(OpenAILLMContext):
         logger.debug(f"Upgrading to Anthropic: {obj}")
         if isinstance(obj, OpenAILLMContext) and not isinstance(obj, AnthropicLLMContext):
             obj.__class__ = AnthropicLLMContext
+            obj.__setup_local()
             obj._restructure_from_openai_messages()
         return obj
@@ -534,20 +573,37 @@ class AnthropicLLMContext(OpenAILLMContext):
         Handles text content and function calls for both user and assistant messages.
         Args:
-            obj: Message in Anthropic format:
-                {
-                    "role": "user/assistant",
-                    "content": str | [{"type": "text/tool_use/tool_result", ...}]
-                }
+            obj: Message in Anthropic format.
         Returns:
-            List of messages in standard format:
-            [
+            List of messages in standard format.
+        Examples:
+            Input Anthropic format::
                 {
-                    "role": "user/assistant/tool",
-                    "content": [{"type": "text", "text": str}]
+                    "role": "assistant",
+                    "content": [
+                        {"type": "text", "text": "Hello"},
+                        {"type": "tool_use", "id": "123", "name": "search", "input": {"q": "test"}}
+                    ]
                 }
-            ]
+            Output standard format::
+                [
+                    {"role": "assistant", "content": [{"type": "text", "text": "Hello"}]},
+                    {
+                        "role": "assistant",
+                        "tool_calls": [
+                            {
+                                "type": "function",
+                                "id": "123",
+                                "function": {"name": "search", "arguments": '{"q": "test"}'}
+                            }
+                        ]
+                    }
+                ]
         """
         # todo: image format (?)
         # tool_use
@@ -609,23 +665,37 @@ class AnthropicLLMContext(OpenAILLMContext):
         Empty text content is converted to "(empty)".
         Args:
-            message: Message in standard format:
+            message: Message in standard format.
+        Returns:
+            Message in Anthropic format.
+        Examples:
+            Input standard format::
                 {
-                    "role": "user/assistant/tool",
-                    "content": str | [{"type": "text", ...}],
-                    "tool_calls": [{"id": str, "function": {"name": str, "arguments": str}}]
+                    "role": "assistant",
+                    "tool_calls": [
+                        {
+                            "id": "123",
+                            "function": {"name": "search", "arguments": '{"q": "test"}'}
+                        }
+                    ]
                 }
-        Returns:
-            Message in Anthropic format:
-            {
-                "role": "user/assistant",
-                "content": str | [
-                    {"type": "text", "text": str} |
-                    {"type": "tool_use", "id": str, "name": str, "input": dict} |
-                    {"type": "tool_result", "tool_use_id": str, "content": str}
-                ]
-            }
+            Output Anthropic format::
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "tool_use",
+                            "id": "123",
+                            "name": "search",
+                            "input": {"q": "test"}
+                        }
+                    ]
+                }
         """
         # todo: image messages (?)
         if message["role"] == "tool":

pipecat/services/assemblyai/models.py CHANGED Viewed

@@ -1,10 +1,30 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""AssemblyAI WebSocket API message models and connection parameters.
+This module defines Pydantic models for handling AssemblyAI's real-time
+transcription WebSocket messages and connection configuration.
+"""
 from typing import List, Literal, Optional
 from pydantic import BaseModel, Field
 class Word(BaseModel):
-    """Represents a single word in a transcription with timing and confidence."""
+    """Represents a single word in a transcription with timing and confidence.
+    Parameters:
+        start: Start time of the word in milliseconds.
+        end: End time of the word in milliseconds.
+        text: The transcribed word text.
+        confidence: Confidence score for the word (0.0 to 1.0).
+        word_is_final: Whether this word is finalized and won't change.
+    """
     start: int
     end: int
@@ -14,13 +34,23 @@ class Word(BaseModel):
 class BaseMessage(BaseModel):
-    """Base class for all AssemblyAI WebSocket messages."""
+    """Base class for all AssemblyAI WebSocket messages.
+    Parameters:
+        type: The message type identifier.
+    """
     type: str
 class BeginMessage(BaseMessage):
-    """Message sent when a new session begins."""
+    """Message sent when a new session begins.
+    Parameters:
+        type: Always "Begin" for this message type.
+        id: Unique session identifier.
+        expires_at: Unix timestamp when the session expires.
+    """
     type: Literal["Begin"] = "Begin"
     id: str
@@ -28,7 +58,17 @@ class BeginMessage(BaseMessage):
 class TurnMessage(BaseMessage):
-    """Message containing transcription data for a turn of speech."""
+    """Message containing transcription data for a turn of speech.
+    Parameters:
+        type: Always "Turn" for this message type.
+        turn_order: Sequential number of this turn in the session.
+        turn_is_formatted: Whether the transcript has been formatted.
+        end_of_turn: Whether this marks the end of a speaking turn.
+        transcript: The transcribed text for this turn.
+        end_of_turn_confidence: Confidence score for end-of-turn detection.
+        words: List of individual words with timing and confidence data.
+    """
     type: Literal["Turn"] = "Turn"
     turn_order: int
@@ -40,7 +80,13 @@ class TurnMessage(BaseMessage):
 class TerminationMessage(BaseMessage):
-    """Message sent when the session is terminated."""
+    """Message sent when the session is terminated.
+    Parameters:
+        type: Always "Termination" for this message type.
+        audio_duration_seconds: Total duration of audio processed.
+        session_duration_seconds: Total duration of the session.
+    """
     type: Literal["Termination"] = "Termination"
     audio_duration_seconds: float
@@ -52,6 +98,18 @@ AnyMessage = BeginMessage | TurnMessage | TerminationMessage
 class AssemblyAIConnectionParams(BaseModel):
+    """Configuration parameters for AssemblyAI WebSocket connection.
+    Parameters:
+        sample_rate: Audio sample rate in Hz. Defaults to 16000.
+        encoding: Audio encoding format. Defaults to "pcm_s16le".
+        formatted_finals: Whether to enable transcript formatting. Defaults to True.
+        word_finalization_max_wait_time: Maximum time to wait for word finalization in milliseconds.
+        end_of_turn_confidence_threshold: Confidence threshold for end-of-turn detection.
+        min_end_of_turn_silence_when_confident: Minimum silence duration when confident about end-of-turn.
+        max_turn_silence: Maximum silence duration before forcing end-of-turn.
+    """
     sample_rate: int = 16000
     encoding: Literal["pcm_s16le", "pcm_mulaw"] = "pcm_s16le"
     formatted_finals: bool = True

pipecat/services/assemblyai/stt.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""AssemblyAI speech-to-text service implementation.
+This module provides integration with AssemblyAI's real-time speech-to-text
+WebSocket API for streaming audio transcription.
+"""
 import asyncio
 import json
 from typing import Any, AsyncGenerator, Dict
@@ -38,6 +44,7 @@ from .models import (
 try:
     import websockets
+    from websockets.asyncio.client import connect as websocket_connect
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error('In order to use AssemblyAI, you need to `pip install "pipecat-ai[assemblyai]"`.')
@@ -45,6 +52,13 @@ except ModuleNotFoundError as e:
 class AssemblyAISTTService(STTService):
+    """AssemblyAI real-time speech-to-text service.
+    Provides real-time speech transcription using AssemblyAI's WebSocket API.
+    Supports both interim and final transcriptions with configurable parameters
+    for audio processing and connection management.
+    """
     def __init__(
         self,
         *,
@@ -55,6 +69,16 @@ class AssemblyAISTTService(STTService):
         vad_force_turn_endpoint: bool = True,
         **kwargs,
     ):
+        """Initialize the AssemblyAI STT service.
+        Args:
+            api_key: AssemblyAI API key for authentication.
+            language: Language code for transcription. Defaults to English (Language.EN).
+            api_endpoint_base_url: WebSocket endpoint URL. Defaults to AssemblyAI's streaming endpoint.
+            connection_params: Connection configuration parameters. Defaults to AssemblyAIConnectionParams().
+            vad_force_turn_endpoint: Whether to force turn endpoint on VAD stop. Defaults to True.
+            **kwargs: Additional arguments passed to parent STTService class.
+        """
         self._api_key = api_key
         self._language = language
         self._api_endpoint_base_url = api_endpoint_base_url
@@ -75,22 +99,50 @@ class AssemblyAISTTService(STTService):
         self._chunk_size_bytes = 0
     def can_generate_metrics(self) -> bool:
+        """Check if the service can generate metrics.
+        Returns:
+            True if metrics generation is supported.
+        """
         return True
     async def start(self, frame: StartFrame):
+        """Start the speech-to-text service.
+        Args:
+            frame: Start frame to begin processing.
+        """
         await super().start(frame)
         self._chunk_size_bytes = int(self._chunk_size_ms * self._sample_rate * 2 / 1000)
         await self._connect()
     async def stop(self, frame: EndFrame):
+        """Stop the speech-to-text service.
+        Args:
+            frame: End frame to stop processing.
+        """
         await super().stop(frame)
         await self._disconnect()
     async def cancel(self, frame: CancelFrame):
+        """Cancel the speech-to-text service.
+        Args:
+            frame: Cancel frame to abort processing.
+        """
         await super().cancel(frame)
         await self._disconnect()
     async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
+        """Process audio data for speech-to-text conversion.
+        Args:
+            audio: Raw audio bytes to process.
+        Yields:
+            None (processing handled via WebSocket messages).
+        """
         self._audio_buffer.extend(audio)
         while len(self._audio_buffer) >= self._chunk_size_bytes:
@@ -101,6 +153,12 @@ class AssemblyAISTTService(STTService):
         yield None
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames for VAD and metrics handling.
+        Args:
+            frame: Frame to process.
+            direction: Direction of frame processing.
+        """
         await super().process_frame(frame, direction)
         if isinstance(frame, UserStartedSpeakingFrame):
             await self.start_ttfb_metrics()
@@ -133,9 +191,9 @@ class AssemblyAISTTService(STTService):
                 "Authorization": self._api_key,
                 "User-Agent": f"AssemblyAI/1.0 (integration=Pipecat/{pipecat_version})",
             }
-            self._websocket = await websockets.connect(
+            self._websocket = await websocket_connect(
                 ws_url,
-                extra_headers=headers,
+                additional_headers=headers,
             )
             self._connected = True
             self._receive_task = self.create_task(self._receive_task_handler())
@@ -161,10 +219,7 @@ class AssemblyAISTTService(STTService):
                 await self._websocket.send(json.dumps({"type": "Terminate"}))
                 try:
-                    await asyncio.wait_for(
-                        self._termination_event.wait(),
-                        timeout=5.0,
-                    )
+                    await asyncio.wait_for(self._termination_event.wait(), timeout=5.0)
                 except asyncio.TimeoutError:
                     logger.warning("Timed out waiting for termination message from server")
@@ -189,11 +244,9 @@ class AssemblyAISTTService(STTService):
         try:
             while self._connected:
                 try:
-                    message = await asyncio.wait_for(self._websocket.recv(), timeout=1.0)
+                    message = await self._websocket.recv()
                     data = json.loads(message)
                     await self._handle_message(data)
-                except asyncio.TimeoutError:
-                    self.reset_watchdog()
                 except websockets.exceptions.ConnectionClosedOK:
                     break
                 except Exception as e:
@@ -254,7 +307,7 @@ class AssemblyAISTTService(STTService):
             await self.push_frame(
                 TranscriptionFrame(
                     message.transcript,
-                    "",  # participant
+                    self._user_id,
                     time_now_iso8601(),
                     self._language,
                     message,
@@ -266,7 +319,7 @@ class AssemblyAISTTService(STTService):
             await self.push_frame(
                 InterimTranscriptionFrame(
                     message.transcript,
-                    "",  # participant
+                    self._user_id,
                     time_now_iso8601(),
                     self._language,
                     message,

pipecat/services/asyncai/__init__.py ADDED Viewed

File without changes

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl