PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/riva/tts.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""NVIDIA Riva text-to-speech service implementation.
+This module provides integration with NVIDIA Riva's TTS services through
+gRPC API for high-quality speech synthesis.
+"""
 import asyncio
 import os
 from typing import AsyncGenerator, Mapping, Optional
@@ -37,7 +43,21 @@ RIVA_TTS_TIMEOUT_SECS = 5
 class RivaTTSService(TTSService):
+    """NVIDIA Riva text-to-speech service.
+    Provides high-quality text-to-speech synthesis using NVIDIA Riva's
+    cloud-based TTS models. Supports multiple voices, languages, and
+    configurable quality settings.
+    """
     class InputParams(BaseModel):
+        """Input parameters for Riva TTS configuration.
+        Parameters:
+            language: Language code for synthesis. Defaults to US English.
+            quality: Audio quality setting (0-100). Defaults to 20.
+        """
         language: Optional[Language] = Language.EN_US
         quality: Optional[int] = 20
@@ -55,6 +75,17 @@ class RivaTTSService(TTSService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the NVIDIA Riva TTS service.
+        Args:
+            api_key: NVIDIA API key for authentication.
+            server: gRPC server endpoint. Defaults to NVIDIA's cloud endpoint.
+            voice_id: Voice model identifier. Defaults to multilingual Ray voice.
+            sample_rate: Audio sample rate. If None, uses service default.
+            model_function_map: Dictionary containing function_id and model_name for the TTS model.
+            params: Additional configuration parameters for TTS synthesis.
+            **kwargs: Additional arguments passed to parent TTSService.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         params = params or RivaTTSService.InputParams()
@@ -82,6 +113,13 @@ class RivaTTSService(TTSService):
         )
     async def set_model(self, model: str):
+        """Attempt to set the TTS model.
+        Note: Model cannot be changed after initialization for Riva service.
+        Args:
+            model: The model name to set (operation not supported).
+        """
         logger.warning(f"Cannot set model after initialization. Set model and function id like so:")
         example = {"function_id": "<UUID>", "model_name": "<model_name>"}
         logger.warning(
@@ -90,6 +128,15 @@ class RivaTTSService(TTSService):
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate speech from text using NVIDIA Riva TTS.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech data.
+        """
         def read_audio_responses(queue: asyncio.Queue):
             def add_response(r):
                 asyncio.run_coroutine_threadsafe(queue.put(r), self.get_event_loop())
@@ -121,7 +168,7 @@ class RivaTTSService(TTSService):
             await asyncio.to_thread(read_audio_responses, queue)
             # Wait for the thread to start.
-            resp = await asyncio.wait_for(queue.get(), RIVA_TTS_TIMEOUT_SECS)
+            resp = await asyncio.wait_for(queue.get(), timeout=RIVA_TTS_TIMEOUT_SECS)
             while resp:
                 await self.stop_ttfb_metrics()
                 frame = TTSAudioRawFrame(
@@ -130,7 +177,7 @@ class RivaTTSService(TTSService):
                     num_channels=1,
                 )
                 yield frame
-                resp = await asyncio.wait_for(queue.get(), RIVA_TTS_TIMEOUT_SECS)
+                resp = await asyncio.wait_for(queue.get(), timeout=RIVA_TTS_TIMEOUT_SECS)
         except asyncio.TimeoutError:
             logger.error(f"{self} timeout waiting for audio response")
@@ -139,6 +186,13 @@ class RivaTTSService(TTSService):
 class FastPitchTTSService(RivaTTSService):
+    """Deprecated FastPitch TTS service.
+    .. deprecated:: 0.0.66
+        This class is deprecated. Use RivaTTSService instead for new implementations.
+        Provides backward compatibility for existing FastPitch TTS integrations.
+    """
     def __init__(
         self,
         *,
@@ -153,6 +207,17 @@ class FastPitchTTSService(RivaTTSService):
         params: Optional[RivaTTSService.InputParams] = None,
         **kwargs,
     ):
+        """Initialize the deprecated FastPitch TTS service.
+        Args:
+            api_key: NVIDIA API key for authentication.
+            server: gRPC server endpoint. Defaults to NVIDIA's cloud endpoint.
+            voice_id: Voice model identifier. Defaults to Female-1 voice.
+            sample_rate: Audio sample rate. If None, uses service default.
+            model_function_map: Dictionary containing function_id and model_name for FastPitch model.
+            params: Additional configuration parameters for TTS synthesis.
+            **kwargs: Additional arguments passed to parent RivaTTSService.
+        """
         super().__init__(
             api_key=api_key,
             server=server,

pipecat/services/sambanova/llm.py CHANGED Viewed

@@ -20,7 +20,6 @@ from pipecat.metrics.metrics import LLMTokenUsage
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.llm_service import FunctionCallFromLLM
 from pipecat.services.openai.llm import OpenAILLMService
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 from pipecat.utils.tracing.service_decorators import traced_llm
@@ -29,12 +28,6 @@ class SambaNovaLLMService(OpenAILLMService):  # type: ignore
     This service extends OpenAILLMService to connect to SambaNova's API endpoint while
     maintaining full compatibility with OpenAI's interface and functionality.
-    Args:
-        api_key: The API key for accessing SambaNova API.
-        model: The model identifier to use. Defaults to "Llama-4-Maverick-17B-128E-Instruct".
-        base_url: The base URL for SambaNova API. Defaults to "https://api.sambanova.ai/v1".
-        **kwargs: Additional keyword arguments passed to OpenAILLMService.
     """
     def __init__(
@@ -45,6 +38,14 @@ class SambaNovaLLMService(OpenAILLMService):  # type: ignore
         base_url: str = "https://api.sambanova.ai/v1",
         **kwargs: Dict[Any, Any],
     ) -> None:
+        """Initialize SambaNova LLM service.
+        Args:
+            api_key: The API key for accessing SambaNova API.
+            model: The model identifier to use. Defaults to "Llama-4-Maverick-17B-128E-Instruct".
+            base_url: The base URL for SambaNova API. Defaults to "https://api.sambanova.ai/v1".
+            **kwargs: Additional keyword arguments passed to OpenAILLMService.
+        """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
     def create_client(
@@ -66,17 +67,20 @@ class SambaNovaLLMService(OpenAILLMService):  # type: ignore
         logger.debug(f"Creating SambaNova client with API {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    async def get_chat_completions(
+    def build_chat_completion_params(
         self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> Any:
-        """Get chat completions from SambaNova API endpoint.
+    ) -> dict:
+        """Build parameters for SambaNova chat completion request.
+        SambaNova doesn't support some OpenAI parameters like frequency_penalty,
+        presence_penalty, and seed.
         Args:
-            context: OpenAI LLM context containing tools and configuration.
-            messages: List of chat completion message parameters.
+            context: The LLM context containing tools and configuration.
+            messages: List of chat completion messages to send.
         Returns:
-            Chat completion response stream from SambaNova API.
+            Dictionary of parameters for the chat completion request.
         """
         params = {
             "model": self.model_name,
@@ -92,9 +96,7 @@ class SambaNovaLLMService(OpenAILLMService):  # type: ignore
         }
         params.update(self._settings["extra"])
-        chunks = await self._client.chat.completions.create(**params)
-        return chunks
+        return params
     @traced_llm  # type: ignore
     async def _process_context(self, context: OpenAILLMContext) -> AsyncStream[ChatCompletionChunk]:
@@ -124,7 +126,7 @@ class SambaNovaLLMService(OpenAILLMService):  # type: ignore
             context
         )
-        async for chunk in WatchdogAsyncIterator(chunk_stream, manager=self.task_manager):
+        async for chunk in chunk_stream:
             if chunk.usage:
                 tokens = LLMTokenUsage(
                     prompt_tokens=chunk.usage.prompt_tokens,

pipecat/services/sambanova/stt.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""SambaNova's Speech-to-Text service implementation for real-time transcription."""
 from typing import Any, Optional
 from pipecat.services.whisper.base_stt import BaseWhisperSTTService, Transcription
@@ -12,16 +14,9 @@ from pipecat.transcriptions.language import Language
 class SambaNovaSTTService(BaseWhisperSTTService):  # type: ignore
     """SambaNova Whisper speech-to-text service.
     Uses SambaNova's Whisper API to convert audio to text.
     Requires a SambaNova API key set via the api_key parameter or SAMBANOVA_API_KEY environment variable.
-    Args:
-        model: Whisper model to use. Defaults to "Whisper-Large-v3".
-        api_key: SambaNova API key. Defaults to None.
-        base_url: API base URL. Defaults to "https://api.sambanova.ai/v1".
-        language: Language of the audio input. Defaults to English.
-        prompt: Optional text to guide the model's style or continue a previous segment.
-        temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.
-        **kwargs: Additional arguments passed to `pipecat.services.whisper.base_stt.BaseWhisperSTTService`.
     """
     def __init__(
@@ -35,6 +30,17 @@ class SambaNovaSTTService(BaseWhisperSTTService):  # type: ignore
         temperature: Optional[float] = None,
         **kwargs: Any,
     ) -> None:
+        """Initialize SambaNova STT service.
+        Args:
+            model: Whisper model to use. Defaults to "Whisper-Large-v3".
+            api_key: SambaNova API key. Defaults to None.
+            base_url: API base URL. Defaults to "https://api.sambanova.ai/v1".
+            language: Language of the audio input. Defaults to English.
+            prompt: Optional text to guide the model's style or continue a previous segment.
+            temperature: Optional sampling temperature between 0 and 1. Defaults to 0.0.
+            **kwargs: Additional arguments passed to `pipecat.services.whisper.base_stt.BaseWhisperSTTService`.
+        """
         super().__init__(
             model=model,
             api_key=api_key,

pipecat/services/sarvam/tts.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Sarvam AI text-to-speech service implementation."""
 import base64
 from typing import AsyncGenerator, Optional
@@ -25,7 +27,14 @@ from pipecat.utils.tracing.service_decorators import traced_tts
 def language_to_sarvam_language(language: Language) -> Optional[str]:
-    """Convert Pipecat Language enum to Sarvam AI language codes."""
+    """Convert Pipecat Language enum to Sarvam AI language codes.
+    Args:
+        language: The Language enum value to convert.
+    Returns:
+        The corresponding Sarvam AI language code, or None if not supported.
+    """
     LANGUAGE_MAP = {
         Language.BN: "bn-IN",  # Bengali
         Language.EN: "en-IN",  # English (India)
@@ -50,17 +59,8 @@ class SarvamTTSService(TTSService):
     Indian languages. Provides control over voice characteristics like pitch, pace,
     and loudness.
-    Args:
-        api_key: Sarvam AI API subscription key.
-        voice_id: Speaker voice ID (e.g., "anushka", "meera").
-        model: TTS model to use ("bulbul:v1" or "bulbul:v2").
-        aiohttp_session: Shared aiohttp session for making requests.
-        base_url: Sarvam AI API base URL.
-        sample_rate: Audio sample rate in Hz (8000, 16000, 22050, 24000).
-        params: Additional voice and preprocessing parameters.
-    Example:
-        ```python
+    Example::
         tts = SarvamTTSService(
             api_key="your-api-key",
             voice_id="anushka",
@@ -72,10 +72,19 @@ class SarvamTTSService(TTSService):
                 pace=1.2
             )
         )
-        ```
     """
     class InputParams(BaseModel):
+        """Input parameters for Sarvam TTS configuration.
+        Parameters:
+            language: Language for synthesis. Defaults to English (India).
+            pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0.
+            pace: Speech pace multiplier (0.3 to 3.0). Defaults to 1.0.
+            loudness: Volume multiplier (0.1 to 3.0). Defaults to 1.0.
+            enable_preprocessing: Whether to enable text preprocessing. Defaults to False.
+        """
         language: Optional[Language] = Language.EN
         pitch: Optional[float] = Field(default=0.0, ge=-0.75, le=0.75)
         pace: Optional[float] = Field(default=1.0, ge=0.3, le=3.0)
@@ -94,6 +103,18 @@ class SarvamTTSService(TTSService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the Sarvam TTS service.
+        Args:
+            api_key: Sarvam AI API subscription key.
+            voice_id: Speaker voice ID (e.g., "anushka", "meera"). Defaults to "anushka".
+            model: TTS model to use ("bulbul:v1" or "bulbul:v2"). Defaults to "bulbul:v2".
+            aiohttp_session: Shared aiohttp session for making requests.
+            base_url: Sarvam AI API base URL. Defaults to "https://api.sarvam.ai".
+            sample_rate: Audio sample rate in Hz (8000, 16000, 22050, 24000). If None, uses default.
+            params: Additional voice and preprocessing parameters. If None, uses defaults.
+            **kwargs: Additional arguments passed to parent TTSService.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         params = params or SarvamTTSService.InputParams()
@@ -116,17 +137,43 @@ class SarvamTTSService(TTSService):
         self.set_voice(voice_id)
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as Sarvam service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> Optional[str]:
+        """Convert a Language enum to Sarvam AI language format.
+        Args:
+            language: The language to convert.
+        Returns:
+            The Sarvam AI-specific language code, or None if not supported.
+        """
         return language_to_sarvam_language(language)
     async def start(self, frame: StartFrame):
+        """Start the Sarvam TTS service.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
         self._settings["sample_rate"] = self.sample_rate
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate speech from text using Sarvam AI's API.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech.
+        """
         logger.debug(f"{self}: Generating TTS [{text}]")
         try:

pipecat/services/simli/video.py CHANGED Viewed

@@ -4,6 +4,8 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Simli video service for real-time avatar generation."""
 import asyncio
 import numpy as np
@@ -16,9 +18,10 @@ from pipecat.frames.frames import (
     OutputImageRawFrame,
     StartInterruptionFrame,
     TTSAudioRawFrame,
+    TTSStoppedFrame,
+    UserStartedSpeakingFrame,
 )
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, StartFrame
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 try:
     from av.audio.frame import AudioFrame
@@ -31,39 +34,68 @@ except ModuleNotFoundError as e:
 class SimliVideoService(FrameProcessor):
+    """Simli video service for real-time avatar generation.
+    Provides real-time avatar video generation by processing audio frames
+    and producing synchronized video output using the Simli API. Handles
+    audio resampling, video frame processing, and connection management.
+    """
     def __init__(
         self,
         simli_config: SimliConfig,
         use_turn_server: bool = False,
         latency_interval: int = 0,
+        simli_url: str = "https://api.simli.ai",
+        is_trinity_avatar: bool = False,
     ):
+        """Initialize the Simli video service.
+        Args:
+            simli_config: Configuration object for Simli client settings.
+            use_turn_server: Whether to use TURN server for connection. Defaults to False.
+            latency_interval: Latency interval setting for sending health checks to check the latency to Simli Servers. Defaults to 0.
+            simli_url: URL of the simli servers. Can be changed for custom deployments of enterprise users.
+            is_trinity_avatar: boolean to tell simli client that this is a Trinity avatar which reduces latency when using Trinity.
+        """
         super().__init__()
-        self._simli_client = SimliClient(simli_config, use_turn_server, latency_interval)
+        self._initialized = False
+        simli_config.maxIdleTime += 5
+        simli_config.maxSessionLength += 5
+        self._simli_client = SimliClient(
+            simli_config,
+            use_turn_server,
+            latency_interval,
+            simliURL=simli_url,
+        )
-        self._pipecat_resampler_event = asyncio.Event()
         self._pipecat_resampler: AudioResampler = None
+        self._pipecat_resampler_event = asyncio.Event()
         self._simli_resampler = AudioResampler("s16", "mono", 16000)
-        self._initialized = False
         self._audio_task: asyncio.Task = None
         self._video_task: asyncio.Task = None
+        self._is_trinity_avatar = is_trinity_avatar
+        self._previously_interrupted = is_trinity_avatar
+        self._audio_buffer = bytearray()
     async def _start_connection(self):
+        """Start the connection to Simli service and begin processing tasks."""
         if not self._initialized:
             await self._simli_client.Initialize()
             self._initialized = True
         # Create task to consume and process audio and video
-        if not self._audio_task:
-            self._audio_task = self.create_task(self._consume_and_process_audio())
-        if not self._video_task:
-            self._video_task = self.create_task(self._consume_and_process_video())
+        await self._simli_client.sendSilence()
+        self._audio_task = self.create_task(self._consume_and_process_audio())
+        self._video_task = self.create_task(self._consume_and_process_video())
     async def _consume_and_process_audio(self):
+        """Consume audio frames from Simli and push them downstream."""
         await self._pipecat_resampler_event.wait()
         audio_iterator = self._simli_client.getAudioStreamIterator()
-        async for audio_frame in WatchdogAsyncIterator(audio_iterator, manager=self.task_manager):
+        async for audio_frame in audio_iterator:
             resampled_frames = self._pipecat_resampler.resample(audio_frame)
             for resampled_frame in resampled_frames:
                 audio_array = resampled_frame.to_ndarray()
@@ -78,9 +110,10 @@ class SimliVideoService(FrameProcessor):
                     )
     async def _consume_and_process_video(self):
+        """Consume video frames from Simli and convert them to output frames."""
         await self._pipecat_resampler_event.wait()
         video_iterator = self._simli_client.getVideoStreamIterator(targetFormat="rgb24")
-        async for video_frame in WatchdogAsyncIterator(video_iterator, manager=self.task_manager):
+        async for video_frame in video_iterator:
             # Process the video frame
             convertedFrame: OutputImageRawFrame = OutputImageRawFrame(
                 image=video_frame.to_rgb().to_image().tobytes(),
@@ -91,9 +124,14 @@ class SimliVideoService(FrameProcessor):
             await self.push_frame(convertedFrame)
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process incoming frames and handle Simli video generation.
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame processing.
+        """
         await super().process_frame(frame, direction)
         if isinstance(frame, StartFrame):
-            await self.push_frame(frame, direction)
             await self._start_connection()
         elif isinstance(frame, TTSAudioRawFrame):
             # Send audio frame to Simli
@@ -112,21 +150,44 @@ class SimliVideoService(FrameProcessor):
                 resampled_frames = self._simli_resampler.resample(old_frame)
                 for resampled_frame in resampled_frames:
-                    await self._simli_client.send(
-                        resampled_frame.to_ndarray().astype(np.int16).tobytes()
-                    )
+                    audioBytes = resampled_frame.to_ndarray().astype(np.int16).tobytes()
+                    if self._previously_interrupted:
+                        self._audio_buffer.extend(audioBytes)
+                        if len(self._audio_buffer) >= 128000:
+                            try:
+                                for flushFrame in self._simli_resampler.resample(None):
+                                    self._audio_buffer.extend(
+                                        flushFrame.to_ndarray().astype(np.int16).tobytes()
+                                    )
+                            finally:
+                                await self._simli_client.playImmediate(self._audio_buffer)
+                                self._previously_interrupted = False
+                                self._audio_buffer = bytearray()
+                    else:
+                        await self._simli_client.send(audioBytes)
+                return
             except Exception as e:
                 logger.exception(f"{self} exception: {e}")
+        elif isinstance(frame, TTSStoppedFrame):
+            try:
+                if self._previously_interrupted and len(self._audio_buffer) > 0:
+                    await self._simli_client.playImmediate(self._audio_buffer)
+                    self._previously_interrupted = False
+                    self._audio_buffer = bytearray()
+            except Exception as e:
+                logger.exception(f"{self} exception: {e}")
+            return
         elif isinstance(frame, (EndFrame, CancelFrame)):
             await self._stop()
-            await self.push_frame(frame, direction)
-        elif isinstance(frame, StartInterruptionFrame):
-            await self._simli_client.clearBuffer()
-            await self.push_frame(frame, direction)
-        else:
-            await self.push_frame(frame, direction)
+        elif isinstance(frame, (StartInterruptionFrame, UserStartedSpeakingFrame)):
+            if not self._previously_interrupted:
+                await self._simli_client.clearBuffer()
+            self._previously_interrupted = self._is_trinity_avatar
+        await self.push_frame(frame, direction)
     async def _stop(self):
+        """Stop the Simli client and cancel processing tasks."""
         await self._simli_client.stop()
         if self._audio_task:
             await self.cancel_task(self._audio_task)

pipecat/services/soniox/__init__.py ADDED Viewed

File without changes

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl