PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/rime/tts.py CHANGED Viewed

@@ -4,10 +4,16 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Rime text-to-speech service implementations.
+This module provides both WebSocket and HTTP-based text-to-speech services
+using Rime's API for streaming and batch audio synthesis.
+"""
 import base64
 import json
 import uuid
-from typing import AsyncGenerator, Optional
+from typing import Any, AsyncGenerator, Mapping, Optional
 import aiohttp
 from loguru import logger
@@ -33,7 +39,8 @@ from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator
 from pipecat.utils.tracing.service_decorators import traced_tts
 try:
-    import websockets
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use Rime, you need to `pip install pipecat-ai[rime]`.")
@@ -47,7 +54,7 @@ def language_to_rime_language(language: Language) -> str:
         language: The pipecat Language enum value.
     Returns:
-        str: Three-letter language code used by Rime (e.g., 'eng' for English).
+        Three-letter language code used by Rime (e.g., 'eng' for English).
     """
     LANGUAGE_MAP = {
         Language.DE: "ger",
@@ -67,7 +74,15 @@ class RimeTTSService(AudioContextWordTTSService):
     """
     class InputParams(BaseModel):
-        """Configuration parameters for Rime TTS service."""
+        """Configuration parameters for Rime TTS service.
+        Parameters:
+            language: Language for synthesis. Defaults to English.
+            speed_alpha: Speech speed multiplier. Defaults to 1.0.
+            reduce_latency: Whether to reduce latency at potential quality cost.
+            pause_between_brackets: Whether to add pauses between bracketed content.
+            phonemize_between_brackets: Whether to phonemize bracketed content.
+        """
         language: Optional[Language] = Language.EN
         speed_alpha: Optional[float] = 1.0
@@ -85,6 +100,7 @@ class RimeTTSService(AudioContextWordTTSService):
         sample_rate: Optional[int] = None,
         params: Optional[InputParams] = None,
         text_aggregator: Optional[BaseTextAggregator] = None,
+        aggregate_sentences: Optional[bool] = True,
         **kwargs,
     ):
         """Initialize Rime TTS service.
@@ -96,10 +112,13 @@ class RimeTTSService(AudioContextWordTTSService):
             model: Model ID to use for synthesis.
             sample_rate: Audio sample rate in Hz.
             params: Additional configuration parameters.
+            text_aggregator: Custom text aggregator for processing input text.
+            aggregate_sentences: Whether to aggregate sentences within the TTSService.
+            **kwargs: Additional arguments passed to parent class.
         """
         # Initialize with parent class settings for proper frame handling
         super().__init__(
-            aggregate_sentences=True,
+            aggregate_sentences=aggregate_sentences,
             push_text_frames=False,
             push_stop_frames=True,
             pause_frame_processing=True,
@@ -135,17 +154,43 @@ class RimeTTSService(AudioContextWordTTSService):
         self._cumulative_time = 0  # Accumulates time across messages
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as Rime service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> str | None:
-        """Convert pipecat language to Rime language code."""
+        """Convert pipecat language to Rime language code.
+        Args:
+            language: The language to convert.
+        Returns:
+            The Rime-specific language code, or None if not supported.
+        """
         return language_to_rime_language(language)
     async def set_model(self, model: str):
-        """Update the TTS model."""
+        """Update the TTS model.
+        Args:
+            model: The model name to use for synthesis.
+        """
         self._model = model
         await super().set_model(model)
+    async def _update_settings(self, settings: Mapping[str, Any]):
+        """Update service settings and reconnect if voice changed."""
+        prev_voice = self._voice_id
+        await super()._update_settings(settings)
+        if not prev_voice == self._voice_id:
+            self._settings["speaker"] = self._voice_id
+            logger.info(f"Switching TTS voice to: [{self._voice_id}]")
+            await self._disconnect()
+            await self._connect()
     def _build_msg(self, text: str = "") -> dict:
         """Build JSON message for Rime API."""
         return {"text": text, "contextId": self._context_id}
@@ -159,18 +204,30 @@ class RimeTTSService(AudioContextWordTTSService):
         return {"operation": "eos"}
     async def start(self, frame: StartFrame):
-        """Start the service and establish websocket connection."""
+        """Start the service and establish websocket connection.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
         self._settings["samplingRate"] = self.sample_rate
         await self._connect()
     async def stop(self, frame: EndFrame):
-        """Stop the service and close connection."""
+        """Stop the service and close connection.
+        Args:
+            frame: The end frame.
+        """
         await super().stop(frame)
         await self._disconnect()
     async def cancel(self, frame: CancelFrame):
-        """Cancel current operation and clean up."""
+        """Cancel current operation and clean up.
+        Args:
+            frame: The cancel frame.
+        """
         await super().cancel(frame)
         await self._disconnect()
@@ -192,13 +249,13 @@ class RimeTTSService(AudioContextWordTTSService):
     async def _connect_websocket(self):
         """Connect to Rime websocket API with configured settings."""
         try:
-            if self._websocket and self._websocket.open:
+            if self._websocket and self._websocket.state is State.OPEN:
                 return
             params = "&".join(f"{k}={v}" for k, v in self._settings.items())
             url = f"{self._url}?{params}"
             headers = {"Authorization": f"Bearer {self._api_key}"}
-            self._websocket = await websockets.connect(url, extra_headers=headers)
+            self._websocket = await websocket_connect(url, additional_headers=headers)
         except Exception as e:
             logger.error(f"{self} initialization error: {e}")
             self._websocket = None
@@ -261,6 +318,7 @@ class RimeTTSService(AudioContextWordTTSService):
         return word_pairs
     async def flush_audio(self):
+        """Flush any pending audio synthesis."""
         if not self._context_id or not self._websocket:
             return
@@ -310,7 +368,12 @@ class RimeTTSService(AudioContextWordTTSService):
                 self._context_id = None
     async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
-        """Push frame and handle end-of-turn conditions."""
+        """Push frame and handle end-of-turn conditions.
+        Args:
+            frame: The frame to push.
+            direction: The direction to push the frame.
+        """
         await super().push_frame(frame, direction)
         if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
             if isinstance(frame, TTSStoppedFrame):
@@ -318,17 +381,17 @@ class RimeTTSService(AudioContextWordTTSService):
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
-        """Generate speech from text.
+        """Generate speech from text using Rime's streaming API.
         Args:
             text: The text to convert to speech.
         Yields:
-            Frames containing audio data and timing information.
+            Frame: Audio frames containing the synthesized speech.
         """
         logger.debug(f"{self}: Generating TTS [{text}]")
         try:
-            if not self._websocket or self._websocket.closed:
+            if not self._websocket or self._websocket.state is State.CLOSED:
                 await self._connect()
             try:
@@ -354,7 +417,24 @@ class RimeTTSService(AudioContextWordTTSService):
 class RimeHttpTTSService(TTSService):
+    """Rime HTTP-based text-to-speech service.
+    Provides text-to-speech synthesis using Rime's HTTP API for batch processing.
+    Suitable for use cases where streaming is not required.
+    """
     class InputParams(BaseModel):
+        """Configuration parameters for Rime HTTP TTS service.
+        Parameters:
+            language: Language for synthesis. Defaults to English.
+            pause_between_brackets: Whether to add pauses between bracketed content.
+            phonemize_between_brackets: Whether to phonemize bracketed content.
+            inline_speed_alpha: Inline speed control markup.
+            speed_alpha: Speech speed multiplier. Defaults to 1.0.
+            reduce_latency: Whether to reduce latency at potential quality cost.
+        """
         language: Optional[Language] = Language.EN
         pause_between_brackets: Optional[bool] = False
         phonemize_between_brackets: Optional[bool] = False
@@ -373,6 +453,17 @@ class RimeHttpTTSService(TTSService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize Rime HTTP TTS service.
+        Args:
+            api_key: Rime API key for authentication.
+            voice_id: ID of the voice to use.
+            aiohttp_session: Shared aiohttp session for HTTP requests.
+            model: Model ID to use for synthesis.
+            sample_rate: Audio sample rate in Hz.
+            params: Additional configuration parameters.
+            **kwargs: Additional arguments passed to parent TTSService.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         params = params or RimeHttpTTSService.InputParams()
@@ -396,14 +487,34 @@ class RimeHttpTTSService(TTSService):
             self._settings["inlineSpeedAlpha"] = params.inline_speed_alpha
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as Rime HTTP service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> str | None:
-        """Convert pipecat language to Rime language code."""
+        """Convert pipecat language to Rime language code.
+        Args:
+            language: The language to convert.
+        Returns:
+            The Rime-specific language code, or None if not supported.
+        """
         return language_to_rime_language(language)
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate speech from text using Rime's HTTP API.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech.
+        """
         logger.debug(f"{self}: Generating TTS [{text}]")
         headers = {

pipecat/services/riva/stt.py CHANGED Viewed

@@ -4,7 +4,10 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""NVIDIA Riva Speech-to-Text service implementations for real-time and batch transcription."""
 import asyncio
+from concurrent.futures import CancelledError as FuturesCancelledError
 from typing import AsyncGenerator, List, Mapping, Optional
 from loguru import logger
@@ -21,7 +24,6 @@ from pipecat.frames.frames import (
 )
 from pipecat.services.stt_service import SegmentedSTTService, STTService
 from pipecat.transcriptions.language import Language
-from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
 from pipecat.utils.time import time_now_iso8601
 from pipecat.utils.tracing.service_decorators import traced_stt
@@ -87,7 +89,20 @@ def language_to_riva_language(language: Language) -> Optional[str]:
 class RivaSTTService(STTService):
+    """Real-time speech-to-text service using NVIDIA Riva streaming ASR.
+    Provides real-time transcription capabilities using NVIDIA's Riva ASR models
+    through streaming recognition. Supports interim results and continuous audio
+    processing for low-latency applications.
+    """
     class InputParams(BaseModel):
+        """Configuration parameters for Riva STT service.
+        Parameters:
+            language: Target language for transcription. Defaults to EN_US.
+        """
         language: Optional[Language] = Language.EN_US
     def __init__(
@@ -103,6 +118,16 @@ class RivaSTTService(STTService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the Riva STT service.
+        Args:
+            api_key: NVIDIA API key for authentication.
+            server: Riva server address. Defaults to NVIDIA Cloud Function endpoint.
+            model_function_map: Mapping containing 'function_id' and 'model_name' for the ASR model.
+            sample_rate: Audio sample rate in Hz. If None, uses pipeline default.
+            params: Additional configuration parameters for Riva.
+            **kwargs: Additional arguments passed to STTService.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         params = params or RivaSTTService.InputParams()
@@ -142,15 +167,29 @@ class RivaSTTService(STTService):
         self._asr_service = riva.client.ASRService(auth)
-        self._queue = asyncio.Queue()
+        self._queue = None
         self._config = None
         self._thread_task = None
         self._response_task = None
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            False - this service does not support metrics generation.
+        """
         return False
     async def set_model(self, model: str):
+        """Set the ASR model for transcription.
+        Args:
+            model: Model name to set.
+        Note:
+            Model cannot be changed after initialization. Use model_function_map
+            parameter in constructor instead.
+        """
         logger.warning(f"Cannot set model after initialization. Set model and function id like so:")
         example = {"function_id": "<UUID>", "model_name": "<model_name>"}
         logger.warning(
@@ -158,6 +197,11 @@ class RivaSTTService(STTService):
         )
     async def start(self, frame: StartFrame):
+        """Start the Riva STT service and initialize streaming configuration.
+        Args:
+            frame: StartFrame indicating pipeline start.
+        """
         await super().start(frame)
         if self._config:
@@ -194,19 +238,30 @@ class RivaSTTService(STTService):
         riva.client.add_custom_configuration_to_config(config, self._custom_configuration)
         self._config = config
+        self._queue = asyncio.Queue()
         if not self._thread_task:
             self._thread_task = self.create_task(self._thread_task_handler())
         if not self._response_task:
-            self._response_queue = WatchdogQueue(self.task_manager)
+            self._response_queue = asyncio.Queue()
             self._response_task = self.create_task(self._response_task_handler())
     async def stop(self, frame: EndFrame):
+        """Stop the Riva STT service and clean up resources.
+        Args:
+            frame: EndFrame indicating pipeline stop.
+        """
         await super().stop(frame)
         await self._stop_tasks()
     async def cancel(self, frame: CancelFrame):
+        """Cancel the Riva STT service operation.
+        Args:
+            frame: CancelFrame indicating operation cancellation.
+        """
         await super().cancel(frame)
         await self._stop_tasks()
@@ -225,7 +280,6 @@ class RivaSTTService(STTService):
             streaming_config=self._config,
         )
         for response in responses:
-            self.reset_watchdog()
             if not response.results:
                 continue
             asyncio.run_coroutine_threadsafe(
@@ -260,7 +314,7 @@ class RivaSTTService(STTService):
                     await self.push_frame(
                         TranscriptionFrame(
                             transcript,
-                            "",
+                            self._user_id,
                             time_now_iso8601(),
                             self._language_code,
                             result=result,
@@ -275,7 +329,7 @@ class RivaSTTService(STTService):
                     await self.push_frame(
                         InterimTranscriptionFrame(
                             transcript,
-                            "",
+                            self._user_id,
                             time_now_iso8601(),
                             self._language_code,
                             result=result,
@@ -289,18 +343,43 @@ class RivaSTTService(STTService):
             self._response_queue.task_done()
     async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
+        """Process audio data for speech-to-text transcription.
+        Args:
+            audio: Raw audio bytes to transcribe.
+        Yields:
+            None - transcription results are pushed to the pipeline via frames.
+        """
         await self.start_ttfb_metrics()
         await self.start_processing_metrics()
         await self._queue.put(audio)
         yield None
     def __next__(self) -> bytes:
+        """Get the next audio chunk for Riva processing.
+        Returns:
+            Audio bytes from the queue.
+        Raises:
+            StopIteration: When the thread is no longer running.
+        """
         if not self._thread_running:
             raise StopIteration
-        future = asyncio.run_coroutine_threadsafe(self._queue.get(), self.get_event_loop())
-        return future.result()
+        try:
+            future = asyncio.run_coroutine_threadsafe(self._queue.get(), self.get_event_loop())
+            return future.result()
+        except FuturesCancelledError:
+            raise StopIteration
     def __iter__(self):
+        """Return iterator for audio chunk processing.
+        Returns:
+            Self as iterator.
+        """
         return self
@@ -310,17 +389,20 @@ class RivaSegmentedSTTService(SegmentedSTTService):
     By default, his service uses NVIDIA's Riva Canary ASR API to perform speech-to-text
     transcription on audio segments. It inherits from SegmentedSTTService to handle
     audio buffering and speech detection.
-    Args:
-        api_key: NVIDIA API key for authentication
-        server: Riva server address (defaults to NVIDIA Cloud Function endpoint)
-        model_function_map: Mapping of model name and its corresponding NVIDIA Cloud Function ID
-        sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate
-        params: Additional configuration parameters for Riva
-        **kwargs: Additional arguments passed to SegmentedSTTService
     """
     class InputParams(BaseModel):
+        """Configuration parameters for Riva segmented STT service.
+        Parameters:
+            language: Target language for transcription. Defaults to EN_US.
+            profanity_filter: Whether to filter profanity from results.
+            automatic_punctuation: Whether to add automatic punctuation.
+            verbatim_transcripts: Whether to return verbatim transcripts.
+            boosted_lm_words: List of words to boost in language model.
+            boosted_lm_score: Score boost for specified words.
+        """
         language: Optional[Language] = Language.EN_US
         profanity_filter: bool = False
         automatic_punctuation: bool = True
@@ -341,6 +423,16 @@ class RivaSegmentedSTTService(SegmentedSTTService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the Riva segmented STT service.
+        Args:
+            api_key: NVIDIA API key for authentication
+            server: Riva server address (defaults to NVIDIA Cloud Function endpoint)
+            model_function_map: Mapping of model name and its corresponding NVIDIA Cloud Function ID
+            sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate
+            params: Additional configuration parameters for Riva
+            **kwargs: Additional arguments passed to SegmentedSTTService
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         params = params or RivaSegmentedSTTService.InputParams()
@@ -380,7 +472,14 @@ class RivaSegmentedSTTService(SegmentedSTTService):
         self._settings = {"language": self._language_enum}
     def language_to_service_language(self, language: Language) -> Optional[str]:
-        """Convert pipecat Language enum to Riva's language code."""
+        """Convert pipecat Language enum to Riva's language code.
+        Args:
+            language: Language enum value.
+        Returns:
+            Riva language code or None if not supported.
+        """
         return language_to_riva_language(language)
     def _initialize_client(self):
@@ -435,10 +534,23 @@ class RivaSegmentedSTTService(SegmentedSTTService):
         return config
     def can_generate_metrics(self) -> bool:
-        """Indicates whether this service can generate processing metrics."""
+        """Check if this service can generate processing metrics.
+        Returns:
+            True - this service supports metrics generation.
+        """
         return True
     async def set_model(self, model: str):
+        """Set the ASR model for transcription.
+        Args:
+            model: Model name to set.
+        Note:
+            Model cannot be changed after initialization. Use model_function_map
+            parameter in constructor instead.
+        """
         logger.warning(f"Cannot set model after initialization. Set model and function id like so:")
         example = {"function_id": "<UUID>", "model_name": "<model_name>"}
         logger.warning(
@@ -446,13 +558,21 @@ class RivaSegmentedSTTService(SegmentedSTTService):
         )
     async def start(self, frame: StartFrame):
-        """Initialize the service when the pipeline starts."""
+        """Initialize the service when the pipeline starts.
+        Args:
+            frame: StartFrame indicating pipeline start.
+        """
         await super().start(frame)
         self._initialize_client()
         self._config = self._create_recognition_config()
     async def set_language(self, language: Language):
-        """Set the language for the STT service."""
+        """Set the language for the STT service.
+        Args:
+            language: Target language for transcription.
+        """
         logger.info(f"Switching STT language to: [{language}]")
         self._language_enum = language
         self._language = self.language_to_service_language(language) or "en-US"
@@ -520,7 +640,10 @@ class RivaSegmentedSTTService(SegmentedSTTService):
                         if text:
                             logger.debug(f"Transcription: [{text}]")
                             yield TranscriptionFrame(
-                                text, "", time_now_iso8601(), self._language_enum
+                                text,
+                                self._user_id,
+                                time_now_iso8601(),
+                                self._language_enum,
                             )
                             transcription_found = True
@@ -539,7 +662,12 @@ class RivaSegmentedSTTService(SegmentedSTTService):
 class ParakeetSTTService(RivaSTTService):
-    """Deprecated: Use RivaSTTService instead."""
+    """Deprecated speech-to-text service using NVIDIA Parakeet models.
+    .. deprecated:: 0.0.66
+        This class is deprecated. Use `RivaSTTService` instead for equivalent functionality
+        with Parakeet models by specifying the appropriate model_function_map.
+    """
     def __init__(
         self,
@@ -554,6 +682,16 @@ class ParakeetSTTService(RivaSTTService):
         params: Optional[RivaSTTService.InputParams] = None,  # Use parent class's type
         **kwargs,
     ):
+        """Initialize the Parakeet STT service.
+        Args:
+            api_key: NVIDIA API key for authentication.
+            server: Riva server address. Defaults to NVIDIA Cloud Function endpoint.
+            model_function_map: Mapping containing 'function_id' and 'model_name' for Parakeet model.
+            sample_rate: Audio sample rate in Hz. If None, uses pipeline default.
+            params: Additional configuration parameters for Riva.
+            **kwargs: Additional arguments passed to RivaSTTService.
+        """
         super().__init__(
             api_key=api_key,
             server=server,

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl