PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/aws/stt.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""AWS Transcribe Speech-to-Text service implementation.
+This module provides a WebSocket-based connection to AWS Transcribe for real-time
+speech-to-text transcription with support for multiple languages and audio formats.
+"""
 import asyncio
 import json
 import os
@@ -30,6 +36,8 @@ from pipecat.utils.tracing.service_decorators import traced_stt
 try:
     import websockets
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use AWS services, you need to `pip install pipecat-ai[aws]`.")
@@ -37,6 +45,13 @@ except ModuleNotFoundError as e:
 class AWSTranscribeSTTService(STTService):
+    """AWS Transcribe Speech-to-Text service using WebSocket streaming.
+    Provides real-time speech transcription using AWS Transcribe's streaming API.
+    Supports multiple languages, configurable sample rates, and both interim and
+    final transcription results.
+    """
     def __init__(
         self,
         *,
@@ -48,6 +63,17 @@ class AWSTranscribeSTTService(STTService):
         language: Language = Language.EN,
         **kwargs,
     ):
+        """Initialize the AWS Transcribe STT service.
+        Args:
+            api_key: AWS secret access key. If None, uses AWS_SECRET_ACCESS_KEY environment variable.
+            aws_access_key_id: AWS access key ID. If None, uses AWS_ACCESS_KEY_ID environment variable.
+            aws_session_token: AWS session token for temporary credentials. If None, uses AWS_SESSION_TOKEN environment variable.
+            region: AWS region for the service. Defaults to "us-east-1".
+            sample_rate: Audio sample rate in Hz. Must be 8000 or 16000. Defaults to 16000.
+            language: Language for transcription. Defaults to English.
+            **kwargs: Additional arguments passed to parent STTService class.
+        """
         super().__init__(**kwargs)
         self._settings = {
@@ -79,14 +105,28 @@ class AWSTranscribeSTTService(STTService):
         self._receive_task = None
     def get_service_encoding(self, encoding: str) -> str:
-        """Convert internal encoding format to AWS Transcribe format."""
+        """Convert internal encoding format to AWS Transcribe format.
+        Args:
+            encoding: Internal encoding format string.
+        Returns:
+            AWS Transcribe compatible encoding format.
+        """
         encoding_map = {
             "linear16": "pcm",  # AWS expects "pcm" for 16-bit linear PCM
         }
         return encoding_map.get(encoding, encoding)
     async def start(self, frame: StartFrame):
-        """Initialize the connection when the service starts."""
+        """Initialize the connection when the service starts.
+        Args:
+            frame: Start frame signaling service initialization.
+        Raises:
+            RuntimeError: If WebSocket connection cannot be established after retries.
+        """
         await super().start(frame)
         logger.info("Starting AWS Transcribe service...")
         retry_count = 0
@@ -95,7 +135,7 @@ class AWSTranscribeSTTService(STTService):
         while retry_count < max_retries:
             try:
                 await self._connect()
-                if self._ws_client and self._ws_client.open:
+                if self._ws_client and self._ws_client.state is State.OPEN:
                     logger.info("Successfully established WebSocket connection")
                     return
                 logger.warning("WebSocket connection not established after connect")
@@ -108,18 +148,35 @@ class AWSTranscribeSTTService(STTService):
         raise RuntimeError("Failed to establish WebSocket connection after multiple attempts")
     async def stop(self, frame: EndFrame):
+        """Stop the service and disconnect from AWS Transcribe.
+        Args:
+            frame: End frame signaling service shutdown.
+        """
         await super().stop(frame)
         await self._disconnect()
     async def cancel(self, frame: CancelFrame):
+        """Cancel the service and disconnect from AWS Transcribe.
+        Args:
+            frame: Cancel frame signaling service cancellation.
+        """
         await super().cancel(frame)
         await self._disconnect()
     async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
-        """Process audio data and send to AWS Transcribe"""
+        """Process audio data and send to AWS Transcribe.
+        Args:
+            audio: Raw audio bytes to transcribe.
+        Yields:
+            ErrorFrame: If processing fails or connection issues occur.
+        """
         try:
             # Ensure WebSocket is connected
-            if not self._ws_client or not self._ws_client.open:
+            if not self._ws_client or self._ws_client.state is State.CLOSED:
                 logger.debug("WebSocket not connected, attempting to reconnect...")
                 try:
                     await self._connect()
@@ -153,7 +210,7 @@ class AWSTranscribeSTTService(STTService):
     async def _connect(self):
         """Connect to AWS Transcribe with connection state management."""
-        if self._ws_client and self._ws_client.open and self._receive_task:
+        if self._ws_client and self._ws_client.state is State.OPEN and self._receive_task:
             logger.debug(f"{self} Already connected")
             return
@@ -183,7 +240,7 @@ class AWSTranscribeSTTService(STTService):
                 )
                 # Add required headers
-                extra_headers = {
+                additional_headers = {
                     "Origin": "https://localhost",
                     "Sec-WebSocket-Key": websocket_key,
                     "Sec-WebSocket-Version": "13",
@@ -213,9 +270,9 @@ class AWSTranscribeSTTService(STTService):
                 logger.debug(f"{self} Connecting to WebSocket with URL: {presigned_url[:100]}...")
                 # Connect with the required headers and settings
-                self._ws_client = await websockets.connect(
+                self._ws_client = await websocket_connect(
                     presigned_url,
-                    extra_headers=extra_headers,
+                    additional_headers=additional_headers,
                     subprotocols=["mqtt"],
                     ping_interval=None,
                     ping_timeout=None,
@@ -244,7 +301,7 @@ class AWSTranscribeSTTService(STTService):
             self._receive_task = None
         try:
-            if self._ws_client and self._ws_client.open:
+            if self._ws_client and self._ws_client.state is State.OPEN:
                 # Send end-stream message
                 end_stream = {"message-type": "event", "event": "end"}
                 await self._ws_client.send(json.dumps(end_stream))
@@ -255,19 +312,158 @@ class AWSTranscribeSTTService(STTService):
             self._ws_client = None
     def language_to_service_language(self, language: Language) -> str | None:
-        """Convert internal language enum to AWS Transcribe language code."""
+        """Convert internal language enum to AWS Transcribe language code.
+        Source:
+        https://docs.aws.amazon.com/transcribe/latest/dg/supported-languages.html
+        All language codes that support streaming are included.
+        Args:
+            language: Internal language enumeration value.
+        Returns:
+            AWS Transcribe compatible language code, or None if unsupported.
+        """
         language_map = {
-            Language.EN: "en-US",
-            Language.ES: "es-US",
-            Language.FR: "fr-FR",
-            Language.DE: "de-DE",
+            # Afrikaans
+            Language.AF: "af-ZA",
+            Language.AF_ZA: "af-ZA",
+            # Arabic
+            Language.AR: "ar-SA",  # Default to Modern Standard Arabic
+            Language.AR_AE: "ar-AE",  # Gulf Arabic
+            Language.AR_SA: "ar-SA",  # Modern Standard Arabic
+            # Basque
+            Language.EU: "eu-ES",
+            Language.EU_ES: "eu-ES",
+            # Catalan
+            Language.CA: "ca-ES",
+            Language.CA_ES: "ca-ES",
+            # Chinese
+            Language.ZH: "zh-CN",  # Default to Simplified
+            Language.ZH_CN: "zh-CN",  # Simplified
+            Language.ZH_TW: "zh-TW",  # Traditional
+            Language.ZH_HK: "zh-HK",  # Cantonese (also yue-HK)
+            Language.YUE: "zh-HK",  # Cantonese fallback
+            # Croatian
+            Language.HR: "hr-HR",
+            Language.HR_HR: "hr-HR",
+            # Czech
+            Language.CS: "cs-CZ",
+            Language.CS_CZ: "cs-CZ",
+            # Danish
+            Language.DA: "da-DK",
+            Language.DA_DK: "da-DK",
+            # Dutch
+            Language.NL: "nl-NL",
+            Language.NL_NL: "nl-NL",
+            # English
+            Language.EN: "en-US",  # Default to US
+            Language.EN_AU: "en-AU",  # Australian
+            Language.EN_GB: "en-GB",  # British
+            Language.EN_IN: "en-IN",  # Indian
+            Language.EN_IE: "en-IE",  # Irish
+            Language.EN_NZ: "en-NZ",  # New Zealand
+            # Note: Scottish (en-AB) and Welsh (en-WL) don't have direct Language enum matches
+            Language.EN_ZA: "en-ZA",  # South African
+            Language.EN_US: "en-US",  # US
+            # Persian/Farsi
+            Language.FA: "fa-IR",
+            Language.FA_IR: "fa-IR",
+            # Finnish
+            Language.FI: "fi-FI",
+            Language.FI_FI: "fi-FI",
+            # French
+            Language.FR: "fr-FR",  # Default to France
+            Language.FR_FR: "fr-FR",
+            Language.FR_CA: "fr-CA",  # Canadian
+            # Galician
+            Language.GL: "gl-ES",
+            Language.GL_ES: "gl-ES",
+            # Georgian
+            Language.KA: "ka-GE",
+            Language.KA_GE: "ka-GE",
+            # German
+            Language.DE: "de-DE",  # Default to Germany
+            Language.DE_DE: "de-DE",
+            Language.DE_CH: "de-CH",  # Swiss
+            # Greek
+            Language.EL: "el-GR",
+            Language.EL_GR: "el-GR",
+            # Hebrew
+            Language.HE: "he-IL",
+            Language.HE_IL: "he-IL",
+            # Hindi
+            Language.HI: "hi-IN",
+            Language.HI_IN: "hi-IN",
+            # Indonesian
+            Language.ID: "id-ID",
+            Language.ID_ID: "id-ID",
+            # Italian
             Language.IT: "it-IT",
-            Language.PT: "pt-BR",
+            Language.IT_IT: "it-IT",
+            # Japanese
             Language.JA: "ja-JP",
+            Language.JA_JP: "ja-JP",
+            # Korean
             Language.KO: "ko-KR",
-            Language.ZH: "zh-CN",
+            Language.KO_KR: "ko-KR",
+            # Latvian
+            Language.LV: "lv-LV",
+            Language.LV_LV: "lv-LV",
+            # Malay
+            Language.MS: "ms-MY",
+            Language.MS_MY: "ms-MY",
+            # Norwegian
+            Language.NB: "no-NO",  # Norwegian Bokmål
+            Language.NB_NO: "no-NO",
+            Language.NO: "no-NO",
+            # Polish
             Language.PL: "pl-PL",
+            Language.PL_PL: "pl-PL",
+            # Portuguese
+            Language.PT: "pt-PT",  # Default to Portugal
+            Language.PT_PT: "pt-PT",
+            Language.PT_BR: "pt-BR",  # Brazilian
+            # Romanian
+            Language.RO: "ro-RO",
+            Language.RO_RO: "ro-RO",
+            # Russian
+            Language.RU: "ru-RU",
+            Language.RU_RU: "ru-RU",
+            # Serbian
+            Language.SR: "sr-RS",
+            Language.SR_RS: "sr-RS",
+            # Slovak
+            Language.SK: "sk-SK",
+            Language.SK_SK: "sk-SK",
+            # Somali
+            Language.SO: "so-SO",
+            Language.SO_SO: "so-SO",
+            # Spanish
+            Language.ES: "es-ES",  # Default to Spain
+            Language.ES_ES: "es-ES",
+            Language.ES_US: "es-US",  # US Spanish
+            # Swedish
+            Language.SV: "sv-SE",
+            Language.SV_SE: "sv-SE",
+            # Tagalog/Filipino
+            Language.TL: "tl-PH",
+            Language.FIL: "tl-PH",  # Filipino maps to Tagalog
+            Language.FIL_PH: "tl-PH",
+            # Thai
+            Language.TH: "th-TH",
+            Language.TH_TH: "th-TH",
+            # Ukrainian
+            Language.UK: "uk-UA",
+            Language.UK_UA: "uk-UA",
+            # Vietnamese
+            Language.VI: "vi-VN",
+            Language.VI_VN: "vi-VN",
+            # Zulu
+            Language.ZU: "zu-ZA",
+            Language.ZU_ZA: "zu-ZA",
         }
         return language_map.get(language)
     @traced_stt
@@ -279,12 +475,12 @@ class AWSTranscribeSTTService(STTService):
     async def _receive_loop(self):
         """Background task to receive and process messages from AWS Transcribe."""
         while True:
-            if not self._ws_client or not self._ws_client.open:
+            if not self._ws_client or self._ws_client.state is State.CLOSED:
                 logger.warning(f"{self} WebSocket closed in receive loop")
                 break
             try:
-                response = await asyncio.wait_for(self._ws_client.recv(), timeout=1.0)
+                response = await self._ws_client.recv()
                 headers, payload = decode_event(response)
@@ -304,7 +500,7 @@ class AWSTranscribeSTTService(STTService):
                                     await self.push_frame(
                                         TranscriptionFrame(
                                             transcript,
-                                            "",
+                                            self._user_id,
                                             time_now_iso8601(),
                                             self._settings["language"],
                                             result=result,
@@ -320,7 +516,7 @@ class AWSTranscribeSTTService(STTService):
                                     await self.push_frame(
                                         InterimTranscriptionFrame(
                                             transcript,
-                                            "",
+                                            self._user_id,
                                             time_now_iso8601(),
                                             self._settings["language"],
                                             result=result,
@@ -335,8 +531,6 @@ class AWSTranscribeSTTService(STTService):
                 else:
                     logger.debug(f"{self} Other message type received: {headers}")
                     logger.debug(f"{self} Payload: {payload}")
-            except asyncio.TimeoutError:
-                self.reset_watchdog()
             except websockets.exceptions.ConnectionClosed as e:
                 logger.error(
                     f"{self} WebSocket connection closed in receive loop with code {e.code}: {e.reason}"

pipecat/services/aws/tts.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""AWS Polly text-to-speech service implementation.
+This module provides integration with Amazon Polly for text-to-speech synthesis,
+supporting multiple languages, voices, and SSML features.
+"""
 import asyncio
 import os
 from typing import AsyncGenerator, List, Optional
@@ -11,7 +17,7 @@ from typing import AsyncGenerator, List, Optional
 from loguru import logger
 from pydantic import BaseModel
-from pipecat.audio.utils import create_default_resampler
+from pipecat.audio.utils import create_stream_resampler
 from pipecat.frames.frames import (
     ErrorFrame,
     Frame,
@@ -24,7 +30,7 @@ from pipecat.transcriptions.language import Language
 from pipecat.utils.tracing.service_decorators import traced_tts
 try:
-    import boto3
+    import aioboto3
     from botocore.exceptions import BotoCoreError, ClientError
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
@@ -33,6 +39,14 @@ except ModuleNotFoundError as e:
 def language_to_aws_language(language: Language) -> Optional[str]:
+    """Convert a Language enum to AWS Polly language code.
+    Args:
+        language: The Language enum value to convert.
+    Returns:
+        The corresponding AWS Polly language code, or None if not supported.
+    """
     language_map = {
         # Arabic
         Language.AR: "arb",
@@ -109,7 +123,25 @@ def language_to_aws_language(language: Language) -> Optional[str]:
 class AWSPollyTTSService(TTSService):
+    """AWS Polly text-to-speech service.
+    Provides text-to-speech synthesis using Amazon Polly with support for
+    multiple languages, voices, SSML features, and voice customization
+    options including prosody controls.
+    """
     class InputParams(BaseModel):
+        """Input parameters for AWS Polly TTS configuration.
+        Parameters:
+            engine: TTS engine to use ('standard', 'neural', etc.).
+            language: Language for synthesis. Defaults to English.
+            pitch: Voice pitch adjustment (for standard engine only).
+            rate: Speech rate adjustment.
+            volume: Voice volume adjustment.
+            lexicon_names: List of pronunciation lexicons to apply.
+        """
         engine: Optional[str] = None
         language: Optional[Language] = Language.EN
         pitch: Optional[str] = None
@@ -129,54 +161,73 @@ class AWSPollyTTSService(TTSService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initializes the AWS Polly TTS service.
+        Args:
+            api_key: AWS secret access key. If None, uses AWS_SECRET_ACCESS_KEY environment variable.
+            aws_access_key_id: AWS access key ID. If None, uses AWS_ACCESS_KEY_ID environment variable.
+            aws_session_token: AWS session token for temporary credentials.
+            region: AWS region for Polly service. Defaults to 'us-east-1'.
+            voice_id: Voice ID to use for synthesis. Defaults to 'Joanna'.
+            sample_rate: Audio sample rate. If None, uses service default.
+            params: Additional input parameters for voice customization.
+            **kwargs: Additional arguments passed to parent TTSService class.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         params = params or AWSPollyTTSService.InputParams()
-        self._polly_client = boto3.client(
-            "polly",
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=api_key,
-            aws_session_token=aws_session_token,
-            region_name=region,
-        )
-        self._settings = {
-            "engine": params.engine,
-            "language": self.language_to_service_language(params.language)
-            if params.language
-            else "en-US",
-            "pitch": params.pitch,
-            "rate": params.rate,
-            "volume": params.volume,
-            "lexicon_names": params.lexicon_names,
-        }
-        self._resampler = create_default_resampler()
-        self.set_voice(voice_id)
         # Get credentials from environment variables if not provided
-        self._credentials = {
+        self._aws_params = {
             "aws_access_key_id": aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"),
             "aws_secret_access_key": api_key or os.getenv("AWS_SECRET_ACCESS_KEY"),
             "aws_session_token": aws_session_token or os.getenv("AWS_SESSION_TOKEN"),
-            "region": region or os.getenv("AWS_REGION", "us-east-1"),
+            "region_name": region or os.getenv("AWS_REGION", "us-east-1"),
         }
         # Validate that we have the required credentials
         if (
-            not self._credentials["aws_access_key_id"]
-            or not self._credentials["aws_secret_access_key"]
+            not self._aws_params["aws_access_key_id"]
+            or not self._aws_params["aws_secret_access_key"]
         ):
             raise ValueError(
                 "AWS credentials not found. Please provide them either through constructor parameters "
                 "or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables."
             )
+        self._aws_session = aioboto3.Session()
+        self._settings = {
+            "engine": params.engine,
+            "language": self.language_to_service_language(params.language)
+            if params.language
+            else "en-US",
+            "pitch": params.pitch,
+            "rate": params.rate,
+            "volume": params.volume,
+            "lexicon_names": params.lexicon_names,
+        }
+        self._resampler = create_stream_resampler()
+        self.set_voice(voice_id)
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as AWS Polly service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> Optional[str]:
+        """Convert a Language enum to AWS Polly language format.
+        Args:
+            language: The language to convert.
+        Returns:
+            The AWS Polly-specific language code, or None if not supported.
+        """
         return language_to_aws_language(language)
     def _construct_ssml(self, text: str) -> str:
@@ -214,13 +265,14 @@ class AWSPollyTTSService(TTSService):
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
-        def read_audio_data(**args):
-            response = self._polly_client.synthesize_speech(**args)
-            if "AudioStream" in response:
-                audio_data = response["AudioStream"].read()
-                return audio_data
-            return None
+        """Generate speech from text using AWS Polly.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech.
+        """
         logger.debug(f"{self}: Generating TTS [{text}]")
         try:
@@ -243,30 +295,32 @@ class AWSPollyTTSService(TTSService):
             # Filter out None values
             filtered_params = {k: v for k, v in params.items() if v is not None}
-            audio_data = await asyncio.to_thread(read_audio_data, **filtered_params)
-            if not audio_data:
-                logger.error(f"{self} No audio data returned")
-                yield None
-                return
+            async with self._aws_session.client("polly", **self._aws_params) as polly:
+                response = await polly.synthesize_speech(**filtered_params)
+                if "AudioStream" in response:
+                    # Get the streaming body and read it
+                    stream = response["AudioStream"]
+                    audio_data = await stream.read()
+                else:
+                    logger.error(f"{self} No audio stream in response")
+                    audio_data = None
-            audio_data = await self._resampler.resample(audio_data, 16000, self.sample_rate)
+                audio_data = await self._resampler.resample(audio_data, 16000, self.sample_rate)
-            await self.start_tts_usage_metrics(text)
+                await self.start_tts_usage_metrics(text)
-            yield TTSStartedFrame()
+                yield TTSStartedFrame()
-            CHUNK_SIZE = self.chunk_size
+                CHUNK_SIZE = self.chunk_size
-            for i in range(0, len(audio_data), CHUNK_SIZE):
-                chunk = audio_data[i : i + CHUNK_SIZE]
-                if len(chunk) > 0:
-                    await self.stop_ttfb_metrics()
-                    frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
-                    yield frame
-            yield TTSStoppedFrame()
+                for i in range(0, len(audio_data), CHUNK_SIZE):
+                    chunk = audio_data[i : i + CHUNK_SIZE]
+                    if len(chunk) > 0:
+                        await self.stop_ttfb_metrics()
+                        frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
+                        yield frame
+                yield TTSStoppedFrame()
         except (BotoCoreError, ClientError) as error:
             logger.exception(f"{self} error generating TTS: {error}")
             error_message = f"AWS Polly TTS error: {str(error)}"
@@ -277,7 +331,19 @@ class AWSPollyTTSService(TTSService):
 class PollyTTSService(AWSPollyTTSService):
+    """Deprecated alias for AWSPollyTTSService.
+    .. deprecated:: 0.0.67
+        `PollyTTSService` is deprecated, use `AWSPollyTTSService` instead.
+    """
     def __init__(self, **kwargs):
+        """Initialize the deprecated PollyTTSService.
+        Args:
+            **kwargs: All arguments passed to AWSPollyTTSService.
+        """
         super().__init__(**kwargs)
         import warnings

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl