PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/playht/tts.py CHANGED Viewed

@@ -4,14 +4,20 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""PlayHT text-to-speech service implementations.
+This module provides integration with PlayHT's text-to-speech API
+supporting both WebSocket streaming and HTTP-based synthesis.
+"""
 import io
 import json
 import struct
 import uuid
+import warnings
 from typing import AsyncGenerator, Optional
 import aiohttp
-import websockets
 from loguru import logger
 from pydantic import BaseModel
@@ -32,16 +38,23 @@ from pipecat.transcriptions.language import Language
 from pipecat.utils.tracing.service_decorators import traced_tts
 try:
-    from pyht.async_client import AsyncClient
-    from pyht.client import Format, TTSOptions
-    from pyht.client import Language as PlayHTLanguage
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
-    logger.error("In order to use PlayHT, you need to `pip install pipecat-ai[playht]`.")
+    logger.error("In order to use PlayHTTTSService, you need to `pip install pipecat-ai[playht]`.")
     raise Exception(f"Missing module: {e}")
 def language_to_playht_language(language: Language) -> Optional[str]:
+    """Convert a Language enum to PlayHT language code.
+    Args:
+        language: The Language enum value to convert.
+    Returns:
+        The corresponding PlayHT language code, or None if not supported.
+    """
     BASE_LANGUAGES = {
         Language.AF: "afrikans",
         Language.AM: "amharic",
@@ -96,7 +109,22 @@ def language_to_playht_language(language: Language) -> Optional[str]:
 class PlayHTTTSService(InterruptibleTTSService):
+    """PlayHT WebSocket-based text-to-speech service.
+    Provides real-time text-to-speech synthesis using PlayHT's WebSocket API.
+    Supports streaming audio generation with configurable voice engines and
+    language settings.
+    """
     class InputParams(BaseModel):
+        """Input parameters for PlayHT TTS configuration.
+        Parameters:
+            language: Language for synthesis. Defaults to English.
+            speed: Speech speed multiplier. Defaults to 1.0.
+            seed: Random seed for voice consistency.
+        """
         language: Optional[Language] = Language.EN
         speed: Optional[float] = 1.0
         seed: Optional[int] = None
@@ -113,6 +141,18 @@ class PlayHTTTSService(InterruptibleTTSService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the PlayHT WebSocket TTS service.
+        Args:
+            api_key: PlayHT API key for authentication.
+            user_id: PlayHT user ID for authentication.
+            voice_url: URL of the voice to use for synthesis.
+            voice_engine: Voice engine to use. Defaults to "Play3.0-mini".
+            sample_rate: Audio sample rate. If None, uses default.
+            output_format: Audio output format. Defaults to "wav".
+            params: Additional input parameters for voice customization.
+            **kwargs: Additional arguments passed to parent InterruptibleTTSService.
+        """
         super().__init__(
             pause_frame_processing=True,
             sample_rate=sample_rate,
@@ -140,30 +180,60 @@ class PlayHTTTSService(InterruptibleTTSService):
         self.set_voice(voice_url)
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as PlayHT service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> Optional[str]:
+        """Convert a Language enum to PlayHT service language format.
+        Args:
+            language: The language to convert.
+        Returns:
+            The PlayHT-specific language code, or None if not supported.
+        """
         return language_to_playht_language(language)
     async def start(self, frame: StartFrame):
+        """Start the PlayHT TTS service.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
         await self._connect()
     async def stop(self, frame: EndFrame):
+        """Stop the PlayHT TTS service.
+        Args:
+            frame: The end frame.
+        """
         await super().stop(frame)
         await self._disconnect()
     async def cancel(self, frame: CancelFrame):
+        """Cancel the PlayHT TTS service.
+        Args:
+            frame: The cancel frame.
+        """
         await super().cancel(frame)
         await self._disconnect()
     async def _connect(self):
+        """Connect to PlayHT WebSocket and start receive task."""
         await self._connect_websocket()
         if self._websocket and not self._receive_task:
             self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
     async def _disconnect(self):
+        """Disconnect from PlayHT WebSocket and clean up tasks."""
         if self._receive_task:
             await self.cancel_task(self._receive_task)
             self._receive_task = None
@@ -171,8 +241,9 @@ class PlayHTTTSService(InterruptibleTTSService):
         await self._disconnect_websocket()
     async def _connect_websocket(self):
+        """Connect to PlayHT websocket."""
         try:
-            if self._websocket and self._websocket.open:
+            if self._websocket and self._websocket.state is State.OPEN:
                 return
             logger.debug("Connecting to PlayHT")
@@ -183,7 +254,7 @@ class PlayHTTTSService(InterruptibleTTSService):
             if not isinstance(self._websocket_url, str):
                 raise ValueError("WebSocket URL is not a string")
-            self._websocket = await websockets.connect(self._websocket_url)
+            self._websocket = await websocket_connect(self._websocket_url)
         except ValueError as e:
             logger.error(f"{self} initialization error: {e}")
             self._websocket = None
@@ -194,6 +265,7 @@ class PlayHTTTSService(InterruptibleTTSService):
             await self._call_event_handler("on_connection_error", f"{e}")
     async def _disconnect_websocket(self):
+        """Disconnect from PlayHT websocket."""
         try:
             await self.stop_all_metrics()
@@ -207,6 +279,7 @@ class PlayHTTTSService(InterruptibleTTSService):
             self._websocket = None
     async def _get_websocket_url(self):
+        """Retrieve WebSocket URL from PlayHT API."""
         async with aiohttp.ClientSession() as session:
             async with session.post(
                 "https://api.play.ht/api/v4/websocket-auth",
@@ -235,16 +308,19 @@ class PlayHTTTSService(InterruptibleTTSService):
                     raise Exception(f"Failed to get WebSocket URL: {response.status}")
     def _get_websocket(self):
+        """Get the WebSocket connection if available."""
         if self._websocket:
             return self._websocket
         raise Exception("Websocket not connected")
     async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
+        """Handle interruption by stopping metrics and clearing request ID."""
         await super()._handle_interruption(frame, direction)
         await self.stop_all_metrics()
         self._request_id = None
     async def _receive_messages(self):
+        """Receive messages from PlayHT websocket."""
         async for message in self._get_websocket():
             if isinstance(message, bytes):
                 # Skip the WAV header message
@@ -273,11 +349,19 @@ class PlayHTTTSService(InterruptibleTTSService):
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate TTS audio from text using PlayHT's WebSocket API.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech.
+        """
         logger.debug(f"{self}: Generating TTS [{text}]")
         try:
             # Reconnect if the websocket is closed
-            if not self._websocket or self._websocket.closed:
+            if not self._websocket or self._websocket.state is State.CLOSED:
                 await self._connect()
             if not self._request_id:
@@ -316,7 +400,22 @@ class PlayHTTTSService(InterruptibleTTSService):
 class PlayHTHttpTTSService(TTSService):
+    """PlayHT HTTP-based text-to-speech service.
+    Provides text-to-speech synthesis using PlayHT's HTTP API for simpler,
+    non-streaming synthesis. Suitable for use cases where streaming is not
+    required and simpler integration is preferred.
+    """
     class InputParams(BaseModel):
+        """Input parameters for PlayHT HTTP TTS configuration.
+        Parameters:
+            language: Language for synthesis. Defaults to English.
+            speed: Speech speed multiplier. Defaults to 1.0.
+            seed: Random seed for voice consistency.
+        """
         language: Optional[Language] = Language.EN
         speed: Optional[float] = 1.0
         seed: Optional[int] = None
@@ -328,40 +427,59 @@ class PlayHTHttpTTSService(TTSService):
         user_id: str,
         voice_url: str,
         voice_engine: str = "Play3.0-mini",
-        protocol: str = "http",  # Options: http, ws
+        protocol: Optional[str] = None,
+        output_format: str = "wav",
         sample_rate: Optional[int] = None,
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the PlayHT HTTP TTS service.
+        Args:
+            api_key: PlayHT API key for authentication.
+            user_id: PlayHT user ID for authentication.
+            voice_url: URL of the voice to use for synthesis.
+            voice_engine: Voice engine to use. Defaults to "Play3.0-mini".
+            protocol: Protocol to use ("http" or "ws").
+                .. deprecated:: 0.0.80
+                    This parameter no longer has any effect and will be removed in a future version.
+                    Use PlayHTTTSService for WebSocket or PlayHTHttpTTSService for HTTP.
+            output_format: Audio output format. Defaults to "wav".
+            sample_rate: Audio sample rate. If None, uses default.
+            params: Additional input parameters for voice customization.
+            **kwargs: Additional arguments passed to parent TTSService.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
+        # Warn about deprecated protocol parameter if explicitly provided
+        if protocol:
+            warnings.warn(
+                "The 'protocol' parameter is deprecated and will be removed in a future version.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         params = params or PlayHTHttpTTSService.InputParams()
         self._user_id = user_id
         self._api_key = api_key
-        self._client = AsyncClient(
-            user_id=self._user_id,
-            api_key=self._api_key,
-        )
         # Check if voice_engine contains protocol information (backward compatibility)
         if "-http" in voice_engine:
             # Extract the base engine name
             voice_engine = voice_engine.replace("-http", "")
-            protocol = "http"
         elif "-ws" in voice_engine:
             # Extract the base engine name
             voice_engine = voice_engine.replace("-ws", "")
-            protocol = "ws"
         self._settings = {
             "language": self.language_to_service_language(params.language)
             if params.language
             else "english",
-            "format": Format.FORMAT_WAV,
+            "output_format": output_format,
             "voice_engine": voice_engine,
-            "protocol": protocol,
             "speed": params.speed,
             "seed": params.seed,
         }
@@ -369,74 +487,118 @@ class PlayHTHttpTTSService(TTSService):
         self.set_voice(voice_url)
     async def start(self, frame: StartFrame):
+        """Start the PlayHT HTTP TTS service.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
         self._settings["sample_rate"] = self.sample_rate
-    def _create_options(self) -> TTSOptions:
-        language_str = self._settings["language"]
-        playht_language = None
-        if language_str:
-            # Convert string to PlayHT Language enum
-            for lang in PlayHTLanguage:
-                if lang.value == language_str:
-                    playht_language = lang
-                    break
-        return TTSOptions(
-            voice=self._voice_id,
-            language=playht_language,
-            sample_rate=self.sample_rate,
-            format=self._settings["format"],
-            speed=self._settings["speed"],
-            seed=self._settings["seed"],
-        )
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as PlayHT HTTP service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> Optional[str]:
+        """Convert a Language enum to PlayHT service language format.
+        Args:
+            language: The language to convert.
+        Returns:
+            The PlayHT-specific language code, or None if not supported.
+        """
         return language_to_playht_language(language)
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate TTS audio from text using PlayHT's HTTP API.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech.
+        """
         logger.debug(f"{self}: Generating TTS [{text}]")
         try:
-            options = self._create_options()
             await self.start_ttfb_metrics()
-            playht_gen = self._client.tts(
-                text,
-                voice_engine=self._settings["voice_engine"],
-                protocol=self._settings["protocol"],
-                options=options,
-            )
+            # Prepare the request payload
+            payload = {
+                "text": text,
+                "voice": self._voice_id,
+                "voice_engine": self._settings["voice_engine"],
+                "output_format": self._settings["output_format"],
+                "sample_rate": self.sample_rate,
+                "language": self._settings["language"],
+            }
+            # Add optional parameters if they exist
+            if self._settings["speed"] is not None:
+                payload["speed"] = self._settings["speed"]
+            if self._settings["seed"] is not None:
+                payload["seed"] = self._settings["seed"]
+            headers = {
+                "Authorization": f"Bearer {self._api_key}",
+                "X-User-Id": self._user_id,
+                "Content-Type": "application/json",
+                "Accept": "*/*",
+            }
             await self.start_tts_usage_metrics(text)
             yield TTSStartedFrame()
-            b = bytearray()
-            in_header = True
-            async for chunk in playht_gen:
-                # skip the RIFF header.
-                if in_header:
-                    b.extend(chunk)
-                    if len(b) <= 36:
-                        continue
-                    else:
-                        fh = io.BytesIO(b)
-                        fh.seek(36)
-                        (data, size) = struct.unpack("<4sI", fh.read(8))
-                        while data != b"data":
-                            fh.read(size)
-                            (data, size) = struct.unpack("<4sI", fh.read(8))
-                        in_header = False
-                elif len(chunk) > 0:
-                    await self.stop_ttfb_metrics()
-                    frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
-                    yield frame
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    "https://api.play.ht/api/v2/tts/stream",
+                    headers=headers,
+                    json=payload,
+                ) as response:
+                    if response.status not in (200, 201):
+                        error_text = await response.text()
+                        raise Exception(f"PlayHT API error {response.status}: {error_text}")
+                    in_header = True
+                    buffer = b""
+                    CHUNK_SIZE = self.chunk_size
+                    async for chunk in response.content.iter_chunked(CHUNK_SIZE):
+                        if len(chunk) == 0:
+                            continue
+                        # Skip the RIFF header
+                        if in_header:
+                            buffer += chunk
+                            if len(buffer) <= 36:
+                                continue
+                            else:
+                                fh = io.BytesIO(buffer)
+                                fh.seek(36)
+                                (data, size) = struct.unpack("<4sI", fh.read(8))
+                                while data != b"data":
+                                    fh.read(size)
+                                    (data, size) = struct.unpack("<4sI", fh.read(8))
+                                # Extract audio data after header
+                                audio_data = buffer[fh.tell() :]
+                                if len(audio_data) > 0:
+                                    await self.stop_ttfb_metrics()
+                                    frame = TTSAudioRawFrame(audio_data, self.sample_rate, 1)
+                                    yield frame
+                                in_header = False
+                        elif len(chunk) > 0:
+                            await self.stop_ttfb_metrics()
+                            frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
+                            yield frame
         except Exception as e:
             logger.error(f"{self} error generating TTS: {e}")
         finally:

pipecat/services/qwen/llm.py CHANGED Viewed

@@ -16,12 +16,6 @@ class QwenLLMService(OpenAILLMService):
     This service extends OpenAILLMService to connect to Qwen's API endpoint while
     maintaining full compatibility with OpenAI's interface and functionality.
-    Args:
-        api_key: The API key for accessing Qwen's API (DashScope API key).
-        base_url: Base URL for Qwen API. Defaults to "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".
-        model: The model identifier to use. Defaults to "qwen-plus".
-        **kwargs: Additional keyword arguments passed to OpenAILLMService.
     """
     def __init__(
@@ -32,6 +26,14 @@ class QwenLLMService(OpenAILLMService):
         model: str = "qwen-plus",
         **kwargs,
     ):
+        """Initialize the Qwen LLM service.
+        Args:
+            api_key: The API key for accessing Qwen's API (DashScope API key).
+            base_url: Base URL for Qwen API. Defaults to "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".
+            model: The model identifier to use. Defaults to "qwen-plus".
+            **kwargs: Additional keyword arguments passed to OpenAILLMService.
+        """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
         logger.info(f"Initialized Qwen LLM service with model: {model}")

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl