PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/cartesia/tts.py CHANGED Viewed

@@ -30,15 +30,19 @@ from pipecat.frames.frames import (
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.tts_service import AudioContextWordTTSService, TTSService
 from pipecat.transcriptions.language import Language
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 from pipecat.utils.text.base_text_aggregator import BaseTextAggregator
 from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator
 from pipecat.utils.tracing.service_decorators import traced_tts
+# Suppress regex warnings from pydub (used by cartesia)
+warnings.filterwarnings("ignore", message="invalid escape sequence", category=SyntaxWarning)
 # See .env.example for Cartesia configuration needed
 try:
-    import websockets
     from cartesia import AsyncCartesia
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use Cartesia, you need to `pip install pipecat-ai[cartesia]`.")
@@ -91,19 +95,6 @@ class CartesiaTTSService(AudioContextWordTTSService):
     Provides text-to-speech using Cartesia's streaming WebSocket API.
     Supports word-level timestamps, audio context management, and various voice
     customization options including speed and emotion controls.
-    Args:
-        api_key: Cartesia API key for authentication.
-        voice_id: ID of the voice to use for synthesis.
-        cartesia_version: API version string for Cartesia service.
-        url: WebSocket URL for Cartesia TTS API.
-        model: TTS model to use (e.g., "sonic-2").
-        sample_rate: Audio sample rate. If None, uses default.
-        encoding: Audio encoding format.
-        container: Audio container format.
-        params: Additional input parameters for voice customization.
-        text_aggregator: Custom text aggregator for processing input text.
-        **kwargs: Additional arguments passed to the parent service.
     """
     class InputParams(BaseModel):
@@ -112,7 +103,10 @@ class CartesiaTTSService(AudioContextWordTTSService):
         Parameters:
             language: Language to use for synthesis.
             speed: Voice speed control (string or float).
-            emotion: List of emotion controls (deprecated).
+            emotion: List of emotion controls.
+                .. deprecated:: 0.0.68
+                        The `emotion` parameter is deprecated and will be removed in a future version.
         """
         language: Optional[Language] = Language.EN
@@ -132,8 +126,25 @@ class CartesiaTTSService(AudioContextWordTTSService):
         container: str = "raw",
         params: Optional[InputParams] = None,
         text_aggregator: Optional[BaseTextAggregator] = None,
+        aggregate_sentences: Optional[bool] = True,
         **kwargs,
     ):
+        """Initialize the Cartesia TTS service.
+        Args:
+            api_key: Cartesia API key for authentication.
+            voice_id: ID of the voice to use for synthesis.
+            cartesia_version: API version string for Cartesia service.
+            url: WebSocket URL for Cartesia TTS API.
+            model: TTS model to use (e.g., "sonic-2").
+            sample_rate: Audio sample rate. If None, uses default.
+            encoding: Audio encoding format.
+            container: Audio container format.
+            params: Additional input parameters for voice customization.
+            text_aggregator: Custom text aggregator for processing input text.
+            aggregate_sentences: Whether to aggregate sentences within the TTSService.
+            **kwargs: Additional arguments passed to the parent service.
+        """
         # Aggregating sentences still gives cleaner-sounding results and fewer
         # artifacts than streaming one word at a time. On average, waiting for a
         # full sentence should only "cost" us 15ms or so with GPT-4o or a Llama
@@ -145,7 +156,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         # can use those to generate text frames ourselves aligned with the
         # playout timing of the audio!
         super().__init__(
-            aggregate_sentences=True,
+            aggregate_sentences=aggregate_sentences,
             push_text_frames=False,
             pause_frame_processing=True,
             sample_rate=sample_rate,
@@ -205,6 +216,54 @@ class CartesiaTTSService(AudioContextWordTTSService):
         """
         return language_to_cartesia_language(language)
+    def _is_cjk_language(self, language: str) -> bool:
+        """Check if the given language is CJK (Chinese, Japanese, Korean).
+        Args:
+            language: The language code to check.
+        Returns:
+            True if the language is Chinese, Japanese, or Korean.
+        """
+        cjk_languages = {"zh", "ja", "ko"}
+        base_lang = language.split("-")[0].lower()
+        return base_lang in cjk_languages
+    def _process_word_timestamps_for_language(
+        self, words: List[str], starts: List[float]
+    ) -> List[tuple[str, float]]:
+        """Process word timestamps based on the current language.
+        For CJK languages, Cartesia groups related characters in the same timestamp message.
+        For example, in Japanese a single message might be `['こ', 'ん', 'に', 'ち', 'は', '。']`.
+        We combine these into single words so the downstream aggregator can add natural
+        spacing between meaningful units rather than individual characters.
+        For non-CJK languages, words are already properly separated and are used as-is.
+        Args:
+            words: List of words/characters from Cartesia.
+            starts: List of start timestamps for each word/character.
+        Returns:
+            List of (word, start_time) tuples processed for the language.
+        """
+        current_language = self._settings.get("language", "en")
+        # Check if this is a CJK language
+        if self._is_cjk_language(current_language):
+            # For CJK languages, combine all characters in this message into one word
+            # using the first character's start time
+            if words and starts:
+                combined_word = "".join(words)
+                first_start = starts[0]
+                return [(combined_word, first_start)]
+            else:
+                return []
+        else:
+            # For non-CJK languages, use as-is
+            return list(zip(words, starts))
     def _build_msg(
         self, text: str = "", continue_transcript: bool = True, add_timestamps: bool = True
     ):
@@ -282,10 +341,10 @@ class CartesiaTTSService(AudioContextWordTTSService):
     async def _connect_websocket(self):
         try:
-            if self._websocket and self._websocket.open:
+            if self._websocket and self._websocket.state is State.OPEN:
                 return
             logger.debug("Connecting to Cartesia")
-            self._websocket = await websockets.connect(
+            self._websocket = await websocket_connect(
                 f"{self._url}?api_key={self._api_key}&cartesia_version={self._cartesia_version}"
             )
         except Exception as e:
@@ -329,9 +388,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         self._context_id = None
     async def _receive_messages(self):
-        async for message in WatchdogAsyncIterator(
-            self._get_websocket(), manager=self.task_manager
-        ):
+        async for message in self._get_websocket():
             msg = json.loads(message)
             if not msg or not self.audio_context_available(msg["context_id"]):
                 continue
@@ -340,9 +397,11 @@ class CartesiaTTSService(AudioContextWordTTSService):
                 await self.add_word_timestamps([("TTSStoppedFrame", 0), ("Reset", 0)])
                 await self.remove_audio_context(msg["context_id"])
             elif msg["type"] == "timestamps":
-                await self.add_word_timestamps(
-                    list(zip(msg["word_timestamps"]["words"], msg["word_timestamps"]["start"]))
+                # Process the timestamps based on language before adding them
+                processed_timestamps = self._process_word_timestamps_for_language(
+                    msg["word_timestamps"]["words"], msg["word_timestamps"]["start"]
                 )
+                await self.add_word_timestamps(processed_timestamps)
             elif msg["type"] == "chunk":
                 await self.stop_ttfb_metrics()
                 self.start_word_timestamps()
@@ -375,7 +434,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         logger.debug(f"{self}: Generating TTS [{text}]")
         try:
-            if not self._websocket or self._websocket.closed:
+            if not self._websocket or self._websocket.state is State.CLOSED:
                 await self._connect()
             if not self._context_id:
@@ -406,18 +465,6 @@ class CartesiaHttpTTSService(TTSService):
     Provides text-to-speech using Cartesia's HTTP API for simpler, non-streaming
     synthesis. Suitable for use cases where streaming is not required and simpler
     integration is preferred.
-    Args:
-        api_key: Cartesia API key for authentication.
-        voice_id: ID of the voice to use for synthesis.
-        model: TTS model to use (e.g., "sonic-2").
-        base_url: Base URL for Cartesia HTTP API.
-        cartesia_version: API version string for Cartesia service.
-        sample_rate: Audio sample rate. If None, uses default.
-        encoding: Audio encoding format.
-        container: Audio container format.
-        params: Additional input parameters for voice customization.
-        **kwargs: Additional arguments passed to the parent TTSService.
     """
     class InputParams(BaseModel):
@@ -426,7 +473,10 @@ class CartesiaHttpTTSService(TTSService):
         Parameters:
             language: Language to use for synthesis.
             speed: Voice speed control (string or float).
-            emotion: List of emotion controls (deprecated).
+            emotion: List of emotion controls.
+                .. deprecated:: 0.0.68
+                        The `emotion` parameter is deprecated and will be removed in a future version.
         """
         language: Optional[Language] = Language.EN
@@ -447,6 +497,20 @@ class CartesiaHttpTTSService(TTSService):
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the Cartesia HTTP TTS service.
+        Args:
+            api_key: Cartesia API key for authentication.
+            voice_id: ID of the voice to use for synthesis.
+            model: TTS model to use (e.g., "sonic-2").
+            base_url: Base URL for Cartesia HTTP API.
+            cartesia_version: API version string for Cartesia service.
+            sample_rate: Audio sample rate. If None, uses default.
+            encoding: Audio encoding format.
+            container: Audio container format.
+            params: Additional input parameters for voice customization.
+            **kwargs: Additional arguments passed to the parent TTSService.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         params = params or CartesiaHttpTTSService.InputParams()

pipecat/services/cerebras/llm.py CHANGED Viewed

@@ -9,8 +9,7 @@
 from typing import List
 from loguru import logger
-from openai import AsyncStream
-from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
+from openai.types.chat import ChatCompletionMessageParam
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.openai.llm import OpenAILLMService
@@ -21,12 +20,6 @@ class CerebrasLLMService(OpenAILLMService):
     This service extends OpenAILLMService to connect to Cerebras's API endpoint while
     maintaining full compatibility with OpenAI's interface and functionality.
-    Args:
-        api_key: The API key for accessing Cerebras's API.
-        base_url: The base URL for Cerebras API. Defaults to "https://api.cerebras.ai/v1".
-        model: The model identifier to use. Defaults to "llama-3.3-70b".
-        **kwargs: Additional keyword arguments passed to OpenAILLMService.
     """
     def __init__(
@@ -37,6 +30,14 @@ class CerebrasLLMService(OpenAILLMService):
         model: str = "llama-3.3-70b",
         **kwargs,
     ):
+        """Initialize the Cerebras LLM service.
+        Args:
+            api_key: The API key for accessing Cerebras's API.
+            base_url: The base URL for Cerebras API. Defaults to "https://api.cerebras.ai/v1".
+            model: The model identifier to use. Defaults to "llama-3.3-70b".
+            **kwargs: Additional keyword arguments passed to OpenAILLMService.
+        """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
     def create_client(self, api_key=None, base_url=None, **kwargs):
@@ -53,20 +54,13 @@ class CerebrasLLMService(OpenAILLMService):
         logger.debug(f"Creating Cerebras client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    async def get_chat_completions(
+    def build_chat_completion_params(
         self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> AsyncStream[ChatCompletionChunk]:
-        """Create a streaming chat completion using Cerebras's API.
+    ) -> dict:
+        """Build parameters for Cerebras chat completion request.
-        Args:
-            context: The context object containing tools configuration
-                and other settings for the chat completion.
-            messages: The list of messages comprising
-                the conversation history and current request.
-        Returns:
-            A streaming response of chat completion
-                chunks that can be processed asynchronously.
+        Cerebras supports a subset of OpenAI parameters, focusing on core
+        completion settings without advanced features like frequency/presence penalties.
         """
         params = {
             "model": self.model_name,
@@ -81,6 +75,4 @@ class CerebrasLLMService(OpenAILLMService):
         }
         params.update(self._settings["extra"])
-        chunks = await self._client.chat.completions.create(**params)
-        return chunks
+        return params

pipecat/services/deepgram/stt.py CHANGED Viewed

@@ -15,6 +15,7 @@ from loguru import logger
 from pipecat.frames.frames import (
     CancelFrame,
     EndFrame,
+    ErrorFrame,
     Frame,
     InterimTranscriptionFrame,
     StartFrame,
@@ -50,15 +51,6 @@ class DeepgramSTTService(STTService):
     Provides real-time speech recognition using Deepgram's WebSocket API.
     Supports configurable models, languages, VAD events, and various audio
     processing options.
-    Args:
-        api_key: Deepgram API key for authentication.
-        url: Deprecated. Use base_url instead.
-        base_url: Custom Deepgram API base URL.
-        sample_rate: Audio sample rate. If None, uses default or live_options value.
-        live_options: Deepgram LiveOptions for detailed configuration.
-        addons: Additional Deepgram features to enable.
-        **kwargs: Additional arguments passed to the parent STTService.
     """
     def __init__(
@@ -72,6 +64,21 @@ class DeepgramSTTService(STTService):
         addons: Optional[Dict] = None,
         **kwargs,
     ):
+        """Initialize the Deepgram STT service.
+        Args:
+            api_key: Deepgram API key for authentication.
+            url: Custom Deepgram API base URL.
+                .. deprecated:: 0.0.64
+                    Parameter `url` is deprecated, use `base_url` instead.
+            base_url: Custom Deepgram API base URL.
+            sample_rate: Audio sample rate. If None, uses default or live_options value.
+            live_options: Deepgram LiveOptions for detailed configuration.
+            addons: Additional Deepgram features to enable.
+            **kwargs: Additional arguments passed to the parent STTService.
+        """
         sample_rate = sample_rate or (live_options.sample_rate if live_options else None)
         super().__init__(sample_rate=sample_rate, **kwargs)
@@ -279,6 +286,7 @@ class DeepgramSTTService(STTService):
     async def _on_error(self, *args, **kwargs):
         error: ErrorResponse = kwargs["error"]
         self.logger.warning(f"{self} connection error, will retry: {error}")
+        await self.push_error(ErrorFrame(f"{error}"))
         await self.stop_all_metrics()
         # NOTE(aleix): we don't disconnect (i.e. call finish on the connection)
         # because this triggers more errors internally in the Deepgram SDK. So,
@@ -316,7 +324,7 @@ class DeepgramSTTService(STTService):
                 await self.push_frame(
                     TranscriptionFrame(
                         transcript,
-                        "",
+                        self._user_id,
                         time_now_iso8601(),
                         language,
                         result=result,
@@ -330,7 +338,7 @@ class DeepgramSTTService(STTService):
                 await self.push_frame(
                     InterimTranscriptionFrame(
                         transcript,
-                        "",
+                        self._user_id,
                         time_now_iso8601(),
                         language,
                         result=result,

pipecat/services/deepgram/tts.py CHANGED Viewed

@@ -4,6 +4,12 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Deepgram text-to-speech service implementation.
+This module provides integration with Deepgram's text-to-speech API
+for generating speech from text using various voice models.
+"""
 from typing import AsyncGenerator, Optional
 from loguru import logger
@@ -27,6 +33,13 @@ except ModuleNotFoundError as e:
 class DeepgramTTSService(TTSService):
+    """Deepgram text-to-speech service.
+    Provides text-to-speech synthesis using Deepgram's streaming API.
+    Supports various voice models and audio encoding formats with
+    configurable sample rates and quality settings.
+    """
     def __init__(
         self,
         *,
@@ -37,6 +50,16 @@ class DeepgramTTSService(TTSService):
         encoding: str = "linear16",
         **kwargs,
     ):
+        """Initialize the Deepgram TTS service.
+        Args:
+            api_key: Deepgram API key for authentication.
+            voice: Voice model to use for synthesis. Defaults to "aura-2-helena-en".
+            base_url: Custom base URL for Deepgram API. Uses default if empty.
+            sample_rate: Audio sample rate in Hz. If None, uses service default.
+            encoding: Audio encoding format. Defaults to "linear16".
+            **kwargs: Additional arguments passed to parent TTSService class.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         self._settings = {
@@ -48,10 +71,23 @@ class DeepgramTTSService(TTSService):
         self._deepgram_client = DeepgramClient(api_key, config=client_options)
     def can_generate_metrics(self) -> bool:
+        """Check if the service can generate metrics.
+        Returns:
+            True, as Deepgram TTS service supports metrics generation.
+        """
         return True
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate speech from text using Deepgram's TTS API.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech, plus start/stop frames.
+        """
         logger.debug(f"{self}: Generating TTS [{text}]")
         options = SpeakOptions(

pipecat/services/deepseek/llm.py CHANGED Viewed

@@ -9,8 +9,7 @@
 from typing import List
 from loguru import logger
-from openai import AsyncStream
-from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
+from openai.types.chat import ChatCompletionMessageParam
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.openai.llm import OpenAILLMService
@@ -21,12 +20,6 @@ class DeepSeekLLMService(OpenAILLMService):
     This service extends OpenAILLMService to connect to DeepSeek's API endpoint while
     maintaining full compatibility with OpenAI's interface and functionality.
-    Args:
-        api_key: The API key for accessing DeepSeek's API.
-        base_url: The base URL for DeepSeek API. Defaults to "https://api.deepseek.com/v1".
-        model: The model identifier to use. Defaults to "deepseek-chat".
-        **kwargs: Additional keyword arguments passed to OpenAILLMService.
     """
     def __init__(
@@ -37,6 +30,14 @@ class DeepSeekLLMService(OpenAILLMService):
         model: str = "deepseek-chat",
         **kwargs,
     ):
+        """Initialize the DeepSeek LLM service.
+        Args:
+            api_key: The API key for accessing DeepSeek's API.
+            base_url: The base URL for DeepSeek API. Defaults to "https://api.deepseek.com/v1".
+            model: The model identifier to use. Defaults to "deepseek-chat".
+            **kwargs: Additional keyword arguments passed to OpenAILLMService.
+        """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
     def create_client(self, api_key=None, base_url=None, **kwargs):
@@ -53,20 +54,12 @@ class DeepSeekLLMService(OpenAILLMService):
         logger.debug(f"Creating DeepSeek client with api {base_url}")
         return super().create_client(api_key, base_url, **kwargs)
-    async def get_chat_completions(
+    def _build_chat_completion_params(
         self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
-    ) -> AsyncStream[ChatCompletionChunk]:
-        """Create a streaming chat completion using DeepSeek's API.
+    ) -> dict:
+        """Build parameters for DeepSeek chat completion request.
-        Args:
-            context: The context object containing tools configuration
-                and other settings for the chat completion.
-            messages: The list of messages comprising the conversation
-                history and current request.
-        Returns:
-            A streaming response of chat completion chunks that can be
-            processed asynchronously.
+        DeepSeek doesn't support some OpenAI parameters like seed and max_completion_tokens.
         """
         params = {
             "model": self.model_name,
@@ -83,6 +76,4 @@ class DeepSeekLLMService(OpenAILLMService):
         }
         params.update(self._settings["extra"])
-        chunks = await self._client.chat.completions.create(**params)
-        return chunks
+        return params

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl