PyPI - dv-pipecat-ai - Versions diffs - 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (244) hide show

{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
pipecat/__init__.py +17 -0
pipecat/adapters/base_llm_adapter.py +36 -1
pipecat/adapters/schemas/direct_function.py +296 -0
pipecat/adapters/schemas/function_schema.py +15 -6
pipecat/adapters/schemas/tools_schema.py +55 -7
pipecat/adapters/services/anthropic_adapter.py +22 -3
pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
pipecat/adapters/services/bedrock_adapter.py +22 -3
pipecat/adapters/services/gemini_adapter.py +16 -3
pipecat/adapters/services/open_ai_adapter.py +17 -2
pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
pipecat/audio/filters/base_audio_filter.py +30 -6
pipecat/audio/filters/koala_filter.py +37 -2
pipecat/audio/filters/krisp_filter.py +59 -6
pipecat/audio/filters/noisereduce_filter.py +37 -0
pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
pipecat/audio/mixers/base_audio_mixer.py +30 -7
pipecat/audio/mixers/soundfile_mixer.py +53 -6
pipecat/audio/resamplers/base_audio_resampler.py +17 -9
pipecat/audio/resamplers/resampy_resampler.py +26 -1
pipecat/audio/resamplers/soxr_resampler.py +32 -1
pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
pipecat/audio/utils.py +194 -1
pipecat/audio/vad/silero.py +60 -3
pipecat/audio/vad/vad_analyzer.py +114 -30
pipecat/clocks/base_clock.py +19 -0
pipecat/clocks/system_clock.py +25 -0
pipecat/extensions/voicemail/__init__.py +0 -0
pipecat/extensions/voicemail/voicemail_detector.py +707 -0
pipecat/frames/frames.py +590 -156
pipecat/metrics/metrics.py +64 -1
pipecat/observers/base_observer.py +58 -19
pipecat/observers/loggers/debug_log_observer.py +56 -64
pipecat/observers/loggers/llm_log_observer.py +8 -1
pipecat/observers/loggers/transcription_log_observer.py +19 -7
pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
pipecat/observers/turn_tracking_observer.py +26 -1
pipecat/pipeline/base_pipeline.py +5 -7
pipecat/pipeline/base_task.py +52 -9
pipecat/pipeline/parallel_pipeline.py +121 -177
pipecat/pipeline/pipeline.py +129 -20
pipecat/pipeline/runner.py +50 -1
pipecat/pipeline/sync_parallel_pipeline.py +132 -32
pipecat/pipeline/task.py +263 -280
pipecat/pipeline/task_observer.py +85 -34
pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
pipecat/processors/aggregators/gated.py +25 -24
pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
pipecat/processors/aggregators/llm_response.py +398 -89
pipecat/processors/aggregators/openai_llm_context.py +161 -13
pipecat/processors/aggregators/sentence.py +25 -14
pipecat/processors/aggregators/user_response.py +28 -3
pipecat/processors/aggregators/vision_image_frame.py +24 -14
pipecat/processors/async_generator.py +28 -0
pipecat/processors/audio/audio_buffer_processor.py +78 -37
pipecat/processors/consumer_processor.py +25 -6
pipecat/processors/filters/frame_filter.py +23 -0
pipecat/processors/filters/function_filter.py +30 -0
pipecat/processors/filters/identity_filter.py +17 -2
pipecat/processors/filters/null_filter.py +24 -1
pipecat/processors/filters/stt_mute_filter.py +56 -21
pipecat/processors/filters/wake_check_filter.py +46 -3
pipecat/processors/filters/wake_notifier_filter.py +21 -3
pipecat/processors/frame_processor.py +488 -131
pipecat/processors/frameworks/langchain.py +38 -3
pipecat/processors/frameworks/rtvi.py +719 -34
pipecat/processors/gstreamer/pipeline_source.py +41 -0
pipecat/processors/idle_frame_processor.py +26 -3
pipecat/processors/logger.py +23 -0
pipecat/processors/metrics/frame_processor_metrics.py +77 -4
pipecat/processors/metrics/sentry.py +42 -4
pipecat/processors/producer_processor.py +34 -14
pipecat/processors/text_transformer.py +22 -10
pipecat/processors/transcript_processor.py +48 -29
pipecat/processors/user_idle_processor.py +31 -21
pipecat/runner/__init__.py +1 -0
pipecat/runner/daily.py +132 -0
pipecat/runner/livekit.py +148 -0
pipecat/runner/run.py +543 -0
pipecat/runner/types.py +67 -0
pipecat/runner/utils.py +515 -0
pipecat/serializers/base_serializer.py +42 -0
pipecat/serializers/exotel.py +17 -6
pipecat/serializers/genesys.py +95 -0
pipecat/serializers/livekit.py +33 -0
pipecat/serializers/plivo.py +16 -15
pipecat/serializers/protobuf.py +37 -1
pipecat/serializers/telnyx.py +18 -17
pipecat/serializers/twilio.py +32 -16
pipecat/services/ai_service.py +5 -3
pipecat/services/anthropic/llm.py +113 -43
pipecat/services/assemblyai/models.py +63 -5
pipecat/services/assemblyai/stt.py +64 -11
pipecat/services/asyncai/__init__.py +0 -0
pipecat/services/asyncai/tts.py +501 -0
pipecat/services/aws/llm.py +185 -111
pipecat/services/aws/stt.py +217 -23
pipecat/services/aws/tts.py +118 -52
pipecat/services/aws/utils.py +101 -5
pipecat/services/aws_nova_sonic/aws.py +82 -64
pipecat/services/aws_nova_sonic/context.py +15 -6
pipecat/services/azure/common.py +10 -2
pipecat/services/azure/image.py +32 -0
pipecat/services/azure/llm.py +9 -7
pipecat/services/azure/stt.py +65 -2
pipecat/services/azure/tts.py +154 -23
pipecat/services/cartesia/stt.py +125 -8
pipecat/services/cartesia/tts.py +102 -38
pipecat/services/cerebras/llm.py +15 -23
pipecat/services/deepgram/stt.py +19 -11
pipecat/services/deepgram/tts.py +36 -0
pipecat/services/deepseek/llm.py +14 -23
pipecat/services/elevenlabs/tts.py +330 -64
pipecat/services/fal/image.py +43 -0
pipecat/services/fal/stt.py +48 -10
pipecat/services/fireworks/llm.py +14 -21
pipecat/services/fish/tts.py +109 -9
pipecat/services/gemini_multimodal_live/__init__.py +1 -0
pipecat/services/gemini_multimodal_live/events.py +83 -2
pipecat/services/gemini_multimodal_live/file_api.py +189 -0
pipecat/services/gemini_multimodal_live/gemini.py +218 -21
pipecat/services/gladia/config.py +17 -10
pipecat/services/gladia/stt.py +82 -36
pipecat/services/google/frames.py +40 -0
pipecat/services/google/google.py +2 -0
pipecat/services/google/image.py +39 -2
pipecat/services/google/llm.py +176 -58
pipecat/services/google/llm_openai.py +26 -4
pipecat/services/google/llm_vertex.py +37 -15
pipecat/services/google/rtvi.py +41 -0
pipecat/services/google/stt.py +65 -17
pipecat/services/google/test-google-chirp.py +45 -0
pipecat/services/google/tts.py +390 -19
pipecat/services/grok/llm.py +8 -6
pipecat/services/groq/llm.py +8 -6
pipecat/services/groq/stt.py +13 -9
pipecat/services/groq/tts.py +40 -0
pipecat/services/hamsa/__init__.py +9 -0
pipecat/services/hamsa/stt.py +241 -0
pipecat/services/heygen/__init__.py +5 -0
pipecat/services/heygen/api.py +281 -0
pipecat/services/heygen/client.py +620 -0
pipecat/services/heygen/video.py +338 -0
pipecat/services/image_service.py +5 -3
pipecat/services/inworld/__init__.py +1 -0
pipecat/services/inworld/tts.py +592 -0
pipecat/services/llm_service.py +127 -45
pipecat/services/lmnt/tts.py +80 -7
pipecat/services/mcp_service.py +85 -44
pipecat/services/mem0/memory.py +42 -13
pipecat/services/minimax/tts.py +74 -15
pipecat/services/mistral/__init__.py +0 -0
pipecat/services/mistral/llm.py +185 -0
pipecat/services/moondream/vision.py +55 -10
pipecat/services/neuphonic/tts.py +275 -48
pipecat/services/nim/llm.py +8 -6
pipecat/services/ollama/llm.py +27 -7
pipecat/services/openai/base_llm.py +54 -16
pipecat/services/openai/image.py +30 -0
pipecat/services/openai/llm.py +7 -5
pipecat/services/openai/stt.py +13 -9
pipecat/services/openai/tts.py +42 -10
pipecat/services/openai_realtime_beta/azure.py +11 -9
pipecat/services/openai_realtime_beta/context.py +7 -5
pipecat/services/openai_realtime_beta/events.py +10 -7
pipecat/services/openai_realtime_beta/openai.py +37 -18
pipecat/services/openpipe/llm.py +30 -24
pipecat/services/openrouter/llm.py +9 -7
pipecat/services/perplexity/llm.py +15 -19
pipecat/services/piper/tts.py +26 -12
pipecat/services/playht/tts.py +227 -65
pipecat/services/qwen/llm.py +8 -6
pipecat/services/rime/tts.py +128 -17
pipecat/services/riva/stt.py +160 -22
pipecat/services/riva/tts.py +67 -2
pipecat/services/sambanova/llm.py +19 -17
pipecat/services/sambanova/stt.py +14 -8
pipecat/services/sarvam/tts.py +60 -13
pipecat/services/simli/video.py +82 -21
pipecat/services/soniox/__init__.py +0 -0
pipecat/services/soniox/stt.py +398 -0
pipecat/services/speechmatics/stt.py +29 -17
pipecat/services/stt_service.py +47 -11
pipecat/services/tavus/video.py +94 -25
pipecat/services/together/llm.py +8 -6
pipecat/services/tts_service.py +77 -53
pipecat/services/ultravox/stt.py +46 -43
pipecat/services/vision_service.py +5 -3
pipecat/services/websocket_service.py +12 -11
pipecat/services/whisper/base_stt.py +58 -12
pipecat/services/whisper/stt.py +69 -58
pipecat/services/xtts/tts.py +59 -2
pipecat/sync/base_notifier.py +19 -0
pipecat/sync/event_notifier.py +24 -0
pipecat/tests/utils.py +73 -5
pipecat/transcriptions/language.py +24 -0
pipecat/transports/base_input.py +112 -8
pipecat/transports/base_output.py +235 -13
pipecat/transports/base_transport.py +119 -0
pipecat/transports/local/audio.py +76 -0
pipecat/transports/local/tk.py +84 -0
pipecat/transports/network/fastapi_websocket.py +174 -15
pipecat/transports/network/small_webrtc.py +383 -39
pipecat/transports/network/webrtc_connection.py +214 -8
pipecat/transports/network/websocket_client.py +171 -1
pipecat/transports/network/websocket_server.py +147 -9
pipecat/transports/services/daily.py +792 -70
pipecat/transports/services/helpers/daily_rest.py +122 -129
pipecat/transports/services/livekit.py +339 -4
pipecat/transports/services/tavus.py +273 -38
pipecat/utils/asyncio/task_manager.py +92 -186
pipecat/utils/base_object.py +83 -1
pipecat/utils/network.py +2 -0
pipecat/utils/string.py +114 -58
pipecat/utils/text/base_text_aggregator.py +44 -13
pipecat/utils/text/base_text_filter.py +46 -0
pipecat/utils/text/markdown_text_filter.py +70 -14
pipecat/utils/text/pattern_pair_aggregator.py +18 -14
pipecat/utils/text/simple_text_aggregator.py +43 -2
pipecat/utils/text/skip_tags_aggregator.py +21 -13
pipecat/utils/time.py +36 -0
pipecat/utils/tracing/class_decorators.py +32 -7
pipecat/utils/tracing/conversation_context_provider.py +12 -2
pipecat/utils/tracing/service_attributes.py +80 -64
pipecat/utils/tracing/service_decorators.py +48 -21
pipecat/utils/tracing/setup.py +13 -7
pipecat/utils/tracing/turn_context_provider.py +12 -2
pipecat/utils/tracing/turn_trace_observer.py +27 -0
pipecat/utils/utils.py +14 -14
dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
pipecat/examples/daily_runner.py +0 -64
pipecat/examples/run.py +0 -265
pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
pipecat/utils/asyncio/watchdog_event.py +0 -42
pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
pipecat/utils/asyncio/watchdog_queue.py +0 -48
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
/pipecat/{examples → extensions}/__init__.py +0 -0

pipecat/services/neuphonic/tts.py CHANGED Viewed

@@ -4,11 +4,18 @@
 # SPDX-License-Identifier: BSD 2-Clause License
 #
+"""Neuphonic text-to-speech service implementations.
+This module provides WebSocket and HTTP-based integrations with Neuphonic's
+text-to-speech API for real-time audio synthesis.
+"""
 import asyncio
 import base64
 import json
 from typing import Any, AsyncGenerator, Mapping, Optional
+import aiohttp
 from loguru import logger
 from pydantic import BaseModel
@@ -29,12 +36,11 @@ from pipecat.frames.frames import (
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.tts_service import InterruptibleTTSService, TTSService
 from pipecat.transcriptions.language import Language
-from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
 from pipecat.utils.tracing.service_decorators import traced_tts
 try:
-    import websockets
-    from pyneuphonic import Neuphonic, TTSConfig
+    from websockets.asyncio.client import connect as websocket_connect
+    from websockets.protocol import State
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
     logger.error("In order to use Neuphonic, you need to `pip install pipecat-ai[neuphonic]`.")
@@ -42,6 +48,14 @@ except ModuleNotFoundError as e:
 def language_to_neuphonic_lang_code(language: Language) -> Optional[str]:
+    """Convert a Language enum to Neuphonic language code.
+    Args:
+        language: The Language enum value to convert.
+    Returns:
+        The corresponding Neuphonic language code, or None if not supported.
+    """
     BASE_LANGUAGES = {
         Language.DE: "de",
         Language.EN: "en",
@@ -69,7 +83,21 @@ def language_to_neuphonic_lang_code(language: Language) -> Optional[str]:
 class NeuphonicTTSService(InterruptibleTTSService):
+    """Neuphonic real-time text-to-speech service using WebSocket streaming.
+    Provides real-time text-to-speech synthesis using Neuphonic's WebSocket API.
+    Supports interruption handling, keepalive connections, and configurable voice
+    parameters for high-quality speech generation.
+    """
     class InputParams(BaseModel):
+        """Input parameters for Neuphonic TTS configuration.
+        Parameters:
+            language: Language for synthesis. Defaults to English.
+            speed: Speech speed multiplier. Defaults to 1.0.
+        """
         language: Optional[Language] = Language.EN
         speed: Optional[float] = 1.0
@@ -82,10 +110,23 @@ class NeuphonicTTSService(InterruptibleTTSService):
         sample_rate: Optional[int] = 22050,
         encoding: str = "pcm_linear",
         params: Optional[InputParams] = None,
+        aggregate_sentences: Optional[bool] = True,
         **kwargs,
     ):
+        """Initialize the Neuphonic TTS service.
+        Args:
+            api_key: Neuphonic API key for authentication.
+            voice_id: ID of the voice to use for synthesis.
+            url: WebSocket URL for the Neuphonic API.
+            sample_rate: Audio sample rate in Hz. Defaults to 22050.
+            encoding: Audio encoding format. Defaults to "pcm_linear".
+            params: Additional input parameters for TTS configuration.
+            aggregate_sentences: Whether to aggregate sentences within the TTSService.
+            **kwargs: Additional arguments passed to parent InterruptibleTTSService.
+        """
         super().__init__(
-            aggregate_sentences=True,
+            aggregate_sentences=aggregate_sentences,
             push_text_frames=False,
             push_stop_frames=True,
             stop_frame_timeout_s=2.0,
@@ -114,12 +155,26 @@ class NeuphonicTTSService(InterruptibleTTSService):
         self._keepalive_task = None
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as Neuphonic service supports metrics generation.
+        """
         return True
     def language_to_service_language(self, language: Language) -> Optional[str]:
+        """Convert a Language enum to Neuphonic service language format.
+        Args:
+            language: The language to convert.
+        Returns:
+            The Neuphonic-specific language code, or None if not supported.
+        """
         return language_to_neuphonic_lang_code(language)
     async def _update_settings(self, settings: Mapping[str, Any]):
+        """Update service settings and reconnect with new configuration."""
         if "voice_id" in settings:
             self.set_voice(settings["voice_id"])
@@ -129,28 +184,56 @@ class NeuphonicTTSService(InterruptibleTTSService):
         logger.info(f"Switching TTS to settings: [{self._settings}]")
     async def start(self, frame: StartFrame):
+        """Start the Neuphonic TTS service.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
         await self._connect()
     async def stop(self, frame: EndFrame):
+        """Stop the Neuphonic TTS service.
+        Args:
+            frame: The end frame.
+        """
         await super().stop(frame)
         await self._disconnect()
     async def cancel(self, frame: CancelFrame):
+        """Cancel the Neuphonic TTS service.
+        Args:
+            frame: The cancel frame.
+        """
         await super().cancel(frame)
         await self._disconnect()
     async def flush_audio(self):
+        """Flush any pending audio synthesis by sending stop command."""
         if self._websocket:
             msg = {"text": "<STOP>"}
             await self._websocket.send(json.dumps(msg))
     async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
+        """Push a frame downstream with special handling for stop conditions.
+        Args:
+            frame: The frame to push.
+            direction: The direction to push the frame.
+        """
         await super().push_frame(frame, direction)
         if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
             self._started = False
     async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process frames with special handling for speech control.
+        Args:
+            frame: The frame to process.
+            direction: The direction of frame processing.
+        """
         await super().process_frame(frame, direction)
         # If we received a TTSSpeakFrame and the LLM response included text (it
@@ -164,6 +247,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
             await self.resume_processing_frames()
     async def _connect(self):
+        """Connect to Neuphonic WebSocket and start background tasks."""
         await self._connect_websocket()
         if self._websocket and not self._receive_task:
@@ -173,6 +257,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
             self._keepalive_task = self.create_task(self._keepalive_task_handler())
     async def _disconnect(self):
+        """Disconnect from Neuphonic WebSocket and clean up tasks."""
         if self._receive_task:
             await self.cancel_task(self._receive_task)
             self._receive_task = None
@@ -184,8 +269,9 @@ class NeuphonicTTSService(InterruptibleTTSService):
         await self._disconnect_websocket()
     async def _connect_websocket(self):
+        """Establish WebSocket connection to Neuphonic API."""
         try:
-            if self._websocket and self._websocket.open:
+            if self._websocket and self._websocket.state is State.OPEN:
                 return
             logger.debug("Connecting to Neuphonic")
@@ -195,20 +281,25 @@ class NeuphonicTTSService(InterruptibleTTSService):
                 "voice_id": self._voice_id,
             }
-            query_params = [f"api_key={self._api_key}"]
+            query_params = []
             for key, value in tts_config.items():
                 if value is not None:
                     query_params.append(f"{key}={value}")
-            url = f"{self._url}/speak/{self._settings['lang_code']}?{'&'.join(query_params)}"
+            url = f"{self._url}/speak/{self._settings['lang_code']}"
+            if query_params:
+                url += f"?{'&'.join(query_params)}"
-            self._websocket = await websockets.connect(url)
+            headers = {"x-api-key": self._api_key}
+            self._websocket = await websocket_connect(url, additional_headers=headers)
         except Exception as e:
             logger.error(f"{self} initialization error: {e}")
             self._websocket = None
             await self._call_event_handler("on_connection_error", f"{e}")
     async def _disconnect_websocket(self):
+        """Close WebSocket connection and clean up state."""
         try:
             await self.stop_all_metrics()
@@ -222,10 +313,11 @@ class NeuphonicTTSService(InterruptibleTTSService):
             self._websocket = None
     async def _receive_messages(self):
-        async for message in WatchdogAsyncIterator(self._websocket, manager=self.task_manager):
+        """Receive and process messages from Neuphonic WebSocket."""
+        async for message in self._websocket:
             if isinstance(message, str):
                 msg = json.loads(message)
-                if msg.get("data", {}).get("audio") is not None:
+                if msg.get("data") and msg["data"].get("audio"):
                     await self.stop_ttfb_metrics()
                     audio = base64.b64decode(msg["data"]["audio"])
@@ -233,24 +325,40 @@ class NeuphonicTTSService(InterruptibleTTSService):
                     await self.push_frame(frame)
     async def _keepalive_task_handler(self):
-        KEEPALIVE_SLEEP = 10 if self.task_manager.task_watchdog_enabled else 3
+        """Handle keepalive messages to maintain WebSocket connection."""
+        KEEPALIVE_SLEEP = 10
         while True:
-            self.reset_watchdog()
             await asyncio.sleep(KEEPALIVE_SLEEP)
-            await self._send_text("")
+            await self._send_keepalive()
+    async def _send_keepalive(self):
+        """Send keepalive message to maintain connection."""
+        if self._websocket:
+            # Send empty text for keepalive
+            msg = {"text": ""}
+            await self._websocket.send(json.dumps(msg))
     async def _send_text(self, text: str):
+        """Send text to Neuphonic WebSocket for synthesis."""
         if self._websocket:
-            msg = {"text": text}
+            msg = {"text": f"{text} <STOP>"}
             logger.debug(f"Sending text to websocket: {msg}")
             await self._websocket.send(json.dumps(msg))
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
+        """Generate speech from text using Neuphonic's streaming API.
+        Args:
+            text: The text to synthesize into speech.
+        Yields:
+            Frame: Audio frames containing the synthesized speech.
+        """
         logger.debug(f"Generating TTS: [{text}]")
         try:
-            if not self._websocket or self._websocket.closed:
+            if not self._websocket or self._websocket.state is State.CLOSED:
                 await self._connect()
             try:
@@ -274,19 +382,21 @@ class NeuphonicTTSService(InterruptibleTTSService):
 class NeuphonicHttpTTSService(TTSService):
-    """Neuphonic Text-to-Speech service using HTTP streaming.
+    """Neuphonic text-to-speech service using HTTP streaming.
-    Args:
-        api_key: Neuphonic API key
-        voice_id: ID of the voice to use
-        url: Base URL for the Neuphonic API (default: "https://api.neuphonic.com")
-        sample_rate: Sample rate for audio output (default: 22050Hz)
-        encoding: Audio encoding format (default: "pcm_linear")
-        params: Additional parameters for TTS generation including language and speed
-        **kwargs: Additional keyword arguments passed to the parent class
+    Provides text-to-speech synthesis using Neuphonic's HTTP API with server-sent
+    events for streaming audio delivery. Suitable for applications that prefer
+    HTTP-based communication over WebSocket connections.
     """
     class InputParams(BaseModel):
+        """Input parameters for Neuphonic HTTP TTS configuration.
+        Parameters:
+            language: Language for synthesis. Defaults to English.
+            speed: Speech speed multiplier. Defaults to 1.0.
+        """
         language: Optional[Language] = Language.EN
         speed: Optional[float] = 1.0
@@ -295,66 +405,183 @@ class NeuphonicHttpTTSService(TTSService):
         *,
         api_key: str,
         voice_id: Optional[str] = None,
+        aiohttp_session: aiohttp.ClientSession,
         url: str = "https://api.neuphonic.com",
         sample_rate: Optional[int] = 22050,
-        encoding: str = "pcm_linear",
+        encoding: Optional[str] = "pcm_linear",
         params: Optional[InputParams] = None,
         **kwargs,
     ):
+        """Initialize the Neuphonic HTTP TTS service.
+        Args:
+            api_key: Neuphonic API key for authentication.
+            voice_id: ID of the voice to use for synthesis.
+            aiohttp_session: Shared aiohttp session for HTTP requests.
+            url: Base URL for the Neuphonic HTTP API.
+            sample_rate: Audio sample rate in Hz. Defaults to 22050.
+            encoding: Audio encoding format. Defaults to "pcm_linear".
+            params: Additional input parameters for TTS configuration.
+            **kwargs: Additional arguments passed to parent TTSService.
+        """
         super().__init__(sample_rate=sample_rate, **kwargs)
         params = params or NeuphonicHttpTTSService.InputParams()
         self._api_key = api_key
-        self._url = url
-        self._settings = {
-            "lang_code": self.language_to_service_language(params.language),
-            "speed": params.speed,
-            "encoding": encoding,
-            "sampling_rate": sample_rate,
-        }
+        self._session = aiohttp_session
+        self._base_url = url.rstrip("/")
+        self._lang_code = self.language_to_service_language(params.language) or "en"
+        self._speed = params.speed
+        self._encoding = encoding
         self.set_voice(voice_id)
     def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics.
+        Returns:
+            True, as Neuphonic HTTP service supports metrics generation.
+        """
         return True
+    def language_to_service_language(self, language: Language) -> Optional[str]:
+        """Convert a Language enum to Neuphonic service language format.
+        Args:
+            language: The language to convert.
+        Returns:
+            The Neuphonic-specific language code, or None if not supported.
+        """
+        return language_to_neuphonic_lang_code(language)
     async def start(self, frame: StartFrame):
+        """Start the Neuphonic HTTP TTS service.
+        Args:
+            frame: The start frame containing initialization parameters.
+        """
         await super().start(frame)
     async def flush_audio(self):
+        """Flush any pending audio synthesis.
+        Note:
+            HTTP-based service doesn't require explicit flushing.
+        """
         pass
+    def _parse_sse_message(self, message: str) -> dict | None:
+        """Parse a Server-Sent Event message.
+        Args:
+            message: The SSE message to parse.
+        Returns:
+            Parsed message dictionary or None if not a data message.
+        """
+        message = message.strip()
+        if not message or "data" not in message:
+            return None
+        try:
+            # Split on ": " and take the part after "data: "
+            _, data_content = message.split(": ", 1)
+            if not data_content or data_content == "[DONE]":
+                return None
+            message_dict = json.loads(data_content)
+            # Check for errors in the response
+            if message_dict.get("errors") is not None:
+                raise Exception(
+                    f"Neuphonic API error {message_dict.get('status_code', 'unknown')}: {message_dict['errors']}"
+                )
+            return message_dict
+        except (ValueError, json.JSONDecodeError) as e:
+            logger.warning(f"Failed to parse SSE message: {e}")
+            return None
     @traced_tts
     async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
         """Generate speech from text using Neuphonic streaming API.
         Args:
-            text: The text to convert to speech
+            text: The text to convert to speech.
         Yields:
-            Frames containing audio data and status information
+            Frame: Audio frames containing the synthesized speech and status information.
         """
         logger.debug(f"Generating TTS: [{text}]")
-        client = Neuphonic(api_key=self._api_key, base_url=self._url.replace("https://", ""))
+        url = f"{self._base_url}/sse/speak/{self._lang_code}"
-        sse = client.tts.AsyncSSEClient()
+        headers = {
+            "X-API-KEY": self._api_key,
+            "Content-Type": "application/json",
+        }
+        payload = {
+            "text": text,
+            "lang_code": self._lang_code,
+            "encoding": self._encoding,
+            "sampling_rate": self.sample_rate,
+            "speed": self._speed,
+        }
+        if self._voice_id:
+            payload["voice_id"] = self._voice_id
         try:
             await self.start_ttfb_metrics()
-            response = sse.send(text, TTSConfig(**self._settings, voice_id=self._voice_id))
-            await self.start_tts_usage_metrics(text)
-            yield TTSStartedFrame()
+            async with self._session.post(url, json=payload, headers=headers) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    error_message = f"Neuphonic API error: HTTP {response.status} - {error_text}"
+                    logger.error(error_message)
+                    yield ErrorFrame(error=error_message)
+                    return
-            async for message in response:
-                if message.status_code != 200:
-                    logger.error(f"{self} error: {message.errors}")
-                    yield ErrorFrame(error=f"Neuphonic API error: {message.errors}")
-                await self.stop_ttfb_metrics()
-                yield TTSAudioRawFrame(message.data.audio, self.sample_rate, 1)
+                await self.start_tts_usage_metrics(text)
+                yield TTSStartedFrame()
+                # Process SSE stream line by line
+                async for line in response.content:
+                    if not line:
+                        continue
+                    message = line.decode("utf-8", errors="ignore")
+                    if not message.strip():
+                        continue
+                    try:
+                        parsed_message = self._parse_sse_message(message)
+                        if (
+                            parsed_message is not None
+                            and parsed_message.get("data", {}).get("audio") is not None
+                        ):
+                            audio_b64 = parsed_message["data"]["audio"]
+                            audio_bytes = base64.b64decode(audio_b64)
+                            await self.stop_ttfb_metrics()
+                            yield TTSAudioRawFrame(audio_bytes, self.sample_rate, 1)
+                    except Exception as e:
+                        logger.error(f"Error processing SSE message: {e}")
+                        # Don't yield error frame for individual message failures
+                        continue
+        except asyncio.CancelledError:
+            logger.debug("TTS generation cancelled")
+            raise
         except Exception as e:
-            logger.error(f"Error in run_tts: {e}")
-            yield ErrorFrame(error=str(e))
+            logger.exception(f"Error in run_tts: {e}")
+            yield ErrorFrame(error=f"Neuphonic TTS error: {str(e)}")
         finally:
+            await self.stop_ttfb_metrics()
             yield TTSStoppedFrame()

pipecat/services/nim/llm.py CHANGED Viewed

@@ -21,12 +21,6 @@ class NimLLMService(OpenAILLMService):
     This service extends OpenAILLMService to work with NVIDIA's NIM API while maintaining
     compatibility with the OpenAI-style interface. It specifically handles the difference
     in token usage reporting between NIM (incremental) and OpenAI (final summary).
-    Args:
-        api_key: The API key for accessing NVIDIA's NIM API.
-        base_url: The base URL for NIM API. Defaults to "https://integrate.api.nvidia.com/v1".
-        model: The model identifier to use. Defaults to "nvidia/llama-3.1-nemotron-70b-instruct".
-        **kwargs: Additional keyword arguments passed to OpenAILLMService.
     """
     def __init__(
@@ -37,6 +31,14 @@ class NimLLMService(OpenAILLMService):
         model: str = "nvidia/llama-3.1-nemotron-70b-instruct",
         **kwargs,
     ):
+        """Initialize the NimLLMService.
+        Args:
+            api_key: The API key for accessing NVIDIA's NIM API.
+            base_url: The base URL for NIM API. Defaults to "https://integrate.api.nvidia.com/v1".
+            model: The model identifier to use. Defaults to "nvidia/llama-3.1-nemotron-70b-instruct".
+            **kwargs: Additional keyword arguments passed to OpenAILLMService.
+        """
         super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
         # Counters for accumulating token usage metrics
         self._prompt_tokens = 0

pipecat/services/ollama/llm.py CHANGED Viewed

@@ -6,6 +6,8 @@
 """OLLama LLM service implementation for Pipecat AI framework."""
+from loguru import logger
 from pipecat.services.openai.llm import OpenAILLMService
@@ -14,12 +16,30 @@ class OLLamaLLMService(OpenAILLMService):
     This service extends OpenAILLMService to work with locally hosted OLLama models,
     providing a compatible interface for running large language models locally.
-    Args:
-        model: The OLLama model to use. Defaults to "llama2".
-        base_url: The base URL for the OLLama API endpoint.
-                 Defaults to "http://localhost:11434/v1".
     """
-    def __init__(self, *, model: str = "llama2", base_url: str = "http://localhost:11434/v1"):
-        super().__init__(model=model, base_url=base_url, api_key="ollama")
+    def __init__(
+        self, *, model: str = "llama2", base_url: str = "http://localhost:11434/v1", **kwargs
+    ):
+        """Initialize OLLama LLM service.
+        Args:
+            model: The OLLama model to use. Defaults to "llama2".
+            base_url: The base URL for the OLLama API endpoint.
+                    Defaults to "http://localhost:11434/v1".
+            **kwargs: Additional keyword arguments passed to OpenAILLMService.
+        """
+        super().__init__(model=model, base_url=base_url, api_key="ollama", **kwargs)
+    def create_client(self, base_url=None, **kwargs):
+        """Create OpenAI-compatible client for Ollama.
+        Args:
+            base_url: The base URL for the API. If None, uses instance base_url.
+            **kwargs: Additional keyword arguments passed to the parent create_client method.
+        Returns:
+            An OpenAI-compatible client configured for Ollama.
+        """
+        logger.debug(f"Creating Ollama client with api {base_url}")
+        return super().create_client(base_url=base_url, **kwargs)

dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.74.dev770py3-none-any.whl → 0.0.82.dev776py3-none-any.whl