PyPI - dv-pipecat-ai - Versions diffs - 0.0.85.dev840__py3-none-any.whl → 0.0.85.dev842__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.85.dev840py3-none-any.whl → 0.0.85.dev842py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (7) hide show

{dv_pipecat_ai-0.0.85.dev840.dist-info → dv_pipecat_ai-0.0.85.dev842.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dv-pipecat-ai
-Version: 0.0.85.dev840
+Version: 0.0.85.dev842
 Summary: An open source framework for voice (and multimodal) assistants
 License-Expression: BSD-2-Clause
 Project-URL: Source, https://github.com/pipecat-ai/pipecat

{dv_pipecat_ai-0.0.85.dev840.dist-info → dv_pipecat_ai-0.0.85.dev842.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-dv_pipecat_ai-0.0.85.dev840.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
+dv_pipecat_ai-0.0.85.dev842.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
 pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
 pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -210,7 +210,7 @@ pipecat/services/cartesia/tts.py,sha256=I_OZCINywkDXmYzFL35MjSN8cAuNEaJs7nj0YB_o
 pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
 pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
 pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
-pipecat/services/deepgram/stt.py,sha256=IvdKvo23PxhKoWTJDxuK4Uoo0wCtkFGAE_QrMUoGdYM,13732
+pipecat/services/deepgram/stt.py,sha256=fzKirTjTopwXNQEEPuUOIgk4AMvTJQcrh6H11w13q2c,16185
 pipecat/services/deepgram/tts.py,sha256=H_2WCJEx3_L4ytrHHRNkA-6GKTd1coou_vvTfiEodpQ,3745
 pipecat/services/deepgram/flux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/services/deepgram/flux/stt.py,sha256=yCZodrHAOShgYy_GbdviX8iAuh36dBgDL41gHMXVxEM,25887
@@ -329,7 +329,7 @@ pipecat/services/sarvam/tts.py,sha256=lrwfdC53kZ7f2QPgNRxzryISNkrJCvNtlZ-19-iXg9
 pipecat/services/simli/__init__.py,sha256=cbDcqOaGsEgKbGYKpJ1Vv7LN4ZjOWA04sE84WW5vgQI,257
 pipecat/services/simli/video.py,sha256=Zu2XLvl2Y6VHaWzT9wEdzW9d0EYoZyzYLxjQFyV8vho,8320
 pipecat/services/soniox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pipecat/services/soniox/stt.py,sha256=AhJF2YOzmqgB80x22jocgzr3neYCBMyxzP_WjkYR9Gc,15441
+pipecat/services/soniox/stt.py,sha256=Ndml6QvPQ1WZBvdGT3LSg-LLWwrZ8KlqW8wBBFsQrrM,16509
 pipecat/services/speechmatics/__init__.py,sha256=Jgq1fqrZVkpWC21D79L1cn5Ub8PnYgnnCaqC5pOlbIc,89
 pipecat/services/speechmatics/stt.py,sha256=GLGJzlMSeZ1WzTOMjhKXDl5JYkqGhnFTbP3o0ez0hSw,44696
 pipecat/services/tavus/__init__.py,sha256=SNyyi2Xq6tXIihDG2Bwvmg6Srbd-uWd1RwG-NKWcPuI,257
@@ -416,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
 pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
 pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
 pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
-dv_pipecat_ai-0.0.85.dev840.dist-info/METADATA,sha256=ALfdKasSbWLkqVlc0XSkl9lo6qzO7Wpior0WxVFzWZk,32955
-dv_pipecat_ai-0.0.85.dev840.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dv_pipecat_ai-0.0.85.dev840.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
-dv_pipecat_ai-0.0.85.dev840.dist-info/RECORD,,
+dv_pipecat_ai-0.0.85.dev842.dist-info/METADATA,sha256=8uxfODboEJQwB04vWViiyMIo4KyT-pGag_ChfLt8STo,32955
+dv_pipecat_ai-0.0.85.dev842.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dv_pipecat_ai-0.0.85.dev842.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
+dv_pipecat_ai-0.0.85.dev842.dist-info/RECORD,,

pipecat/services/deepgram/stt.py CHANGED Viewed

@@ -62,6 +62,8 @@ class DeepgramSTTService(STTService):
         sample_rate: Optional[int] = None,
         live_options: Optional[LiveOptions] = None,
         addons: Optional[Dict] = None,
+        max_connect_retries: int = 3,
+        connect_timeout_s: float = 2.5,
         **kwargs,
     ):
         """Initialize the Deepgram STT service.
@@ -77,6 +79,9 @@ class DeepgramSTTService(STTService):
             sample_rate: Audio sample rate. If None, uses default or live_options value.
             live_options: Deepgram LiveOptions for detailed configuration.
             addons: Additional Deepgram features to enable.
+            max_connect_retries: Maximum number of connection attempts before giving up.
+            connect_timeout_s: Maximum time in seconds to wait for a connection attempt.
+                Connection retries wait 100ms between attempts.
             **kwargs: Additional arguments passed to the parent STTService.
         """
         sample_rate = sample_rate or (live_options.sample_rate if live_options else None)
@@ -121,9 +126,9 @@ class DeepgramSTTService(STTService):
         self._settings = merged_options
         self._addons = addons
-        # Connection retry settings
-        self._max_connect_retries = 3
-        self._connect_retry_delay_s = 0.1
+        # Connection retry settings (100ms delay between retries)
+        self._max_connect_retries = max_connect_retries
+        self._connect_timeout_s = connect_timeout_s
         self._client = DeepgramClient(
             api_key,
@@ -131,8 +136,8 @@ class DeepgramSTTService(STTService):
                 url=base_url,
                 options={
                     "keepalive": "true",
-                    "open_timeout": 3,  # Max wait for only 3 seconds for the connection to establish #
-                    # "termination_exception_connect": True,  # Enable exception propagation
+                    # Note: Connection timeout is enforced by asyncio.wait_for() in _connect()
+                    # with the connect_timeout_s parameter (default 2.0s)
                 },
                 verbose=logging.ERROR,  # Enable error level and above logging
             ),
@@ -227,6 +232,11 @@ class DeepgramSTTService(STTService):
         for attempt in range(self._max_connect_retries):
             try:
+                # Clean up any previous connection attempt in background (non-blocking)
+                if hasattr(self, "_connection") and self._connection is not None:
+                    old_conn = self._connection
+                    asyncio.create_task(self._cleanup_abandoned_connection(old_conn))
                 # Create a new connection object for a clean attempt
                 self._connection: AsyncListenWebSocketClient = self._client.listen.asyncwebsocket.v(
                     "1"
@@ -250,10 +260,25 @@ class DeepgramSTTService(STTService):
                         self._on_utterance_end,
                     )
-                # Attempt to start the connection (timeout handled by open_timeout config)
-                if await self._connection.start(options=self._settings, addons=self._addons):
-                    self.logger.info("Successfully connected to Deepgram.")
-                    return  # Exit the method on success
+                try:
+                    start_result = await asyncio.wait_for(
+                        self._connection.start(options=self._settings, addons=self._addons),
+                        timeout=self._connect_timeout_s,
+                    )
+                except asyncio.TimeoutError:
+                    self.logger.warning(
+                        f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} timed out after {self._connect_timeout_s} second(s)."
+                    )
+                    start_result = False
+                except Exception as start_error:
+                    self.logger.warning(
+                        f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} failed with an exception: {start_error}"
+                    )
+                    start_result = False
+                else:
+                    if start_result:
+                        self.logger.info("Successfully connected to Deepgram.")
+                        return  # Exit the method on success
                 self.logger.warning(
                     f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} failed."
@@ -264,17 +289,21 @@ class DeepgramSTTService(STTService):
                     f"Deepgram connection attempt {attempt + 1}/{self._max_connect_retries} failed with an exception: {e}"
                 )
-            # If this is not the last attempt, wait briefly before retrying
+            # If this is not the last attempt, wait 100ms before retrying
             if attempt < self._max_connect_retries - 1:
-                self.logger.info(f"Retrying in {self._connect_retry_delay_s} second(s)...")
-                await asyncio.sleep(self._connect_retry_delay_s)
+                self.logger.info("Retrying in 0.1 second(s)...")
+                await asyncio.sleep(0.1)
-        self.logger.error(
+        error_msg = (
             f"{self}: unable to connect to Deepgram after {self._max_connect_retries} attempts."
         )
+        self.logger.error(error_msg)
+        await self.push_error(ErrorFrame(error_msg, fatal=True))
     async def _disconnect(self):
-        if self._connection.is_connected:
+        # Guard against missing connection instance and ensure proper async check
+        connection: AsyncListenWebSocketClient = getattr(self, "_connection", None)
+        if connection and await connection.is_connected():
             self.logger.debug("Disconnecting from Deepgram")
             # Deepgram swallows asyncio.CancelledError internally which prevents
             # proper cancellation propagation. This issue was found with
@@ -284,7 +313,25 @@ class DeepgramSTTService(STTService):
             # Deepgram disconnection was still finishing and therefore
             # preventing the task cancellation that occurs during `cleanup()`.
             # GH issue: https://github.com/deepgram/deepgram-python-sdk/issues/570
-            await self._connection.finish()
+            await connection.finish()
+    async def _cleanup_abandoned_connection(self, conn: AsyncListenWebSocketClient):
+        """Clean up abandoned connection attempt in background (non-blocking).
+        This prevents zombie connections from triggering spurious error events
+        when they eventually timeout and call _on_error().
+        Args:
+            conn: The abandoned connection object to clean up.
+        """
+        try:
+            # Try to finish with short timeout
+            await asyncio.wait_for(conn.finish(), timeout=5)
+            self.logger.debug("Successfully cleaned up abandoned connection")
+        except Exception as e:
+            # Ignore all cleanup errors - connection might not be fully started
+            # This is expected and fine - we just want best-effort cleanup
+            self.logger.debug(f"Abandoned connection cleanup failed: {e}")
     async def start_metrics(self):
         """Start TTFB and processing metrics collection."""

pipecat/services/soniox/stt.py CHANGED Viewed

@@ -49,6 +49,33 @@ END_TOKEN = "<end>"
 FINALIZED_TOKEN = "<fin>"
+class SonioxContextGeneralItem(BaseModel):
+    """Represents a key-value pair for structured general context information."""
+    key: str
+    value: str
+class SonioxContextTranslationTerm(BaseModel):
+    """Represents a custom translation mapping for ambiguous or domain-specific terms."""
+    source: str
+    target: str
+class SonioxContextObject(BaseModel):
+    """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
+    Learn more about context in the documentation:
+    https://soniox.com/docs/stt/concepts/context
+    """
+    general: Optional[List[SonioxContextGeneralItem]] = None
+    text: Optional[str] = None
+    terms: Optional[List[str]] = None
+    translation_terms: Optional[List[SonioxContextTranslationTerm]] = None
 class SonioxInputParams(BaseModel):
     """Real-time transcription settings.
@@ -60,9 +87,9 @@ class SonioxInputParams(BaseModel):
         audio_format: Audio format to use for transcription.
         num_channels: Number of channels to use for transcription.
         language_hints: List of language hints to use for transcription.
-        context: Customization for transcription.
-        enable_non_final_tokens: Whether to enable non-final tokens. If false, only final tokens will be returned.
-        max_non_final_tokens_duration_ms: Maximum duration of non-final tokens.
+        context: Customization for transcription. String for models with context_version 1 and ContextObject for models with context_version 2.
+        enable_speaker_diarization: Whether to enable speaker diarization. Tokens are annotated with speaker IDs.
+        enable_language_identification: Whether to enable language identification. Tokens are annotated with language IDs.
         client_reference_id: Client reference ID to use for transcription.
     """
@@ -72,10 +99,10 @@ class SonioxInputParams(BaseModel):
     num_channels: Optional[int] = 1
     language_hints: Optional[List[Language]] = None
-    context: Optional[str] = None
+    context: Optional[SonioxContextObject | str] = None
-    enable_non_final_tokens: Optional[bool] = True
-    max_non_final_tokens_duration_ms: Optional[int] = None
+    enable_speaker_diarization: Optional[bool] = False
+    enable_language_identification: Optional[bool] = False
     client_reference_id: Optional[str] = None
@@ -173,6 +200,10 @@ class SonioxSTTService(STTService):
         # Either one or the other is required.
         enable_endpoint_detection = not self._vad_force_turn_endpoint
+        context = self._params.context
+        if isinstance(context, SonioxContextObject):
+            context = context.model_dump()
         # Send the initial configuration message.
         config = {
             "api_key": self._api_key,
@@ -182,9 +213,9 @@ class SonioxSTTService(STTService):
             "enable_endpoint_detection": enable_endpoint_detection,
             "sample_rate": self.sample_rate,
             "language_hints": _prepare_language_hints(self._params.language_hints),
-            "context": self._params.context,
-            "enable_non_final_tokens": self._params.enable_non_final_tokens,
-            "max_non_final_tokens_duration_ms": self._params.max_non_final_tokens_duration_ms,
+            "context": context,
+            "enable_speaker_diarization": self._params.enable_speaker_diarization,
+            "enable_language_identification": self._params.enable_language_identification,
             "client_reference_id": self._params.client_reference_id,
         }

{dv_pipecat_ai-0.0.85.dev840.dist-info → dv_pipecat_ai-0.0.85.dev842.dist-info}/WHEEL RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev840.dist-info → dv_pipecat_ai-0.0.85.dev842.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev840.dist-info → dv_pipecat_ai-0.0.85.dev842.dist-info}/top_level.txt RENAMED Viewed

File without changes

dv-pipecat-ai 0.0.85.dev840__py3-none-any.whl → 0.0.85.dev842__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.85.dev840py3-none-any.whl → 0.0.85.dev842py3-none-any.whl