PyPI - dv-pipecat-ai - Versions diffs - 0.0.85.dev837__py3-none-any.whl → 0.0.85.dev841__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.85.dev837py3-none-any.whl → 0.0.85.dev841py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dv-pipecat-ai
-Version: 0.0.85.dev837
+Version: 0.0.85.dev841
 Summary: An open source framework for voice (and multimodal) assistants
 License-Expression: BSD-2-Clause
 Project-URL: Source, https://github.com/pipecat-ai/pipecat
@@ -26,6 +26,7 @@ Requires-Dist: numpy<3,>=1.26.4
 Requires-Dist: Pillow<12,>=11.1.0
 Requires-Dist: protobuf~=5.29.3
 Requires-Dist: pydantic<3,>=2.10.6
+Requires-Dist: PyJWT<3,>=2.8.0
 Requires-Dist: pyloudnorm~=0.1.1
 Requires-Dist: resampy~=0.4.3
 Requires-Dist: soxr~=0.5.0

{dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-dv_pipecat_ai-0.0.85.dev837.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
+dv_pipecat_ai-0.0.85.dev841.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
 pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
 pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -329,7 +329,7 @@ pipecat/services/sarvam/tts.py,sha256=lrwfdC53kZ7f2QPgNRxzryISNkrJCvNtlZ-19-iXg9
 pipecat/services/simli/__init__.py,sha256=cbDcqOaGsEgKbGYKpJ1Vv7LN4ZjOWA04sE84WW5vgQI,257
 pipecat/services/simli/video.py,sha256=Zu2XLvl2Y6VHaWzT9wEdzW9d0EYoZyzYLxjQFyV8vho,8320
 pipecat/services/soniox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pipecat/services/soniox/stt.py,sha256=AhJF2YOzmqgB80x22jocgzr3neYCBMyxzP_WjkYR9Gc,15441
+pipecat/services/soniox/stt.py,sha256=Ndml6QvPQ1WZBvdGT3LSg-LLWwrZ8KlqW8wBBFsQrrM,16509
 pipecat/services/speechmatics/__init__.py,sha256=Jgq1fqrZVkpWC21D79L1cn5Ub8PnYgnnCaqC5pOlbIc,89
 pipecat/services/speechmatics/stt.py,sha256=GLGJzlMSeZ1WzTOMjhKXDl5JYkqGhnFTbP3o0ez0hSw,44696
 pipecat/services/tavus/__init__.py,sha256=SNyyi2Xq6tXIihDG2Bwvmg6Srbd-uWd1RwG-NKWcPuI,257
@@ -339,7 +339,7 @@ pipecat/services/together/llm.py,sha256=VSayO-U6g9Ld0xK9CXRQPUsd5gWJKtiA8qDAyXgs
 pipecat/services/ultravox/__init__.py,sha256=EoHCSXI2o0DFQslELgkhAGZtxDj63gZi-9ZEhXljaKE,259
 pipecat/services/ultravox/stt.py,sha256=uCQm_-LbycXdXRV6IE1a6Mymis6tyww7V8PnPzAQtx8,16586
 pipecat/services/vistaar/__init__.py,sha256=UFfSWFN5rbzl6NN-E_OH_MFaSYodZWNlenAU0wk-rAI,110
-pipecat/services/vistaar/llm.py,sha256=GNVKaelbpNH7NW7iOpBj2rJjmhMVUsPqfnBI-YgIjjw,19326
+pipecat/services/vistaar/llm.py,sha256=PrJIPPBh6PSKMtGRd2nYu1aIzk2covbwLEuUbZvDAVM,23114
 pipecat/services/whisper/__init__.py,sha256=smADmw0Fv98k7cGRuHTEcljKTO2WdZqLpJd0qsTCwH8,281
 pipecat/services/whisper/base_stt.py,sha256=VhslESPnYIeVbmnQTzmlZPV35TH49duxYTvJe0epNnE,7850
 pipecat/services/whisper/stt.py,sha256=9Qd56vWMzg3LtHikQnfgyMtl4odE6BCHDbpAn3HSWjw,17480
@@ -416,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
 pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
 pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
 pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
-dv_pipecat_ai-0.0.85.dev837.dist-info/METADATA,sha256=dQC8Y4gHZ3jPBKpybN1R9aKRUbb9mQpb0cPuLQo5KUc,32924
-dv_pipecat_ai-0.0.85.dev837.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dv_pipecat_ai-0.0.85.dev837.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
-dv_pipecat_ai-0.0.85.dev837.dist-info/RECORD,,
+dv_pipecat_ai-0.0.85.dev841.dist-info/METADATA,sha256=xq4O-F0nWpeT1pXQ6uVPqT-eYvdnGrm_ktReRPPqrYo,32955
+dv_pipecat_ai-0.0.85.dev841.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dv_pipecat_ai-0.0.85.dev841.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
+dv_pipecat_ai-0.0.85.dev841.dist-info/RECORD,,

pipecat/services/soniox/stt.py CHANGED Viewed

@@ -49,6 +49,33 @@ END_TOKEN = "<end>"
 FINALIZED_TOKEN = "<fin>"
+class SonioxContextGeneralItem(BaseModel):
+    """Represents a key-value pair for structured general context information."""
+    key: str
+    value: str
+class SonioxContextTranslationTerm(BaseModel):
+    """Represents a custom translation mapping for ambiguous or domain-specific terms."""
+    source: str
+    target: str
+class SonioxContextObject(BaseModel):
+    """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
+    Learn more about context in the documentation:
+    https://soniox.com/docs/stt/concepts/context
+    """
+    general: Optional[List[SonioxContextGeneralItem]] = None
+    text: Optional[str] = None
+    terms: Optional[List[str]] = None
+    translation_terms: Optional[List[SonioxContextTranslationTerm]] = None
 class SonioxInputParams(BaseModel):
     """Real-time transcription settings.
@@ -60,9 +87,9 @@ class SonioxInputParams(BaseModel):
         audio_format: Audio format to use for transcription.
         num_channels: Number of channels to use for transcription.
         language_hints: List of language hints to use for transcription.
-        context: Customization for transcription.
-        enable_non_final_tokens: Whether to enable non-final tokens. If false, only final tokens will be returned.
-        max_non_final_tokens_duration_ms: Maximum duration of non-final tokens.
+        context: Customization for transcription. String for models with context_version 1 and ContextObject for models with context_version 2.
+        enable_speaker_diarization: Whether to enable speaker diarization. Tokens are annotated with speaker IDs.
+        enable_language_identification: Whether to enable language identification. Tokens are annotated with language IDs.
         client_reference_id: Client reference ID to use for transcription.
     """
@@ -72,10 +99,10 @@ class SonioxInputParams(BaseModel):
     num_channels: Optional[int] = 1
     language_hints: Optional[List[Language]] = None
-    context: Optional[str] = None
+    context: Optional[SonioxContextObject | str] = None
-    enable_non_final_tokens: Optional[bool] = True
-    max_non_final_tokens_duration_ms: Optional[int] = None
+    enable_speaker_diarization: Optional[bool] = False
+    enable_language_identification: Optional[bool] = False
     client_reference_id: Optional[str] = None
@@ -173,6 +200,10 @@ class SonioxSTTService(STTService):
         # Either one or the other is required.
         enable_endpoint_detection = not self._vad_force_turn_endpoint
+        context = self._params.context
+        if isinstance(context, SonioxContextObject):
+            context = context.model_dump()
         # Send the initial configuration message.
         config = {
             "api_key": self._api_key,
@@ -182,9 +213,9 @@ class SonioxSTTService(STTService):
             "enable_endpoint_detection": enable_endpoint_detection,
             "sample_rate": self.sample_rate,
             "language_hints": _prepare_language_hints(self._params.language_hints),
-            "context": self._params.context,
-            "enable_non_final_tokens": self._params.enable_non_final_tokens,
-            "max_non_final_tokens_duration_ms": self._params.max_non_final_tokens_duration_ms,
+            "context": context,
+            "enable_speaker_diarization": self._params.enable_speaker_diarization,
+            "enable_language_identification": self._params.enable_language_identification,
             "client_reference_id": self._params.client_reference_id,
         }

pipecat/services/vistaar/llm.py CHANGED Viewed

@@ -10,9 +10,17 @@ from typing import Any, AsyncGenerator, Dict, List, Optional
 from urllib.parse import urlencode
 import httpx
+import jwt
 from loguru import logger
 from pydantic import BaseModel, Field
+try:
+    import redis.asyncio as redis
+    REDIS_AVAILABLE = True
+except ImportError:
+    REDIS_AVAILABLE = False
+    redis = None
 from pipecat.frames.frames import (
     CancelFrame,
     EndFrame,
@@ -55,7 +63,9 @@ class VistaarLLMService(LLMService):
         Parameters:
             source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
             target_lang: Target language code for responses.
-            session_id: Session ID for maintaining conversation context.
+            session_id: Session ID for maintaining conversation context (also used for JWT caching).
+            pre_query_response_phrases: List of phrases to say while waiting for response.
+            phone_number: Phone number for JWT subject claim.
             extra: Additional model-specific parameters
         """
@@ -63,6 +73,7 @@ class VistaarLLMService(LLMService):
         target_lang: Optional[str] = Field(default="mr")
         session_id: Optional[str] = Field(default=None)
         pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
+        phone_number: Optional[str] = Field(default="UNKNOWN")
         extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
     def __init__(
@@ -72,6 +83,9 @@ class VistaarLLMService(LLMService):
         params: Optional[InputParams] = None,
         timeout: float = 30.0,
         interim_timeout: float = 5.0,
+        redis_client: Optional[Any] = None,  # redis.Redis type
+        jwt_private_key: Optional[str] = None,
+        jwt_token_expiry: int = 3600,
         **kwargs,
     ):
         """Initialize Vistaar LLM service.
@@ -81,6 +95,9 @@ class VistaarLLMService(LLMService):
             params: Input parameters for model configuration and behavior.
             timeout: Request timeout in seconds. Defaults to 30.0 seconds.
             interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
+            redis_client: Optional Redis client for JWT token caching.
+            jwt_private_key: Optional RSA private key in PEM format for JWT signing.
+            jwt_token_expiry: JWT token expiry time in seconds. Defaults to 3600 (1 hour).
             **kwargs: Additional arguments passed to the parent LLMService.
         """
         super().__init__(**kwargs)
@@ -95,6 +112,16 @@ class VistaarLLMService(LLMService):
         self._extra = params.extra if isinstance(params.extra, dict) else {}
         self._timeout = timeout
         self._interim_timeout = interim_timeout
+        self._phone_number = params.phone_number
+        # JWT authentication setup
+        self._redis_client = redis_client
+        self._jwt_private_key = jwt_private_key
+        self._jwt_token_expiry = jwt_token_expiry
+        self._jwt_issuer = "voice-provider"
+        if self._jwt_private_key and not self._redis_client:
+            logger.warning("JWT private key provided but no Redis client for caching. JWT auth will regenerate tokens on each request.")
         # Create an async HTTP client
         self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
@@ -112,6 +139,53 @@ class VistaarLLMService(LLMService):
             f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
         )
+    async def _get_jwt_token(self) -> Optional[str]:
+        """Generate or retrieve a cached JWT token.
+        Returns:
+            JWT token string or None if JWT auth is not configured.
+        """
+        if not self._jwt_private_key:
+            return None
+        # Try to get from Redis cache if available
+        if self._redis_client and self._session_id:
+            redis_key = f"vistaar_jwt:{self._session_id}"
+            try:
+                cached_token = await self._redis_client.get(redis_key)
+                if cached_token:
+                    logger.debug(f"Retrieved JWT token from Redis cache for session_id: {self._session_id}")
+                    return cached_token.decode('utf-8') if isinstance(cached_token, bytes) else cached_token
+            except Exception as e:
+                logger.warning(f"Redis cache retrieval failed: {e}. Generating new token.")
+        # Generate new token
+        current_time = int(time.time())
+        payload = {
+            "sub": self._phone_number,  # Subject identifier (phone number)
+            "iss": self._jwt_issuer,    # Issuer
+            "iat": current_time,         # Issued at timestamp
+            "exp": current_time + self._jwt_token_expiry  # Expiration timestamp
+        }
+        token = jwt.encode(payload, self._jwt_private_key, algorithm="RS256")
+        logger.info(f"Generated new JWT token for {self._phone_number}, expires in {self._jwt_token_expiry}s")
+        # Cache in Redis if available
+        if self._redis_client and self._session_id:
+            redis_key = f"vistaar_jwt:{self._session_id}"
+            try:
+                await self._redis_client.setex(
+                    redis_key,
+                    self._jwt_token_expiry,
+                    token
+                )
+                logger.debug(f"Cached JWT token in Redis for session_id: {self._session_id} with {self._jwt_token_expiry}s TTL")
+            except Exception as e:
+                logger.warning(f"Redis cache storage failed: {e}. Continuing without cache.")
+        return token
     async def _extract_messages_to_query(self, context: OpenAILLMContext) -> str:
         """Extract only the last user message from context.
@@ -259,9 +333,20 @@ class VistaarLLMService(LLMService):
         self._interim_in_progress = False
         self._interim_completion_event.clear()  # Reset the event for new request
+        # Prepare headers with JWT authentication if configured
+        headers = {}
+        try:
+            jwt_token = await self._get_jwt_token()
+            if jwt_token:
+                headers["Authorization"] = f"Bearer {jwt_token}"
+                logger.debug(f"Added JWT authentication header for session_id: {self._session_id}")
+        except Exception as e:
+            logger.error(f"Failed to generate JWT token: {e}")
+            raise
         try:
             # Use httpx to handle SSE streaming
-            async with self._client.stream("GET", url) as response:
+            async with self._client.stream("GET", url, headers=headers) as response:
                 self._current_response = response  # Store for potential cancellation
                 response.raise_for_status()

{dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/WHEEL RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/top_level.txt RENAMED Viewed

File without changes

dv-pipecat-ai 0.0.85.dev837__py3-none-any.whl → 0.0.85.dev841__py3-none-any.whl

dv-pipecat-ai 0.0.85.dev837py3-none-any.whl → 0.0.85.dev841py3-none-any.whl