dv-pipecat-ai 0.0.85.dev837__py3-none-any.whl → 0.0.85.dev841__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dv-pipecat-ai
3
- Version: 0.0.85.dev837
3
+ Version: 0.0.85.dev841
4
4
  Summary: An open source framework for voice (and multimodal) assistants
5
5
  License-Expression: BSD-2-Clause
6
6
  Project-URL: Source, https://github.com/pipecat-ai/pipecat
@@ -26,6 +26,7 @@ Requires-Dist: numpy<3,>=1.26.4
26
26
  Requires-Dist: Pillow<12,>=11.1.0
27
27
  Requires-Dist: protobuf~=5.29.3
28
28
  Requires-Dist: pydantic<3,>=2.10.6
29
+ Requires-Dist: PyJWT<3,>=2.8.0
29
30
  Requires-Dist: pyloudnorm~=0.1.1
30
31
  Requires-Dist: resampy~=0.4.3
31
32
  Requires-Dist: soxr~=0.5.0
@@ -1,4 +1,4 @@
1
- dv_pipecat_ai-0.0.85.dev837.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
1
+ dv_pipecat_ai-0.0.85.dev841.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
2
2
  pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
3
3
  pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -329,7 +329,7 @@ pipecat/services/sarvam/tts.py,sha256=lrwfdC53kZ7f2QPgNRxzryISNkrJCvNtlZ-19-iXg9
329
329
  pipecat/services/simli/__init__.py,sha256=cbDcqOaGsEgKbGYKpJ1Vv7LN4ZjOWA04sE84WW5vgQI,257
330
330
  pipecat/services/simli/video.py,sha256=Zu2XLvl2Y6VHaWzT9wEdzW9d0EYoZyzYLxjQFyV8vho,8320
331
331
  pipecat/services/soniox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
332
- pipecat/services/soniox/stt.py,sha256=AhJF2YOzmqgB80x22jocgzr3neYCBMyxzP_WjkYR9Gc,15441
332
+ pipecat/services/soniox/stt.py,sha256=Ndml6QvPQ1WZBvdGT3LSg-LLWwrZ8KlqW8wBBFsQrrM,16509
333
333
  pipecat/services/speechmatics/__init__.py,sha256=Jgq1fqrZVkpWC21D79L1cn5Ub8PnYgnnCaqC5pOlbIc,89
334
334
  pipecat/services/speechmatics/stt.py,sha256=GLGJzlMSeZ1WzTOMjhKXDl5JYkqGhnFTbP3o0ez0hSw,44696
335
335
  pipecat/services/tavus/__init__.py,sha256=SNyyi2Xq6tXIihDG2Bwvmg6Srbd-uWd1RwG-NKWcPuI,257
@@ -339,7 +339,7 @@ pipecat/services/together/llm.py,sha256=VSayO-U6g9Ld0xK9CXRQPUsd5gWJKtiA8qDAyXgs
339
339
  pipecat/services/ultravox/__init__.py,sha256=EoHCSXI2o0DFQslELgkhAGZtxDj63gZi-9ZEhXljaKE,259
340
340
  pipecat/services/ultravox/stt.py,sha256=uCQm_-LbycXdXRV6IE1a6Mymis6tyww7V8PnPzAQtx8,16586
341
341
  pipecat/services/vistaar/__init__.py,sha256=UFfSWFN5rbzl6NN-E_OH_MFaSYodZWNlenAU0wk-rAI,110
342
- pipecat/services/vistaar/llm.py,sha256=GNVKaelbpNH7NW7iOpBj2rJjmhMVUsPqfnBI-YgIjjw,19326
342
+ pipecat/services/vistaar/llm.py,sha256=PrJIPPBh6PSKMtGRd2nYu1aIzk2covbwLEuUbZvDAVM,23114
343
343
  pipecat/services/whisper/__init__.py,sha256=smADmw0Fv98k7cGRuHTEcljKTO2WdZqLpJd0qsTCwH8,281
344
344
  pipecat/services/whisper/base_stt.py,sha256=VhslESPnYIeVbmnQTzmlZPV35TH49duxYTvJe0epNnE,7850
345
345
  pipecat/services/whisper/stt.py,sha256=9Qd56vWMzg3LtHikQnfgyMtl4odE6BCHDbpAn3HSWjw,17480
@@ -416,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
416
416
  pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
417
417
  pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
418
418
  pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
419
- dv_pipecat_ai-0.0.85.dev837.dist-info/METADATA,sha256=dQC8Y4gHZ3jPBKpybN1R9aKRUbb9mQpb0cPuLQo5KUc,32924
420
- dv_pipecat_ai-0.0.85.dev837.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
421
- dv_pipecat_ai-0.0.85.dev837.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
422
- dv_pipecat_ai-0.0.85.dev837.dist-info/RECORD,,
419
+ dv_pipecat_ai-0.0.85.dev841.dist-info/METADATA,sha256=xq4O-F0nWpeT1pXQ6uVPqT-eYvdnGrm_ktReRPPqrYo,32955
420
+ dv_pipecat_ai-0.0.85.dev841.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
421
+ dv_pipecat_ai-0.0.85.dev841.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
422
+ dv_pipecat_ai-0.0.85.dev841.dist-info/RECORD,,
@@ -49,6 +49,33 @@ END_TOKEN = "<end>"
49
49
  FINALIZED_TOKEN = "<fin>"
50
50
 
51
51
 
52
+ class SonioxContextGeneralItem(BaseModel):
53
+ """Represents a key-value pair for structured general context information."""
54
+
55
+ key: str
56
+ value: str
57
+
58
+
59
+ class SonioxContextTranslationTerm(BaseModel):
60
+ """Represents a custom translation mapping for ambiguous or domain-specific terms."""
61
+
62
+ source: str
63
+ target: str
64
+
65
+
66
+ class SonioxContextObject(BaseModel):
67
+ """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
68
+
69
+ Learn more about context in the documentation:
70
+ https://soniox.com/docs/stt/concepts/context
71
+ """
72
+
73
+ general: Optional[List[SonioxContextGeneralItem]] = None
74
+ text: Optional[str] = None
75
+ terms: Optional[List[str]] = None
76
+ translation_terms: Optional[List[SonioxContextTranslationTerm]] = None
77
+
78
+
52
79
  class SonioxInputParams(BaseModel):
53
80
  """Real-time transcription settings.
54
81
 
@@ -60,9 +87,9 @@ class SonioxInputParams(BaseModel):
60
87
  audio_format: Audio format to use for transcription.
61
88
  num_channels: Number of channels to use for transcription.
62
89
  language_hints: List of language hints to use for transcription.
63
- context: Customization for transcription.
64
- enable_non_final_tokens: Whether to enable non-final tokens. If false, only final tokens will be returned.
65
- max_non_final_tokens_duration_ms: Maximum duration of non-final tokens.
90
+ context: Customization for transcription. String for models with context_version 1 and ContextObject for models with context_version 2.
91
+ enable_speaker_diarization: Whether to enable speaker diarization. Tokens are annotated with speaker IDs.
92
+ enable_language_identification: Whether to enable language identification. Tokens are annotated with language IDs.
66
93
  client_reference_id: Client reference ID to use for transcription.
67
94
  """
68
95
 
@@ -72,10 +99,10 @@ class SonioxInputParams(BaseModel):
72
99
  num_channels: Optional[int] = 1
73
100
 
74
101
  language_hints: Optional[List[Language]] = None
75
- context: Optional[str] = None
102
+ context: Optional[SonioxContextObject | str] = None
76
103
 
77
- enable_non_final_tokens: Optional[bool] = True
78
- max_non_final_tokens_duration_ms: Optional[int] = None
104
+ enable_speaker_diarization: Optional[bool] = False
105
+ enable_language_identification: Optional[bool] = False
79
106
 
80
107
  client_reference_id: Optional[str] = None
81
108
 
@@ -173,6 +200,10 @@ class SonioxSTTService(STTService):
173
200
  # Either one or the other is required.
174
201
  enable_endpoint_detection = not self._vad_force_turn_endpoint
175
202
 
203
+ context = self._params.context
204
+ if isinstance(context, SonioxContextObject):
205
+ context = context.model_dump()
206
+
176
207
  # Send the initial configuration message.
177
208
  config = {
178
209
  "api_key": self._api_key,
@@ -182,9 +213,9 @@ class SonioxSTTService(STTService):
182
213
  "enable_endpoint_detection": enable_endpoint_detection,
183
214
  "sample_rate": self.sample_rate,
184
215
  "language_hints": _prepare_language_hints(self._params.language_hints),
185
- "context": self._params.context,
186
- "enable_non_final_tokens": self._params.enable_non_final_tokens,
187
- "max_non_final_tokens_duration_ms": self._params.max_non_final_tokens_duration_ms,
216
+ "context": context,
217
+ "enable_speaker_diarization": self._params.enable_speaker_diarization,
218
+ "enable_language_identification": self._params.enable_language_identification,
188
219
  "client_reference_id": self._params.client_reference_id,
189
220
  }
190
221
 
@@ -10,9 +10,17 @@ from typing import Any, AsyncGenerator, Dict, List, Optional
10
10
  from urllib.parse import urlencode
11
11
 
12
12
  import httpx
13
+ import jwt
13
14
  from loguru import logger
14
15
  from pydantic import BaseModel, Field
15
16
 
17
+ try:
18
+ import redis.asyncio as redis
19
+ REDIS_AVAILABLE = True
20
+ except ImportError:
21
+ REDIS_AVAILABLE = False
22
+ redis = None
23
+
16
24
  from pipecat.frames.frames import (
17
25
  CancelFrame,
18
26
  EndFrame,
@@ -55,7 +63,9 @@ class VistaarLLMService(LLMService):
55
63
  Parameters:
56
64
  source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
57
65
  target_lang: Target language code for responses.
58
- session_id: Session ID for maintaining conversation context.
66
+ session_id: Session ID for maintaining conversation context (also used for JWT caching).
67
+ pre_query_response_phrases: List of phrases to say while waiting for response.
68
+ phone_number: Phone number for JWT subject claim.
59
69
  extra: Additional model-specific parameters
60
70
  """
61
71
 
@@ -63,6 +73,7 @@ class VistaarLLMService(LLMService):
63
73
  target_lang: Optional[str] = Field(default="mr")
64
74
  session_id: Optional[str] = Field(default=None)
65
75
  pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
76
+ phone_number: Optional[str] = Field(default="UNKNOWN")
66
77
  extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
67
78
 
68
79
  def __init__(
@@ -72,6 +83,9 @@ class VistaarLLMService(LLMService):
72
83
  params: Optional[InputParams] = None,
73
84
  timeout: float = 30.0,
74
85
  interim_timeout: float = 5.0,
86
+ redis_client: Optional[Any] = None, # redis.Redis type
87
+ jwt_private_key: Optional[str] = None,
88
+ jwt_token_expiry: int = 3600,
75
89
  **kwargs,
76
90
  ):
77
91
  """Initialize Vistaar LLM service.
@@ -81,6 +95,9 @@ class VistaarLLMService(LLMService):
81
95
  params: Input parameters for model configuration and behavior.
82
96
  timeout: Request timeout in seconds. Defaults to 30.0 seconds.
83
97
  interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
98
+ redis_client: Optional Redis client for JWT token caching.
99
+ jwt_private_key: Optional RSA private key in PEM format for JWT signing.
100
+ jwt_token_expiry: JWT token expiry time in seconds. Defaults to 3600 (1 hour).
84
101
  **kwargs: Additional arguments passed to the parent LLMService.
85
102
  """
86
103
  super().__init__(**kwargs)
@@ -95,6 +112,16 @@ class VistaarLLMService(LLMService):
95
112
  self._extra = params.extra if isinstance(params.extra, dict) else {}
96
113
  self._timeout = timeout
97
114
  self._interim_timeout = interim_timeout
115
+ self._phone_number = params.phone_number
116
+
117
+ # JWT authentication setup
118
+ self._redis_client = redis_client
119
+ self._jwt_private_key = jwt_private_key
120
+ self._jwt_token_expiry = jwt_token_expiry
121
+ self._jwt_issuer = "voice-provider"
122
+
123
+ if self._jwt_private_key and not self._redis_client:
124
+ logger.warning("JWT private key provided but no Redis client for caching. JWT auth will regenerate tokens on each request.")
98
125
 
99
126
  # Create an async HTTP client
100
127
  self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
@@ -112,6 +139,53 @@ class VistaarLLMService(LLMService):
112
139
  f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
113
140
  )
114
141
 
142
+ async def _get_jwt_token(self) -> Optional[str]:
143
+ """Generate or retrieve a cached JWT token.
144
+
145
+ Returns:
146
+ JWT token string or None if JWT auth is not configured.
147
+ """
148
+ if not self._jwt_private_key:
149
+ return None
150
+
151
+ # Try to get from Redis cache if available
152
+ if self._redis_client and self._session_id:
153
+ redis_key = f"vistaar_jwt:{self._session_id}"
154
+ try:
155
+ cached_token = await self._redis_client.get(redis_key)
156
+ if cached_token:
157
+ logger.debug(f"Retrieved JWT token from Redis cache for session_id: {self._session_id}")
158
+ return cached_token.decode('utf-8') if isinstance(cached_token, bytes) else cached_token
159
+ except Exception as e:
160
+ logger.warning(f"Redis cache retrieval failed: {e}. Generating new token.")
161
+
162
+ # Generate new token
163
+ current_time = int(time.time())
164
+ payload = {
165
+ "sub": self._phone_number, # Subject identifier (phone number)
166
+ "iss": self._jwt_issuer, # Issuer
167
+ "iat": current_time, # Issued at timestamp
168
+ "exp": current_time + self._jwt_token_expiry # Expiration timestamp
169
+ }
170
+
171
+ token = jwt.encode(payload, self._jwt_private_key, algorithm="RS256")
172
+ logger.info(f"Generated new JWT token for {self._phone_number}, expires in {self._jwt_token_expiry}s")
173
+
174
+ # Cache in Redis if available
175
+ if self._redis_client and self._session_id:
176
+ redis_key = f"vistaar_jwt:{self._session_id}"
177
+ try:
178
+ await self._redis_client.setex(
179
+ redis_key,
180
+ self._jwt_token_expiry,
181
+ token
182
+ )
183
+ logger.debug(f"Cached JWT token in Redis for session_id: {self._session_id} with {self._jwt_token_expiry}s TTL")
184
+ except Exception as e:
185
+ logger.warning(f"Redis cache storage failed: {e}. Continuing without cache.")
186
+
187
+ return token
188
+
115
189
  async def _extract_messages_to_query(self, context: OpenAILLMContext) -> str:
116
190
  """Extract only the last user message from context.
117
191
 
@@ -259,9 +333,20 @@ class VistaarLLMService(LLMService):
259
333
  self._interim_in_progress = False
260
334
  self._interim_completion_event.clear() # Reset the event for new request
261
335
 
336
+ # Prepare headers with JWT authentication if configured
337
+ headers = {}
338
+ try:
339
+ jwt_token = await self._get_jwt_token()
340
+ if jwt_token:
341
+ headers["Authorization"] = f"Bearer {jwt_token}"
342
+ logger.debug(f"Added JWT authentication header for session_id: {self._session_id}")
343
+ except Exception as e:
344
+ logger.error(f"Failed to generate JWT token: {e}")
345
+ raise
346
+
262
347
  try:
263
348
  # Use httpx to handle SSE streaming
264
- async with self._client.stream("GET", url) as response:
349
+ async with self._client.stream("GET", url, headers=headers) as response:
265
350
  self._current_response = response # Store for potential cancellation
266
351
  response.raise_for_status()
267
352