dv-pipecat-ai 0.0.85.dev837__py3-none-any.whl → 0.0.85.dev841__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/METADATA +2 -1
- {dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/RECORD +7 -7
- pipecat/services/soniox/stt.py +40 -9
- pipecat/services/vistaar/llm.py +87 -2
- {dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dv-pipecat-ai
|
|
3
|
-
Version: 0.0.85.
|
|
3
|
+
Version: 0.0.85.dev841
|
|
4
4
|
Summary: An open source framework for voice (and multimodal) assistants
|
|
5
5
|
License-Expression: BSD-2-Clause
|
|
6
6
|
Project-URL: Source, https://github.com/pipecat-ai/pipecat
|
|
@@ -26,6 +26,7 @@ Requires-Dist: numpy<3,>=1.26.4
|
|
|
26
26
|
Requires-Dist: Pillow<12,>=11.1.0
|
|
27
27
|
Requires-Dist: protobuf~=5.29.3
|
|
28
28
|
Requires-Dist: pydantic<3,>=2.10.6
|
|
29
|
+
Requires-Dist: PyJWT<3,>=2.8.0
|
|
29
30
|
Requires-Dist: pyloudnorm~=0.1.1
|
|
30
31
|
Requires-Dist: resampy~=0.4.3
|
|
31
32
|
Requires-Dist: soxr~=0.5.0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
dv_pipecat_ai-0.0.85.
|
|
1
|
+
dv_pipecat_ai-0.0.85.dev841.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
|
|
2
2
|
pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
|
|
3
3
|
pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -329,7 +329,7 @@ pipecat/services/sarvam/tts.py,sha256=lrwfdC53kZ7f2QPgNRxzryISNkrJCvNtlZ-19-iXg9
|
|
|
329
329
|
pipecat/services/simli/__init__.py,sha256=cbDcqOaGsEgKbGYKpJ1Vv7LN4ZjOWA04sE84WW5vgQI,257
|
|
330
330
|
pipecat/services/simli/video.py,sha256=Zu2XLvl2Y6VHaWzT9wEdzW9d0EYoZyzYLxjQFyV8vho,8320
|
|
331
331
|
pipecat/services/soniox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
332
|
-
pipecat/services/soniox/stt.py,sha256=
|
|
332
|
+
pipecat/services/soniox/stt.py,sha256=Ndml6QvPQ1WZBvdGT3LSg-LLWwrZ8KlqW8wBBFsQrrM,16509
|
|
333
333
|
pipecat/services/speechmatics/__init__.py,sha256=Jgq1fqrZVkpWC21D79L1cn5Ub8PnYgnnCaqC5pOlbIc,89
|
|
334
334
|
pipecat/services/speechmatics/stt.py,sha256=GLGJzlMSeZ1WzTOMjhKXDl5JYkqGhnFTbP3o0ez0hSw,44696
|
|
335
335
|
pipecat/services/tavus/__init__.py,sha256=SNyyi2Xq6tXIihDG2Bwvmg6Srbd-uWd1RwG-NKWcPuI,257
|
|
@@ -339,7 +339,7 @@ pipecat/services/together/llm.py,sha256=VSayO-U6g9Ld0xK9CXRQPUsd5gWJKtiA8qDAyXgs
|
|
|
339
339
|
pipecat/services/ultravox/__init__.py,sha256=EoHCSXI2o0DFQslELgkhAGZtxDj63gZi-9ZEhXljaKE,259
|
|
340
340
|
pipecat/services/ultravox/stt.py,sha256=uCQm_-LbycXdXRV6IE1a6Mymis6tyww7V8PnPzAQtx8,16586
|
|
341
341
|
pipecat/services/vistaar/__init__.py,sha256=UFfSWFN5rbzl6NN-E_OH_MFaSYodZWNlenAU0wk-rAI,110
|
|
342
|
-
pipecat/services/vistaar/llm.py,sha256=
|
|
342
|
+
pipecat/services/vistaar/llm.py,sha256=PrJIPPBh6PSKMtGRd2nYu1aIzk2covbwLEuUbZvDAVM,23114
|
|
343
343
|
pipecat/services/whisper/__init__.py,sha256=smADmw0Fv98k7cGRuHTEcljKTO2WdZqLpJd0qsTCwH8,281
|
|
344
344
|
pipecat/services/whisper/base_stt.py,sha256=VhslESPnYIeVbmnQTzmlZPV35TH49duxYTvJe0epNnE,7850
|
|
345
345
|
pipecat/services/whisper/stt.py,sha256=9Qd56vWMzg3LtHikQnfgyMtl4odE6BCHDbpAn3HSWjw,17480
|
|
@@ -416,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
|
|
|
416
416
|
pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
|
|
417
417
|
pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
|
|
418
418
|
pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
|
|
419
|
-
dv_pipecat_ai-0.0.85.
|
|
420
|
-
dv_pipecat_ai-0.0.85.
|
|
421
|
-
dv_pipecat_ai-0.0.85.
|
|
422
|
-
dv_pipecat_ai-0.0.85.
|
|
419
|
+
dv_pipecat_ai-0.0.85.dev841.dist-info/METADATA,sha256=xq4O-F0nWpeT1pXQ6uVPqT-eYvdnGrm_ktReRPPqrYo,32955
|
|
420
|
+
dv_pipecat_ai-0.0.85.dev841.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
421
|
+
dv_pipecat_ai-0.0.85.dev841.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
|
|
422
|
+
dv_pipecat_ai-0.0.85.dev841.dist-info/RECORD,,
|
pipecat/services/soniox/stt.py
CHANGED
|
@@ -49,6 +49,33 @@ END_TOKEN = "<end>"
|
|
|
49
49
|
FINALIZED_TOKEN = "<fin>"
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
class SonioxContextGeneralItem(BaseModel):
|
|
53
|
+
"""Represents a key-value pair for structured general context information."""
|
|
54
|
+
|
|
55
|
+
key: str
|
|
56
|
+
value: str
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class SonioxContextTranslationTerm(BaseModel):
|
|
60
|
+
"""Represents a custom translation mapping for ambiguous or domain-specific terms."""
|
|
61
|
+
|
|
62
|
+
source: str
|
|
63
|
+
target: str
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class SonioxContextObject(BaseModel):
|
|
67
|
+
"""Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
|
|
68
|
+
|
|
69
|
+
Learn more about context in the documentation:
|
|
70
|
+
https://soniox.com/docs/stt/concepts/context
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
general: Optional[List[SonioxContextGeneralItem]] = None
|
|
74
|
+
text: Optional[str] = None
|
|
75
|
+
terms: Optional[List[str]] = None
|
|
76
|
+
translation_terms: Optional[List[SonioxContextTranslationTerm]] = None
|
|
77
|
+
|
|
78
|
+
|
|
52
79
|
class SonioxInputParams(BaseModel):
|
|
53
80
|
"""Real-time transcription settings.
|
|
54
81
|
|
|
@@ -60,9 +87,9 @@ class SonioxInputParams(BaseModel):
|
|
|
60
87
|
audio_format: Audio format to use for transcription.
|
|
61
88
|
num_channels: Number of channels to use for transcription.
|
|
62
89
|
language_hints: List of language hints to use for transcription.
|
|
63
|
-
context: Customization for transcription.
|
|
64
|
-
|
|
65
|
-
|
|
90
|
+
context: Customization for transcription. String for models with context_version 1 and ContextObject for models with context_version 2.
|
|
91
|
+
enable_speaker_diarization: Whether to enable speaker diarization. Tokens are annotated with speaker IDs.
|
|
92
|
+
enable_language_identification: Whether to enable language identification. Tokens are annotated with language IDs.
|
|
66
93
|
client_reference_id: Client reference ID to use for transcription.
|
|
67
94
|
"""
|
|
68
95
|
|
|
@@ -72,10 +99,10 @@ class SonioxInputParams(BaseModel):
|
|
|
72
99
|
num_channels: Optional[int] = 1
|
|
73
100
|
|
|
74
101
|
language_hints: Optional[List[Language]] = None
|
|
75
|
-
context: Optional[str] = None
|
|
102
|
+
context: Optional[SonioxContextObject | str] = None
|
|
76
103
|
|
|
77
|
-
|
|
78
|
-
|
|
104
|
+
enable_speaker_diarization: Optional[bool] = False
|
|
105
|
+
enable_language_identification: Optional[bool] = False
|
|
79
106
|
|
|
80
107
|
client_reference_id: Optional[str] = None
|
|
81
108
|
|
|
@@ -173,6 +200,10 @@ class SonioxSTTService(STTService):
|
|
|
173
200
|
# Either one or the other is required.
|
|
174
201
|
enable_endpoint_detection = not self._vad_force_turn_endpoint
|
|
175
202
|
|
|
203
|
+
context = self._params.context
|
|
204
|
+
if isinstance(context, SonioxContextObject):
|
|
205
|
+
context = context.model_dump()
|
|
206
|
+
|
|
176
207
|
# Send the initial configuration message.
|
|
177
208
|
config = {
|
|
178
209
|
"api_key": self._api_key,
|
|
@@ -182,9 +213,9 @@ class SonioxSTTService(STTService):
|
|
|
182
213
|
"enable_endpoint_detection": enable_endpoint_detection,
|
|
183
214
|
"sample_rate": self.sample_rate,
|
|
184
215
|
"language_hints": _prepare_language_hints(self._params.language_hints),
|
|
185
|
-
"context":
|
|
186
|
-
"
|
|
187
|
-
"
|
|
216
|
+
"context": context,
|
|
217
|
+
"enable_speaker_diarization": self._params.enable_speaker_diarization,
|
|
218
|
+
"enable_language_identification": self._params.enable_language_identification,
|
|
188
219
|
"client_reference_id": self._params.client_reference_id,
|
|
189
220
|
}
|
|
190
221
|
|
pipecat/services/vistaar/llm.py
CHANGED
|
@@ -10,9 +10,17 @@ from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
|
10
10
|
from urllib.parse import urlencode
|
|
11
11
|
|
|
12
12
|
import httpx
|
|
13
|
+
import jwt
|
|
13
14
|
from loguru import logger
|
|
14
15
|
from pydantic import BaseModel, Field
|
|
15
16
|
|
|
17
|
+
try:
|
|
18
|
+
import redis.asyncio as redis
|
|
19
|
+
REDIS_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
REDIS_AVAILABLE = False
|
|
22
|
+
redis = None
|
|
23
|
+
|
|
16
24
|
from pipecat.frames.frames import (
|
|
17
25
|
CancelFrame,
|
|
18
26
|
EndFrame,
|
|
@@ -55,7 +63,9 @@ class VistaarLLMService(LLMService):
|
|
|
55
63
|
Parameters:
|
|
56
64
|
source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
|
|
57
65
|
target_lang: Target language code for responses.
|
|
58
|
-
session_id: Session ID for maintaining conversation context.
|
|
66
|
+
session_id: Session ID for maintaining conversation context (also used for JWT caching).
|
|
67
|
+
pre_query_response_phrases: List of phrases to say while waiting for response.
|
|
68
|
+
phone_number: Phone number for JWT subject claim.
|
|
59
69
|
extra: Additional model-specific parameters
|
|
60
70
|
"""
|
|
61
71
|
|
|
@@ -63,6 +73,7 @@ class VistaarLLMService(LLMService):
|
|
|
63
73
|
target_lang: Optional[str] = Field(default="mr")
|
|
64
74
|
session_id: Optional[str] = Field(default=None)
|
|
65
75
|
pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
|
|
76
|
+
phone_number: Optional[str] = Field(default="UNKNOWN")
|
|
66
77
|
extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
67
78
|
|
|
68
79
|
def __init__(
|
|
@@ -72,6 +83,9 @@ class VistaarLLMService(LLMService):
|
|
|
72
83
|
params: Optional[InputParams] = None,
|
|
73
84
|
timeout: float = 30.0,
|
|
74
85
|
interim_timeout: float = 5.0,
|
|
86
|
+
redis_client: Optional[Any] = None, # redis.Redis type
|
|
87
|
+
jwt_private_key: Optional[str] = None,
|
|
88
|
+
jwt_token_expiry: int = 3600,
|
|
75
89
|
**kwargs,
|
|
76
90
|
):
|
|
77
91
|
"""Initialize Vistaar LLM service.
|
|
@@ -81,6 +95,9 @@ class VistaarLLMService(LLMService):
|
|
|
81
95
|
params: Input parameters for model configuration and behavior.
|
|
82
96
|
timeout: Request timeout in seconds. Defaults to 30.0 seconds.
|
|
83
97
|
interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
|
|
98
|
+
redis_client: Optional Redis client for JWT token caching.
|
|
99
|
+
jwt_private_key: Optional RSA private key in PEM format for JWT signing.
|
|
100
|
+
jwt_token_expiry: JWT token expiry time in seconds. Defaults to 3600 (1 hour).
|
|
84
101
|
**kwargs: Additional arguments passed to the parent LLMService.
|
|
85
102
|
"""
|
|
86
103
|
super().__init__(**kwargs)
|
|
@@ -95,6 +112,16 @@ class VistaarLLMService(LLMService):
|
|
|
95
112
|
self._extra = params.extra if isinstance(params.extra, dict) else {}
|
|
96
113
|
self._timeout = timeout
|
|
97
114
|
self._interim_timeout = interim_timeout
|
|
115
|
+
self._phone_number = params.phone_number
|
|
116
|
+
|
|
117
|
+
# JWT authentication setup
|
|
118
|
+
self._redis_client = redis_client
|
|
119
|
+
self._jwt_private_key = jwt_private_key
|
|
120
|
+
self._jwt_token_expiry = jwt_token_expiry
|
|
121
|
+
self._jwt_issuer = "voice-provider"
|
|
122
|
+
|
|
123
|
+
if self._jwt_private_key and not self._redis_client:
|
|
124
|
+
logger.warning("JWT private key provided but no Redis client for caching. JWT auth will regenerate tokens on each request.")
|
|
98
125
|
|
|
99
126
|
# Create an async HTTP client
|
|
100
127
|
self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
|
|
@@ -112,6 +139,53 @@ class VistaarLLMService(LLMService):
|
|
|
112
139
|
f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
|
|
113
140
|
)
|
|
114
141
|
|
|
142
|
+
async def _get_jwt_token(self) -> Optional[str]:
|
|
143
|
+
"""Generate or retrieve a cached JWT token.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
JWT token string or None if JWT auth is not configured.
|
|
147
|
+
"""
|
|
148
|
+
if not self._jwt_private_key:
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
# Try to get from Redis cache if available
|
|
152
|
+
if self._redis_client and self._session_id:
|
|
153
|
+
redis_key = f"vistaar_jwt:{self._session_id}"
|
|
154
|
+
try:
|
|
155
|
+
cached_token = await self._redis_client.get(redis_key)
|
|
156
|
+
if cached_token:
|
|
157
|
+
logger.debug(f"Retrieved JWT token from Redis cache for session_id: {self._session_id}")
|
|
158
|
+
return cached_token.decode('utf-8') if isinstance(cached_token, bytes) else cached_token
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.warning(f"Redis cache retrieval failed: {e}. Generating new token.")
|
|
161
|
+
|
|
162
|
+
# Generate new token
|
|
163
|
+
current_time = int(time.time())
|
|
164
|
+
payload = {
|
|
165
|
+
"sub": self._phone_number, # Subject identifier (phone number)
|
|
166
|
+
"iss": self._jwt_issuer, # Issuer
|
|
167
|
+
"iat": current_time, # Issued at timestamp
|
|
168
|
+
"exp": current_time + self._jwt_token_expiry # Expiration timestamp
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
token = jwt.encode(payload, self._jwt_private_key, algorithm="RS256")
|
|
172
|
+
logger.info(f"Generated new JWT token for {self._phone_number}, expires in {self._jwt_token_expiry}s")
|
|
173
|
+
|
|
174
|
+
# Cache in Redis if available
|
|
175
|
+
if self._redis_client and self._session_id:
|
|
176
|
+
redis_key = f"vistaar_jwt:{self._session_id}"
|
|
177
|
+
try:
|
|
178
|
+
await self._redis_client.setex(
|
|
179
|
+
redis_key,
|
|
180
|
+
self._jwt_token_expiry,
|
|
181
|
+
token
|
|
182
|
+
)
|
|
183
|
+
logger.debug(f"Cached JWT token in Redis for session_id: {self._session_id} with {self._jwt_token_expiry}s TTL")
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.warning(f"Redis cache storage failed: {e}. Continuing without cache.")
|
|
186
|
+
|
|
187
|
+
return token
|
|
188
|
+
|
|
115
189
|
async def _extract_messages_to_query(self, context: OpenAILLMContext) -> str:
|
|
116
190
|
"""Extract only the last user message from context.
|
|
117
191
|
|
|
@@ -259,9 +333,20 @@ class VistaarLLMService(LLMService):
|
|
|
259
333
|
self._interim_in_progress = False
|
|
260
334
|
self._interim_completion_event.clear() # Reset the event for new request
|
|
261
335
|
|
|
336
|
+
# Prepare headers with JWT authentication if configured
|
|
337
|
+
headers = {}
|
|
338
|
+
try:
|
|
339
|
+
jwt_token = await self._get_jwt_token()
|
|
340
|
+
if jwt_token:
|
|
341
|
+
headers["Authorization"] = f"Bearer {jwt_token}"
|
|
342
|
+
logger.debug(f"Added JWT authentication header for session_id: {self._session_id}")
|
|
343
|
+
except Exception as e:
|
|
344
|
+
logger.error(f"Failed to generate JWT token: {e}")
|
|
345
|
+
raise
|
|
346
|
+
|
|
262
347
|
try:
|
|
263
348
|
# Use httpx to handle SSE streaming
|
|
264
|
-
async with self._client.stream("GET", url) as response:
|
|
349
|
+
async with self._client.stream("GET", url, headers=headers) as response:
|
|
265
350
|
self._current_response = response # Store for potential cancellation
|
|
266
351
|
response.raise_for_status()
|
|
267
352
|
|
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev837.dist-info → dv_pipecat_ai-0.0.85.dev841.dist-info}/top_level.txt
RENAMED
|
File without changes
|