dv-pipecat-ai 0.0.85.dev834__py3-none-any.whl → 0.0.85.dev840__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev834.dist-info → dv_pipecat_ai-0.0.85.dev840.dist-info}/METADATA +2 -1
- {dv_pipecat_ai-0.0.85.dev834.dist-info → dv_pipecat_ai-0.0.85.dev840.dist-info}/RECORD +8 -7
- pipecat/serializers/__init__.py +2 -0
- pipecat/serializers/vi.py +324 -0
- pipecat/services/vistaar/llm.py +87 -2
- {dv_pipecat_ai-0.0.85.dev834.dist-info → dv_pipecat_ai-0.0.85.dev840.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev834.dist-info → dv_pipecat_ai-0.0.85.dev840.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev834.dist-info → dv_pipecat_ai-0.0.85.dev840.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dv-pipecat-ai
|
|
3
|
-
Version: 0.0.85.
|
|
3
|
+
Version: 0.0.85.dev840
|
|
4
4
|
Summary: An open source framework for voice (and multimodal) assistants
|
|
5
5
|
License-Expression: BSD-2-Clause
|
|
6
6
|
Project-URL: Source, https://github.com/pipecat-ai/pipecat
|
|
@@ -26,6 +26,7 @@ Requires-Dist: numpy<3,>=1.26.4
|
|
|
26
26
|
Requires-Dist: Pillow<12,>=11.1.0
|
|
27
27
|
Requires-Dist: protobuf~=5.29.3
|
|
28
28
|
Requires-Dist: pydantic<3,>=2.10.6
|
|
29
|
+
Requires-Dist: PyJWT<3,>=2.8.0
|
|
29
30
|
Requires-Dist: pyloudnorm~=0.1.1
|
|
30
31
|
Requires-Dist: resampy~=0.4.3
|
|
31
32
|
Requires-Dist: soxr~=0.5.0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
dv_pipecat_ai-0.0.85.
|
|
1
|
+
dv_pipecat_ai-0.0.85.dev840.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
|
|
2
2
|
pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
|
|
3
3
|
pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -153,7 +153,7 @@ pipecat/runner/livekit.py,sha256=in-2Io3FUZV-VcZZ-gQCx9L1WnKp5sHqmm7tDYlFNl4,458
|
|
|
153
153
|
pipecat/runner/run.py,sha256=McalzMoFYEJJRXyoD5PBAyUhHCdsEeeZJk8lBvplRck,30054
|
|
154
154
|
pipecat/runner/types.py,sha256=zHjbAiU17fG0ypLXCEzPu7bpDOutAg-4gE7TESvK8n0,1761
|
|
155
155
|
pipecat/runner/utils.py,sha256=Ve9rjRvbt1o8e9by0nIrCJzUDGcuJUeYYhkqycmgHXc,18682
|
|
156
|
-
pipecat/serializers/__init__.py,sha256=
|
|
156
|
+
pipecat/serializers/__init__.py,sha256=z0V5GflCoPt4k2Yqm4ivuzKDh9VsYYAgK2UXZTw10aU,863
|
|
157
157
|
pipecat/serializers/asterisk.py,sha256=QLJMXkU3DZ0sgFw3Vq2Zf8PHKkQQguL_v-l2Io4lZ_M,6729
|
|
158
158
|
pipecat/serializers/base_serializer.py,sha256=OyBUZccs2ZT9mfkBbq2tGsUJMvci6o-j90Cl1sicPaI,2030
|
|
159
159
|
pipecat/serializers/convox.py,sha256=fj9NkFTB74B9k8qWEuICQNGUQtEV0DusaHohkOqNLa8,11145
|
|
@@ -164,6 +164,7 @@ pipecat/serializers/plivo.py,sha256=ie6VUhZDTJ7KlAuJyHNeIeMtJ3ScDq_2js1SZtz7jLI,
|
|
|
164
164
|
pipecat/serializers/protobuf.py,sha256=L0jSqvgTdkfxsu6JWjYK8QSTVji9nhzmgRsEEbGU7xY,5223
|
|
165
165
|
pipecat/serializers/telnyx.py,sha256=eFkC7dExDFildYLR8DPvgfHbgXlCwdSPd1vc11yxyok,10847
|
|
166
166
|
pipecat/serializers/twilio.py,sha256=0emSzXVw8DU_N5RPruMekbBKku9Q429-0z1PMuYejSk,10823
|
|
167
|
+
pipecat/serializers/vi.py,sha256=Q7kMXvKM493RIuOUc99LKZWgVmvd8_owAzIK_oEktfw,11150
|
|
167
168
|
pipecat/services/__init__.py,sha256=8e3Ta-8_BOPozhDB3l0GJkNXs5PWhib6yqZQUof2Kvw,1209
|
|
168
169
|
pipecat/services/ai_service.py,sha256=yE386fm2Id-yD4fCNfkmEMtg0lTA7PB17n2x_A_jwTg,5896
|
|
169
170
|
pipecat/services/ai_services.py,sha256=_RrDWfM8adV17atzY9RxK0nXRVM5kbUkKrvN90GAWYM,795
|
|
@@ -338,7 +339,7 @@ pipecat/services/together/llm.py,sha256=VSayO-U6g9Ld0xK9CXRQPUsd5gWJKtiA8qDAyXgs
|
|
|
338
339
|
pipecat/services/ultravox/__init__.py,sha256=EoHCSXI2o0DFQslELgkhAGZtxDj63gZi-9ZEhXljaKE,259
|
|
339
340
|
pipecat/services/ultravox/stt.py,sha256=uCQm_-LbycXdXRV6IE1a6Mymis6tyww7V8PnPzAQtx8,16586
|
|
340
341
|
pipecat/services/vistaar/__init__.py,sha256=UFfSWFN5rbzl6NN-E_OH_MFaSYodZWNlenAU0wk-rAI,110
|
|
341
|
-
pipecat/services/vistaar/llm.py,sha256=
|
|
342
|
+
pipecat/services/vistaar/llm.py,sha256=PrJIPPBh6PSKMtGRd2nYu1aIzk2covbwLEuUbZvDAVM,23114
|
|
342
343
|
pipecat/services/whisper/__init__.py,sha256=smADmw0Fv98k7cGRuHTEcljKTO2WdZqLpJd0qsTCwH8,281
|
|
343
344
|
pipecat/services/whisper/base_stt.py,sha256=VhslESPnYIeVbmnQTzmlZPV35TH49duxYTvJe0epNnE,7850
|
|
344
345
|
pipecat/services/whisper/stt.py,sha256=9Qd56vWMzg3LtHikQnfgyMtl4odE6BCHDbpAn3HSWjw,17480
|
|
@@ -415,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
|
|
|
415
416
|
pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
|
|
416
417
|
pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
|
|
417
418
|
pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
|
|
418
|
-
dv_pipecat_ai-0.0.85.
|
|
419
|
-
dv_pipecat_ai-0.0.85.
|
|
420
|
-
dv_pipecat_ai-0.0.85.
|
|
421
|
-
dv_pipecat_ai-0.0.85.
|
|
419
|
+
dv_pipecat_ai-0.0.85.dev840.dist-info/METADATA,sha256=ALfdKasSbWLkqVlc0XSkl9lo6qzO7Wpior0WxVFzWZk,32955
|
|
420
|
+
dv_pipecat_ai-0.0.85.dev840.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
421
|
+
dv_pipecat_ai-0.0.85.dev840.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
|
|
422
|
+
dv_pipecat_ai-0.0.85.dev840.dist-info/RECORD,,
|
pipecat/serializers/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ from .exotel import ExotelFrameSerializer
|
|
|
5
5
|
from .plivo import PlivoFrameSerializer
|
|
6
6
|
from .telnyx import TelnyxFrameSerializer
|
|
7
7
|
from .twilio import TwilioFrameSerializer
|
|
8
|
+
from .vi import VIFrameSerializer
|
|
8
9
|
|
|
9
10
|
__all__ = [
|
|
10
11
|
"FrameSerializer",
|
|
@@ -15,6 +16,7 @@ __all__ = [
|
|
|
15
16
|
"PlivoFrameSerializer",
|
|
16
17
|
"TelnyxFrameSerializer",
|
|
17
18
|
"TwilioFrameSerializer",
|
|
19
|
+
"VIFrameSerializer",
|
|
18
20
|
]
|
|
19
21
|
|
|
20
22
|
# Optional imports
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Vodafone Idea (VI) WebSocket frame serializer for audio streaming and call management."""
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import json
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from pydantic import BaseModel
|
|
16
|
+
|
|
17
|
+
from pipecat.audio.utils import create_default_resampler
|
|
18
|
+
from pipecat.frames.frames import (
|
|
19
|
+
AudioRawFrame,
|
|
20
|
+
CancelFrame,
|
|
21
|
+
EndFrame,
|
|
22
|
+
Frame,
|
|
23
|
+
InputAudioRawFrame,
|
|
24
|
+
InputDTMFFrame,
|
|
25
|
+
KeypadEntry,
|
|
26
|
+
StartFrame,
|
|
27
|
+
StartInterruptionFrame,
|
|
28
|
+
TransportMessageFrame,
|
|
29
|
+
TransportMessageUrgentFrame,
|
|
30
|
+
)
|
|
31
|
+
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class VIFrameSerializer(FrameSerializer):
|
|
35
|
+
"""Serializer for Vodafone Idea (VI) WebSocket protocol.
|
|
36
|
+
|
|
37
|
+
This serializer handles converting between Pipecat frames and VI's WebSocket
|
|
38
|
+
protocol for bidirectional audio streaming. It supports audio conversion, DTMF events,
|
|
39
|
+
and real-time communication with VI telephony systems.
|
|
40
|
+
|
|
41
|
+
VI WebSocket protocol requirements:
|
|
42
|
+
- PCM audio format at 8kHz sample rate
|
|
43
|
+
- 16-bit Linear PCM encoding
|
|
44
|
+
- Base64 encoded audio payloads
|
|
45
|
+
- JSON message format for control and media events
|
|
46
|
+
- Bitrate: 128 Kbps
|
|
47
|
+
|
|
48
|
+
Events (VI → Endpoint):
|
|
49
|
+
- connected: WebSocket connection established
|
|
50
|
+
- start: Stream session started with call/stream IDs
|
|
51
|
+
- media: Audio data in Base64-encoded PCM
|
|
52
|
+
- dtmf: Keypad digit pressed
|
|
53
|
+
- stop: Stream ended
|
|
54
|
+
- mark: Audio playback checkpoint confirmation
|
|
55
|
+
|
|
56
|
+
Events (Endpoint → VI):
|
|
57
|
+
- media: Send audio back to VI
|
|
58
|
+
- mark: Request acknowledgment for audio playback
|
|
59
|
+
- clear: Clear queued audio (interruption)
|
|
60
|
+
- exit: Terminate session gracefully
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
class InputParams(BaseModel):
|
|
64
|
+
"""Configuration parameters for VIFrameSerializer.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
vi_sample_rate: Sample rate used by VI, defaults to 8000 Hz (telephony standard).
|
|
68
|
+
sample_rate: Optional override for pipeline input sample rate.
|
|
69
|
+
auto_hang_up: Whether to automatically terminate call on EndFrame.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
vi_sample_rate: int = 8000
|
|
73
|
+
sample_rate: Optional[int] = None
|
|
74
|
+
auto_hang_up: bool = False
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
stream_id: str,
|
|
79
|
+
call_id: Optional[str] = None,
|
|
80
|
+
params: Optional[InputParams] = None,
|
|
81
|
+
):
|
|
82
|
+
"""Initialize the VIFrameSerializer.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
stream_id: The VI stream identifier.
|
|
86
|
+
call_id: The associated VI call identifier.
|
|
87
|
+
params: Configuration parameters.
|
|
88
|
+
"""
|
|
89
|
+
self._stream_id = stream_id
|
|
90
|
+
self._call_id = call_id
|
|
91
|
+
self._params = params or VIFrameSerializer.InputParams()
|
|
92
|
+
|
|
93
|
+
self._vi_sample_rate = self._params.vi_sample_rate
|
|
94
|
+
self._sample_rate = 0 # Pipeline input rate
|
|
95
|
+
self._call_ended = False
|
|
96
|
+
|
|
97
|
+
self._resampler = create_default_resampler()
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def type(self) -> FrameSerializerType:
|
|
101
|
+
"""Gets the serializer type.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
The serializer type as TEXT for JSON WebSocket messages.
|
|
105
|
+
"""
|
|
106
|
+
return FrameSerializerType.TEXT
|
|
107
|
+
|
|
108
|
+
async def setup(self, frame: StartFrame):
|
|
109
|
+
"""Sets up the serializer with pipeline configuration.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
frame: The StartFrame containing pipeline configuration.
|
|
113
|
+
"""
|
|
114
|
+
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
|
|
115
|
+
|
|
116
|
+
async def serialize(self, frame: Frame) -> str | bytes | None:
|
|
117
|
+
"""Serializes a Pipecat frame to VI WebSocket format.
|
|
118
|
+
|
|
119
|
+
Handles conversion of various frame types to VI WebSocket messages.
|
|
120
|
+
For EndFrames, initiates call termination if auto_hang_up is enabled.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
frame: The Pipecat frame to serialize.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Serialized data as JSON string, or None if the frame isn't handled.
|
|
127
|
+
"""
|
|
128
|
+
if (
|
|
129
|
+
self._params.auto_hang_up
|
|
130
|
+
and not self._call_ended
|
|
131
|
+
and isinstance(frame, (EndFrame, CancelFrame))
|
|
132
|
+
):
|
|
133
|
+
self._call_ended = True
|
|
134
|
+
# Return the exit event to terminate the VI session
|
|
135
|
+
return await self._send_exit_event()
|
|
136
|
+
|
|
137
|
+
elif isinstance(frame, StartInterruptionFrame):
|
|
138
|
+
# Clear/interrupt command for VI - clears queued audio
|
|
139
|
+
message = {
|
|
140
|
+
"event": "clear",
|
|
141
|
+
"stream_id": self._stream_id,
|
|
142
|
+
"call_id": self._call_id,
|
|
143
|
+
}
|
|
144
|
+
logger.debug(f"VI: Sending clear event for stream_id: {self._stream_id}")
|
|
145
|
+
return json.dumps(message)
|
|
146
|
+
|
|
147
|
+
elif isinstance(frame, AudioRawFrame):
|
|
148
|
+
if self._call_ended:
|
|
149
|
+
logger.debug("VI SERIALIZE: Skipping audio - call has ended")
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
# Convert PCM audio to VI format
|
|
153
|
+
data = frame.audio
|
|
154
|
+
|
|
155
|
+
# Resample to VI sample rate (8kHz)
|
|
156
|
+
serialized_data = await self._resampler.resample(
|
|
157
|
+
data, frame.sample_rate, self._vi_sample_rate
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Encode as base64 for transmission
|
|
161
|
+
payload = base64.b64encode(serialized_data).decode("ascii")
|
|
162
|
+
|
|
163
|
+
# VI expects media event format with Base64-encoded PCM audio
|
|
164
|
+
timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
165
|
+
|
|
166
|
+
message = {
|
|
167
|
+
"event": "media",
|
|
168
|
+
"stream_id": self._stream_id,
|
|
169
|
+
"media": {
|
|
170
|
+
"timestamp": timestamp,
|
|
171
|
+
"chunk": len(serialized_data), # Chunk size in bytes
|
|
172
|
+
"payload": payload,
|
|
173
|
+
},
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return json.dumps(message)
|
|
177
|
+
|
|
178
|
+
elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
|
|
179
|
+
# Pass through transport messages (for mark events, etc.)
|
|
180
|
+
return json.dumps(frame.message)
|
|
181
|
+
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
async def _send_exit_event(self):
|
|
185
|
+
"""Send an exit event to VI to terminate the session gracefully.
|
|
186
|
+
|
|
187
|
+
This method is called when auto_hang_up is enabled and an EndFrame or
|
|
188
|
+
CancelFrame is received. The exit event allows IVR logic to continue
|
|
189
|
+
after the WebSocket session ends.
|
|
190
|
+
"""
|
|
191
|
+
try:
|
|
192
|
+
exit_event = {
|
|
193
|
+
"event": "exit",
|
|
194
|
+
"stream_id": self._stream_id,
|
|
195
|
+
"call_id": self._call_id,
|
|
196
|
+
"timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
logger.info(
|
|
200
|
+
f"VI auto_hang_up: Sending exit event for stream_id: {self._stream_id}, call_id: {self._call_id}"
|
|
201
|
+
)
|
|
202
|
+
return json.dumps(exit_event)
|
|
203
|
+
except Exception as e:
|
|
204
|
+
logger.error(f"VI auto_hang_up: Failed to create exit event: {e}")
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
async def deserialize(self, data: str | bytes) -> Frame | None:
|
|
208
|
+
"""Deserializes VI WebSocket data to Pipecat frames.
|
|
209
|
+
|
|
210
|
+
Handles conversion of VI media events to appropriate Pipecat frames.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
data: The raw WebSocket data from VI.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
A Pipecat frame corresponding to the VI event, or None if unhandled.
|
|
217
|
+
"""
|
|
218
|
+
try:
|
|
219
|
+
message = json.loads(data)
|
|
220
|
+
except json.JSONDecodeError:
|
|
221
|
+
logger.error(f"Invalid JSON received from VI: {data}")
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
# Log all incoming events for debugging and monitoring
|
|
225
|
+
event = message.get("event")
|
|
226
|
+
logger.debug(
|
|
227
|
+
f"VI INCOMING EVENT: {event} - stream_id: {self._stream_id}, call_id: {self._call_id}"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
if event == "media":
|
|
231
|
+
# Handle incoming audio data from VI
|
|
232
|
+
media = message.get("media", {})
|
|
233
|
+
payload_base64 = media.get("payload")
|
|
234
|
+
|
|
235
|
+
if not payload_base64:
|
|
236
|
+
logger.warning("VI DESERIALIZE: No payload in VI media message")
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
payload = base64.b64decode(payload_base64)
|
|
241
|
+
chunk_size = len(payload)
|
|
242
|
+
|
|
243
|
+
# Log chunk info (optional)
|
|
244
|
+
logger.debug(
|
|
245
|
+
f"VI DESERIALIZE: Received audio from VI - {chunk_size} bytes at {self._vi_sample_rate}Hz"
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
except Exception as e:
|
|
249
|
+
logger.error(f"VI DESERIALIZE: Error decoding VI audio payload: {e}")
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
# Convert from VI sample rate (8kHz) to pipeline sample rate
|
|
253
|
+
deserialized_data = await self._resampler.resample(
|
|
254
|
+
payload,
|
|
255
|
+
self._vi_sample_rate,
|
|
256
|
+
self._sample_rate,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
audio_frame = InputAudioRawFrame(
|
|
260
|
+
audio=deserialized_data,
|
|
261
|
+
num_channels=1, # VI uses mono audio
|
|
262
|
+
sample_rate=self._sample_rate,
|
|
263
|
+
)
|
|
264
|
+
return audio_frame
|
|
265
|
+
|
|
266
|
+
elif event == "dtmf":
|
|
267
|
+
# Handle DTMF events
|
|
268
|
+
dtmf_data = message.get("dtmf", {})
|
|
269
|
+
digit = dtmf_data.get("digit")
|
|
270
|
+
|
|
271
|
+
if digit:
|
|
272
|
+
try:
|
|
273
|
+
logger.info(f"VI: Received DTMF digit: {digit}")
|
|
274
|
+
return InputDTMFFrame(KeypadEntry(digit))
|
|
275
|
+
except ValueError:
|
|
276
|
+
logger.warning(f"Invalid DTMF digit from VI: {digit}")
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
elif event == "connected":
|
|
280
|
+
# Handle connection event
|
|
281
|
+
logger.info(f"VI connection established: {message}")
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
elif event == "start":
|
|
285
|
+
# Handle stream start event
|
|
286
|
+
logger.info(f"VI stream started: {message}")
|
|
287
|
+
return None
|
|
288
|
+
|
|
289
|
+
elif event == "stop":
|
|
290
|
+
# Handle stream stop event
|
|
291
|
+
logger.info(f"VI stream stopped: {message}")
|
|
292
|
+
# Don't end the call here, wait for explicit exit or call end
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
elif event == "mark":
|
|
296
|
+
# Handle mark event - checkpoint confirming audio playback completion
|
|
297
|
+
mark_data = message.get("mark", {})
|
|
298
|
+
mark_name = mark_data.get("name", "unknown")
|
|
299
|
+
logger.info(f"VI mark event received: {mark_name}")
|
|
300
|
+
# Mark events are informational, no frame to return
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
elif event == "error":
|
|
304
|
+
# Handle error events
|
|
305
|
+
error_msg = message.get("error", "Unknown error")
|
|
306
|
+
logger.error(f"VI error: {error_msg}")
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
elif event == "exit":
|
|
310
|
+
# Handle exit event from VI
|
|
311
|
+
logger.info("VI exit event received - terminating session")
|
|
312
|
+
self._call_ended = True
|
|
313
|
+
return CancelFrame()
|
|
314
|
+
|
|
315
|
+
elif event == "call_end" or event == "callEnd":
|
|
316
|
+
# Handle call end event (if VI sends this)
|
|
317
|
+
logger.info("VI call end event received")
|
|
318
|
+
self._call_ended = True
|
|
319
|
+
return CancelFrame()
|
|
320
|
+
|
|
321
|
+
else:
|
|
322
|
+
logger.debug(f"VI UNHANDLED EVENT: {event}")
|
|
323
|
+
|
|
324
|
+
return None
|
pipecat/services/vistaar/llm.py
CHANGED
|
@@ -10,9 +10,17 @@ from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
|
10
10
|
from urllib.parse import urlencode
|
|
11
11
|
|
|
12
12
|
import httpx
|
|
13
|
+
import jwt
|
|
13
14
|
from loguru import logger
|
|
14
15
|
from pydantic import BaseModel, Field
|
|
15
16
|
|
|
17
|
+
try:
|
|
18
|
+
import redis.asyncio as redis
|
|
19
|
+
REDIS_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
REDIS_AVAILABLE = False
|
|
22
|
+
redis = None
|
|
23
|
+
|
|
16
24
|
from pipecat.frames.frames import (
|
|
17
25
|
CancelFrame,
|
|
18
26
|
EndFrame,
|
|
@@ -55,7 +63,9 @@ class VistaarLLMService(LLMService):
|
|
|
55
63
|
Parameters:
|
|
56
64
|
source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
|
|
57
65
|
target_lang: Target language code for responses.
|
|
58
|
-
session_id: Session ID for maintaining conversation context.
|
|
66
|
+
session_id: Session ID for maintaining conversation context (also used for JWT caching).
|
|
67
|
+
pre_query_response_phrases: List of phrases to say while waiting for response.
|
|
68
|
+
phone_number: Phone number for JWT subject claim.
|
|
59
69
|
extra: Additional model-specific parameters
|
|
60
70
|
"""
|
|
61
71
|
|
|
@@ -63,6 +73,7 @@ class VistaarLLMService(LLMService):
|
|
|
63
73
|
target_lang: Optional[str] = Field(default="mr")
|
|
64
74
|
session_id: Optional[str] = Field(default=None)
|
|
65
75
|
pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
|
|
76
|
+
phone_number: Optional[str] = Field(default="UNKNOWN")
|
|
66
77
|
extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
67
78
|
|
|
68
79
|
def __init__(
|
|
@@ -72,6 +83,9 @@ class VistaarLLMService(LLMService):
|
|
|
72
83
|
params: Optional[InputParams] = None,
|
|
73
84
|
timeout: float = 30.0,
|
|
74
85
|
interim_timeout: float = 5.0,
|
|
86
|
+
redis_client: Optional[Any] = None, # redis.Redis type
|
|
87
|
+
jwt_private_key: Optional[str] = None,
|
|
88
|
+
jwt_token_expiry: int = 3600,
|
|
75
89
|
**kwargs,
|
|
76
90
|
):
|
|
77
91
|
"""Initialize Vistaar LLM service.
|
|
@@ -81,6 +95,9 @@ class VistaarLLMService(LLMService):
|
|
|
81
95
|
params: Input parameters for model configuration and behavior.
|
|
82
96
|
timeout: Request timeout in seconds. Defaults to 30.0 seconds.
|
|
83
97
|
interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
|
|
98
|
+
redis_client: Optional Redis client for JWT token caching.
|
|
99
|
+
jwt_private_key: Optional RSA private key in PEM format for JWT signing.
|
|
100
|
+
jwt_token_expiry: JWT token expiry time in seconds. Defaults to 3600 (1 hour).
|
|
84
101
|
**kwargs: Additional arguments passed to the parent LLMService.
|
|
85
102
|
"""
|
|
86
103
|
super().__init__(**kwargs)
|
|
@@ -95,6 +112,16 @@ class VistaarLLMService(LLMService):
|
|
|
95
112
|
self._extra = params.extra if isinstance(params.extra, dict) else {}
|
|
96
113
|
self._timeout = timeout
|
|
97
114
|
self._interim_timeout = interim_timeout
|
|
115
|
+
self._phone_number = params.phone_number
|
|
116
|
+
|
|
117
|
+
# JWT authentication setup
|
|
118
|
+
self._redis_client = redis_client
|
|
119
|
+
self._jwt_private_key = jwt_private_key
|
|
120
|
+
self._jwt_token_expiry = jwt_token_expiry
|
|
121
|
+
self._jwt_issuer = "voice-provider"
|
|
122
|
+
|
|
123
|
+
if self._jwt_private_key and not self._redis_client:
|
|
124
|
+
logger.warning("JWT private key provided but no Redis client for caching. JWT auth will regenerate tokens on each request.")
|
|
98
125
|
|
|
99
126
|
# Create an async HTTP client
|
|
100
127
|
self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
|
|
@@ -112,6 +139,53 @@ class VistaarLLMService(LLMService):
|
|
|
112
139
|
f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
|
|
113
140
|
)
|
|
114
141
|
|
|
142
|
+
async def _get_jwt_token(self) -> Optional[str]:
|
|
143
|
+
"""Generate or retrieve a cached JWT token.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
JWT token string or None if JWT auth is not configured.
|
|
147
|
+
"""
|
|
148
|
+
if not self._jwt_private_key:
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
# Try to get from Redis cache if available
|
|
152
|
+
if self._redis_client and self._session_id:
|
|
153
|
+
redis_key = f"vistaar_jwt:{self._session_id}"
|
|
154
|
+
try:
|
|
155
|
+
cached_token = await self._redis_client.get(redis_key)
|
|
156
|
+
if cached_token:
|
|
157
|
+
logger.debug(f"Retrieved JWT token from Redis cache for session_id: {self._session_id}")
|
|
158
|
+
return cached_token.decode('utf-8') if isinstance(cached_token, bytes) else cached_token
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.warning(f"Redis cache retrieval failed: {e}. Generating new token.")
|
|
161
|
+
|
|
162
|
+
# Generate new token
|
|
163
|
+
current_time = int(time.time())
|
|
164
|
+
payload = {
|
|
165
|
+
"sub": self._phone_number, # Subject identifier (phone number)
|
|
166
|
+
"iss": self._jwt_issuer, # Issuer
|
|
167
|
+
"iat": current_time, # Issued at timestamp
|
|
168
|
+
"exp": current_time + self._jwt_token_expiry # Expiration timestamp
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
token = jwt.encode(payload, self._jwt_private_key, algorithm="RS256")
|
|
172
|
+
logger.info(f"Generated new JWT token for {self._phone_number}, expires in {self._jwt_token_expiry}s")
|
|
173
|
+
|
|
174
|
+
# Cache in Redis if available
|
|
175
|
+
if self._redis_client and self._session_id:
|
|
176
|
+
redis_key = f"vistaar_jwt:{self._session_id}"
|
|
177
|
+
try:
|
|
178
|
+
await self._redis_client.setex(
|
|
179
|
+
redis_key,
|
|
180
|
+
self._jwt_token_expiry,
|
|
181
|
+
token
|
|
182
|
+
)
|
|
183
|
+
logger.debug(f"Cached JWT token in Redis for session_id: {self._session_id} with {self._jwt_token_expiry}s TTL")
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.warning(f"Redis cache storage failed: {e}. Continuing without cache.")
|
|
186
|
+
|
|
187
|
+
return token
|
|
188
|
+
|
|
115
189
|
async def _extract_messages_to_query(self, context: OpenAILLMContext) -> str:
|
|
116
190
|
"""Extract only the last user message from context.
|
|
117
191
|
|
|
@@ -259,9 +333,20 @@ class VistaarLLMService(LLMService):
|
|
|
259
333
|
self._interim_in_progress = False
|
|
260
334
|
self._interim_completion_event.clear() # Reset the event for new request
|
|
261
335
|
|
|
336
|
+
# Prepare headers with JWT authentication if configured
|
|
337
|
+
headers = {}
|
|
338
|
+
try:
|
|
339
|
+
jwt_token = await self._get_jwt_token()
|
|
340
|
+
if jwt_token:
|
|
341
|
+
headers["Authorization"] = f"Bearer {jwt_token}"
|
|
342
|
+
logger.debug(f"Added JWT authentication header for session_id: {self._session_id}")
|
|
343
|
+
except Exception as e:
|
|
344
|
+
logger.error(f"Failed to generate JWT token: {e}")
|
|
345
|
+
raise
|
|
346
|
+
|
|
262
347
|
try:
|
|
263
348
|
# Use httpx to handle SSE streaming
|
|
264
|
-
async with self._client.stream("GET", url) as response:
|
|
349
|
+
async with self._client.stream("GET", url, headers=headers) as response:
|
|
265
350
|
self._current_response = response # Store for potential cancellation
|
|
266
351
|
response.raise_for_status()
|
|
267
352
|
|
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev834.dist-info → dv_pipecat_ai-0.0.85.dev840.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev834.dist-info → dv_pipecat_ai-0.0.85.dev840.dist-info}/top_level.txt
RENAMED
|
File without changes
|