dv-pipecat-ai 0.0.82.dev776__py3-none-any.whl → 0.0.82.dev815__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dv-pipecat-ai
3
- Version: 0.0.82.dev776
3
+ Version: 0.0.82.dev815
4
4
  Summary: An open source framework for voice (and multimodal) assistants
5
5
  License-Expression: BSD-2-Clause
6
6
  Project-URL: Source, https://github.com/pipecat-ai/pipecat
@@ -1,4 +1,4 @@
1
- dv_pipecat_ai-0.0.82.dev776.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
1
+ dv_pipecat_ai-0.0.82.dev815.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
2
2
  pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
3
3
  pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -46,7 +46,7 @@ pipecat/audio/vad/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
46
46
  pipecat/audio/vad/silero.py,sha256=r9UL8aEe-QoRMNDGWLUlgUYew93-QFojE9sIqLO0VYE,7792
47
47
  pipecat/audio/vad/vad_analyzer.py,sha256=XkZLEe4z7Ja0lGoYZst1HNYqt5qOwG-vjsk_w8chiNA,7430
48
48
  pipecat/audio/vad/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- pipecat/audio/vad/data/silero_vad.onnx,sha256=JiOilT9v89LB5hdAxs23FoEzR5smff7xFKSjzFvdeI8,2327524
49
+ pipecat/audio/vad/data/silero_vad.onnx,sha256=WX0ws-wHZgjQWUd7sUz-_9-VG_XK43DTj2XTO7_oIAQ,2327524
50
50
  pipecat/clocks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  pipecat/clocks/base_clock.py,sha256=PuTmCtPKz5VG0VxhN5cyhbvduEBnfNPgA6GLAu1eSns,929
52
52
  pipecat/clocks/system_clock.py,sha256=ht6TdDAn0JVXEmhLdt5igcHMQOkKO4YHNuOjuKcxkUU,1315
@@ -123,6 +123,7 @@ pipecat/runner/run.py,sha256=BuVI9-cpnQHOBxymkPoqpGaSaZImWZKLeu1g0JsvS8E,18818
123
123
  pipecat/runner/types.py,sha256=iG9A1ox1ePXiEo2bWANsi6RxpGOb5n_Am5O3enbojBM,1599
124
124
  pipecat/runner/utils.py,sha256=cT4G46skiIuZexm-KJ9ltrtufcGxPCAk7HW95rCy3tA,17724
125
125
  pipecat/serializers/__init__.py,sha256=OV61GQX5ZVU7l7Dt7UTBdv2wUF7ZvtbCoXryo7nnoGY,734
126
+ pipecat/serializers/asterisk.py,sha256=rDb8qMYNGgHzRC_EyCgxB6p99d_1YXXoIm-YW7aHtAc,5236
126
127
  pipecat/serializers/base_serializer.py,sha256=OyBUZccs2ZT9mfkBbq2tGsUJMvci6o-j90Cl1sicPaI,2030
127
128
  pipecat/serializers/convox.py,sha256=MXCLhV6GMnoP8bI6-EVrObhrftEyTGOmzVeIU5ywmPo,9536
128
129
  pipecat/serializers/exotel.py,sha256=LB4wYoXDjPmtkydrZ0G4H4u-SXpQw9KjyRzBZCYloEE,5907
@@ -279,6 +280,8 @@ pipecat/services/together/__init__.py,sha256=hNMycJDDf3CLiL9WA9fwvMdYphyDWLv0Oab
279
280
  pipecat/services/together/llm.py,sha256=VSayO-U6g9Ld0xK9CXRQPUsd5gWJKtiA8qDAyXgsSkE,1958
280
281
  pipecat/services/ultravox/__init__.py,sha256=EoHCSXI2o0DFQslELgkhAGZtxDj63gZi-9ZEhXljaKE,259
281
282
  pipecat/services/ultravox/stt.py,sha256=uCQm_-LbycXdXRV6IE1a6Mymis6tyww7V8PnPzAQtx8,16586
283
+ pipecat/services/vistaar/__init__.py,sha256=UFfSWFN5rbzl6NN-E_OH_MFaSYodZWNlenAU0wk-rAI,110
284
+ pipecat/services/vistaar/llm.py,sha256=yXo6hQ_YscxYZZbQLGoXy5X1Pt-lMPpGQc5MrnjYqpI,17332
282
285
  pipecat/services/whisper/__init__.py,sha256=smADmw0Fv98k7cGRuHTEcljKTO2WdZqLpJd0qsTCwH8,281
283
286
  pipecat/services/whisper/base_stt.py,sha256=VhslESPnYIeVbmnQTzmlZPV35TH49duxYTvJe0epNnE,7850
284
287
  pipecat/services/whisper/stt.py,sha256=9Qd56vWMzg3LtHikQnfgyMtl4odE6BCHDbpAn3HSWjw,17480
@@ -334,7 +337,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=HwDCqLGijhYD3F8nxDuQmEw-YkRw0
334
337
  pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
335
338
  pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
336
339
  pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
337
- dv_pipecat_ai-0.0.82.dev776.dist-info/METADATA,sha256=VJAth6kEBgJT2SJHJ5KnMorgpHZvF6ZMg6Uqc65CL-Q,32457
338
- dv_pipecat_ai-0.0.82.dev776.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
339
- dv_pipecat_ai-0.0.82.dev776.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
340
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD,,
340
+ dv_pipecat_ai-0.0.82.dev815.dist-info/METADATA,sha256=L2lkaXtlnUH8zjhRFqwqSQS-0pfyy90Ym9bOFeAfchU,32457
341
+ dv_pipecat_ai-0.0.82.dev815.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
342
+ dv_pipecat_ai-0.0.82.dev815.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
343
+ dv_pipecat_ai-0.0.82.dev815.dist-info/RECORD,,
Binary file
@@ -0,0 +1,129 @@
1
+ # asterisk_ws_serializer.py
2
+ import base64
3
+ import json
4
+ from typing import Literal, Optional
5
+
6
+ from pydantic import BaseModel
7
+
8
+ from pipecat.audio.utils import create_stream_resampler, pcm_to_ulaw, ulaw_to_pcm
9
+ from pipecat.frames.frames import (
10
+ AudioRawFrame,
11
+ CancelFrame,
12
+ EndFrame,
13
+ Frame,
14
+ InputAudioRawFrame,
15
+ StartFrame,
16
+ StartInterruptionFrame,
17
+ TransportMessageFrame,
18
+ TransportMessageUrgentFrame,
19
+ )
20
+ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
21
+
22
+
23
+ class AsteriskFrameSerializer(FrameSerializer):
24
+ class InputParams(BaseModel):
25
+ """Configuration parameters for AsteriskFrameSerializer.
26
+
27
+ Parameters:
28
+ telephony_encoding: The encoding used by the telephony system (e.g., "pcmu" for μ-law).
29
+ telephony_sample_rate: The sample rate used by the telephony system (e.g., 8000 Hz).
30
+ sample_rate: Optional override for pipeline input sample rate.
31
+ auto_hang_up: Whether to automatically terminate call on EndFrame.
32
+ """
33
+
34
+ # What the ADAPTER/Asterisk is sending/expecting on the wire:
35
+ # "pcmu" -> μ-law @ 8k; "pcm16" -> signed 16-bit @ 8k
36
+ telephony_encoding: Literal["pcmu", "pcm16"] = "pcmu"
37
+ telephony_sample_rate: int = 8000
38
+ sample_rate: Optional[int] = None # pipeline input rate
39
+ auto_hang_up: bool = False # no-op here; adapter handles hangup
40
+
41
+ def __init__(self, stream_id: str, params: Optional[InputParams] = None):
42
+ self._stream_id = stream_id
43
+ self._params = params or AsteriskFrameSerializer.InputParams()
44
+ self._tel_rate = self._params.telephony_sample_rate
45
+ self._sample_rate = 0
46
+ self._in_resampler = create_stream_resampler()
47
+ self._out_resampler = create_stream_resampler()
48
+
49
+ @property
50
+ def type(self) -> FrameSerializerType:
51
+ return FrameSerializerType.TEXT # we send/recv JSON strings
52
+
53
+ async def setup(self, frame: StartFrame):
54
+ self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
55
+
56
+ # Pipecat -> Adapter (play to caller)
57
+ async def serialize(self, frame: Frame) -> str | bytes | None:
58
+ # On pipeline end, ask bridge to hang up
59
+ if (
60
+ self._params.auto_hang_up
61
+ and not self._hangup_sent
62
+ and isinstance(frame, (EndFrame, CancelFrame))
63
+ ):
64
+ self._hangup_sent = True
65
+ return json.dumps({"event": "hangup"})
66
+ if isinstance(frame, StartInterruptionFrame):
67
+ return json.dumps({"event": "clear", "streamId": self._stream_id})
68
+ if isinstance(frame, AudioRawFrame):
69
+ pcm = frame.audio
70
+ if self._params.telephony_encoding == "pcmu":
71
+ ul = await pcm_to_ulaw(pcm, frame.sample_rate, self._tel_rate, self._out_resampler)
72
+ if not ul:
73
+ return None
74
+ payload = base64.b64encode(ul).decode("utf-8")
75
+ return json.dumps(
76
+ {
77
+ "event": "media",
78
+ "encoding": "pcmu",
79
+ "sampleRate": self._tel_rate,
80
+ "payload": payload,
81
+ }
82
+ )
83
+ else: # "pcm16"
84
+ # resample to 8k if needed, but data stays PCM16 bytes
85
+ pcm8 = await self._out_resampler.resample(pcm, frame.sample_rate, self._tel_rate)
86
+ if not pcm8:
87
+ return None
88
+ payload = base64.b64encode(pcm8).decode("utf-8")
89
+ return json.dumps(
90
+ {
91
+ "event": "media",
92
+ "encoding": "pcm16",
93
+ "sampleRate": self._tel_rate,
94
+ "payload": payload,
95
+ }
96
+ )
97
+ if isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
98
+ return json.dumps(frame.message)
99
+ return None
100
+
101
+ # Adapter -> Pipecat (audio from caller)
102
+ async def deserialize(self, data: str | bytes) -> Frame | None:
103
+ try:
104
+ msg = json.loads(data)
105
+ except Exception:
106
+ return None
107
+ if msg.get("event") == "media":
108
+ enc = msg.get("encoding")
109
+ sr = int(msg.get("sampleRate", self._tel_rate))
110
+ raw = base64.b64decode(msg.get("payload", ""))
111
+ if not raw:
112
+ return None
113
+ if enc == "pcmu":
114
+ pcm = await ulaw_to_pcm(raw, sr, self._sample_rate, self._in_resampler)
115
+ elif enc == "pcm16":
116
+ # resample if pipeline rate != 8k
117
+ pcm = await self._in_resampler.resample(raw, sr, self._sample_rate)
118
+ else:
119
+ return None
120
+ if not pcm:
121
+ return None
122
+ return InputAudioRawFrame(audio=pcm, num_channels=1, sample_rate=self._sample_rate)
123
+ elif msg.get("event") == "dtmf":
124
+ # optional: map to InputDTMFFrame if you want
125
+ return None
126
+ elif msg.get("event") == "hangup":
127
+ # Bridge is hanging up; you can treat as EndFrame if you want.
128
+ return CancelFrame()
129
+ return None
@@ -0,0 +1,5 @@
1
+ """Vistaar AI service implementations."""
2
+
3
+ from .llm import VistaarLLMService
4
+
5
+ __all__ = ["VistaarLLMService"]
@@ -0,0 +1,431 @@
1
+ """Vistaar LLM Service implementation."""
2
+
3
+ import asyncio
4
+ import json
5
+ import time
6
+ import uuid
7
+ from dataclasses import dataclass
8
+ from typing import Any, AsyncGenerator, Dict, Optional
9
+ from urllib.parse import urlencode
10
+
11
+ import httpx
12
+ from loguru import logger
13
+ from pydantic import BaseModel, Field
14
+
15
+ from pipecat.frames.frames import (
16
+ CancelFrame,
17
+ EndFrame,
18
+ Frame,
19
+ LLMFullResponseEndFrame,
20
+ LLMFullResponseStartFrame,
21
+ LLMMessagesFrame,
22
+ LLMTextFrame,
23
+ LLMUpdateSettingsFrame,
24
+ StartInterruptionFrame,
25
+ StopInterruptionFrame,
26
+ )
27
+ from pipecat.processors.aggregators.llm_response import (
28
+ LLMAssistantAggregatorParams,
29
+ LLMUserAggregatorParams,
30
+ )
31
+ from pipecat.processors.aggregators.openai_llm_context import (
32
+ OpenAILLMContext,
33
+ OpenAILLMContextFrame,
34
+ )
35
+ from pipecat.services.openai.llm import (
36
+ OpenAIAssistantContextAggregator,
37
+ OpenAIContextAggregatorPair,
38
+ OpenAIUserContextAggregator,
39
+ )
40
+ from pipecat.processors.frame_processor import FrameDirection
41
+ from pipecat.services.llm_service import LLMService
42
+
43
+
44
+ class VistaarLLMService(LLMService):
45
+ """A service for interacting with Vistaar's voice API using Server-Sent Events.
46
+
47
+ This service handles text generation through Vistaar's SSE endpoint which
48
+ streams responses in real-time. Vistaar maintains all conversation context
49
+ server-side via session_id, so we only send the latest user message.
50
+ """
51
+
52
+ class InputParams(BaseModel):
53
+ """Input parameters for Vistaar model configuration.
54
+
55
+ Parameters:
56
+ source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
57
+ target_lang: Target language code for responses.
58
+ session_id: Session ID for maintaining conversation context.
59
+ extra: Additional model-specific parameters.
60
+ """
61
+
62
+ source_lang: Optional[str] = Field(default="mr")
63
+ target_lang: Optional[str] = Field(default="mr")
64
+ session_id: Optional[str] = Field(default=None)
65
+ extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
66
+
67
+ def __init__(
68
+ self,
69
+ *,
70
+ base_url: str = "https://vistaar.kenpath.ai/api",
71
+ params: Optional[InputParams] = None,
72
+ timeout: float = 30.0,
73
+ interim_timeout: float = 5.0,
74
+ interim_message: str = "एक क्षण थांबा, मी बघतो. ",
75
+ **kwargs,
76
+ ):
77
+ """Initialize Vistaar LLM service.
78
+
79
+ Args:
80
+ base_url: The base URL for Vistaar API. Defaults to "https://vistaar.kenpath.ai/api".
81
+ params: Input parameters for model configuration and behavior.
82
+ timeout: Request timeout in seconds. Defaults to 30.0 seconds.
83
+ interim_timeout: Time in seconds before sending interim message. Defaults to 3.0 seconds.
84
+ interim_message: Message to send if API takes longer than interim_timeout. Defaults to "एक क्षण थांबा, मी बघतो. ".
85
+ **kwargs: Additional arguments passed to the parent LLMService.
86
+ """
87
+ super().__init__(**kwargs)
88
+
89
+ params = params or VistaarLLMService.InputParams()
90
+
91
+ self._base_url = base_url.rstrip("/")
92
+ self._source_lang = params.source_lang
93
+ self._target_lang = params.target_lang
94
+ self._session_id = params.session_id or str(uuid.uuid4())
95
+ self._extra = params.extra if isinstance(params.extra, dict) else {}
96
+ self._timeout = timeout
97
+ self._interim_timeout = interim_timeout
98
+ self._interim_message = interim_message
99
+
100
+ # Create an async HTTP client
101
+ self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
102
+
103
+ # Interruption handling state
104
+ self._current_response = None # Track current HTTP response stream
105
+ self._is_interrupted = False # Track if current generation was interrupted
106
+ self._partial_response = [] # Track what was actually sent before interruption
107
+ self._interim_sent = False # Track if interim message was sent
108
+ self._interim_task = None # Track interim message task
109
+
110
+ logger.info(
111
+ f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
112
+ )
113
+
114
+ async def _extract_messages_to_query(self, context: OpenAILLMContext) -> str:
115
+ """Extract only the last user message from context.
116
+
117
+ Since Vistaar maintains context server-side via session_id,
118
+ we only need to send the most recent user message.
119
+
120
+ As a fallback for context synchronization, we can optionally include
121
+ information about interrupted responses.
122
+
123
+ Args:
124
+ context: The OpenAI LLM context containing messages.
125
+
126
+ Returns:
127
+ The last user message as a query string, optionally with context hints.
128
+ """
129
+ messages = context.get_messages()
130
+ query_parts = []
131
+
132
+ # Include interrupted response context as a hint (optional fallback strategy)
133
+ if hasattr(self, "_last_interrupted_response"):
134
+ interrupted_text = self._last_interrupted_response[:100] # Limit length
135
+ query_parts.append(
136
+ f"[Context: I was previously saying '{interrupted_text}...' when interrupted]"
137
+ )
138
+ # Clear the interrupted response after using it
139
+ delattr(self, "_last_interrupted_response")
140
+
141
+ # Find the last user message (iterate in reverse for efficiency)
142
+ for message in reversed(messages):
143
+ if message.get("role") == "user":
144
+ content = message.get("content", "")
145
+
146
+ # Handle content that might be a list (for multimodal messages)
147
+ if isinstance(content, list):
148
+ text_parts = [
149
+ item.get("text", "") for item in content if item.get("type") == "text"
150
+ ]
151
+ content = " ".join(text_parts)
152
+
153
+ if isinstance(content, str):
154
+ query_parts.append(content.strip())
155
+ break
156
+
157
+ # If no user message found, return empty string or just context
158
+ return " ".join(query_parts) if query_parts else ""
159
+
160
+ async def _handle_interruption(self):
161
+ """Handle interruption by cancelling ongoing stream."""
162
+ logger.debug("Handling interruption for Vistaar LLM")
163
+
164
+ # Set interruption flag
165
+ self._is_interrupted = True
166
+
167
+ # Cancel interim message task if active
168
+ await self._cancel_interim_message_task(
169
+ "Cancelled interim message task - handling interruption"
170
+ )
171
+
172
+ # Cancel ongoing HTTP response stream if active
173
+ if self._current_response:
174
+ try:
175
+ await self._current_response.aclose()
176
+ logger.debug("Closed active Vistaar response stream")
177
+ except Exception as e:
178
+ logger.warning(f"Error closing Vistaar response stream: {e}")
179
+ finally:
180
+ self._current_response = None
181
+
182
+ # Store partial response for potential inclusion in next query
183
+ if self._partial_response:
184
+ partial_text = "".join(self._partial_response)
185
+ logger.debug(f"Storing interrupted response: {partial_text[:100]}...")
186
+ # Store the interrupted response for next query context
187
+ self._last_interrupted_response = partial_text
188
+
189
+ # Clear current partial response
190
+ self._partial_response = []
191
+
192
+ async def _send_interim_message(self):
193
+ """Send interim message after timeout."""
194
+ try:
195
+ await asyncio.sleep(self._interim_timeout)
196
+ if not self._is_interrupted and not self._interim_sent:
197
+ logger.info(f"Sending interim message after {self._interim_timeout}s timeout")
198
+ self._interim_sent = True
199
+ await self.push_frame(LLMTextFrame(text=self._interim_message))
200
+ except asyncio.CancelledError:
201
+ logger.debug("Interim message task cancelled")
202
+ except Exception as e:
203
+ logger.error(f"Error sending interim message: {e}")
204
+
205
+ async def _stream_response(self, query: str) -> AsyncGenerator[str, None]:
206
+ """Stream response from Vistaar API using Server-Sent Events.
207
+
208
+ Args:
209
+ query: The user's query to send to the API.
210
+
211
+ Yields:
212
+ Text chunks from the streaming response.
213
+ """
214
+ # Prepare query parameters
215
+ params = {
216
+ "query": query,
217
+ "session_id": self._session_id,
218
+ "source_lang": self._source_lang,
219
+ "target_lang": self._target_lang,
220
+ }
221
+
222
+ # Add any extra parameters
223
+ params.update(self._extra)
224
+
225
+ # Construct the full URL with query parameters
226
+ url = f"{self._base_url}/voice/?{urlencode(params)}"
227
+
228
+ logger.info(
229
+ f"Vistaar API request - URL: {self._base_url}/voice/, Session: {self._session_id}, Query: {query[:100]}..."
230
+ )
231
+ logger.debug(f"Full URL with params: {url}")
232
+
233
+ # Reset interruption state and partial response for new request
234
+ self._is_interrupted = False
235
+ self._partial_response = []
236
+ self._interim_sent = False
237
+
238
+ try:
239
+ # Use httpx to handle SSE streaming
240
+ async with self._client.stream("GET", url) as response:
241
+ self._current_response = response # Store for potential cancellation
242
+ response.raise_for_status()
243
+
244
+ # Process the SSE stream
245
+ async for line in response.aiter_lines():
246
+ # Check for interruption before processing each line
247
+ if self._is_interrupted:
248
+ logger.debug("Stream interrupted, stopping processing")
249
+ break
250
+
251
+ if not line:
252
+ continue
253
+
254
+ self._partial_response.append(line) # Track what we're sending
255
+ yield line
256
+
257
+ except httpx.HTTPStatusError as e:
258
+ logger.error(
259
+ f"Vistaar HTTP error - Status: {e.response.status_code}, URL: {url}, Response: {e.response.text if hasattr(e.response, 'text') else 'N/A'}"
260
+ )
261
+ raise
262
+ except httpx.TimeoutException as e:
263
+ logger.error(f"Vistaar timeout error - URL: {url}, Timeout: {self._timeout}s")
264
+ raise
265
+ except Exception as e:
266
+ logger.error(
267
+ f"Vistaar unexpected error - Type: {type(e).__name__}, Message: {str(e)}, URL: {url}"
268
+ )
269
+ raise
270
+ finally:
271
+ # Clean up response reference
272
+ self._current_response = None
273
+
274
+ async def _process_context(self, context: OpenAILLMContext):
275
+ """Process the LLM context and generate streaming response.
276
+
277
+ Args:
278
+ context: The OpenAI LLM context containing messages to process.
279
+ """
280
+ logger.info(f"Vistaar processing context - Session: {self._session_id}")
281
+ try:
282
+ # Extract query from context
283
+ query = await self._extract_messages_to_query(context)
284
+
285
+ if not query:
286
+ logger.warning(
287
+ f"Vistaar: No query extracted from context - Session: {self._session_id}"
288
+ )
289
+ return
290
+
291
+ logger.info(f"Vistaar extracted query: {query}")
292
+
293
+ logger.debug(f"Processing query: {query[:100]}...")
294
+
295
+ # Start response
296
+ await self.push_frame(LLMFullResponseStartFrame())
297
+ await self.start_processing_metrics()
298
+ await self.start_ttfb_metrics()
299
+
300
+ # Start interim message task
301
+ self._interim_task = self.create_task(
302
+ self._send_interim_message(), "Vistaar LLM - _send_interim_message"
303
+ )
304
+
305
+ first_chunk = True
306
+ full_response = []
307
+
308
+ # Stream the response
309
+ async for text_chunk in self._stream_response(query):
310
+ if first_chunk:
311
+ await self.stop_ttfb_metrics()
312
+ first_chunk = False
313
+ # Cancel interim message task since we got first response
314
+ await self._cancel_interim_message_task(
315
+ "Cancelled interim message task - got first response"
316
+ )
317
+
318
+ # Push each text chunk as it arrives
319
+ await self.push_frame(LLMTextFrame(text=text_chunk))
320
+ full_response.append(text_chunk)
321
+
322
+ # No need to update context - Vistaar maintains all context server-side
323
+ # The response has already been sent via LLMTextFrame chunks
324
+
325
+ except Exception as e:
326
+ logger.error(
327
+ f"Vistaar context processing error - Session: {self._session_id}, Error: {type(e).__name__}: {str(e)}"
328
+ )
329
+ import traceback
330
+
331
+ logger.error(f"Vistaar traceback: {traceback.format_exc()}")
332
+ raise
333
+ finally:
334
+ # Clean up interim message task
335
+ await self._cancel_interim_message_task(
336
+ "Cancelled interim message task in finally block"
337
+ )
338
+ await self.stop_processing_metrics()
339
+ await self.push_frame(LLMFullResponseEndFrame())
340
+
341
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
342
+ """Process frames for LLM completion requests.
343
+
344
+ Handles OpenAILLMContextFrame, LLMMessagesFrame, and LLMUpdateSettingsFrame
345
+ to trigger LLM completions and manage settings.
346
+
347
+ Args:
348
+ frame: The frame to process.
349
+ direction: The direction of frame processing.
350
+ """
351
+ await super().process_frame(frame, direction)
352
+ context = None
353
+ if isinstance(frame, (EndFrame, CancelFrame)):
354
+ await self._cancel_interim_message_task(
355
+ f"Cancelled interim message task - received {type(frame).__name__}"
356
+ )
357
+ await self.push_frame(frame, direction)
358
+ return
359
+ elif isinstance(frame, StartInterruptionFrame):
360
+ await self._handle_interruption()
361
+ await self.push_frame(frame, direction)
362
+ return
363
+ elif isinstance(frame, OpenAILLMContextFrame):
364
+ context = frame.context
365
+ elif isinstance(frame, LLMMessagesFrame):
366
+ context = OpenAILLMContext.from_messages(frame.messages)
367
+ elif isinstance(frame, LLMUpdateSettingsFrame):
368
+ # Update settings if needed
369
+ settings = frame.settings
370
+ if "source_lang" in settings:
371
+ self._source_lang = settings["source_lang"]
372
+ if "target_lang" in settings:
373
+ self._target_lang = settings["target_lang"]
374
+ if "session_id" in settings:
375
+ self._session_id = settings["session_id"]
376
+ logger.debug(f"Updated Vistaar settings: {settings}")
377
+ else:
378
+ await self.push_frame(frame, direction)
379
+
380
+ if context:
381
+ try:
382
+ await self._process_context(context)
383
+ except httpx.TimeoutException:
384
+ logger.error("Timeout while processing Vistaar request")
385
+ await self._call_event_handler("on_completion_timeout")
386
+ except Exception as e:
387
+ logger.error(f"Error processing Vistaar request: {e}")
388
+ raise
389
+
390
+ def create_context_aggregator(
391
+ self,
392
+ context: OpenAILLMContext,
393
+ *,
394
+ user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(),
395
+ assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(),
396
+ ) -> OpenAIContextAggregatorPair:
397
+ """Create context aggregators for Vistaar LLM.
398
+
399
+ Since Vistaar uses OpenAI-compatible message format, we reuse OpenAI's
400
+ context aggregators directly, similar to how Groq and Azure services work.
401
+
402
+ Args:
403
+ context: The LLM context to create aggregators for.
404
+ user_params: Parameters for user message aggregation.
405
+ assistant_params: Parameters for assistant message aggregation.
406
+
407
+ Returns:
408
+ OpenAIContextAggregatorPair: A pair of OpenAI context aggregators,
409
+ compatible with Vistaar's OpenAI-like message format.
410
+ """
411
+ context.set_llm_adapter(self.get_llm_adapter())
412
+ user = OpenAIUserContextAggregator(context, params=user_params)
413
+ assistant = OpenAIAssistantContextAggregator(context, params=assistant_params)
414
+ return OpenAIContextAggregatorPair(_user=user, _assistant=assistant)
415
+
416
+ async def close(self):
417
+ """Close the HTTP client when the service is destroyed."""
418
+ await self._client.aclose()
419
+
420
+ def __del__(self):
421
+ """Ensure the client is closed on deletion."""
422
+ try:
423
+ asyncio.create_task(self._client.aclose())
424
+ except:
425
+ pass
426
+
427
+ async def _cancel_interim_message_task(self, message: str = "Cancelled interim message task"):
428
+ if self._interim_task and not self._interim_task.done():
429
+ await self.cancel_task(self._interim_task)
430
+ self._interim_task = None
431
+ logger.debug(message)