dv-pipecat-ai 0.0.85.dev818__py3-none-any.whl → 0.0.85.dev858__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/METADATA +2 -1
- {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/RECORD +32 -29
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +5 -1
- pipecat/frames/frames.py +34 -0
- pipecat/metrics/connection_metrics.py +45 -0
- pipecat/processors/aggregators/llm_response.py +25 -4
- pipecat/processors/dtmf_aggregator.py +17 -21
- pipecat/processors/frame_processor.py +51 -8
- pipecat/processors/metrics/frame_processor_metrics.py +108 -0
- pipecat/processors/transcript_processor.py +22 -1
- pipecat/serializers/__init__.py +2 -0
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +2 -2
- pipecat/serializers/custom.py +2 -2
- pipecat/serializers/vi.py +326 -0
- pipecat/services/cartesia/tts.py +75 -10
- pipecat/services/deepgram/stt.py +317 -17
- pipecat/services/elevenlabs/stt.py +487 -19
- pipecat/services/elevenlabs/tts.py +28 -4
- pipecat/services/google/llm.py +26 -11
- pipecat/services/openai/base_llm.py +79 -14
- pipecat/services/salesforce/llm.py +321 -86
- pipecat/services/sarvam/tts.py +0 -1
- pipecat/services/soniox/stt.py +45 -10
- pipecat/services/vistaar/llm.py +97 -6
- pipecat/transcriptions/language.py +50 -0
- pipecat/transports/base_input.py +15 -11
- pipecat/transports/base_output.py +29 -3
- pipecat/utils/redis.py +58 -0
- {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@ from openai import (
|
|
|
18
18
|
APITimeoutError,
|
|
19
19
|
AsyncOpenAI,
|
|
20
20
|
AsyncStream,
|
|
21
|
+
BadRequestError,
|
|
21
22
|
DefaultAsyncHttpxClient,
|
|
22
23
|
)
|
|
23
24
|
from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
|
|
@@ -32,6 +33,7 @@ from pipecat.frames.frames import (
|
|
|
32
33
|
LLMMessagesFrame,
|
|
33
34
|
LLMTextFrame,
|
|
34
35
|
LLMUpdateSettingsFrame,
|
|
36
|
+
WarmupLLMFrame,
|
|
35
37
|
)
|
|
36
38
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
37
39
|
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
@@ -99,6 +101,7 @@ class BaseOpenAILLMService(LLMService):
|
|
|
99
101
|
params: Optional[InputParams] = None,
|
|
100
102
|
retry_timeout_secs: Optional[float] = 5.0,
|
|
101
103
|
retry_on_timeout: Optional[bool] = False,
|
|
104
|
+
enable_warmup: bool = False,
|
|
102
105
|
**kwargs,
|
|
103
106
|
):
|
|
104
107
|
"""Initialize the BaseOpenAILLMService.
|
|
@@ -113,6 +116,7 @@ class BaseOpenAILLMService(LLMService):
|
|
|
113
116
|
params: Input parameters for model configuration and behavior.
|
|
114
117
|
retry_timeout_secs: Request timeout in seconds. Defaults to 5.0 seconds.
|
|
115
118
|
retry_on_timeout: Whether to retry the request once if it times out.
|
|
119
|
+
enable_warmup: Whether to enable LLM cache warmup. Defaults to False.
|
|
116
120
|
**kwargs: Additional arguments passed to the parent LLMService.
|
|
117
121
|
"""
|
|
118
122
|
super().__init__(**kwargs)
|
|
@@ -132,6 +136,7 @@ class BaseOpenAILLMService(LLMService):
|
|
|
132
136
|
}
|
|
133
137
|
self._retry_timeout_secs = retry_timeout_secs
|
|
134
138
|
self._retry_on_timeout = retry_on_timeout
|
|
139
|
+
self._enable_warmup = enable_warmup
|
|
135
140
|
self.set_model_name(model)
|
|
136
141
|
self._client = self.create_client(
|
|
137
142
|
api_key=api_key,
|
|
@@ -200,20 +205,29 @@ class BaseOpenAILLMService(LLMService):
|
|
|
200
205
|
"""
|
|
201
206
|
params = self.build_chat_completion_params(params_from_context)
|
|
202
207
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
208
|
+
await self.start_connection_metrics()
|
|
209
|
+
|
|
210
|
+
try:
|
|
211
|
+
if self._retry_on_timeout:
|
|
212
|
+
try:
|
|
213
|
+
chunks = await asyncio.wait_for(
|
|
214
|
+
self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
|
|
215
|
+
)
|
|
216
|
+
await self.stop_connection_metrics(success=True, connection_type="http")
|
|
217
|
+
return chunks
|
|
218
|
+
except (APITimeoutError, asyncio.TimeoutError):
|
|
219
|
+
# Retry, this time without a timeout so we get a response
|
|
220
|
+
logger.debug(f"{self}: Retrying chat completion due to timeout")
|
|
221
|
+
chunks = await self._client.chat.completions.create(**params)
|
|
222
|
+
await self.stop_connection_metrics(success=True, connection_type="http")
|
|
223
|
+
return chunks
|
|
224
|
+
else:
|
|
212
225
|
chunks = await self._client.chat.completions.create(**params)
|
|
226
|
+
await self.stop_connection_metrics(success=True, connection_type="http")
|
|
213
227
|
return chunks
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
228
|
+
except Exception as e:
|
|
229
|
+
await self.stop_connection_metrics(success=False, error=str(e), connection_type="http")
|
|
230
|
+
raise
|
|
217
231
|
|
|
218
232
|
def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
|
|
219
233
|
"""Build parameters for chat completion request.
|
|
@@ -438,14 +452,19 @@ class BaseOpenAILLMService(LLMService):
|
|
|
438
452
|
completions and manage settings.
|
|
439
453
|
>>>>>>> dv-stage
|
|
440
454
|
|
|
441
|
-
|
|
455
|
+
Args:
|
|
442
456
|
frame: The frame to process.
|
|
443
457
|
direction: The direction of frame processing.
|
|
444
458
|
"""
|
|
445
459
|
await super().process_frame(frame, direction)
|
|
446
460
|
|
|
447
461
|
context = None
|
|
448
|
-
if isinstance(frame,
|
|
462
|
+
if isinstance(frame, WarmupLLMFrame):
|
|
463
|
+
# Handle warmup frame - prime cache without emitting response
|
|
464
|
+
# Run in background to avoid blocking the pipeline
|
|
465
|
+
asyncio.create_task(self._handle_warmup_frame(frame))
|
|
466
|
+
return # Don't process further, warmup is silent
|
|
467
|
+
elif isinstance(frame, OpenAILLMContextFrame):
|
|
449
468
|
# Handle OpenAI-specific context frames
|
|
450
469
|
context = frame.context
|
|
451
470
|
elif isinstance(frame, LLMContextFrame):
|
|
@@ -470,3 +489,49 @@ class BaseOpenAILLMService(LLMService):
|
|
|
470
489
|
finally:
|
|
471
490
|
await self.stop_processing_metrics()
|
|
472
491
|
await self.push_frame(LLMFullResponseEndFrame())
|
|
492
|
+
|
|
493
|
+
def _is_gpt5_model(self) -> bool:
|
|
494
|
+
"""Check if the current model is a GPT-5 series model that requires max_completion_tokens."""
|
|
495
|
+
model = (self.model_name or "").lower()
|
|
496
|
+
return model.startswith("gpt-5")
|
|
497
|
+
|
|
498
|
+
async def _handle_warmup_frame(self, frame: WarmupLLMFrame):
|
|
499
|
+
"""Handle WarmupLLMFrame to prime the LLM cache without emitting responses.
|
|
500
|
+
|
|
501
|
+
This method sends a minimal request to the LLM to warm up any provider-side
|
|
502
|
+
caches (like prompt caching). The response is discarded and no frames are emitted.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
frame: WarmupLLMFrame containing the messages to cache.
|
|
506
|
+
"""
|
|
507
|
+
# Skip warmup if disabled
|
|
508
|
+
if not self._enable_warmup:
|
|
509
|
+
self.logger.debug("LLM warmup is disabled, skipping")
|
|
510
|
+
return
|
|
511
|
+
|
|
512
|
+
try:
|
|
513
|
+
# Use the provided messages for warmup
|
|
514
|
+
messages: List[ChatCompletionMessageParam] = frame.messages # type: ignore
|
|
515
|
+
|
|
516
|
+
# Make a non-streaming call to warm the cache
|
|
517
|
+
# We use a minimal token limit to reduce latency and cost
|
|
518
|
+
# GPT-5 series models require max_completion_tokens instead of max_tokens
|
|
519
|
+
warmup_params = {
|
|
520
|
+
"model": self.model_name,
|
|
521
|
+
"messages": messages,
|
|
522
|
+
"stream": False,
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
if self._is_gpt5_model():
|
|
526
|
+
warmup_params["max_completion_tokens"] = 10
|
|
527
|
+
else:
|
|
528
|
+
warmup_params["max_tokens"] = 10
|
|
529
|
+
|
|
530
|
+
await self._client.chat.completions.create(**warmup_params)
|
|
531
|
+
|
|
532
|
+
self.logger.info("LLM cache warmed successfully")
|
|
533
|
+
# Intentionally don't emit any frames - this is a silent warmup
|
|
534
|
+
|
|
535
|
+
except Exception as e:
|
|
536
|
+
self.logger.error(f"Failed to warm LLM cache: {e}")
|
|
537
|
+
# Don't propagate error - warmup failure shouldn't break the bot
|