dv-pipecat-ai 0.0.85.dev818__py3-none-any.whl → 0.0.85.dev858__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (32) hide show
  1. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/METADATA +2 -1
  2. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/RECORD +32 -29
  3. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +5 -1
  4. pipecat/frames/frames.py +34 -0
  5. pipecat/metrics/connection_metrics.py +45 -0
  6. pipecat/processors/aggregators/llm_response.py +25 -4
  7. pipecat/processors/dtmf_aggregator.py +17 -21
  8. pipecat/processors/frame_processor.py +51 -8
  9. pipecat/processors/metrics/frame_processor_metrics.py +108 -0
  10. pipecat/processors/transcript_processor.py +22 -1
  11. pipecat/serializers/__init__.py +2 -0
  12. pipecat/serializers/asterisk.py +16 -2
  13. pipecat/serializers/convox.py +2 -2
  14. pipecat/serializers/custom.py +2 -2
  15. pipecat/serializers/vi.py +326 -0
  16. pipecat/services/cartesia/tts.py +75 -10
  17. pipecat/services/deepgram/stt.py +317 -17
  18. pipecat/services/elevenlabs/stt.py +487 -19
  19. pipecat/services/elevenlabs/tts.py +28 -4
  20. pipecat/services/google/llm.py +26 -11
  21. pipecat/services/openai/base_llm.py +79 -14
  22. pipecat/services/salesforce/llm.py +321 -86
  23. pipecat/services/sarvam/tts.py +0 -1
  24. pipecat/services/soniox/stt.py +45 -10
  25. pipecat/services/vistaar/llm.py +97 -6
  26. pipecat/transcriptions/language.py +50 -0
  27. pipecat/transports/base_input.py +15 -11
  28. pipecat/transports/base_output.py +29 -3
  29. pipecat/utils/redis.py +58 -0
  30. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/WHEEL +0 -0
  31. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/licenses/LICENSE +0 -0
  32. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,7 @@ from openai import (
18
18
  APITimeoutError,
19
19
  AsyncOpenAI,
20
20
  AsyncStream,
21
+ BadRequestError,
21
22
  DefaultAsyncHttpxClient,
22
23
  )
23
24
  from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
@@ -32,6 +33,7 @@ from pipecat.frames.frames import (
32
33
  LLMMessagesFrame,
33
34
  LLMTextFrame,
34
35
  LLMUpdateSettingsFrame,
36
+ WarmupLLMFrame,
35
37
  )
36
38
  from pipecat.metrics.metrics import LLMTokenUsage
37
39
  from pipecat.processors.aggregators.llm_context import LLMContext
@@ -99,6 +101,7 @@ class BaseOpenAILLMService(LLMService):
99
101
  params: Optional[InputParams] = None,
100
102
  retry_timeout_secs: Optional[float] = 5.0,
101
103
  retry_on_timeout: Optional[bool] = False,
104
+ enable_warmup: bool = False,
102
105
  **kwargs,
103
106
  ):
104
107
  """Initialize the BaseOpenAILLMService.
@@ -113,6 +116,7 @@ class BaseOpenAILLMService(LLMService):
113
116
  params: Input parameters for model configuration and behavior.
114
117
  retry_timeout_secs: Request timeout in seconds. Defaults to 5.0 seconds.
115
118
  retry_on_timeout: Whether to retry the request once if it times out.
119
+ enable_warmup: Whether to enable LLM cache warmup. Defaults to False.
116
120
  **kwargs: Additional arguments passed to the parent LLMService.
117
121
  """
118
122
  super().__init__(**kwargs)
@@ -132,6 +136,7 @@ class BaseOpenAILLMService(LLMService):
132
136
  }
133
137
  self._retry_timeout_secs = retry_timeout_secs
134
138
  self._retry_on_timeout = retry_on_timeout
139
+ self._enable_warmup = enable_warmup
135
140
  self.set_model_name(model)
136
141
  self._client = self.create_client(
137
142
  api_key=api_key,
@@ -200,20 +205,29 @@ class BaseOpenAILLMService(LLMService):
200
205
  """
201
206
  params = self.build_chat_completion_params(params_from_context)
202
207
 
203
- if self._retry_on_timeout:
204
- try:
205
- chunks = await asyncio.wait_for(
206
- self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
207
- )
208
- return chunks
209
- except (APITimeoutError, asyncio.TimeoutError):
210
- # Retry, this time without a timeout so we get a response
211
- logger.debug(f"{self}: Retrying chat completion due to timeout")
208
+ await self.start_connection_metrics()
209
+
210
+ try:
211
+ if self._retry_on_timeout:
212
+ try:
213
+ chunks = await asyncio.wait_for(
214
+ self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
215
+ )
216
+ await self.stop_connection_metrics(success=True, connection_type="http")
217
+ return chunks
218
+ except (APITimeoutError, asyncio.TimeoutError):
219
+ # Retry, this time without a timeout so we get a response
220
+ logger.debug(f"{self}: Retrying chat completion due to timeout")
221
+ chunks = await self._client.chat.completions.create(**params)
222
+ await self.stop_connection_metrics(success=True, connection_type="http")
223
+ return chunks
224
+ else:
212
225
  chunks = await self._client.chat.completions.create(**params)
226
+ await self.stop_connection_metrics(success=True, connection_type="http")
213
227
  return chunks
214
- else:
215
- chunks = await self._client.chat.completions.create(**params)
216
- return chunks
228
+ except Exception as e:
229
+ await self.stop_connection_metrics(success=False, error=str(e), connection_type="http")
230
+ raise
217
231
 
218
232
  def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
219
233
  """Build parameters for chat completion request.
@@ -438,14 +452,19 @@ class BaseOpenAILLMService(LLMService):
438
452
  completions and manage settings.
439
453
  >>>>>>> dv-stage
440
454
 
441
- Args:
455
+ Args:
442
456
  frame: The frame to process.
443
457
  direction: The direction of frame processing.
444
458
  """
445
459
  await super().process_frame(frame, direction)
446
460
 
447
461
  context = None
448
- if isinstance(frame, OpenAILLMContextFrame):
462
+ if isinstance(frame, WarmupLLMFrame):
463
+ # Handle warmup frame - prime cache without emitting response
464
+ # Run in background to avoid blocking the pipeline
465
+ asyncio.create_task(self._handle_warmup_frame(frame))
466
+ return # Don't process further, warmup is silent
467
+ elif isinstance(frame, OpenAILLMContextFrame):
449
468
  # Handle OpenAI-specific context frames
450
469
  context = frame.context
451
470
  elif isinstance(frame, LLMContextFrame):
@@ -470,3 +489,49 @@ class BaseOpenAILLMService(LLMService):
470
489
  finally:
471
490
  await self.stop_processing_metrics()
472
491
  await self.push_frame(LLMFullResponseEndFrame())
492
+
493
+ def _is_gpt5_model(self) -> bool:
494
+ """Check if the current model is a GPT-5 series model that requires max_completion_tokens."""
495
+ model = (self.model_name or "").lower()
496
+ return model.startswith("gpt-5")
497
+
498
+ async def _handle_warmup_frame(self, frame: WarmupLLMFrame):
499
+ """Handle WarmupLLMFrame to prime the LLM cache without emitting responses.
500
+
501
+ This method sends a minimal request to the LLM to warm up any provider-side
502
+ caches (like prompt caching). The response is discarded and no frames are emitted.
503
+
504
+ Args:
505
+ frame: WarmupLLMFrame containing the messages to cache.
506
+ """
507
+ # Skip warmup if disabled
508
+ if not self._enable_warmup:
509
+ self.logger.debug("LLM warmup is disabled, skipping")
510
+ return
511
+
512
+ try:
513
+ # Use the provided messages for warmup
514
+ messages: List[ChatCompletionMessageParam] = frame.messages # type: ignore
515
+
516
+ # Make a non-streaming call to warm the cache
517
+ # We use a minimal token limit to reduce latency and cost
518
+ # GPT-5 series models require max_completion_tokens instead of max_tokens
519
+ warmup_params = {
520
+ "model": self.model_name,
521
+ "messages": messages,
522
+ "stream": False,
523
+ }
524
+
525
+ if self._is_gpt5_model():
526
+ warmup_params["max_completion_tokens"] = 10
527
+ else:
528
+ warmup_params["max_tokens"] = 10
529
+
530
+ await self._client.chat.completions.create(**warmup_params)
531
+
532
+ self.logger.info("LLM cache warmed successfully")
533
+ # Intentionally don't emit any frames - this is a silent warmup
534
+
535
+ except Exception as e:
536
+ self.logger.error(f"Failed to warm LLM cache: {e}")
537
+ # Don't propagate error - warmup failure shouldn't break the bot