dv-pipecat-ai 0.0.85.dev818__py3-none-any.whl → 0.0.85.dev858__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (32) hide show
  1. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/METADATA +2 -1
  2. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/RECORD +32 -29
  3. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +5 -1
  4. pipecat/frames/frames.py +34 -0
  5. pipecat/metrics/connection_metrics.py +45 -0
  6. pipecat/processors/aggregators/llm_response.py +25 -4
  7. pipecat/processors/dtmf_aggregator.py +17 -21
  8. pipecat/processors/frame_processor.py +51 -8
  9. pipecat/processors/metrics/frame_processor_metrics.py +108 -0
  10. pipecat/processors/transcript_processor.py +22 -1
  11. pipecat/serializers/__init__.py +2 -0
  12. pipecat/serializers/asterisk.py +16 -2
  13. pipecat/serializers/convox.py +2 -2
  14. pipecat/serializers/custom.py +2 -2
  15. pipecat/serializers/vi.py +326 -0
  16. pipecat/services/cartesia/tts.py +75 -10
  17. pipecat/services/deepgram/stt.py +317 -17
  18. pipecat/services/elevenlabs/stt.py +487 -19
  19. pipecat/services/elevenlabs/tts.py +28 -4
  20. pipecat/services/google/llm.py +26 -11
  21. pipecat/services/openai/base_llm.py +79 -14
  22. pipecat/services/salesforce/llm.py +321 -86
  23. pipecat/services/sarvam/tts.py +0 -1
  24. pipecat/services/soniox/stt.py +45 -10
  25. pipecat/services/vistaar/llm.py +97 -6
  26. pipecat/transcriptions/language.py +50 -0
  27. pipecat/transports/base_input.py +15 -11
  28. pipecat/transports/base_output.py +29 -3
  29. pipecat/utils/redis.py +58 -0
  30. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/WHEEL +0 -0
  31. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/licenses/LICENSE +0 -0
  32. {dv_pipecat_ai-0.0.85.dev818.dist-info → dv_pipecat_ai-0.0.85.dev858.dist-info}/top_level.txt +0 -0
@@ -49,6 +49,33 @@ END_TOKEN = "<end>"
49
49
  FINALIZED_TOKEN = "<fin>"
50
50
 
51
51
 
52
+ class SonioxContextGeneralItem(BaseModel):
53
+ """Represents a key-value pair for structured general context information."""
54
+
55
+ key: str
56
+ value: str
57
+
58
+
59
+ class SonioxContextTranslationTerm(BaseModel):
60
+ """Represents a custom translation mapping for ambiguous or domain-specific terms."""
61
+
62
+ source: str
63
+ target: str
64
+
65
+
66
+ class SonioxContextObject(BaseModel):
67
+ """Context object for models with context_version 2, for Soniox stt-rt-v3-preview and higher.
68
+
69
+ Learn more about context in the documentation:
70
+ https://soniox.com/docs/stt/concepts/context
71
+ """
72
+
73
+ general: Optional[List[SonioxContextGeneralItem]] = None
74
+ text: Optional[str] = None
75
+ terms: Optional[List[str]] = None
76
+ translation_terms: Optional[List[SonioxContextTranslationTerm]] = None
77
+
78
+
52
79
  class SonioxInputParams(BaseModel):
53
80
  """Real-time transcription settings.
54
81
 
@@ -60,9 +87,9 @@ class SonioxInputParams(BaseModel):
60
87
  audio_format: Audio format to use for transcription.
61
88
  num_channels: Number of channels to use for transcription.
62
89
  language_hints: List of language hints to use for transcription.
63
- context: Customization for transcription.
64
- enable_non_final_tokens: Whether to enable non-final tokens. If false, only final tokens will be returned.
65
- max_non_final_tokens_duration_ms: Maximum duration of non-final tokens.
90
+ context: Customization for transcription. String for models with context_version 1 and ContextObject for models with context_version 2.
91
+ enable_speaker_diarization: Whether to enable speaker diarization. Tokens are annotated with speaker IDs.
92
+ enable_language_identification: Whether to enable language identification. Tokens are annotated with language IDs.
66
93
  client_reference_id: Client reference ID to use for transcription.
67
94
  """
68
95
 
@@ -72,10 +99,10 @@ class SonioxInputParams(BaseModel):
72
99
  num_channels: Optional[int] = 1
73
100
 
74
101
  language_hints: Optional[List[Language]] = None
75
- context: Optional[str] = None
102
+ context: Optional[SonioxContextObject | str] = None
76
103
 
77
- enable_non_final_tokens: Optional[bool] = True
78
- max_non_final_tokens_duration_ms: Optional[int] = None
104
+ enable_speaker_diarization: Optional[bool] = False
105
+ enable_language_identification: Optional[bool] = False
79
106
 
80
107
  client_reference_id: Optional[str] = None
81
108
 
@@ -173,6 +200,10 @@ class SonioxSTTService(STTService):
173
200
  # Either one or the other is required.
174
201
  enable_endpoint_detection = not self._vad_force_turn_endpoint
175
202
 
203
+ context = self._params.context
204
+ if isinstance(context, SonioxContextObject):
205
+ context = context.model_dump()
206
+
176
207
  # Send the initial configuration message.
177
208
  config = {
178
209
  "api_key": self._api_key,
@@ -182,9 +213,9 @@ class SonioxSTTService(STTService):
182
213
  "enable_endpoint_detection": enable_endpoint_detection,
183
214
  "sample_rate": self.sample_rate,
184
215
  "language_hints": _prepare_language_hints(self._params.language_hints),
185
- "context": self._params.context,
186
- "enable_non_final_tokens": self._params.enable_non_final_tokens,
187
- "max_non_final_tokens_duration_ms": self._params.max_non_final_tokens_duration_ms,
216
+ "context": context,
217
+ "enable_speaker_diarization": self._params.enable_speaker_diarization,
218
+ "enable_language_identification": self._params.enable_language_identification,
188
219
  "client_reference_id": self._params.client_reference_id,
189
220
  }
190
221
 
@@ -210,6 +241,7 @@ class SonioxSTTService(STTService):
210
241
  if self._receive_task != asyncio.current_task():
211
242
  await self._receive_task
212
243
  self._receive_task = None
244
+ self.logger.debug("Disconnected from Soniox STT")
213
245
 
214
246
  async def stop(self, frame: EndFrame):
215
247
  """Stop the Soniox STT websocket connection.
@@ -351,7 +383,10 @@ class SonioxSTTService(STTService):
351
383
 
352
384
  if self._final_transcription_buffer or non_final_transcription:
353
385
  final_text = "".join(
354
- map(lambda token: token["text"], self._final_transcription_buffer)
386
+ map(
387
+ lambda token: token["text"],
388
+ self._final_transcription_buffer,
389
+ )
355
390
  )
356
391
  non_final_text = "".join(
357
392
  map(lambda token: token["text"], non_final_transcription)
@@ -10,19 +10,27 @@ from typing import Any, AsyncGenerator, Dict, List, Optional
10
10
  from urllib.parse import urlencode
11
11
 
12
12
  import httpx
13
+ import jwt
13
14
  from loguru import logger
14
15
  from pydantic import BaseModel, Field
15
16
 
17
+ try:
18
+ import redis.asyncio as redis
19
+ REDIS_AVAILABLE = True
20
+ except ImportError:
21
+ REDIS_AVAILABLE = False
22
+ redis = None
23
+
16
24
  from pipecat.frames.frames import (
17
- EndFrame,
18
25
  CancelFrame,
26
+ EndFrame,
19
27
  Frame,
28
+ InterruptionFrame,
20
29
  LLMFullResponseEndFrame,
21
30
  LLMFullResponseStartFrame,
22
31
  LLMMessagesFrame,
23
32
  LLMTextFrame,
24
33
  LLMUpdateSettingsFrame,
25
- StartInterruptionFrame,
26
34
  )
27
35
  from pipecat.processors.aggregators.llm_response import (
28
36
  LLMAssistantAggregatorParams,
@@ -55,7 +63,9 @@ class VistaarLLMService(LLMService):
55
63
  Parameters:
56
64
  source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
57
65
  target_lang: Target language code for responses.
58
- session_id: Session ID for maintaining conversation context.
66
+ session_id: Session ID for maintaining conversation context (also used for JWT caching).
67
+ pre_query_response_phrases: List of phrases to say while waiting for response.
68
+ phone_number: Phone number for JWT subject claim.
59
69
  extra: Additional model-specific parameters
60
70
  """
61
71
 
@@ -63,6 +73,7 @@ class VistaarLLMService(LLMService):
63
73
  target_lang: Optional[str] = Field(default="mr")
64
74
  session_id: Optional[str] = Field(default=None)
65
75
  pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
76
+ phone_number: Optional[str] = Field(default="UNKNOWN")
66
77
  extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
67
78
 
68
79
  def __init__(
@@ -72,6 +83,9 @@ class VistaarLLMService(LLMService):
72
83
  params: Optional[InputParams] = None,
73
84
  timeout: float = 30.0,
74
85
  interim_timeout: float = 5.0,
86
+ redis_client: Optional[Any] = None, # redis.Redis type
87
+ jwt_private_key: Optional[str] = None,
88
+ jwt_token_expiry: int = 3600,
75
89
  **kwargs,
76
90
  ):
77
91
  """Initialize Vistaar LLM service.
@@ -81,6 +95,9 @@ class VistaarLLMService(LLMService):
81
95
  params: Input parameters for model configuration and behavior.
82
96
  timeout: Request timeout in seconds. Defaults to 30.0 seconds.
83
97
  interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
98
+ redis_client: Optional Redis client for JWT token caching.
99
+ jwt_private_key: Optional RSA private key in PEM format for JWT signing.
100
+ jwt_token_expiry: JWT token expiry time in seconds. Defaults to 3600 (1 hour).
84
101
  **kwargs: Additional arguments passed to the parent LLMService.
85
102
  """
86
103
  super().__init__(**kwargs)
@@ -95,6 +112,16 @@ class VistaarLLMService(LLMService):
95
112
  self._extra = params.extra if isinstance(params.extra, dict) else {}
96
113
  self._timeout = timeout
97
114
  self._interim_timeout = interim_timeout
115
+ self._phone_number = params.phone_number
116
+
117
+ # JWT authentication setup
118
+ self._redis_client = redis_client
119
+ self._jwt_private_key = jwt_private_key
120
+ self._jwt_token_expiry = jwt_token_expiry
121
+ self._jwt_issuer = "voice-provider"
122
+
123
+ if self._jwt_private_key and not self._redis_client:
124
+ logger.warning("JWT private key provided but no Redis client for caching. JWT auth will regenerate tokens on each request.")
98
125
 
99
126
  # Create an async HTTP client
100
127
  self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
@@ -112,6 +139,53 @@ class VistaarLLMService(LLMService):
112
139
  f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
113
140
  )
114
141
 
142
+ async def _get_jwt_token(self) -> Optional[str]:
143
+ """Generate or retrieve a cached JWT token.
144
+
145
+ Returns:
146
+ JWT token string or None if JWT auth is not configured.
147
+ """
148
+ if not self._jwt_private_key:
149
+ return None
150
+
151
+ # Try to get from Redis cache if available
152
+ if self._redis_client and self._session_id:
153
+ redis_key = f"vistaar_jwt:{self._session_id}"
154
+ try:
155
+ cached_token = await self._redis_client.get(redis_key)
156
+ if cached_token:
157
+ logger.debug(f"Retrieved JWT token from Redis cache for session_id: {self._session_id}")
158
+ return cached_token.decode('utf-8') if isinstance(cached_token, bytes) else cached_token
159
+ except Exception as e:
160
+ logger.warning(f"Redis cache retrieval failed: {e}. Generating new token.")
161
+
162
+ # Generate new token
163
+ current_time = int(time.time())
164
+ payload = {
165
+ "sub": self._phone_number, # Subject identifier (phone number)
166
+ "iss": self._jwt_issuer, # Issuer
167
+ "iat": current_time, # Issued at timestamp
168
+ "exp": current_time + self._jwt_token_expiry # Expiration timestamp
169
+ }
170
+
171
+ token = jwt.encode(payload, self._jwt_private_key, algorithm="RS256")
172
+ logger.info(f"Generated new JWT token for {self._phone_number}, expires in {self._jwt_token_expiry}s")
173
+
174
+ # Cache in Redis if available
175
+ if self._redis_client and self._session_id:
176
+ redis_key = f"vistaar_jwt:{self._session_id}"
177
+ try:
178
+ await self._redis_client.setex(
179
+ redis_key,
180
+ self._jwt_token_expiry,
181
+ token
182
+ )
183
+ logger.debug(f"Cached JWT token in Redis for session_id: {self._session_id} with {self._jwt_token_expiry}s TTL")
184
+ except Exception as e:
185
+ logger.warning(f"Redis cache storage failed: {e}. Continuing without cache.")
186
+
187
+ return token
188
+
115
189
  async def _extract_messages_to_query(self, context: OpenAILLMContext) -> str:
116
190
  """Extract only the last user message from context.
117
191
 
@@ -259,9 +333,23 @@ class VistaarLLMService(LLMService):
259
333
  self._interim_in_progress = False
260
334
  self._interim_completion_event.clear() # Reset the event for new request
261
335
 
336
+ # Prepare headers with JWT authentication if configured
337
+ headers = {}
338
+ try:
339
+ jwt_token = await self._get_jwt_token()
340
+ if jwt_token:
341
+ headers["Authorization"] = f"Bearer {jwt_token}"
342
+ logger.debug(f"Added JWT authentication header for session_id: {self._session_id}")
343
+ except Exception as e:
344
+ logger.error(f"Failed to generate JWT token: {e}")
345
+ raise
346
+
347
+ await self.start_connection_metrics()
348
+
262
349
  try:
263
350
  # Use httpx to handle SSE streaming
264
- async with self._client.stream("GET", url) as response:
351
+ async with self._client.stream("GET", url, headers=headers) as response:
352
+ await self.stop_connection_metrics(success=True, connection_type="http")
265
353
  self._current_response = response # Store for potential cancellation
266
354
  response.raise_for_status()
267
355
 
@@ -279,14 +367,17 @@ class VistaarLLMService(LLMService):
279
367
  yield line
280
368
 
281
369
  except httpx.HTTPStatusError as e:
370
+ await self.stop_connection_metrics(success=False, error=f"HTTP {e.response.status_code}", connection_type="http")
282
371
  logger.error(
283
372
  f"Vistaar HTTP error - Status: {e.response.status_code}, URL: {url}, Response: {e.response.text if hasattr(e.response, 'text') else 'N/A'}"
284
373
  )
285
374
  raise
286
375
  except httpx.TimeoutException as e:
376
+ await self.stop_connection_metrics(success=False, error="Timeout", connection_type="http")
287
377
  logger.error(f"Vistaar timeout error - URL: {url}, Timeout: {self._timeout}s")
288
378
  raise
289
379
  except Exception as e:
380
+ await self.stop_connection_metrics(success=False, error=str(e), connection_type="http")
290
381
  logger.error(
291
382
  f"Vistaar unexpected error - Type: {type(e).__name__}, Message: {str(e)}, URL: {url}"
292
383
  )
@@ -391,7 +482,7 @@ class VistaarLLMService(LLMService):
391
482
  )
392
483
  await self.push_frame(frame, direction)
393
484
  return
394
- elif isinstance(frame, StartInterruptionFrame):
485
+ elif isinstance(frame, InterruptionFrame):
395
486
  await self._handle_interruption()
396
487
  await self.push_frame(frame, direction)
397
488
  return
@@ -467,4 +558,4 @@ class VistaarLLMService(LLMService):
467
558
 
468
559
  def can_generate_metrics(self) -> bool:
469
560
  """Check if this service can generate processing metrics."""
470
- return True
561
+ return True
@@ -569,3 +569,53 @@ class Language(StrEnum):
569
569
  # Zulu
570
570
  ZU = "zu"
571
571
  ZU_ZA = "zu-ZA"
572
+
573
+
574
+ def resolve_language(
575
+ language: Language, language_map: dict[Language, str], use_base_code: bool = True
576
+ ) -> str:
577
+ """Resolve a Language enum to a service-specific language code.
578
+
579
+ Checks the language map first, then falls back to extracting the appropriate
580
+ code format with a warning if not found in the verified list.
581
+
582
+ Args:
583
+ language: The Language enum value to convert.
584
+ language_map: Dictionary mapping Language enums to service language codes.
585
+ use_base_code: If True, extracts base code (e.g., 'en' from 'en-US').
586
+ If False, uses full language code as-is.
587
+
588
+ Returns:
589
+ The resolved language code for the service.
590
+
591
+ Examples::
592
+
593
+ # Service expecting base codes (e.g., Cartesia)
594
+ >>> LANGUAGE_MAP = {Language.EN: "en", Language.ES: "es"}
595
+ >>> resolve_language(Language.EN_US, LANGUAGE_MAP, use_base_code=True)
596
+ # Logs: "Language en-US not verified. Using base code 'en'."
597
+ "en"
598
+
599
+ # Service expecting full codes (e.g., AWS)
600
+ >>> LANGUAGE_MAP = {Language.EN_US: "en-US", Language.ES_ES: "es-ES"}
601
+ >>> resolve_language(Language.EN_GB, LANGUAGE_MAP, use_base_code=False)
602
+ # Logs: "Language en-GB not verified. Using 'en-GB'."
603
+ "en-GB"
604
+ """
605
+ # Check if language is in the verified map
606
+ result = language_map.get(language)
607
+
608
+ if result is not None:
609
+ return result
610
+
611
+ # Not in map - fall back with warning
612
+ lang_str = str(language.value)
613
+
614
+ if use_base_code:
615
+ # Extract base code (e.g., "en" from "en-US")
616
+ base_code = lang_str.split("-")[0].lower()
617
+ # logger.warning(f"Language {language.value} not verified. Using base code '{base_code}'.")
618
+ return base_code
619
+ else:
620
+ # logger.warning(f"Language {language.value} not verified. Using '{lang_str}'.")
621
+ return lang_str
@@ -297,6 +297,17 @@ class BaseInputTransport(FrameProcessor):
297
297
  elif isinstance(frame, EmulateUserStoppedSpeakingFrame):
298
298
  self.logger.debug("Emulating user stopped speaking")
299
299
  await self._handle_user_interruption(VADState.QUIET, emulated=True)
300
+ elif isinstance(frame, VADParamsUpdateFrame):
301
+ if self.vad_analyzer:
302
+ self.vad_analyzer.set_params(frame.params, self.logger)
303
+ speech_frame = SpeechControlParamsFrame(
304
+ vad_params=frame.params,
305
+ turn_params=self._params.turn_analyzer.params
306
+ if self._params.turn_analyzer
307
+ else None,
308
+ )
309
+ await self.push_frame(speech_frame)
310
+ await self.push_frame(frame, direction)
300
311
  # All other system frames
301
312
  elif isinstance(frame, SystemFrame):
302
313
  await self.push_frame(frame, direction)
@@ -309,16 +320,6 @@ class BaseInputTransport(FrameProcessor):
309
320
  elif isinstance(frame, StopFrame):
310
321
  await self.push_frame(frame, direction)
311
322
  await self.pause(frame)
312
- elif isinstance(frame, VADParamsUpdateFrame):
313
- if self.vad_analyzer:
314
- self.vad_analyzer.set_params(frame.params)
315
- speech_frame = SpeechControlParamsFrame(
316
- vad_params=frame.params,
317
- turn_params=self._params.turn_analyzer.params
318
- if self._params.turn_analyzer
319
- else None,
320
- )
321
- await self.push_frame(speech_frame)
322
323
  elif isinstance(frame, FilterUpdateSettingsFrame) and self._params.audio_in_filter:
323
324
  await self._params.audio_in_filter.process_frame(frame)
324
325
  # Other frames
@@ -444,7 +445,10 @@ class BaseInputTransport(FrameProcessor):
444
445
  await self._handle_user_interruption(VADState.QUIET)
445
446
 
446
447
  async def _run_turn_analyzer(
447
- self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState
448
+ self,
449
+ frame: InputAudioRawFrame,
450
+ vad_state: VADState,
451
+ previous_vad_state: VADState,
448
452
  ):
449
453
  """Run turn analysis on audio frame and handle results."""
450
454
  is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
@@ -50,6 +50,11 @@ from pipecat.utils.time import nanoseconds_to_seconds
50
50
 
51
51
  # TODO: When we use GeminiMultimodalLiveLLMService, we need to change this to 0.35 but that creates issue for faster TTS.
52
52
  BOT_VAD_STOP_SECS = 0.30
53
+ # For the very first bot utterance (e.g., intro), we can safely
54
+ # detect end-of-speech sooner to improve responsiveness for the
55
+ # user’s first short reply. Keep conservative to avoid mid-utterance
56
+ # false stops when TTS streams quickly.
57
+ FIRST_BOT_VAD_STOP_SECS = 0.12
53
58
 
54
59
 
55
60
  class BaseOutputTransport(FrameProcessor):
@@ -84,6 +89,7 @@ class BaseOutputTransport(FrameProcessor):
84
89
  # us to send multiple streams at the same time if the transport allows
85
90
  # it.
86
91
  self._media_senders: Dict[Any, "BaseOutputTransport.MediaSender"] = {}
92
+ self._register_event_handler("on_output_terminated")
87
93
 
88
94
  @property
89
95
  def sample_rate(self) -> int:
@@ -301,10 +307,12 @@ class BaseOutputTransport(FrameProcessor):
301
307
  await self.start(frame)
302
308
  elif isinstance(frame, EndFrame):
303
309
  await self.stop(frame)
310
+ await self._call_event_handler("on_output_terminated", frame)
304
311
  # Keep pushing EndFrame down so all the pipeline stops nicely.
305
312
  await self.push_frame(frame, direction)
306
313
  elif isinstance(frame, CancelFrame):
307
314
  await self.cancel(frame)
315
+ await self._call_event_handler("on_output_terminated", frame)
308
316
  await self.push_frame(frame, direction)
309
317
  elif isinstance(frame, InterruptionFrame):
310
318
  await self.push_frame(frame, direction)
@@ -403,6 +411,9 @@ class BaseOutputTransport(FrameProcessor):
403
411
  self._bot_speaking_frame_period = 0.2
404
412
  # Last time the bot actually spoke.
405
413
  self._bot_speech_last_time = 0
414
+ # Before the first stop event, we use a shorter silence
415
+ # threshold to make the first turn more responsive.
416
+ self._first_stop_pending = True
406
417
 
407
418
  self._audio_task: Optional[asyncio.Task] = None
408
419
  self._video_task: Optional[asyncio.Task] = None
@@ -628,6 +639,10 @@ class BaseOutputTransport(FrameProcessor):
628
639
 
629
640
  self._bot_speaking = False
630
641
 
642
+ # Mark that the first stop has been completed so subsequent
643
+ # stops use the regular (longer) VAD stop threshold.
644
+ self._first_stop_pending = False
645
+
631
646
  # Clean audio buffer (there could be tiny left overs if not multiple
632
647
  # to our output chunk size).
633
648
  self._audio_buffer = bytearray()
@@ -687,9 +702,14 @@ class BaseOutputTransport(FrameProcessor):
687
702
  async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
688
703
  while True:
689
704
  try:
690
- frame = await asyncio.wait_for(
691
- self._audio_queue.get(), timeout=vad_stop_secs
705
+ # Use a shorter timeout only for the first bot stop to
706
+ # accelerate the initial turn handoff right after the intro.
707
+ timeout = (
708
+ FIRST_BOT_VAD_STOP_SECS
709
+ if getattr(self, "_first_stop_pending", True)
710
+ else BOT_VAD_STOP_SECS
692
711
  )
712
+ frame = await asyncio.wait_for(self._audio_queue.get(), timeout=timeout)
693
713
  yield frame
694
714
  self._audio_queue.task_done()
695
715
  except asyncio.TimeoutError:
@@ -710,7 +730,13 @@ class BaseOutputTransport(FrameProcessor):
710
730
  except asyncio.QueueEmpty:
711
731
  # Notify the bot stopped speaking upstream if necessary.
712
732
  diff_time = time.time() - last_frame_time
713
- if diff_time > vad_stop_secs:
733
+ # Use a shorter threshold for the first stop only.
734
+ current_stop_secs = (
735
+ FIRST_BOT_VAD_STOP_SECS
736
+ if getattr(self, "_first_stop_pending", True)
737
+ else BOT_VAD_STOP_SECS
738
+ )
739
+ if diff_time > current_stop_secs:
714
740
  await self._bot_stopped_speaking()
715
741
  # Generate an audio frame with only the mixer's part.
716
742
  frame = OutputAudioRawFrame(
pipecat/utils/redis.py ADDED
@@ -0,0 +1,58 @@
1
+ """Async Redis helper utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Optional, TYPE_CHECKING
6
+
7
+ from urllib.parse import urlparse
8
+
9
+ try:
10
+ import redis.asyncio as redis
11
+ except ImportError: # pragma: no cover - Redis is optional
12
+ redis = None
13
+
14
+
15
+ if TYPE_CHECKING: # pragma: no cover - typing aid
16
+ from redis.asyncio import Redis
17
+
18
+
19
+ def create_async_redis_client(
20
+ url: Optional[str],
21
+ *,
22
+ decode_responses: bool = True,
23
+ encoding: str = "utf-8",
24
+ logger: Optional[Any] = None,
25
+ **kwargs,
26
+ ) -> Optional["Redis"]:
27
+ """Return a configured async Redis client or None if unavailable.
28
+
29
+ Args:
30
+ url: Redis connection URL.
31
+ decode_responses: Whether to decode responses to str.
32
+ encoding: Character encoding to use with decoded responses.
33
+ logger: Optional logger supporting .warning() for diagnostics.
34
+ **kwargs: Additional keyword arguments forwarded to Redis.from_url.
35
+ """
36
+ if redis is None:
37
+ return None
38
+
39
+ if not url or url in {"redis_url", "REDIS_URL"}:
40
+ return None
41
+
42
+ parsed = urlparse(url)
43
+ connection_kwargs = {
44
+ "decode_responses": decode_responses,
45
+ "encoding": encoding,
46
+ }
47
+ connection_kwargs.update(kwargs)
48
+
49
+ if parsed.scheme == "rediss":
50
+ connection_kwargs.setdefault("ssl_cert_reqs", "none")
51
+ connection_kwargs.setdefault("ssl_check_hostname", False)
52
+
53
+ try:
54
+ return redis.Redis.from_url(url, **connection_kwargs)
55
+ except Exception as exc: # pragma: no cover - best effort logging
56
+ if logger is not None:
57
+ logger.warning(f"Failed to create Redis client: {exc}")
58
+ return None