dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (106) hide show
  1. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
  2. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
  3. pipecat/adapters/base_llm_adapter.py +44 -6
  4. pipecat/adapters/services/anthropic_adapter.py +302 -2
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
  6. pipecat/adapters/services/bedrock_adapter.py +40 -2
  7. pipecat/adapters/services/gemini_adapter.py +276 -6
  8. pipecat/adapters/services/open_ai_adapter.py +88 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
  10. pipecat/audio/dtmf/__init__.py +0 -0
  11. pipecat/audio/dtmf/types.py +47 -0
  12. pipecat/audio/dtmf/utils.py +70 -0
  13. pipecat/audio/filters/aic_filter.py +199 -0
  14. pipecat/audio/utils.py +9 -7
  15. pipecat/extensions/ivr/__init__.py +0 -0
  16. pipecat/extensions/ivr/ivr_navigator.py +452 -0
  17. pipecat/frames/frames.py +156 -43
  18. pipecat/pipeline/llm_switcher.py +76 -0
  19. pipecat/pipeline/parallel_pipeline.py +3 -3
  20. pipecat/pipeline/service_switcher.py +144 -0
  21. pipecat/pipeline/task.py +68 -28
  22. pipecat/pipeline/task_observer.py +10 -0
  23. pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
  24. pipecat/processors/aggregators/llm_context.py +277 -0
  25. pipecat/processors/aggregators/llm_response.py +48 -15
  26. pipecat/processors/aggregators/llm_response_universal.py +840 -0
  27. pipecat/processors/aggregators/openai_llm_context.py +3 -3
  28. pipecat/processors/dtmf_aggregator.py +0 -2
  29. pipecat/processors/filters/stt_mute_filter.py +0 -2
  30. pipecat/processors/frame_processor.py +18 -11
  31. pipecat/processors/frameworks/rtvi.py +17 -10
  32. pipecat/processors/metrics/sentry.py +2 -0
  33. pipecat/runner/daily.py +137 -36
  34. pipecat/runner/run.py +1 -1
  35. pipecat/runner/utils.py +7 -7
  36. pipecat/serializers/asterisk.py +20 -4
  37. pipecat/serializers/exotel.py +1 -1
  38. pipecat/serializers/plivo.py +1 -1
  39. pipecat/serializers/telnyx.py +1 -1
  40. pipecat/serializers/twilio.py +1 -1
  41. pipecat/services/__init__.py +2 -2
  42. pipecat/services/anthropic/llm.py +113 -28
  43. pipecat/services/asyncai/tts.py +4 -0
  44. pipecat/services/aws/llm.py +82 -8
  45. pipecat/services/aws/tts.py +0 -10
  46. pipecat/services/aws_nova_sonic/aws.py +5 -0
  47. pipecat/services/cartesia/tts.py +28 -16
  48. pipecat/services/cerebras/llm.py +15 -10
  49. pipecat/services/deepgram/stt.py +8 -0
  50. pipecat/services/deepseek/llm.py +13 -8
  51. pipecat/services/fireworks/llm.py +13 -8
  52. pipecat/services/fish/tts.py +8 -6
  53. pipecat/services/gemini_multimodal_live/gemini.py +5 -0
  54. pipecat/services/gladia/config.py +7 -1
  55. pipecat/services/gladia/stt.py +23 -15
  56. pipecat/services/google/llm.py +159 -59
  57. pipecat/services/google/llm_openai.py +18 -3
  58. pipecat/services/grok/llm.py +2 -1
  59. pipecat/services/llm_service.py +38 -3
  60. pipecat/services/mem0/memory.py +2 -1
  61. pipecat/services/mistral/llm.py +5 -6
  62. pipecat/services/nim/llm.py +2 -1
  63. pipecat/services/openai/base_llm.py +88 -26
  64. pipecat/services/openai/image.py +6 -1
  65. pipecat/services/openai_realtime_beta/openai.py +5 -2
  66. pipecat/services/openpipe/llm.py +6 -8
  67. pipecat/services/perplexity/llm.py +13 -8
  68. pipecat/services/playht/tts.py +9 -6
  69. pipecat/services/rime/tts.py +1 -1
  70. pipecat/services/sambanova/llm.py +18 -13
  71. pipecat/services/sarvam/tts.py +415 -10
  72. pipecat/services/speechmatics/stt.py +2 -2
  73. pipecat/services/tavus/video.py +1 -1
  74. pipecat/services/tts_service.py +15 -5
  75. pipecat/services/vistaar/llm.py +2 -5
  76. pipecat/transports/base_input.py +32 -19
  77. pipecat/transports/base_output.py +39 -5
  78. pipecat/transports/daily/__init__.py +0 -0
  79. pipecat/transports/daily/transport.py +2371 -0
  80. pipecat/transports/daily/utils.py +410 -0
  81. pipecat/transports/livekit/__init__.py +0 -0
  82. pipecat/transports/livekit/transport.py +1042 -0
  83. pipecat/transports/network/fastapi_websocket.py +12 -546
  84. pipecat/transports/network/small_webrtc.py +12 -922
  85. pipecat/transports/network/webrtc_connection.py +9 -595
  86. pipecat/transports/network/websocket_client.py +12 -481
  87. pipecat/transports/network/websocket_server.py +12 -487
  88. pipecat/transports/services/daily.py +9 -2334
  89. pipecat/transports/services/helpers/daily_rest.py +12 -396
  90. pipecat/transports/services/livekit.py +12 -975
  91. pipecat/transports/services/tavus.py +12 -757
  92. pipecat/transports/smallwebrtc/__init__.py +0 -0
  93. pipecat/transports/smallwebrtc/connection.py +612 -0
  94. pipecat/transports/smallwebrtc/transport.py +936 -0
  95. pipecat/transports/tavus/__init__.py +0 -0
  96. pipecat/transports/tavus/transport.py +770 -0
  97. pipecat/transports/websocket/__init__.py +0 -0
  98. pipecat/transports/websocket/client.py +494 -0
  99. pipecat/transports/websocket/fastapi.py +559 -0
  100. pipecat/transports/websocket/server.py +500 -0
  101. pipecat/transports/whatsapp/__init__.py +0 -0
  102. pipecat/transports/whatsapp/api.py +345 -0
  103. pipecat/transports/whatsapp/client.py +364 -0
  104. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
  105. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
  106. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
@@ -24,13 +24,17 @@ from loguru import logger
24
24
  from PIL import Image
25
25
  from pydantic import BaseModel, Field
26
26
 
27
- from pipecat.adapters.services.anthropic_adapter import AnthropicLLMAdapter
27
+ from pipecat.adapters.services.anthropic_adapter import (
28
+ AnthropicLLMAdapter,
29
+ AnthropicLLMInvocationParams,
30
+ )
28
31
  from pipecat.frames.frames import (
29
32
  ErrorFrame,
30
33
  Frame,
31
34
  FunctionCallCancelFrame,
32
35
  FunctionCallInProgressFrame,
33
36
  FunctionCallResultFrame,
37
+ LLMContextFrame,
34
38
  LLMEnablePromptCachingFrame,
35
39
  LLMFullResponseEndFrame,
36
40
  LLMFullResponseStartFrame,
@@ -41,6 +45,7 @@ from pipecat.frames.frames import (
41
45
  VisionImageRawFrame,
42
46
  )
43
47
  from pipecat.metrics.metrics import LLMTokenUsage
48
+ from pipecat.processors.aggregators.llm_context import LLMContext
44
49
  from pipecat.processors.aggregators.llm_response import (
45
50
  LLMAssistantAggregatorParams,
46
51
  LLMAssistantContextAggregator,
@@ -110,7 +115,12 @@ class AnthropicLLMService(LLMService):
110
115
  """Input parameters for Anthropic model inference.
111
116
 
112
117
  Parameters:
113
- enable_prompt_caching_beta: Whether to enable beta prompt caching feature.
118
+ enable_prompt_caching: Whether to enable the prompt caching feature.
119
+ enable_prompt_caching_beta (deprecated): Whether to enable the beta prompt caching feature.
120
+
121
+ .. deprecated:: 0.0.84
122
+ Use the `enable_prompt_caching` parameter instead.
123
+
114
124
  max_tokens: Maximum tokens to generate. Must be at least 1.
115
125
  temperature: Sampling temperature between 0.0 and 1.0.
116
126
  top_k: Top-k sampling parameter.
@@ -118,13 +128,26 @@ class AnthropicLLMService(LLMService):
118
128
  extra: Additional parameters to pass to the API.
119
129
  """
120
130
 
121
- enable_prompt_caching_beta: Optional[bool] = False
131
+ enable_prompt_caching: Optional[bool] = None
132
+ enable_prompt_caching_beta: Optional[bool] = None
122
133
  max_tokens: Optional[int] = Field(default_factory=lambda: 4096, ge=1)
123
134
  temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
124
135
  top_k: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0)
125
136
  top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
126
137
  extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
127
138
 
139
+ def model_post_init(self, __context):
140
+ """Post-initialization to handle deprecated parameters."""
141
+ if self.enable_prompt_caching_beta is not None:
142
+ import warnings
143
+
144
+ warnings.simplefilter("always")
145
+ warnings.warn(
146
+ "enable_prompt_caching_beta is deprecated. Use enable_prompt_caching instead.",
147
+ DeprecationWarning,
148
+ stacklevel=2,
149
+ )
150
+
128
151
  def __init__(
129
152
  self,
130
153
  *,
@@ -157,7 +180,15 @@ class AnthropicLLMService(LLMService):
157
180
  self._retry_on_timeout = retry_on_timeout
158
181
  self._settings = {
159
182
  "max_tokens": params.max_tokens,
160
- "enable_prompt_caching_beta": params.enable_prompt_caching_beta or False,
183
+ "enable_prompt_caching": (
184
+ params.enable_prompt_caching
185
+ if params.enable_prompt_caching is not None
186
+ else (
187
+ params.enable_prompt_caching_beta
188
+ if params.enable_prompt_caching_beta is not None
189
+ else False
190
+ )
191
+ ),
161
192
  "temperature": params.temperature,
162
193
  "top_k": params.top_k,
163
194
  "top_p": params.top_p,
@@ -197,14 +228,39 @@ class AnthropicLLMService(LLMService):
197
228
  response = await api_call(**params)
198
229
  return response
199
230
 
200
- @property
201
- def enable_prompt_caching_beta(self) -> bool:
202
- """Check if prompt caching beta feature is enabled.
231
+ async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
232
+ """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
233
+
234
+ Args:
235
+ context: The LLM context containing conversation history.
203
236
 
204
237
  Returns:
205
- True if prompt caching is enabled.
238
+ The LLM's response as a string, or None if no response is generated.
206
239
  """
207
- return self._enable_prompt_caching_beta
240
+ messages = []
241
+ system = NOT_GIVEN
242
+ if isinstance(context, LLMContext):
243
+ adapter: AnthropicLLMAdapter = self.get_llm_adapter()
244
+ params = adapter.get_llm_invocation_params(
245
+ context, enable_prompt_caching=self._settings["enable_prompt_caching"]
246
+ )
247
+ messages = params["messages"]
248
+ system = params["system"]
249
+ else:
250
+ context = AnthropicLLMContext.upgrade_to_anthropic(context)
251
+ messages = context.messages
252
+ system = getattr(context, "system", NOT_GIVEN)
253
+
254
+ # LLM completion
255
+ response = await self._client.messages.create(
256
+ model=self.model_name,
257
+ messages=messages,
258
+ system=system,
259
+ max_tokens=8192,
260
+ stream=False,
261
+ )
262
+
263
+ return response.content[0].text
208
264
 
209
265
  def create_context_aggregator(
210
266
  self,
@@ -235,8 +291,31 @@ class AnthropicLLMService(LLMService):
235
291
  assistant = AnthropicAssistantContextAggregator(context, params=assistant_params)
236
292
  return AnthropicContextAggregatorPair(_user=user, _assistant=assistant)
237
293
 
294
+ def _get_llm_invocation_params(
295
+ self, context: OpenAILLMContext | LLMContext
296
+ ) -> AnthropicLLMInvocationParams:
297
+ # Universal LLMContext
298
+ if isinstance(context, LLMContext):
299
+ adapter: AnthropicLLMAdapter = self.get_llm_adapter()
300
+ params = adapter.get_llm_invocation_params(
301
+ context, enable_prompt_caching=self._settings["enable_prompt_caching"]
302
+ )
303
+ return params
304
+
305
+ # Anthropic-specific context
306
+ messages = (
307
+ context.get_messages_with_cache_control_markers()
308
+ if self._settings["enable_prompt_caching"]
309
+ else context.messages
310
+ )
311
+ return AnthropicLLMInvocationParams(
312
+ system=context.system,
313
+ messages=messages,
314
+ tools=context.tools or [],
315
+ )
316
+
238
317
  @traced_llm
239
- async def _process_context(self, context: OpenAILLMContext):
318
+ async def _process_context(self, context: OpenAILLMContext | LLMContext):
240
319
  # Usage tracking. We track the usage reported by Anthropic in prompt_tokens and
241
320
  # completion_tokens. We also estimate the completion tokens from output text
242
321
  # and use that estimate if we are interrupted, because we almost certainly won't
@@ -252,24 +331,22 @@ class AnthropicLLMService(LLMService):
252
331
  await self.push_frame(LLMFullResponseStartFrame())
253
332
  await self.start_processing_metrics()
254
333
 
334
+ params_from_context = self._get_llm_invocation_params(context)
335
+
336
+ if isinstance(context, LLMContext):
337
+ adapter = self.get_llm_adapter()
338
+ context_type_for_logging = "universal"
339
+ messages_for_logging = adapter.get_messages_for_logging(context)
340
+ else:
341
+ context_type_for_logging = "LLM-specific"
342
+ messages_for_logging = context.get_messages_for_logging()
255
343
  self.logger.debug(
256
- f"{self}: Generating chat [{context.system}] | [{context.get_messages_for_logging()}]"
344
+ f"{self}: Generating chat from {context_type_for_logging} context [{params_from_context['system']}] | {messages_for_logging}"
257
345
  )
258
346
 
259
- messages = context.messages
260
- if self._settings["enable_prompt_caching_beta"]:
261
- messages = context.get_messages_with_cache_control_markers()
262
-
263
- api_call = self._client.messages.create
264
- if self._settings["enable_prompt_caching_beta"]:
265
- api_call = self._client.beta.prompt_caching.messages.create
266
-
267
347
  await self.start_ttfb_metrics()
268
348
 
269
349
  params = {
270
- "tools": context.tools or [],
271
- "system": context.system,
272
- "messages": messages,
273
350
  "model": self.model_name,
274
351
  "max_tokens": self._settings["max_tokens"],
275
352
  "stream": True,
@@ -278,9 +355,12 @@ class AnthropicLLMService(LLMService):
278
355
  "top_p": self._settings["top_p"],
279
356
  }
280
357
 
358
+ # Messages, system, tools
359
+ params.update(params_from_context)
360
+
281
361
  params.update(self._settings["extra"])
282
362
 
283
- response = await self._create_message_stream(api_call, params)
363
+ response = await self._create_message_stream(self._client.messages.create, params)
284
364
 
285
365
  await self.stop_ttfb_metrics()
286
366
 
@@ -363,7 +443,10 @@ class AnthropicLLMService(LLMService):
363
443
  prompt_tokens + cache_creation_input_tokens + cache_read_input_tokens
364
444
  )
365
445
  if total_input_tokens >= 1024:
366
- context.turns_above_cache_threshold += 1
446
+ if hasattr(
447
+ context, "turns_above_cache_threshold"
448
+ ): # LLMContext doesn't have this attribute
449
+ context.turns_above_cache_threshold += 1
367
450
 
368
451
  await self.run_function_calls(function_calls)
369
452
 
@@ -408,6 +491,8 @@ class AnthropicLLMService(LLMService):
408
491
  context = None
409
492
  if isinstance(frame, OpenAILLMContextFrame):
410
493
  context: "AnthropicLLMContext" = AnthropicLLMContext.upgrade_to_anthropic(frame.context)
494
+ elif isinstance(frame, LLMContextFrame):
495
+ context = frame.context
411
496
  elif isinstance(frame, LLMMessagesFrame):
412
497
  context = AnthropicLLMContext.from_messages(frame.messages)
413
498
  elif isinstance(frame, VisionImageRawFrame):
@@ -420,7 +505,7 @@ class AnthropicLLMService(LLMService):
420
505
  await self._update_settings(frame.settings)
421
506
  elif isinstance(frame, LLMEnablePromptCachingFrame):
422
507
  self.logger.debug(f"Setting enable prompt caching to: [{frame.enable}]")
423
- self._settings["enable_prompt_caching_beta"] = frame.enable
508
+ self._settings["enable_prompt_caching"] = frame.enable
424
509
  else:
425
510
  await self.push_frame(frame, direction)
426
511
 
@@ -889,13 +974,13 @@ class AnthropicLLMContext(OpenAILLMContext):
889
974
  messages.insert(0, {"role": "system", "content": self.system})
890
975
  return messages
891
976
 
892
- def get_messages_for_logging(self) -> str:
977
+ def get_messages_for_logging(self) -> List[Dict[str, Any]]:
893
978
  """Get messages formatted for logging with sensitive data redacted.
894
979
 
895
980
  Replaces image data with placeholder text for cleaner logs.
896
981
 
897
982
  Returns:
898
- JSON string representation of messages for logging.
983
+ List of messages in a format ready for logging.
899
984
  """
900
985
  msgs = []
901
986
  for message in self.messages:
@@ -906,7 +991,7 @@ class AnthropicLLMContext(OpenAILLMContext):
906
991
  if item["type"] == "image":
907
992
  item["source"]["data"] = "..."
908
993
  msgs.append(msg)
909
- return json.dumps(msgs)
994
+ return msgs
910
995
 
911
996
 
912
997
  class AnthropicUserContextAggregator(LLMUserContextAggregator):
@@ -52,6 +52,10 @@ def language_to_async_language(language: Language) -> Optional[str]:
52
52
  """
53
53
  BASE_LANGUAGES = {
54
54
  Language.EN: "en",
55
+ Language.FR: "fr",
56
+ Language.ES: "es",
57
+ Language.DE: "de",
58
+ Language.IT: "it",
55
59
  }
56
60
 
57
61
  result = BASE_LANGUAGES.get(language)
@@ -16,6 +16,7 @@ import base64
16
16
  import copy
17
17
  import io
18
18
  import json
19
+ import os
19
20
  import re
20
21
  from dataclasses import dataclass
21
22
  from typing import Any, Dict, List, Optional
@@ -31,6 +32,7 @@ from pipecat.frames.frames import (
31
32
  FunctionCallFromLLM,
32
33
  FunctionCallInProgressFrame,
33
34
  FunctionCallResultFrame,
35
+ LLMContextFrame,
34
36
  LLMFullResponseEndFrame,
35
37
  LLMFullResponseStartFrame,
36
38
  LLMMessagesFrame,
@@ -40,6 +42,7 @@ from pipecat.frames.frames import (
40
42
  VisionImageRawFrame,
41
43
  )
42
44
  from pipecat.metrics.metrics import LLMTokenUsage
45
+ from pipecat.processors.aggregators.llm_context import LLMContext
43
46
  from pipecat.processors.aggregators.llm_response import (
44
47
  LLMAssistantAggregatorParams,
45
48
  LLMAssistantContextAggregator,
@@ -553,11 +556,11 @@ class AWSBedrockLLMContext(OpenAILLMContext):
553
556
  messages.insert(0, {"role": "system", "content": self.system})
554
557
  return messages
555
558
 
556
- def get_messages_for_logging(self) -> str:
559
+ def get_messages_for_logging(self) -> List[Dict[str, Any]]:
557
560
  """Get messages formatted for logging with sensitive data redacted.
558
561
 
559
562
  Returns:
560
- JSON string representation of messages with image data redacted.
563
+ List of messages in a format ready for logging.
561
564
  """
562
565
  msgs = []
563
566
  for message in self.messages:
@@ -568,7 +571,7 @@ class AWSBedrockLLMContext(OpenAILLMContext):
568
571
  if item.get("image"):
569
572
  item["source"]["bytes"] = "..."
570
573
  msgs.append(msg)
571
- return json.dumps(msgs)
574
+ return msgs
572
575
 
573
576
 
574
577
  class AWSBedrockUserContextAggregator(LLMUserContextAggregator):
@@ -759,10 +762,10 @@ class AWSBedrockLLMService(LLMService):
759
762
 
760
763
  # Store AWS session parameters for creating client in async context
761
764
  self._aws_params = {
762
- "aws_access_key_id": aws_access_key,
763
- "aws_secret_access_key": aws_secret_key,
764
- "aws_session_token": aws_session_token,
765
- "region_name": aws_region,
765
+ "aws_access_key_id": aws_access_key or os.getenv("AWS_ACCESS_KEY_ID"),
766
+ "aws_secret_access_key": aws_secret_key or os.getenv("AWS_SECRET_ACCESS_KEY"),
767
+ "aws_session_token": aws_session_token or os.getenv("AWS_SESSION_TOKEN"),
768
+ "region_name": aws_region or os.getenv("AWS_REGION", "us-east-1"),
766
769
  "config": client_config,
767
770
  }
768
771
 
@@ -789,6 +792,75 @@ class AWSBedrockLLMService(LLMService):
789
792
  """
790
793
  return True
791
794
 
795
+ async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
796
+ """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
797
+
798
+ Args:
799
+ context: The LLM context containing conversation history.
800
+
801
+ Returns:
802
+ The LLM's response as a string, or None if no response is generated.
803
+ """
804
+ try:
805
+ messages = []
806
+ system = []
807
+ if isinstance(context, LLMContext):
808
+ # Future code will be something like this:
809
+ # adapter = self.get_llm_adapter()
810
+ # params: AWSBedrockLLMInvocationParams = adapter.get_llm_invocation_params(context)
811
+ # messages = params["messages"]
812
+ # system = params["system_instruction"] # [{"text": "system message"}]
813
+ raise NotImplementedError(
814
+ "Universal LLMContext is not yet supported for AWS Bedrock."
815
+ )
816
+ else:
817
+ context = AWSBedrockLLMContext.upgrade_to_bedrock(context)
818
+ messages = context.messages
819
+ system = getattr(context, "system", None) # [{"text": "system message"}]
820
+
821
+ # Determine if we're using Claude or Nova based on model ID
822
+ model_id = self.model_name
823
+
824
+ # Prepare request parameters
825
+ request_params = {
826
+ "modelId": model_id,
827
+ "messages": messages,
828
+ "inferenceConfig": {
829
+ "maxTokens": 8192,
830
+ "temperature": 0.7,
831
+ "topP": 0.9,
832
+ },
833
+ }
834
+
835
+ if system:
836
+ request_params["system"] = system
837
+
838
+ async with self._aws_session.client(
839
+ service_name="bedrock-runtime", **self._aws_params
840
+ ) as client:
841
+ # Call Bedrock without streaming
842
+ response = await client.converse(**request_params)
843
+
844
+ # Extract the response text
845
+ if (
846
+ "output" in response
847
+ and "message" in response["output"]
848
+ and "content" in response["output"]["message"]
849
+ ):
850
+ content = response["output"]["message"]["content"]
851
+ if isinstance(content, list):
852
+ for item in content:
853
+ if item.get("text"):
854
+ return item["text"]
855
+ elif isinstance(content, str):
856
+ return content
857
+
858
+ return None
859
+
860
+ except Exception as e:
861
+ logger.error(f"Bedrock summary generation failed: {e}", exc_info=True)
862
+ return None
863
+
792
864
  async def _create_converse_stream(self, client, request_params):
793
865
  """Create converse stream with optional timeout and retry.
794
866
 
@@ -802,7 +874,7 @@ class AWSBedrockLLMService(LLMService):
802
874
  if self._retry_on_timeout:
803
875
  try:
804
876
  response = await asyncio.wait_for(
805
- await client.converse_stream(**request_params), timeout=self._retry_timeout_secs
877
+ client.converse_stream(**request_params), timeout=self._retry_timeout_secs
806
878
  )
807
879
  return response
808
880
  except (ReadTimeoutError, asyncio.TimeoutError) as e:
@@ -1044,6 +1116,8 @@ class AWSBedrockLLMService(LLMService):
1044
1116
  context = None
1045
1117
  if isinstance(frame, OpenAILLMContextFrame):
1046
1118
  context = AWSBedrockLLMContext.upgrade_to_bedrock(frame.context)
1119
+ if isinstance(frame, LLMContextFrame):
1120
+ raise NotImplementedError("Universal LLMContext is not yet supported for AWS Bedrock.")
1047
1121
  elif isinstance(frame, LLMMessagesFrame):
1048
1122
  context = AWSBedrockLLMContext.from_messages(frame.messages)
1049
1123
  elif isinstance(frame, VisionImageRawFrame):
@@ -185,16 +185,6 @@ class AWSPollyTTSService(TTSService):
185
185
  "region_name": region or os.getenv("AWS_REGION", "us-east-1"),
186
186
  }
187
187
 
188
- # Validate that we have the required credentials
189
- if (
190
- not self._aws_params["aws_access_key_id"]
191
- or not self._aws_params["aws_secret_access_key"]
192
- ):
193
- raise ValueError(
194
- "AWS credentials not found. Please provide them either through constructor parameters "
195
- "or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables."
196
- )
197
-
198
188
  self._aws_session = aioboto3.Session()
199
189
  self._settings = {
200
190
  "engine": params.engine,
@@ -34,6 +34,7 @@ from pipecat.frames.frames import (
34
34
  FunctionCallFromLLM,
35
35
  InputAudioRawFrame,
36
36
  InterimTranscriptionFrame,
37
+ LLMContextFrame,
37
38
  LLMFullResponseEndFrame,
38
39
  LLMFullResponseStartFrame,
39
40
  LLMTextFrame,
@@ -322,6 +323,10 @@ class AWSNovaSonicLLMService(LLMService):
322
323
 
323
324
  if isinstance(frame, OpenAILLMContextFrame):
324
325
  await self._handle_context(frame.context)
326
+ elif isinstance(frame, LLMContextFrame):
327
+ raise NotImplementedError(
328
+ "Universal LLMContext is not yet supported for AWS Nova Sonic."
329
+ )
325
330
  elif isinstance(frame, InputAudioRawFrame):
326
331
  await self._handle_input_audio_frame(frame)
327
332
  elif isinstance(frame, BotStoppedSpeakingFrame):
@@ -10,7 +10,7 @@ import base64
10
10
  import json
11
11
  import uuid
12
12
  import warnings
13
- from typing import AsyncGenerator, List, Optional, Union
13
+ from typing import AsyncGenerator, List, Literal, Optional, Union
14
14
 
15
15
  from loguru import logger
16
16
  from pydantic import BaseModel, Field
@@ -102,7 +102,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
102
102
 
103
103
  Parameters:
104
104
  language: Language to use for synthesis.
105
- speed: Voice speed control (string or float).
105
+ speed: Voice speed control.
106
106
  emotion: List of emotion controls.
107
107
 
108
108
  .. deprecated:: 0.0.68
@@ -110,7 +110,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
110
110
  """
111
111
 
112
112
  language: Optional[Language] = Language.EN
113
- speed: Optional[Union[str, float]] = ""
113
+ speed: Optional[Literal["slow", "normal", "fast"]] = None
114
114
  emotion: Optional[List[str]] = []
115
115
 
116
116
  def __init__(
@@ -272,11 +272,13 @@ class CartesiaTTSService(AudioContextWordTTSService):
272
272
  voice_config["id"] = self._voice_id
273
273
 
274
274
  if self._settings["emotion"]:
275
- warnings.warn(
276
- "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
277
- DeprecationWarning,
278
- stacklevel=2,
279
- )
275
+ with warnings.catch_warnings():
276
+ warnings.simplefilter("always")
277
+ warnings.warn(
278
+ "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
279
+ DeprecationWarning,
280
+ stacklevel=2,
281
+ )
280
282
  voice_config["__experimental_controls"] = {}
281
283
  if self._settings["emotion"]:
282
284
  voice_config["__experimental_controls"]["emotion"] = self._settings["emotion"]
@@ -387,7 +389,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
387
389
  await self._websocket.send(msg)
388
390
  self._context_id = None
389
391
 
390
- async def _receive_messages(self):
392
+ async def _process_messages(self):
391
393
  async for message in self._get_websocket():
392
394
  msg = json.loads(message)
393
395
  if not msg or not self.audio_context_available(msg["context_id"]):
@@ -421,6 +423,14 @@ class CartesiaTTSService(AudioContextWordTTSService):
421
423
  else:
422
424
  logger.error(f"{self} error, unknown message type: {msg}")
423
425
 
426
+ async def _receive_messages(self):
427
+ while True:
428
+ await self._process_messages()
429
+ # Cartesia times out after 5 minutes of innactivity (no keepalive
430
+ # mechanism is available). So, we try to reconnect.
431
+ logger.debug(f"{self} Cartesia connection was disconnected (timeout?), reconnecting")
432
+ await self._connect_websocket()
433
+
424
434
  @traced_tts
425
435
  async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
426
436
  """Generate speech from text using Cartesia's streaming API.
@@ -472,7 +482,7 @@ class CartesiaHttpTTSService(TTSService):
472
482
 
473
483
  Parameters:
474
484
  language: Language to use for synthesis.
475
- speed: Voice speed control (string or float).
485
+ speed: Voice speed control.
476
486
  emotion: List of emotion controls.
477
487
 
478
488
  .. deprecated:: 0.0.68
@@ -480,7 +490,7 @@ class CartesiaHttpTTSService(TTSService):
480
490
  """
481
491
 
482
492
  language: Optional[Language] = Language.EN
483
- speed: Optional[Union[str, float]] = ""
493
+ speed: Optional[Literal["slow", "normal", "fast"]] = None
484
494
  emotion: Optional[List[str]] = Field(default_factory=list)
485
495
 
486
496
  def __init__(
@@ -600,11 +610,13 @@ class CartesiaHttpTTSService(TTSService):
600
610
  voice_config = {"mode": "id", "id": self._voice_id}
601
611
 
602
612
  if self._settings["emotion"]:
603
- warnings.warn(
604
- "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
605
- DeprecationWarning,
606
- stacklevel=2,
607
- )
613
+ with warnings.catch_warnings():
614
+ warnings.simplefilter("always")
615
+ warnings.warn(
616
+ "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
617
+ DeprecationWarning,
618
+ stacklevel=2,
619
+ )
608
620
  voice_config["__experimental_controls"] = {"emotion": self._settings["emotion"]}
609
621
 
610
622
  await self.start_ttfb_metrics()
@@ -9,9 +9,8 @@
9
9
  from typing import List
10
10
 
11
11
  from loguru import logger
12
- from openai.types.chat import ChatCompletionMessageParam
13
12
 
14
- from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
13
+ from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
15
14
  from pipecat.services.openai.llm import OpenAILLMService
16
15
 
17
16
 
@@ -27,7 +26,7 @@ class CerebrasLLMService(OpenAILLMService):
27
26
  *,
28
27
  api_key: str,
29
28
  base_url: str = "https://api.cerebras.ai/v1",
30
- model: str = "llama-3.3-70b",
29
+ model: str = "gpt-oss-120b",
31
30
  **kwargs,
32
31
  ):
33
32
  """Initialize the Cerebras LLM service.
@@ -35,7 +34,7 @@ class CerebrasLLMService(OpenAILLMService):
35
34
  Args:
36
35
  api_key: The API key for accessing Cerebras's API.
37
36
  base_url: The base URL for Cerebras API. Defaults to "https://api.cerebras.ai/v1".
38
- model: The model identifier to use. Defaults to "llama-3.3-70b".
37
+ model: The model identifier to use. Defaults to "gpt-oss-120b".
39
38
  **kwargs: Additional keyword arguments passed to OpenAILLMService.
40
39
  """
41
40
  super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
@@ -54,25 +53,31 @@ class CerebrasLLMService(OpenAILLMService):
54
53
  logger.debug(f"Creating Cerebras client with api {base_url}")
55
54
  return super().create_client(api_key, base_url, **kwargs)
56
55
 
57
- def build_chat_completion_params(
58
- self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
59
- ) -> dict:
56
+ def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
60
57
  """Build parameters for Cerebras chat completion request.
61
58
 
62
59
  Cerebras supports a subset of OpenAI parameters, focusing on core
63
60
  completion settings without advanced features like frequency/presence penalties.
61
+
62
+ Args:
63
+ params_from_context: Parameters, derived from the LLM context, to
64
+ use for the chat completion. Contains messages, tools, and tool
65
+ choice.
66
+
67
+ Returns:
68
+ Dictionary of parameters for the chat completion request.
64
69
  """
65
70
  params = {
66
71
  "model": self.model_name,
67
72
  "stream": True,
68
- "messages": messages,
69
- "tools": context.tools,
70
- "tool_choice": context.tool_choice,
71
73
  "seed": self._settings["seed"],
72
74
  "temperature": self._settings["temperature"],
73
75
  "top_p": self._settings["top_p"],
74
76
  "max_completion_tokens": self._settings["max_completion_tokens"],
75
77
  }
76
78
 
79
+ # Messages, tools, tool_choice
80
+ params.update(params_from_context)
81
+
77
82
  params.update(self._settings["extra"])
78
83
  return params
@@ -276,6 +276,14 @@ class DeepgramSTTService(STTService):
276
276
  async def _disconnect(self):
277
277
  if self._connection.is_connected:
278
278
  self.logger.debug("Disconnecting from Deepgram")
279
+ # Deepgram swallows asyncio.CancelledError internally which prevents
280
+ # proper cancellation propagation. This issue was found with
281
+ # parallel pipelines where `CancelFrame` was not awaited for to
282
+ # finish in all branches and it was pushed downstream reaching the
283
+ # end of the pipeline, which caused `cleanup()` to be called while
284
+ # Deepgram disconnection was still finishing and therefore
285
+ # preventing the task cancellation that occurs during `cleanup()`.
286
+ # GH issue: https://github.com/deepgram/deepgram-python-sdk/issues/570
279
287
  await self._connection.finish()
280
288
 
281
289
  async def start_metrics(self):