dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
- pipecat/adapters/base_llm_adapter.py +44 -6
- pipecat/adapters/services/anthropic_adapter.py +302 -2
- pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
- pipecat/adapters/services/bedrock_adapter.py +40 -2
- pipecat/adapters/services/gemini_adapter.py +276 -6
- pipecat/adapters/services/open_ai_adapter.py +88 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
- pipecat/audio/dtmf/__init__.py +0 -0
- pipecat/audio/dtmf/types.py +47 -0
- pipecat/audio/dtmf/utils.py +70 -0
- pipecat/audio/filters/aic_filter.py +199 -0
- pipecat/audio/utils.py +9 -7
- pipecat/extensions/ivr/__init__.py +0 -0
- pipecat/extensions/ivr/ivr_navigator.py +452 -0
- pipecat/frames/frames.py +156 -43
- pipecat/pipeline/llm_switcher.py +76 -0
- pipecat/pipeline/parallel_pipeline.py +3 -3
- pipecat/pipeline/service_switcher.py +144 -0
- pipecat/pipeline/task.py +68 -28
- pipecat/pipeline/task_observer.py +10 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
- pipecat/processors/aggregators/llm_context.py +277 -0
- pipecat/processors/aggregators/llm_response.py +48 -15
- pipecat/processors/aggregators/llm_response_universal.py +840 -0
- pipecat/processors/aggregators/openai_llm_context.py +3 -3
- pipecat/processors/dtmf_aggregator.py +0 -2
- pipecat/processors/filters/stt_mute_filter.py +0 -2
- pipecat/processors/frame_processor.py +18 -11
- pipecat/processors/frameworks/rtvi.py +17 -10
- pipecat/processors/metrics/sentry.py +2 -0
- pipecat/runner/daily.py +137 -36
- pipecat/runner/run.py +1 -1
- pipecat/runner/utils.py +7 -7
- pipecat/serializers/asterisk.py +20 -4
- pipecat/serializers/exotel.py +1 -1
- pipecat/serializers/plivo.py +1 -1
- pipecat/serializers/telnyx.py +1 -1
- pipecat/serializers/twilio.py +1 -1
- pipecat/services/__init__.py +2 -2
- pipecat/services/anthropic/llm.py +113 -28
- pipecat/services/asyncai/tts.py +4 -0
- pipecat/services/aws/llm.py +82 -8
- pipecat/services/aws/tts.py +0 -10
- pipecat/services/aws_nova_sonic/aws.py +5 -0
- pipecat/services/cartesia/tts.py +28 -16
- pipecat/services/cerebras/llm.py +15 -10
- pipecat/services/deepgram/stt.py +8 -0
- pipecat/services/deepseek/llm.py +13 -8
- pipecat/services/fireworks/llm.py +13 -8
- pipecat/services/fish/tts.py +8 -6
- pipecat/services/gemini_multimodal_live/gemini.py +5 -0
- pipecat/services/gladia/config.py +7 -1
- pipecat/services/gladia/stt.py +23 -15
- pipecat/services/google/llm.py +159 -59
- pipecat/services/google/llm_openai.py +18 -3
- pipecat/services/grok/llm.py +2 -1
- pipecat/services/llm_service.py +38 -3
- pipecat/services/mem0/memory.py +2 -1
- pipecat/services/mistral/llm.py +5 -6
- pipecat/services/nim/llm.py +2 -1
- pipecat/services/openai/base_llm.py +88 -26
- pipecat/services/openai/image.py +6 -1
- pipecat/services/openai_realtime_beta/openai.py +5 -2
- pipecat/services/openpipe/llm.py +6 -8
- pipecat/services/perplexity/llm.py +13 -8
- pipecat/services/playht/tts.py +9 -6
- pipecat/services/rime/tts.py +1 -1
- pipecat/services/sambanova/llm.py +18 -13
- pipecat/services/sarvam/tts.py +415 -10
- pipecat/services/speechmatics/stt.py +2 -2
- pipecat/services/tavus/video.py +1 -1
- pipecat/services/tts_service.py +15 -5
- pipecat/services/vistaar/llm.py +2 -5
- pipecat/transports/base_input.py +32 -19
- pipecat/transports/base_output.py +39 -5
- pipecat/transports/daily/__init__.py +0 -0
- pipecat/transports/daily/transport.py +2371 -0
- pipecat/transports/daily/utils.py +410 -0
- pipecat/transports/livekit/__init__.py +0 -0
- pipecat/transports/livekit/transport.py +1042 -0
- pipecat/transports/network/fastapi_websocket.py +12 -546
- pipecat/transports/network/small_webrtc.py +12 -922
- pipecat/transports/network/webrtc_connection.py +9 -595
- pipecat/transports/network/websocket_client.py +12 -481
- pipecat/transports/network/websocket_server.py +12 -487
- pipecat/transports/services/daily.py +9 -2334
- pipecat/transports/services/helpers/daily_rest.py +12 -396
- pipecat/transports/services/livekit.py +12 -975
- pipecat/transports/services/tavus.py +12 -757
- pipecat/transports/smallwebrtc/__init__.py +0 -0
- pipecat/transports/smallwebrtc/connection.py +612 -0
- pipecat/transports/smallwebrtc/transport.py +936 -0
- pipecat/transports/tavus/__init__.py +0 -0
- pipecat/transports/tavus/transport.py +770 -0
- pipecat/transports/websocket/__init__.py +0 -0
- pipecat/transports/websocket/client.py +494 -0
- pipecat/transports/websocket/fastapi.py +559 -0
- pipecat/transports/websocket/server.py +500 -0
- pipecat/transports/whatsapp/__init__.py +0 -0
- pipecat/transports/whatsapp/api.py +345 -0
- pipecat/transports/whatsapp/client.py +364 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
|
@@ -24,13 +24,17 @@ from loguru import logger
|
|
|
24
24
|
from PIL import Image
|
|
25
25
|
from pydantic import BaseModel, Field
|
|
26
26
|
|
|
27
|
-
from pipecat.adapters.services.anthropic_adapter import
|
|
27
|
+
from pipecat.adapters.services.anthropic_adapter import (
|
|
28
|
+
AnthropicLLMAdapter,
|
|
29
|
+
AnthropicLLMInvocationParams,
|
|
30
|
+
)
|
|
28
31
|
from pipecat.frames.frames import (
|
|
29
32
|
ErrorFrame,
|
|
30
33
|
Frame,
|
|
31
34
|
FunctionCallCancelFrame,
|
|
32
35
|
FunctionCallInProgressFrame,
|
|
33
36
|
FunctionCallResultFrame,
|
|
37
|
+
LLMContextFrame,
|
|
34
38
|
LLMEnablePromptCachingFrame,
|
|
35
39
|
LLMFullResponseEndFrame,
|
|
36
40
|
LLMFullResponseStartFrame,
|
|
@@ -41,6 +45,7 @@ from pipecat.frames.frames import (
|
|
|
41
45
|
VisionImageRawFrame,
|
|
42
46
|
)
|
|
43
47
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
48
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
44
49
|
from pipecat.processors.aggregators.llm_response import (
|
|
45
50
|
LLMAssistantAggregatorParams,
|
|
46
51
|
LLMAssistantContextAggregator,
|
|
@@ -110,7 +115,12 @@ class AnthropicLLMService(LLMService):
|
|
|
110
115
|
"""Input parameters for Anthropic model inference.
|
|
111
116
|
|
|
112
117
|
Parameters:
|
|
113
|
-
|
|
118
|
+
enable_prompt_caching: Whether to enable the prompt caching feature.
|
|
119
|
+
enable_prompt_caching_beta (deprecated): Whether to enable the beta prompt caching feature.
|
|
120
|
+
|
|
121
|
+
.. deprecated:: 0.0.84
|
|
122
|
+
Use the `enable_prompt_caching` parameter instead.
|
|
123
|
+
|
|
114
124
|
max_tokens: Maximum tokens to generate. Must be at least 1.
|
|
115
125
|
temperature: Sampling temperature between 0.0 and 1.0.
|
|
116
126
|
top_k: Top-k sampling parameter.
|
|
@@ -118,13 +128,26 @@ class AnthropicLLMService(LLMService):
|
|
|
118
128
|
extra: Additional parameters to pass to the API.
|
|
119
129
|
"""
|
|
120
130
|
|
|
121
|
-
|
|
131
|
+
enable_prompt_caching: Optional[bool] = None
|
|
132
|
+
enable_prompt_caching_beta: Optional[bool] = None
|
|
122
133
|
max_tokens: Optional[int] = Field(default_factory=lambda: 4096, ge=1)
|
|
123
134
|
temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
|
|
124
135
|
top_k: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0)
|
|
125
136
|
top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
|
|
126
137
|
extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
127
138
|
|
|
139
|
+
def model_post_init(self, __context):
|
|
140
|
+
"""Post-initialization to handle deprecated parameters."""
|
|
141
|
+
if self.enable_prompt_caching_beta is not None:
|
|
142
|
+
import warnings
|
|
143
|
+
|
|
144
|
+
warnings.simplefilter("always")
|
|
145
|
+
warnings.warn(
|
|
146
|
+
"enable_prompt_caching_beta is deprecated. Use enable_prompt_caching instead.",
|
|
147
|
+
DeprecationWarning,
|
|
148
|
+
stacklevel=2,
|
|
149
|
+
)
|
|
150
|
+
|
|
128
151
|
def __init__(
|
|
129
152
|
self,
|
|
130
153
|
*,
|
|
@@ -157,7 +180,15 @@ class AnthropicLLMService(LLMService):
|
|
|
157
180
|
self._retry_on_timeout = retry_on_timeout
|
|
158
181
|
self._settings = {
|
|
159
182
|
"max_tokens": params.max_tokens,
|
|
160
|
-
"
|
|
183
|
+
"enable_prompt_caching": (
|
|
184
|
+
params.enable_prompt_caching
|
|
185
|
+
if params.enable_prompt_caching is not None
|
|
186
|
+
else (
|
|
187
|
+
params.enable_prompt_caching_beta
|
|
188
|
+
if params.enable_prompt_caching_beta is not None
|
|
189
|
+
else False
|
|
190
|
+
)
|
|
191
|
+
),
|
|
161
192
|
"temperature": params.temperature,
|
|
162
193
|
"top_k": params.top_k,
|
|
163
194
|
"top_p": params.top_p,
|
|
@@ -197,14 +228,39 @@ class AnthropicLLMService(LLMService):
|
|
|
197
228
|
response = await api_call(**params)
|
|
198
229
|
return response
|
|
199
230
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
231
|
+
async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
|
|
232
|
+
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
context: The LLM context containing conversation history.
|
|
203
236
|
|
|
204
237
|
Returns:
|
|
205
|
-
|
|
238
|
+
The LLM's response as a string, or None if no response is generated.
|
|
206
239
|
"""
|
|
207
|
-
|
|
240
|
+
messages = []
|
|
241
|
+
system = NOT_GIVEN
|
|
242
|
+
if isinstance(context, LLMContext):
|
|
243
|
+
adapter: AnthropicLLMAdapter = self.get_llm_adapter()
|
|
244
|
+
params = adapter.get_llm_invocation_params(
|
|
245
|
+
context, enable_prompt_caching=self._settings["enable_prompt_caching"]
|
|
246
|
+
)
|
|
247
|
+
messages = params["messages"]
|
|
248
|
+
system = params["system"]
|
|
249
|
+
else:
|
|
250
|
+
context = AnthropicLLMContext.upgrade_to_anthropic(context)
|
|
251
|
+
messages = context.messages
|
|
252
|
+
system = getattr(context, "system", NOT_GIVEN)
|
|
253
|
+
|
|
254
|
+
# LLM completion
|
|
255
|
+
response = await self._client.messages.create(
|
|
256
|
+
model=self.model_name,
|
|
257
|
+
messages=messages,
|
|
258
|
+
system=system,
|
|
259
|
+
max_tokens=8192,
|
|
260
|
+
stream=False,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
return response.content[0].text
|
|
208
264
|
|
|
209
265
|
def create_context_aggregator(
|
|
210
266
|
self,
|
|
@@ -235,8 +291,31 @@ class AnthropicLLMService(LLMService):
|
|
|
235
291
|
assistant = AnthropicAssistantContextAggregator(context, params=assistant_params)
|
|
236
292
|
return AnthropicContextAggregatorPair(_user=user, _assistant=assistant)
|
|
237
293
|
|
|
294
|
+
def _get_llm_invocation_params(
|
|
295
|
+
self, context: OpenAILLMContext | LLMContext
|
|
296
|
+
) -> AnthropicLLMInvocationParams:
|
|
297
|
+
# Universal LLMContext
|
|
298
|
+
if isinstance(context, LLMContext):
|
|
299
|
+
adapter: AnthropicLLMAdapter = self.get_llm_adapter()
|
|
300
|
+
params = adapter.get_llm_invocation_params(
|
|
301
|
+
context, enable_prompt_caching=self._settings["enable_prompt_caching"]
|
|
302
|
+
)
|
|
303
|
+
return params
|
|
304
|
+
|
|
305
|
+
# Anthropic-specific context
|
|
306
|
+
messages = (
|
|
307
|
+
context.get_messages_with_cache_control_markers()
|
|
308
|
+
if self._settings["enable_prompt_caching"]
|
|
309
|
+
else context.messages
|
|
310
|
+
)
|
|
311
|
+
return AnthropicLLMInvocationParams(
|
|
312
|
+
system=context.system,
|
|
313
|
+
messages=messages,
|
|
314
|
+
tools=context.tools or [],
|
|
315
|
+
)
|
|
316
|
+
|
|
238
317
|
@traced_llm
|
|
239
|
-
async def _process_context(self, context: OpenAILLMContext):
|
|
318
|
+
async def _process_context(self, context: OpenAILLMContext | LLMContext):
|
|
240
319
|
# Usage tracking. We track the usage reported by Anthropic in prompt_tokens and
|
|
241
320
|
# completion_tokens. We also estimate the completion tokens from output text
|
|
242
321
|
# and use that estimate if we are interrupted, because we almost certainly won't
|
|
@@ -252,24 +331,22 @@ class AnthropicLLMService(LLMService):
|
|
|
252
331
|
await self.push_frame(LLMFullResponseStartFrame())
|
|
253
332
|
await self.start_processing_metrics()
|
|
254
333
|
|
|
334
|
+
params_from_context = self._get_llm_invocation_params(context)
|
|
335
|
+
|
|
336
|
+
if isinstance(context, LLMContext):
|
|
337
|
+
adapter = self.get_llm_adapter()
|
|
338
|
+
context_type_for_logging = "universal"
|
|
339
|
+
messages_for_logging = adapter.get_messages_for_logging(context)
|
|
340
|
+
else:
|
|
341
|
+
context_type_for_logging = "LLM-specific"
|
|
342
|
+
messages_for_logging = context.get_messages_for_logging()
|
|
255
343
|
self.logger.debug(
|
|
256
|
-
f"{self}: Generating chat [{
|
|
344
|
+
f"{self}: Generating chat from {context_type_for_logging} context [{params_from_context['system']}] | {messages_for_logging}"
|
|
257
345
|
)
|
|
258
346
|
|
|
259
|
-
messages = context.messages
|
|
260
|
-
if self._settings["enable_prompt_caching_beta"]:
|
|
261
|
-
messages = context.get_messages_with_cache_control_markers()
|
|
262
|
-
|
|
263
|
-
api_call = self._client.messages.create
|
|
264
|
-
if self._settings["enable_prompt_caching_beta"]:
|
|
265
|
-
api_call = self._client.beta.prompt_caching.messages.create
|
|
266
|
-
|
|
267
347
|
await self.start_ttfb_metrics()
|
|
268
348
|
|
|
269
349
|
params = {
|
|
270
|
-
"tools": context.tools or [],
|
|
271
|
-
"system": context.system,
|
|
272
|
-
"messages": messages,
|
|
273
350
|
"model": self.model_name,
|
|
274
351
|
"max_tokens": self._settings["max_tokens"],
|
|
275
352
|
"stream": True,
|
|
@@ -278,9 +355,12 @@ class AnthropicLLMService(LLMService):
|
|
|
278
355
|
"top_p": self._settings["top_p"],
|
|
279
356
|
}
|
|
280
357
|
|
|
358
|
+
# Messages, system, tools
|
|
359
|
+
params.update(params_from_context)
|
|
360
|
+
|
|
281
361
|
params.update(self._settings["extra"])
|
|
282
362
|
|
|
283
|
-
response = await self._create_message_stream(
|
|
363
|
+
response = await self._create_message_stream(self._client.messages.create, params)
|
|
284
364
|
|
|
285
365
|
await self.stop_ttfb_metrics()
|
|
286
366
|
|
|
@@ -363,7 +443,10 @@ class AnthropicLLMService(LLMService):
|
|
|
363
443
|
prompt_tokens + cache_creation_input_tokens + cache_read_input_tokens
|
|
364
444
|
)
|
|
365
445
|
if total_input_tokens >= 1024:
|
|
366
|
-
|
|
446
|
+
if hasattr(
|
|
447
|
+
context, "turns_above_cache_threshold"
|
|
448
|
+
): # LLMContext doesn't have this attribute
|
|
449
|
+
context.turns_above_cache_threshold += 1
|
|
367
450
|
|
|
368
451
|
await self.run_function_calls(function_calls)
|
|
369
452
|
|
|
@@ -408,6 +491,8 @@ class AnthropicLLMService(LLMService):
|
|
|
408
491
|
context = None
|
|
409
492
|
if isinstance(frame, OpenAILLMContextFrame):
|
|
410
493
|
context: "AnthropicLLMContext" = AnthropicLLMContext.upgrade_to_anthropic(frame.context)
|
|
494
|
+
elif isinstance(frame, LLMContextFrame):
|
|
495
|
+
context = frame.context
|
|
411
496
|
elif isinstance(frame, LLMMessagesFrame):
|
|
412
497
|
context = AnthropicLLMContext.from_messages(frame.messages)
|
|
413
498
|
elif isinstance(frame, VisionImageRawFrame):
|
|
@@ -420,7 +505,7 @@ class AnthropicLLMService(LLMService):
|
|
|
420
505
|
await self._update_settings(frame.settings)
|
|
421
506
|
elif isinstance(frame, LLMEnablePromptCachingFrame):
|
|
422
507
|
self.logger.debug(f"Setting enable prompt caching to: [{frame.enable}]")
|
|
423
|
-
self._settings["
|
|
508
|
+
self._settings["enable_prompt_caching"] = frame.enable
|
|
424
509
|
else:
|
|
425
510
|
await self.push_frame(frame, direction)
|
|
426
511
|
|
|
@@ -889,13 +974,13 @@ class AnthropicLLMContext(OpenAILLMContext):
|
|
|
889
974
|
messages.insert(0, {"role": "system", "content": self.system})
|
|
890
975
|
return messages
|
|
891
976
|
|
|
892
|
-
def get_messages_for_logging(self) -> str:
|
|
977
|
+
def get_messages_for_logging(self) -> List[Dict[str, Any]]:
|
|
893
978
|
"""Get messages formatted for logging with sensitive data redacted.
|
|
894
979
|
|
|
895
980
|
Replaces image data with placeholder text for cleaner logs.
|
|
896
981
|
|
|
897
982
|
Returns:
|
|
898
|
-
|
|
983
|
+
List of messages in a format ready for logging.
|
|
899
984
|
"""
|
|
900
985
|
msgs = []
|
|
901
986
|
for message in self.messages:
|
|
@@ -906,7 +991,7 @@ class AnthropicLLMContext(OpenAILLMContext):
|
|
|
906
991
|
if item["type"] == "image":
|
|
907
992
|
item["source"]["data"] = "..."
|
|
908
993
|
msgs.append(msg)
|
|
909
|
-
return
|
|
994
|
+
return msgs
|
|
910
995
|
|
|
911
996
|
|
|
912
997
|
class AnthropicUserContextAggregator(LLMUserContextAggregator):
|
pipecat/services/asyncai/tts.py
CHANGED
|
@@ -52,6 +52,10 @@ def language_to_async_language(language: Language) -> Optional[str]:
|
|
|
52
52
|
"""
|
|
53
53
|
BASE_LANGUAGES = {
|
|
54
54
|
Language.EN: "en",
|
|
55
|
+
Language.FR: "fr",
|
|
56
|
+
Language.ES: "es",
|
|
57
|
+
Language.DE: "de",
|
|
58
|
+
Language.IT: "it",
|
|
55
59
|
}
|
|
56
60
|
|
|
57
61
|
result = BASE_LANGUAGES.get(language)
|
pipecat/services/aws/llm.py
CHANGED
|
@@ -16,6 +16,7 @@ import base64
|
|
|
16
16
|
import copy
|
|
17
17
|
import io
|
|
18
18
|
import json
|
|
19
|
+
import os
|
|
19
20
|
import re
|
|
20
21
|
from dataclasses import dataclass
|
|
21
22
|
from typing import Any, Dict, List, Optional
|
|
@@ -31,6 +32,7 @@ from pipecat.frames.frames import (
|
|
|
31
32
|
FunctionCallFromLLM,
|
|
32
33
|
FunctionCallInProgressFrame,
|
|
33
34
|
FunctionCallResultFrame,
|
|
35
|
+
LLMContextFrame,
|
|
34
36
|
LLMFullResponseEndFrame,
|
|
35
37
|
LLMFullResponseStartFrame,
|
|
36
38
|
LLMMessagesFrame,
|
|
@@ -40,6 +42,7 @@ from pipecat.frames.frames import (
|
|
|
40
42
|
VisionImageRawFrame,
|
|
41
43
|
)
|
|
42
44
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
45
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
43
46
|
from pipecat.processors.aggregators.llm_response import (
|
|
44
47
|
LLMAssistantAggregatorParams,
|
|
45
48
|
LLMAssistantContextAggregator,
|
|
@@ -553,11 +556,11 @@ class AWSBedrockLLMContext(OpenAILLMContext):
|
|
|
553
556
|
messages.insert(0, {"role": "system", "content": self.system})
|
|
554
557
|
return messages
|
|
555
558
|
|
|
556
|
-
def get_messages_for_logging(self) -> str:
|
|
559
|
+
def get_messages_for_logging(self) -> List[Dict[str, Any]]:
|
|
557
560
|
"""Get messages formatted for logging with sensitive data redacted.
|
|
558
561
|
|
|
559
562
|
Returns:
|
|
560
|
-
|
|
563
|
+
List of messages in a format ready for logging.
|
|
561
564
|
"""
|
|
562
565
|
msgs = []
|
|
563
566
|
for message in self.messages:
|
|
@@ -568,7 +571,7 @@ class AWSBedrockLLMContext(OpenAILLMContext):
|
|
|
568
571
|
if item.get("image"):
|
|
569
572
|
item["source"]["bytes"] = "..."
|
|
570
573
|
msgs.append(msg)
|
|
571
|
-
return
|
|
574
|
+
return msgs
|
|
572
575
|
|
|
573
576
|
|
|
574
577
|
class AWSBedrockUserContextAggregator(LLMUserContextAggregator):
|
|
@@ -759,10 +762,10 @@ class AWSBedrockLLMService(LLMService):
|
|
|
759
762
|
|
|
760
763
|
# Store AWS session parameters for creating client in async context
|
|
761
764
|
self._aws_params = {
|
|
762
|
-
"aws_access_key_id": aws_access_key,
|
|
763
|
-
"aws_secret_access_key": aws_secret_key,
|
|
764
|
-
"aws_session_token": aws_session_token,
|
|
765
|
-
"region_name": aws_region,
|
|
765
|
+
"aws_access_key_id": aws_access_key or os.getenv("AWS_ACCESS_KEY_ID"),
|
|
766
|
+
"aws_secret_access_key": aws_secret_key or os.getenv("AWS_SECRET_ACCESS_KEY"),
|
|
767
|
+
"aws_session_token": aws_session_token or os.getenv("AWS_SESSION_TOKEN"),
|
|
768
|
+
"region_name": aws_region or os.getenv("AWS_REGION", "us-east-1"),
|
|
766
769
|
"config": client_config,
|
|
767
770
|
}
|
|
768
771
|
|
|
@@ -789,6 +792,75 @@ class AWSBedrockLLMService(LLMService):
|
|
|
789
792
|
"""
|
|
790
793
|
return True
|
|
791
794
|
|
|
795
|
+
async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
|
|
796
|
+
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
|
|
797
|
+
|
|
798
|
+
Args:
|
|
799
|
+
context: The LLM context containing conversation history.
|
|
800
|
+
|
|
801
|
+
Returns:
|
|
802
|
+
The LLM's response as a string, or None if no response is generated.
|
|
803
|
+
"""
|
|
804
|
+
try:
|
|
805
|
+
messages = []
|
|
806
|
+
system = []
|
|
807
|
+
if isinstance(context, LLMContext):
|
|
808
|
+
# Future code will be something like this:
|
|
809
|
+
# adapter = self.get_llm_adapter()
|
|
810
|
+
# params: AWSBedrockLLMInvocationParams = adapter.get_llm_invocation_params(context)
|
|
811
|
+
# messages = params["messages"]
|
|
812
|
+
# system = params["system_instruction"] # [{"text": "system message"}]
|
|
813
|
+
raise NotImplementedError(
|
|
814
|
+
"Universal LLMContext is not yet supported for AWS Bedrock."
|
|
815
|
+
)
|
|
816
|
+
else:
|
|
817
|
+
context = AWSBedrockLLMContext.upgrade_to_bedrock(context)
|
|
818
|
+
messages = context.messages
|
|
819
|
+
system = getattr(context, "system", None) # [{"text": "system message"}]
|
|
820
|
+
|
|
821
|
+
# Determine if we're using Claude or Nova based on model ID
|
|
822
|
+
model_id = self.model_name
|
|
823
|
+
|
|
824
|
+
# Prepare request parameters
|
|
825
|
+
request_params = {
|
|
826
|
+
"modelId": model_id,
|
|
827
|
+
"messages": messages,
|
|
828
|
+
"inferenceConfig": {
|
|
829
|
+
"maxTokens": 8192,
|
|
830
|
+
"temperature": 0.7,
|
|
831
|
+
"topP": 0.9,
|
|
832
|
+
},
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
if system:
|
|
836
|
+
request_params["system"] = system
|
|
837
|
+
|
|
838
|
+
async with self._aws_session.client(
|
|
839
|
+
service_name="bedrock-runtime", **self._aws_params
|
|
840
|
+
) as client:
|
|
841
|
+
# Call Bedrock without streaming
|
|
842
|
+
response = await client.converse(**request_params)
|
|
843
|
+
|
|
844
|
+
# Extract the response text
|
|
845
|
+
if (
|
|
846
|
+
"output" in response
|
|
847
|
+
and "message" in response["output"]
|
|
848
|
+
and "content" in response["output"]["message"]
|
|
849
|
+
):
|
|
850
|
+
content = response["output"]["message"]["content"]
|
|
851
|
+
if isinstance(content, list):
|
|
852
|
+
for item in content:
|
|
853
|
+
if item.get("text"):
|
|
854
|
+
return item["text"]
|
|
855
|
+
elif isinstance(content, str):
|
|
856
|
+
return content
|
|
857
|
+
|
|
858
|
+
return None
|
|
859
|
+
|
|
860
|
+
except Exception as e:
|
|
861
|
+
logger.error(f"Bedrock summary generation failed: {e}", exc_info=True)
|
|
862
|
+
return None
|
|
863
|
+
|
|
792
864
|
async def _create_converse_stream(self, client, request_params):
|
|
793
865
|
"""Create converse stream with optional timeout and retry.
|
|
794
866
|
|
|
@@ -802,7 +874,7 @@ class AWSBedrockLLMService(LLMService):
|
|
|
802
874
|
if self._retry_on_timeout:
|
|
803
875
|
try:
|
|
804
876
|
response = await asyncio.wait_for(
|
|
805
|
-
|
|
877
|
+
client.converse_stream(**request_params), timeout=self._retry_timeout_secs
|
|
806
878
|
)
|
|
807
879
|
return response
|
|
808
880
|
except (ReadTimeoutError, asyncio.TimeoutError) as e:
|
|
@@ -1044,6 +1116,8 @@ class AWSBedrockLLMService(LLMService):
|
|
|
1044
1116
|
context = None
|
|
1045
1117
|
if isinstance(frame, OpenAILLMContextFrame):
|
|
1046
1118
|
context = AWSBedrockLLMContext.upgrade_to_bedrock(frame.context)
|
|
1119
|
+
if isinstance(frame, LLMContextFrame):
|
|
1120
|
+
raise NotImplementedError("Universal LLMContext is not yet supported for AWS Bedrock.")
|
|
1047
1121
|
elif isinstance(frame, LLMMessagesFrame):
|
|
1048
1122
|
context = AWSBedrockLLMContext.from_messages(frame.messages)
|
|
1049
1123
|
elif isinstance(frame, VisionImageRawFrame):
|
pipecat/services/aws/tts.py
CHANGED
|
@@ -185,16 +185,6 @@ class AWSPollyTTSService(TTSService):
|
|
|
185
185
|
"region_name": region or os.getenv("AWS_REGION", "us-east-1"),
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
-
# Validate that we have the required credentials
|
|
189
|
-
if (
|
|
190
|
-
not self._aws_params["aws_access_key_id"]
|
|
191
|
-
or not self._aws_params["aws_secret_access_key"]
|
|
192
|
-
):
|
|
193
|
-
raise ValueError(
|
|
194
|
-
"AWS credentials not found. Please provide them either through constructor parameters "
|
|
195
|
-
"or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables."
|
|
196
|
-
)
|
|
197
|
-
|
|
198
188
|
self._aws_session = aioboto3.Session()
|
|
199
189
|
self._settings = {
|
|
200
190
|
"engine": params.engine,
|
|
@@ -34,6 +34,7 @@ from pipecat.frames.frames import (
|
|
|
34
34
|
FunctionCallFromLLM,
|
|
35
35
|
InputAudioRawFrame,
|
|
36
36
|
InterimTranscriptionFrame,
|
|
37
|
+
LLMContextFrame,
|
|
37
38
|
LLMFullResponseEndFrame,
|
|
38
39
|
LLMFullResponseStartFrame,
|
|
39
40
|
LLMTextFrame,
|
|
@@ -322,6 +323,10 @@ class AWSNovaSonicLLMService(LLMService):
|
|
|
322
323
|
|
|
323
324
|
if isinstance(frame, OpenAILLMContextFrame):
|
|
324
325
|
await self._handle_context(frame.context)
|
|
326
|
+
elif isinstance(frame, LLMContextFrame):
|
|
327
|
+
raise NotImplementedError(
|
|
328
|
+
"Universal LLMContext is not yet supported for AWS Nova Sonic."
|
|
329
|
+
)
|
|
325
330
|
elif isinstance(frame, InputAudioRawFrame):
|
|
326
331
|
await self._handle_input_audio_frame(frame)
|
|
327
332
|
elif isinstance(frame, BotStoppedSpeakingFrame):
|
pipecat/services/cartesia/tts.py
CHANGED
|
@@ -10,7 +10,7 @@ import base64
|
|
|
10
10
|
import json
|
|
11
11
|
import uuid
|
|
12
12
|
import warnings
|
|
13
|
-
from typing import AsyncGenerator, List, Optional, Union
|
|
13
|
+
from typing import AsyncGenerator, List, Literal, Optional, Union
|
|
14
14
|
|
|
15
15
|
from loguru import logger
|
|
16
16
|
from pydantic import BaseModel, Field
|
|
@@ -102,7 +102,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
102
102
|
|
|
103
103
|
Parameters:
|
|
104
104
|
language: Language to use for synthesis.
|
|
105
|
-
speed: Voice speed control
|
|
105
|
+
speed: Voice speed control.
|
|
106
106
|
emotion: List of emotion controls.
|
|
107
107
|
|
|
108
108
|
.. deprecated:: 0.0.68
|
|
@@ -110,7 +110,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
110
110
|
"""
|
|
111
111
|
|
|
112
112
|
language: Optional[Language] = Language.EN
|
|
113
|
-
speed: Optional[
|
|
113
|
+
speed: Optional[Literal["slow", "normal", "fast"]] = None
|
|
114
114
|
emotion: Optional[List[str]] = []
|
|
115
115
|
|
|
116
116
|
def __init__(
|
|
@@ -272,11 +272,13 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
272
272
|
voice_config["id"] = self._voice_id
|
|
273
273
|
|
|
274
274
|
if self._settings["emotion"]:
|
|
275
|
-
warnings.
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
275
|
+
with warnings.catch_warnings():
|
|
276
|
+
warnings.simplefilter("always")
|
|
277
|
+
warnings.warn(
|
|
278
|
+
"The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
|
|
279
|
+
DeprecationWarning,
|
|
280
|
+
stacklevel=2,
|
|
281
|
+
)
|
|
280
282
|
voice_config["__experimental_controls"] = {}
|
|
281
283
|
if self._settings["emotion"]:
|
|
282
284
|
voice_config["__experimental_controls"]["emotion"] = self._settings["emotion"]
|
|
@@ -387,7 +389,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
387
389
|
await self._websocket.send(msg)
|
|
388
390
|
self._context_id = None
|
|
389
391
|
|
|
390
|
-
async def
|
|
392
|
+
async def _process_messages(self):
|
|
391
393
|
async for message in self._get_websocket():
|
|
392
394
|
msg = json.loads(message)
|
|
393
395
|
if not msg or not self.audio_context_available(msg["context_id"]):
|
|
@@ -421,6 +423,14 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
421
423
|
else:
|
|
422
424
|
logger.error(f"{self} error, unknown message type: {msg}")
|
|
423
425
|
|
|
426
|
+
async def _receive_messages(self):
|
|
427
|
+
while True:
|
|
428
|
+
await self._process_messages()
|
|
429
|
+
# Cartesia times out after 5 minutes of innactivity (no keepalive
|
|
430
|
+
# mechanism is available). So, we try to reconnect.
|
|
431
|
+
logger.debug(f"{self} Cartesia connection was disconnected (timeout?), reconnecting")
|
|
432
|
+
await self._connect_websocket()
|
|
433
|
+
|
|
424
434
|
@traced_tts
|
|
425
435
|
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
426
436
|
"""Generate speech from text using Cartesia's streaming API.
|
|
@@ -472,7 +482,7 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
472
482
|
|
|
473
483
|
Parameters:
|
|
474
484
|
language: Language to use for synthesis.
|
|
475
|
-
speed: Voice speed control
|
|
485
|
+
speed: Voice speed control.
|
|
476
486
|
emotion: List of emotion controls.
|
|
477
487
|
|
|
478
488
|
.. deprecated:: 0.0.68
|
|
@@ -480,7 +490,7 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
480
490
|
"""
|
|
481
491
|
|
|
482
492
|
language: Optional[Language] = Language.EN
|
|
483
|
-
speed: Optional[
|
|
493
|
+
speed: Optional[Literal["slow", "normal", "fast"]] = None
|
|
484
494
|
emotion: Optional[List[str]] = Field(default_factory=list)
|
|
485
495
|
|
|
486
496
|
def __init__(
|
|
@@ -600,11 +610,13 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
600
610
|
voice_config = {"mode": "id", "id": self._voice_id}
|
|
601
611
|
|
|
602
612
|
if self._settings["emotion"]:
|
|
603
|
-
warnings.
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
613
|
+
with warnings.catch_warnings():
|
|
614
|
+
warnings.simplefilter("always")
|
|
615
|
+
warnings.warn(
|
|
616
|
+
"The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
|
|
617
|
+
DeprecationWarning,
|
|
618
|
+
stacklevel=2,
|
|
619
|
+
)
|
|
608
620
|
voice_config["__experimental_controls"] = {"emotion": self._settings["emotion"]}
|
|
609
621
|
|
|
610
622
|
await self.start_ttfb_metrics()
|
pipecat/services/cerebras/llm.py
CHANGED
|
@@ -9,9 +9,8 @@
|
|
|
9
9
|
from typing import List
|
|
10
10
|
|
|
11
11
|
from loguru import logger
|
|
12
|
-
from openai.types.chat import ChatCompletionMessageParam
|
|
13
12
|
|
|
14
|
-
from pipecat.
|
|
13
|
+
from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
|
|
15
14
|
from pipecat.services.openai.llm import OpenAILLMService
|
|
16
15
|
|
|
17
16
|
|
|
@@ -27,7 +26,7 @@ class CerebrasLLMService(OpenAILLMService):
|
|
|
27
26
|
*,
|
|
28
27
|
api_key: str,
|
|
29
28
|
base_url: str = "https://api.cerebras.ai/v1",
|
|
30
|
-
model: str = "
|
|
29
|
+
model: str = "gpt-oss-120b",
|
|
31
30
|
**kwargs,
|
|
32
31
|
):
|
|
33
32
|
"""Initialize the Cerebras LLM service.
|
|
@@ -35,7 +34,7 @@ class CerebrasLLMService(OpenAILLMService):
|
|
|
35
34
|
Args:
|
|
36
35
|
api_key: The API key for accessing Cerebras's API.
|
|
37
36
|
base_url: The base URL for Cerebras API. Defaults to "https://api.cerebras.ai/v1".
|
|
38
|
-
model: The model identifier to use. Defaults to "
|
|
37
|
+
model: The model identifier to use. Defaults to "gpt-oss-120b".
|
|
39
38
|
**kwargs: Additional keyword arguments passed to OpenAILLMService.
|
|
40
39
|
"""
|
|
41
40
|
super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
|
|
@@ -54,25 +53,31 @@ class CerebrasLLMService(OpenAILLMService):
|
|
|
54
53
|
logger.debug(f"Creating Cerebras client with api {base_url}")
|
|
55
54
|
return super().create_client(api_key, base_url, **kwargs)
|
|
56
55
|
|
|
57
|
-
def build_chat_completion_params(
|
|
58
|
-
self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
|
|
59
|
-
) -> dict:
|
|
56
|
+
def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
|
|
60
57
|
"""Build parameters for Cerebras chat completion request.
|
|
61
58
|
|
|
62
59
|
Cerebras supports a subset of OpenAI parameters, focusing on core
|
|
63
60
|
completion settings without advanced features like frequency/presence penalties.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
params_from_context: Parameters, derived from the LLM context, to
|
|
64
|
+
use for the chat completion. Contains messages, tools, and tool
|
|
65
|
+
choice.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Dictionary of parameters for the chat completion request.
|
|
64
69
|
"""
|
|
65
70
|
params = {
|
|
66
71
|
"model": self.model_name,
|
|
67
72
|
"stream": True,
|
|
68
|
-
"messages": messages,
|
|
69
|
-
"tools": context.tools,
|
|
70
|
-
"tool_choice": context.tool_choice,
|
|
71
73
|
"seed": self._settings["seed"],
|
|
72
74
|
"temperature": self._settings["temperature"],
|
|
73
75
|
"top_p": self._settings["top_p"],
|
|
74
76
|
"max_completion_tokens": self._settings["max_completion_tokens"],
|
|
75
77
|
}
|
|
76
78
|
|
|
79
|
+
# Messages, tools, tool_choice
|
|
80
|
+
params.update(params_from_context)
|
|
81
|
+
|
|
77
82
|
params.update(self._settings["extra"])
|
|
78
83
|
return params
|
pipecat/services/deepgram/stt.py
CHANGED
|
@@ -276,6 +276,14 @@ class DeepgramSTTService(STTService):
|
|
|
276
276
|
async def _disconnect(self):
|
|
277
277
|
if self._connection.is_connected:
|
|
278
278
|
self.logger.debug("Disconnecting from Deepgram")
|
|
279
|
+
# Deepgram swallows asyncio.CancelledError internally which prevents
|
|
280
|
+
# proper cancellation propagation. This issue was found with
|
|
281
|
+
# parallel pipelines where `CancelFrame` was not awaited for to
|
|
282
|
+
# finish in all branches and it was pushed downstream reaching the
|
|
283
|
+
# end of the pipeline, which caused `cleanup()` to be called while
|
|
284
|
+
# Deepgram disconnection was still finishing and therefore
|
|
285
|
+
# preventing the task cancellation that occurs during `cleanup()`.
|
|
286
|
+
# GH issue: https://github.com/deepgram/deepgram-python-sdk/issues/570
|
|
279
287
|
await self._connection.finish()
|
|
280
288
|
|
|
281
289
|
async def start_metrics(self):
|