dv-pipecat-ai 0.0.75.dev883__py3-none-any.whl → 0.0.82.dev19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/METADATA +8 -3
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/RECORD +121 -80
- pipecat/adapters/base_llm_adapter.py +44 -6
- pipecat/adapters/services/anthropic_adapter.py +302 -2
- pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
- pipecat/adapters/services/bedrock_adapter.py +40 -2
- pipecat/adapters/services/gemini_adapter.py +276 -6
- pipecat/adapters/services/open_ai_adapter.py +88 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
- pipecat/audio/dtmf/__init__.py +0 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/dtmf/types.py +47 -0
- pipecat/audio/dtmf/utils.py +70 -0
- pipecat/audio/filters/aic_filter.py +199 -0
- pipecat/audio/utils.py +9 -7
- pipecat/extensions/ivr/__init__.py +0 -0
- pipecat/extensions/ivr/ivr_navigator.py +452 -0
- pipecat/frames/frames.py +156 -43
- pipecat/pipeline/llm_switcher.py +76 -0
- pipecat/pipeline/parallel_pipeline.py +3 -3
- pipecat/pipeline/service_switcher.py +144 -0
- pipecat/pipeline/task.py +68 -28
- pipecat/pipeline/task_observer.py +10 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
- pipecat/processors/aggregators/llm_context.py +277 -0
- pipecat/processors/aggregators/llm_response.py +48 -15
- pipecat/processors/aggregators/llm_response_universal.py +840 -0
- pipecat/processors/aggregators/openai_llm_context.py +3 -3
- pipecat/processors/dtmf_aggregator.py +0 -2
- pipecat/processors/filters/stt_mute_filter.py +0 -2
- pipecat/processors/frame_processor.py +18 -11
- pipecat/processors/frameworks/rtvi.py +17 -10
- pipecat/processors/metrics/sentry.py +2 -0
- pipecat/runner/daily.py +137 -36
- pipecat/runner/run.py +1 -1
- pipecat/runner/utils.py +7 -7
- pipecat/serializers/asterisk.py +145 -0
- pipecat/serializers/exotel.py +1 -1
- pipecat/serializers/plivo.py +1 -1
- pipecat/serializers/telnyx.py +1 -1
- pipecat/serializers/twilio.py +1 -1
- pipecat/services/__init__.py +2 -2
- pipecat/services/anthropic/llm.py +113 -28
- pipecat/services/asyncai/tts.py +4 -0
- pipecat/services/aws/llm.py +82 -8
- pipecat/services/aws/tts.py +0 -10
- pipecat/services/aws_nova_sonic/aws.py +5 -0
- pipecat/services/azure/llm.py +77 -1
- pipecat/services/cartesia/tts.py +28 -16
- pipecat/services/cerebras/llm.py +15 -10
- pipecat/services/deepgram/stt.py +8 -0
- pipecat/services/deepseek/llm.py +13 -8
- pipecat/services/elevenlabs/__init__.py +2 -0
- pipecat/services/elevenlabs/stt.py +351 -0
- pipecat/services/fireworks/llm.py +13 -8
- pipecat/services/fish/tts.py +8 -6
- pipecat/services/gemini_multimodal_live/gemini.py +5 -0
- pipecat/services/gladia/config.py +7 -1
- pipecat/services/gladia/stt.py +23 -15
- pipecat/services/google/llm.py +159 -59
- pipecat/services/google/llm_openai.py +18 -3
- pipecat/services/grok/llm.py +2 -1
- pipecat/services/llm_service.py +38 -3
- pipecat/services/mem0/memory.py +2 -1
- pipecat/services/mistral/llm.py +5 -6
- pipecat/services/nim/llm.py +2 -1
- pipecat/services/openai/base_llm.py +88 -26
- pipecat/services/openai/image.py +6 -1
- pipecat/services/openai_realtime_beta/openai.py +5 -2
- pipecat/services/openpipe/llm.py +6 -8
- pipecat/services/perplexity/llm.py +13 -8
- pipecat/services/playht/tts.py +9 -6
- pipecat/services/rime/tts.py +1 -1
- pipecat/services/sambanova/llm.py +18 -13
- pipecat/services/sarvam/tts.py +415 -10
- pipecat/services/speechmatics/stt.py +4 -4
- pipecat/services/tavus/video.py +1 -1
- pipecat/services/tts_service.py +15 -5
- pipecat/services/vistaar/llm.py +2 -5
- pipecat/transports/base_input.py +32 -19
- pipecat/transports/base_output.py +39 -5
- pipecat/transports/daily/__init__.py +0 -0
- pipecat/transports/daily/transport.py +2371 -0
- pipecat/transports/daily/utils.py +410 -0
- pipecat/transports/livekit/__init__.py +0 -0
- pipecat/transports/livekit/transport.py +1042 -0
- pipecat/transports/network/fastapi_websocket.py +12 -546
- pipecat/transports/network/small_webrtc.py +12 -922
- pipecat/transports/network/webrtc_connection.py +9 -595
- pipecat/transports/network/websocket_client.py +12 -481
- pipecat/transports/network/websocket_server.py +12 -487
- pipecat/transports/services/daily.py +9 -2334
- pipecat/transports/services/helpers/daily_rest.py +12 -396
- pipecat/transports/services/livekit.py +12 -975
- pipecat/transports/services/tavus.py +12 -757
- pipecat/transports/smallwebrtc/__init__.py +0 -0
- pipecat/transports/smallwebrtc/connection.py +612 -0
- pipecat/transports/smallwebrtc/transport.py +936 -0
- pipecat/transports/tavus/__init__.py +0 -0
- pipecat/transports/tavus/transport.py +770 -0
- pipecat/transports/websocket/__init__.py +0 -0
- pipecat/transports/websocket/client.py +494 -0
- pipecat/transports/websocket/fastapi.py +559 -0
- pipecat/transports/websocket/server.py +500 -0
- pipecat/transports/whatsapp/__init__.py +0 -0
- pipecat/transports/whatsapp/api.py +345 -0
- pipecat/transports/whatsapp/client.py +364 -0
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# asterisk_ws_serializer.py
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
from typing import Literal, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from pipecat.audio.utils import create_stream_resampler, pcm_to_ulaw, ulaw_to_pcm
|
|
9
|
+
from pipecat.frames.frames import (
|
|
10
|
+
AudioRawFrame,
|
|
11
|
+
CancelFrame,
|
|
12
|
+
EndFrame,
|
|
13
|
+
Frame,
|
|
14
|
+
InputAudioRawFrame,
|
|
15
|
+
StartFrame,
|
|
16
|
+
StartInterruptionFrame,
|
|
17
|
+
TransportMessageFrame,
|
|
18
|
+
TransportMessageUrgentFrame,
|
|
19
|
+
)
|
|
20
|
+
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class AsteriskFrameSerializer(FrameSerializer):
|
|
24
|
+
class InputParams(BaseModel):
|
|
25
|
+
"""Configuration parameters for AsteriskFrameSerializer.
|
|
26
|
+
|
|
27
|
+
Parameters:
|
|
28
|
+
telephony_encoding: The encoding used by the telephony system (e.g., "pcmu" for μ-law).
|
|
29
|
+
telephony_sample_rate: The sample rate used by the telephony system (e.g., 8000 Hz).
|
|
30
|
+
sample_rate: Optional override for pipeline input sample rate.
|
|
31
|
+
auto_hang_up: Whether to automatically terminate call on EndFrame.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# What the ADAPTER/Asterisk is sending/expecting on the wire:
|
|
35
|
+
# "pcmu" -> μ-law @ 8k; "pcm16" -> signed 16-bit @ 8k
|
|
36
|
+
telephony_encoding: Literal["pcmu", "pcma", "pcm16"] = "pcmu"
|
|
37
|
+
telephony_sample_rate: int = 8000
|
|
38
|
+
sample_rate: Optional[int] = None # pipeline input rate
|
|
39
|
+
auto_hang_up: bool = False # no-op here; adapter handles hangup
|
|
40
|
+
|
|
41
|
+
def __init__(self, stream_id: str, params: Optional[InputParams] = None):
|
|
42
|
+
self._stream_id = stream_id
|
|
43
|
+
self._params = params or AsteriskFrameSerializer.InputParams()
|
|
44
|
+
self._tel_rate = self._params.telephony_sample_rate
|
|
45
|
+
self._sample_rate = 0
|
|
46
|
+
self._in_resampler = create_stream_resampler()
|
|
47
|
+
self._out_resampler = create_stream_resampler()
|
|
48
|
+
self._hangup_sent = False
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def type(self) -> FrameSerializerType:
|
|
52
|
+
return FrameSerializerType.TEXT # we send/recv JSON strings
|
|
53
|
+
|
|
54
|
+
async def setup(self, frame: StartFrame):
|
|
55
|
+
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
|
|
56
|
+
|
|
57
|
+
# Pipecat -> Adapter (play to caller)
|
|
58
|
+
async def serialize(self, frame: Frame) -> str | bytes | None:
|
|
59
|
+
# On pipeline end, ask bridge to hang up
|
|
60
|
+
if (
|
|
61
|
+
self._params.auto_hang_up
|
|
62
|
+
and not self._hangup_sent
|
|
63
|
+
and isinstance(frame, (EndFrame, CancelFrame))
|
|
64
|
+
):
|
|
65
|
+
self._hangup_sent = True
|
|
66
|
+
return json.dumps({"event": "hangup"})
|
|
67
|
+
if isinstance(frame, StartInterruptionFrame):
|
|
68
|
+
return json.dumps({"event": "clear", "streamId": self._stream_id})
|
|
69
|
+
if isinstance(frame, AudioRawFrame):
|
|
70
|
+
pcm = frame.audio
|
|
71
|
+
if self._params.telephony_encoding == "pcmu":
|
|
72
|
+
ul = await pcm_to_ulaw(pcm, frame.sample_rate, self._tel_rate, self._out_resampler)
|
|
73
|
+
if not ul:
|
|
74
|
+
return None
|
|
75
|
+
payload = base64.b64encode(ul).decode("utf-8")
|
|
76
|
+
return json.dumps(
|
|
77
|
+
{
|
|
78
|
+
"event": "media",
|
|
79
|
+
"encoding": "pcmu",
|
|
80
|
+
"sampleRate": self._tel_rate,
|
|
81
|
+
"payload": payload,
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
elif self._params.telephony_encoding == "pcma":
|
|
85
|
+
al = await pcm_to_alaw(pcm, frame.sample_rate, self._tel_rate, self._out_resampler)
|
|
86
|
+
if not al:
|
|
87
|
+
return None
|
|
88
|
+
payload = base64.b64encode(al).decode("utf-8")
|
|
89
|
+
return json.dumps(
|
|
90
|
+
{
|
|
91
|
+
"event": "media",
|
|
92
|
+
"encoding": "pcma",
|
|
93
|
+
"sampleRate": self._tel_rate,
|
|
94
|
+
"payload": payload,
|
|
95
|
+
}
|
|
96
|
+
)
|
|
97
|
+
else: # "pcm16"
|
|
98
|
+
# resample to 8k if needed, but data stays PCM16 bytes
|
|
99
|
+
pcm8 = await self._out_resampler.resample(pcm, frame.sample_rate, self._tel_rate)
|
|
100
|
+
if not pcm8:
|
|
101
|
+
return None
|
|
102
|
+
payload = base64.b64encode(pcm8).decode("utf-8")
|
|
103
|
+
return json.dumps(
|
|
104
|
+
{
|
|
105
|
+
"event": "media",
|
|
106
|
+
"encoding": "pcm16",
|
|
107
|
+
"sampleRate": self._tel_rate,
|
|
108
|
+
"payload": payload,
|
|
109
|
+
}
|
|
110
|
+
)
|
|
111
|
+
if isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
|
|
112
|
+
return json.dumps(frame.message)
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
# Adapter -> Pipecat (audio from caller)
|
|
116
|
+
async def deserialize(self, data: str | bytes) -> Frame | None:
|
|
117
|
+
try:
|
|
118
|
+
msg = json.loads(data)
|
|
119
|
+
except Exception:
|
|
120
|
+
return None
|
|
121
|
+
if msg.get("event") == "media":
|
|
122
|
+
sr = int(msg.get("sampleRate", self._tel_rate))
|
|
123
|
+
raw = base64.b64decode(msg.get("payload", ""))
|
|
124
|
+
if not raw:
|
|
125
|
+
return None
|
|
126
|
+
# Use our configured telephony_encoding instead of trusting the message
|
|
127
|
+
if self._params.telephony_encoding == "pcmu":
|
|
128
|
+
pcm = await ulaw_to_pcm(raw, sr, self._sample_rate, self._in_resampler)
|
|
129
|
+
elif self._params.telephony_encoding == "pcma":
|
|
130
|
+
pcm = await alaw_to_pcm(raw, sr, self._sample_rate, self._in_resampler)
|
|
131
|
+
elif self._params.telephony_encoding == "pcm16":
|
|
132
|
+
# resample if pipeline rate != 8k
|
|
133
|
+
pcm = await self._in_resampler.resample(raw, sr, self._sample_rate)
|
|
134
|
+
else:
|
|
135
|
+
return None
|
|
136
|
+
if not pcm:
|
|
137
|
+
return None
|
|
138
|
+
return InputAudioRawFrame(audio=pcm, num_channels=1, sample_rate=self._sample_rate)
|
|
139
|
+
elif msg.get("event") == "dtmf":
|
|
140
|
+
# optional: map to InputDTMFFrame if you want
|
|
141
|
+
return None
|
|
142
|
+
elif msg.get("event") == "hangup":
|
|
143
|
+
# Bridge is hanging up; you can treat as EndFrame if you want.
|
|
144
|
+
return CancelFrame()
|
|
145
|
+
return None
|
pipecat/serializers/exotel.py
CHANGED
|
@@ -13,13 +13,13 @@ from typing import Optional
|
|
|
13
13
|
from loguru import logger
|
|
14
14
|
from pydantic import BaseModel
|
|
15
15
|
|
|
16
|
+
from pipecat.audio.dtmf.types import KeypadEntry
|
|
16
17
|
from pipecat.audio.utils import create_stream_resampler
|
|
17
18
|
from pipecat.frames.frames import (
|
|
18
19
|
AudioRawFrame,
|
|
19
20
|
Frame,
|
|
20
21
|
InputAudioRawFrame,
|
|
21
22
|
InputDTMFFrame,
|
|
22
|
-
KeypadEntry,
|
|
23
23
|
StartFrame,
|
|
24
24
|
StartInterruptionFrame,
|
|
25
25
|
TransportMessageFrame,
|
pipecat/serializers/plivo.py
CHANGED
|
@@ -13,6 +13,7 @@ from typing import Optional
|
|
|
13
13
|
from loguru import logger
|
|
14
14
|
from pydantic import BaseModel
|
|
15
15
|
|
|
16
|
+
from pipecat.audio.dtmf.types import KeypadEntry
|
|
16
17
|
from pipecat.audio.utils import create_stream_resampler, pcm_to_ulaw, ulaw_to_pcm
|
|
17
18
|
from pipecat.frames.frames import (
|
|
18
19
|
AudioRawFrame,
|
|
@@ -21,7 +22,6 @@ from pipecat.frames.frames import (
|
|
|
21
22
|
Frame,
|
|
22
23
|
InputAudioRawFrame,
|
|
23
24
|
InputDTMFFrame,
|
|
24
|
-
KeypadEntry,
|
|
25
25
|
StartFrame,
|
|
26
26
|
StartInterruptionFrame,
|
|
27
27
|
TransportMessageFrame,
|
pipecat/serializers/telnyx.py
CHANGED
|
@@ -14,6 +14,7 @@ import aiohttp
|
|
|
14
14
|
from loguru import logger
|
|
15
15
|
from pydantic import BaseModel
|
|
16
16
|
|
|
17
|
+
from pipecat.audio.dtmf.types import KeypadEntry
|
|
17
18
|
from pipecat.audio.utils import (
|
|
18
19
|
alaw_to_pcm,
|
|
19
20
|
create_stream_resampler,
|
|
@@ -28,7 +29,6 @@ from pipecat.frames.frames import (
|
|
|
28
29
|
Frame,
|
|
29
30
|
InputAudioRawFrame,
|
|
30
31
|
InputDTMFFrame,
|
|
31
|
-
KeypadEntry,
|
|
32
32
|
StartFrame,
|
|
33
33
|
StartInterruptionFrame,
|
|
34
34
|
)
|
pipecat/serializers/twilio.py
CHANGED
|
@@ -13,6 +13,7 @@ from typing import Optional
|
|
|
13
13
|
from loguru import logger
|
|
14
14
|
from pydantic import BaseModel
|
|
15
15
|
|
|
16
|
+
from pipecat.audio.dtmf.types import KeypadEntry
|
|
16
17
|
from pipecat.audio.utils import create_stream_resampler, pcm_to_ulaw, ulaw_to_pcm
|
|
17
18
|
from pipecat.frames.frames import (
|
|
18
19
|
AudioRawFrame,
|
|
@@ -21,7 +22,6 @@ from pipecat.frames.frames import (
|
|
|
21
22
|
Frame,
|
|
22
23
|
InputAudioRawFrame,
|
|
23
24
|
InputDTMFFrame,
|
|
24
|
-
KeypadEntry,
|
|
25
25
|
StartFrame,
|
|
26
26
|
StartInterruptionFrame,
|
|
27
27
|
TransportMessageFrame,
|
pipecat/services/__init__.py
CHANGED
|
@@ -11,11 +11,11 @@ _warned_modules = set()
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def _warn_deprecated_access(globals: Dict[str, Any], attr, old: str, new: str):
|
|
14
|
-
import warnings
|
|
15
|
-
|
|
16
14
|
# Only warn once per old->new module pair
|
|
17
15
|
module_key = (old, new)
|
|
18
16
|
if module_key not in _warned_modules:
|
|
17
|
+
import warnings
|
|
18
|
+
|
|
19
19
|
with warnings.catch_warnings():
|
|
20
20
|
warnings.simplefilter("always")
|
|
21
21
|
warnings.warn(
|
|
@@ -24,13 +24,17 @@ from loguru import logger
|
|
|
24
24
|
from PIL import Image
|
|
25
25
|
from pydantic import BaseModel, Field
|
|
26
26
|
|
|
27
|
-
from pipecat.adapters.services.anthropic_adapter import
|
|
27
|
+
from pipecat.adapters.services.anthropic_adapter import (
|
|
28
|
+
AnthropicLLMAdapter,
|
|
29
|
+
AnthropicLLMInvocationParams,
|
|
30
|
+
)
|
|
28
31
|
from pipecat.frames.frames import (
|
|
29
32
|
ErrorFrame,
|
|
30
33
|
Frame,
|
|
31
34
|
FunctionCallCancelFrame,
|
|
32
35
|
FunctionCallInProgressFrame,
|
|
33
36
|
FunctionCallResultFrame,
|
|
37
|
+
LLMContextFrame,
|
|
34
38
|
LLMEnablePromptCachingFrame,
|
|
35
39
|
LLMFullResponseEndFrame,
|
|
36
40
|
LLMFullResponseStartFrame,
|
|
@@ -41,6 +45,7 @@ from pipecat.frames.frames import (
|
|
|
41
45
|
VisionImageRawFrame,
|
|
42
46
|
)
|
|
43
47
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
48
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
44
49
|
from pipecat.processors.aggregators.llm_response import (
|
|
45
50
|
LLMAssistantAggregatorParams,
|
|
46
51
|
LLMAssistantContextAggregator,
|
|
@@ -110,7 +115,12 @@ class AnthropicLLMService(LLMService):
|
|
|
110
115
|
"""Input parameters for Anthropic model inference.
|
|
111
116
|
|
|
112
117
|
Parameters:
|
|
113
|
-
|
|
118
|
+
enable_prompt_caching: Whether to enable the prompt caching feature.
|
|
119
|
+
enable_prompt_caching_beta (deprecated): Whether to enable the beta prompt caching feature.
|
|
120
|
+
|
|
121
|
+
.. deprecated:: 0.0.84
|
|
122
|
+
Use the `enable_prompt_caching` parameter instead.
|
|
123
|
+
|
|
114
124
|
max_tokens: Maximum tokens to generate. Must be at least 1.
|
|
115
125
|
temperature: Sampling temperature between 0.0 and 1.0.
|
|
116
126
|
top_k: Top-k sampling parameter.
|
|
@@ -118,13 +128,26 @@ class AnthropicLLMService(LLMService):
|
|
|
118
128
|
extra: Additional parameters to pass to the API.
|
|
119
129
|
"""
|
|
120
130
|
|
|
121
|
-
|
|
131
|
+
enable_prompt_caching: Optional[bool] = None
|
|
132
|
+
enable_prompt_caching_beta: Optional[bool] = None
|
|
122
133
|
max_tokens: Optional[int] = Field(default_factory=lambda: 4096, ge=1)
|
|
123
134
|
temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
|
|
124
135
|
top_k: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0)
|
|
125
136
|
top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
|
|
126
137
|
extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
127
138
|
|
|
139
|
+
def model_post_init(self, __context):
|
|
140
|
+
"""Post-initialization to handle deprecated parameters."""
|
|
141
|
+
if self.enable_prompt_caching_beta is not None:
|
|
142
|
+
import warnings
|
|
143
|
+
|
|
144
|
+
warnings.simplefilter("always")
|
|
145
|
+
warnings.warn(
|
|
146
|
+
"enable_prompt_caching_beta is deprecated. Use enable_prompt_caching instead.",
|
|
147
|
+
DeprecationWarning,
|
|
148
|
+
stacklevel=2,
|
|
149
|
+
)
|
|
150
|
+
|
|
128
151
|
def __init__(
|
|
129
152
|
self,
|
|
130
153
|
*,
|
|
@@ -157,7 +180,15 @@ class AnthropicLLMService(LLMService):
|
|
|
157
180
|
self._retry_on_timeout = retry_on_timeout
|
|
158
181
|
self._settings = {
|
|
159
182
|
"max_tokens": params.max_tokens,
|
|
160
|
-
"
|
|
183
|
+
"enable_prompt_caching": (
|
|
184
|
+
params.enable_prompt_caching
|
|
185
|
+
if params.enable_prompt_caching is not None
|
|
186
|
+
else (
|
|
187
|
+
params.enable_prompt_caching_beta
|
|
188
|
+
if params.enable_prompt_caching_beta is not None
|
|
189
|
+
else False
|
|
190
|
+
)
|
|
191
|
+
),
|
|
161
192
|
"temperature": params.temperature,
|
|
162
193
|
"top_k": params.top_k,
|
|
163
194
|
"top_p": params.top_p,
|
|
@@ -197,14 +228,39 @@ class AnthropicLLMService(LLMService):
|
|
|
197
228
|
response = await api_call(**params)
|
|
198
229
|
return response
|
|
199
230
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
231
|
+
async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
|
|
232
|
+
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
context: The LLM context containing conversation history.
|
|
203
236
|
|
|
204
237
|
Returns:
|
|
205
|
-
|
|
238
|
+
The LLM's response as a string, or None if no response is generated.
|
|
206
239
|
"""
|
|
207
|
-
|
|
240
|
+
messages = []
|
|
241
|
+
system = NOT_GIVEN
|
|
242
|
+
if isinstance(context, LLMContext):
|
|
243
|
+
adapter: AnthropicLLMAdapter = self.get_llm_adapter()
|
|
244
|
+
params = adapter.get_llm_invocation_params(
|
|
245
|
+
context, enable_prompt_caching=self._settings["enable_prompt_caching"]
|
|
246
|
+
)
|
|
247
|
+
messages = params["messages"]
|
|
248
|
+
system = params["system"]
|
|
249
|
+
else:
|
|
250
|
+
context = AnthropicLLMContext.upgrade_to_anthropic(context)
|
|
251
|
+
messages = context.messages
|
|
252
|
+
system = getattr(context, "system", NOT_GIVEN)
|
|
253
|
+
|
|
254
|
+
# LLM completion
|
|
255
|
+
response = await self._client.messages.create(
|
|
256
|
+
model=self.model_name,
|
|
257
|
+
messages=messages,
|
|
258
|
+
system=system,
|
|
259
|
+
max_tokens=8192,
|
|
260
|
+
stream=False,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
return response.content[0].text
|
|
208
264
|
|
|
209
265
|
def create_context_aggregator(
|
|
210
266
|
self,
|
|
@@ -235,8 +291,31 @@ class AnthropicLLMService(LLMService):
|
|
|
235
291
|
assistant = AnthropicAssistantContextAggregator(context, params=assistant_params)
|
|
236
292
|
return AnthropicContextAggregatorPair(_user=user, _assistant=assistant)
|
|
237
293
|
|
|
294
|
+
def _get_llm_invocation_params(
|
|
295
|
+
self, context: OpenAILLMContext | LLMContext
|
|
296
|
+
) -> AnthropicLLMInvocationParams:
|
|
297
|
+
# Universal LLMContext
|
|
298
|
+
if isinstance(context, LLMContext):
|
|
299
|
+
adapter: AnthropicLLMAdapter = self.get_llm_adapter()
|
|
300
|
+
params = adapter.get_llm_invocation_params(
|
|
301
|
+
context, enable_prompt_caching=self._settings["enable_prompt_caching"]
|
|
302
|
+
)
|
|
303
|
+
return params
|
|
304
|
+
|
|
305
|
+
# Anthropic-specific context
|
|
306
|
+
messages = (
|
|
307
|
+
context.get_messages_with_cache_control_markers()
|
|
308
|
+
if self._settings["enable_prompt_caching"]
|
|
309
|
+
else context.messages
|
|
310
|
+
)
|
|
311
|
+
return AnthropicLLMInvocationParams(
|
|
312
|
+
system=context.system,
|
|
313
|
+
messages=messages,
|
|
314
|
+
tools=context.tools or [],
|
|
315
|
+
)
|
|
316
|
+
|
|
238
317
|
@traced_llm
|
|
239
|
-
async def _process_context(self, context: OpenAILLMContext):
|
|
318
|
+
async def _process_context(self, context: OpenAILLMContext | LLMContext):
|
|
240
319
|
# Usage tracking. We track the usage reported by Anthropic in prompt_tokens and
|
|
241
320
|
# completion_tokens. We also estimate the completion tokens from output text
|
|
242
321
|
# and use that estimate if we are interrupted, because we almost certainly won't
|
|
@@ -252,24 +331,22 @@ class AnthropicLLMService(LLMService):
|
|
|
252
331
|
await self.push_frame(LLMFullResponseStartFrame())
|
|
253
332
|
await self.start_processing_metrics()
|
|
254
333
|
|
|
334
|
+
params_from_context = self._get_llm_invocation_params(context)
|
|
335
|
+
|
|
336
|
+
if isinstance(context, LLMContext):
|
|
337
|
+
adapter = self.get_llm_adapter()
|
|
338
|
+
context_type_for_logging = "universal"
|
|
339
|
+
messages_for_logging = adapter.get_messages_for_logging(context)
|
|
340
|
+
else:
|
|
341
|
+
context_type_for_logging = "LLM-specific"
|
|
342
|
+
messages_for_logging = context.get_messages_for_logging()
|
|
255
343
|
self.logger.debug(
|
|
256
|
-
f"{self}: Generating chat [{
|
|
344
|
+
f"{self}: Generating chat from {context_type_for_logging} context [{params_from_context['system']}] | {messages_for_logging}"
|
|
257
345
|
)
|
|
258
346
|
|
|
259
|
-
messages = context.messages
|
|
260
|
-
if self._settings["enable_prompt_caching_beta"]:
|
|
261
|
-
messages = context.get_messages_with_cache_control_markers()
|
|
262
|
-
|
|
263
|
-
api_call = self._client.messages.create
|
|
264
|
-
if self._settings["enable_prompt_caching_beta"]:
|
|
265
|
-
api_call = self._client.beta.prompt_caching.messages.create
|
|
266
|
-
|
|
267
347
|
await self.start_ttfb_metrics()
|
|
268
348
|
|
|
269
349
|
params = {
|
|
270
|
-
"tools": context.tools or [],
|
|
271
|
-
"system": context.system,
|
|
272
|
-
"messages": messages,
|
|
273
350
|
"model": self.model_name,
|
|
274
351
|
"max_tokens": self._settings["max_tokens"],
|
|
275
352
|
"stream": True,
|
|
@@ -278,9 +355,12 @@ class AnthropicLLMService(LLMService):
|
|
|
278
355
|
"top_p": self._settings["top_p"],
|
|
279
356
|
}
|
|
280
357
|
|
|
358
|
+
# Messages, system, tools
|
|
359
|
+
params.update(params_from_context)
|
|
360
|
+
|
|
281
361
|
params.update(self._settings["extra"])
|
|
282
362
|
|
|
283
|
-
response = await self._create_message_stream(
|
|
363
|
+
response = await self._create_message_stream(self._client.messages.create, params)
|
|
284
364
|
|
|
285
365
|
await self.stop_ttfb_metrics()
|
|
286
366
|
|
|
@@ -363,7 +443,10 @@ class AnthropicLLMService(LLMService):
|
|
|
363
443
|
prompt_tokens + cache_creation_input_tokens + cache_read_input_tokens
|
|
364
444
|
)
|
|
365
445
|
if total_input_tokens >= 1024:
|
|
366
|
-
|
|
446
|
+
if hasattr(
|
|
447
|
+
context, "turns_above_cache_threshold"
|
|
448
|
+
): # LLMContext doesn't have this attribute
|
|
449
|
+
context.turns_above_cache_threshold += 1
|
|
367
450
|
|
|
368
451
|
await self.run_function_calls(function_calls)
|
|
369
452
|
|
|
@@ -408,6 +491,8 @@ class AnthropicLLMService(LLMService):
|
|
|
408
491
|
context = None
|
|
409
492
|
if isinstance(frame, OpenAILLMContextFrame):
|
|
410
493
|
context: "AnthropicLLMContext" = AnthropicLLMContext.upgrade_to_anthropic(frame.context)
|
|
494
|
+
elif isinstance(frame, LLMContextFrame):
|
|
495
|
+
context = frame.context
|
|
411
496
|
elif isinstance(frame, LLMMessagesFrame):
|
|
412
497
|
context = AnthropicLLMContext.from_messages(frame.messages)
|
|
413
498
|
elif isinstance(frame, VisionImageRawFrame):
|
|
@@ -420,7 +505,7 @@ class AnthropicLLMService(LLMService):
|
|
|
420
505
|
await self._update_settings(frame.settings)
|
|
421
506
|
elif isinstance(frame, LLMEnablePromptCachingFrame):
|
|
422
507
|
self.logger.debug(f"Setting enable prompt caching to: [{frame.enable}]")
|
|
423
|
-
self._settings["
|
|
508
|
+
self._settings["enable_prompt_caching"] = frame.enable
|
|
424
509
|
else:
|
|
425
510
|
await self.push_frame(frame, direction)
|
|
426
511
|
|
|
@@ -889,13 +974,13 @@ class AnthropicLLMContext(OpenAILLMContext):
|
|
|
889
974
|
messages.insert(0, {"role": "system", "content": self.system})
|
|
890
975
|
return messages
|
|
891
976
|
|
|
892
|
-
def get_messages_for_logging(self) -> str:
|
|
977
|
+
def get_messages_for_logging(self) -> List[Dict[str, Any]]:
|
|
893
978
|
"""Get messages formatted for logging with sensitive data redacted.
|
|
894
979
|
|
|
895
980
|
Replaces image data with placeholder text for cleaner logs.
|
|
896
981
|
|
|
897
982
|
Returns:
|
|
898
|
-
|
|
983
|
+
List of messages in a format ready for logging.
|
|
899
984
|
"""
|
|
900
985
|
msgs = []
|
|
901
986
|
for message in self.messages:
|
|
@@ -906,7 +991,7 @@ class AnthropicLLMContext(OpenAILLMContext):
|
|
|
906
991
|
if item["type"] == "image":
|
|
907
992
|
item["source"]["data"] = "..."
|
|
908
993
|
msgs.append(msg)
|
|
909
|
-
return
|
|
994
|
+
return msgs
|
|
910
995
|
|
|
911
996
|
|
|
912
997
|
class AnthropicUserContextAggregator(LLMUserContextAggregator):
|
pipecat/services/asyncai/tts.py
CHANGED
|
@@ -52,6 +52,10 @@ def language_to_async_language(language: Language) -> Optional[str]:
|
|
|
52
52
|
"""
|
|
53
53
|
BASE_LANGUAGES = {
|
|
54
54
|
Language.EN: "en",
|
|
55
|
+
Language.FR: "fr",
|
|
56
|
+
Language.ES: "es",
|
|
57
|
+
Language.DE: "de",
|
|
58
|
+
Language.IT: "it",
|
|
55
59
|
}
|
|
56
60
|
|
|
57
61
|
result = BASE_LANGUAGES.get(language)
|
pipecat/services/aws/llm.py
CHANGED
|
@@ -16,6 +16,7 @@ import base64
|
|
|
16
16
|
import copy
|
|
17
17
|
import io
|
|
18
18
|
import json
|
|
19
|
+
import os
|
|
19
20
|
import re
|
|
20
21
|
from dataclasses import dataclass
|
|
21
22
|
from typing import Any, Dict, List, Optional
|
|
@@ -31,6 +32,7 @@ from pipecat.frames.frames import (
|
|
|
31
32
|
FunctionCallFromLLM,
|
|
32
33
|
FunctionCallInProgressFrame,
|
|
33
34
|
FunctionCallResultFrame,
|
|
35
|
+
LLMContextFrame,
|
|
34
36
|
LLMFullResponseEndFrame,
|
|
35
37
|
LLMFullResponseStartFrame,
|
|
36
38
|
LLMMessagesFrame,
|
|
@@ -40,6 +42,7 @@ from pipecat.frames.frames import (
|
|
|
40
42
|
VisionImageRawFrame,
|
|
41
43
|
)
|
|
42
44
|
from pipecat.metrics.metrics import LLMTokenUsage
|
|
45
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
43
46
|
from pipecat.processors.aggregators.llm_response import (
|
|
44
47
|
LLMAssistantAggregatorParams,
|
|
45
48
|
LLMAssistantContextAggregator,
|
|
@@ -553,11 +556,11 @@ class AWSBedrockLLMContext(OpenAILLMContext):
|
|
|
553
556
|
messages.insert(0, {"role": "system", "content": self.system})
|
|
554
557
|
return messages
|
|
555
558
|
|
|
556
|
-
def get_messages_for_logging(self) -> str:
|
|
559
|
+
def get_messages_for_logging(self) -> List[Dict[str, Any]]:
|
|
557
560
|
"""Get messages formatted for logging with sensitive data redacted.
|
|
558
561
|
|
|
559
562
|
Returns:
|
|
560
|
-
|
|
563
|
+
List of messages in a format ready for logging.
|
|
561
564
|
"""
|
|
562
565
|
msgs = []
|
|
563
566
|
for message in self.messages:
|
|
@@ -568,7 +571,7 @@ class AWSBedrockLLMContext(OpenAILLMContext):
|
|
|
568
571
|
if item.get("image"):
|
|
569
572
|
item["source"]["bytes"] = "..."
|
|
570
573
|
msgs.append(msg)
|
|
571
|
-
return
|
|
574
|
+
return msgs
|
|
572
575
|
|
|
573
576
|
|
|
574
577
|
class AWSBedrockUserContextAggregator(LLMUserContextAggregator):
|
|
@@ -759,10 +762,10 @@ class AWSBedrockLLMService(LLMService):
|
|
|
759
762
|
|
|
760
763
|
# Store AWS session parameters for creating client in async context
|
|
761
764
|
self._aws_params = {
|
|
762
|
-
"aws_access_key_id": aws_access_key,
|
|
763
|
-
"aws_secret_access_key": aws_secret_key,
|
|
764
|
-
"aws_session_token": aws_session_token,
|
|
765
|
-
"region_name": aws_region,
|
|
765
|
+
"aws_access_key_id": aws_access_key or os.getenv("AWS_ACCESS_KEY_ID"),
|
|
766
|
+
"aws_secret_access_key": aws_secret_key or os.getenv("AWS_SECRET_ACCESS_KEY"),
|
|
767
|
+
"aws_session_token": aws_session_token or os.getenv("AWS_SESSION_TOKEN"),
|
|
768
|
+
"region_name": aws_region or os.getenv("AWS_REGION", "us-east-1"),
|
|
766
769
|
"config": client_config,
|
|
767
770
|
}
|
|
768
771
|
|
|
@@ -789,6 +792,75 @@ class AWSBedrockLLMService(LLMService):
|
|
|
789
792
|
"""
|
|
790
793
|
return True
|
|
791
794
|
|
|
795
|
+
async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
|
|
796
|
+
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
|
|
797
|
+
|
|
798
|
+
Args:
|
|
799
|
+
context: The LLM context containing conversation history.
|
|
800
|
+
|
|
801
|
+
Returns:
|
|
802
|
+
The LLM's response as a string, or None if no response is generated.
|
|
803
|
+
"""
|
|
804
|
+
try:
|
|
805
|
+
messages = []
|
|
806
|
+
system = []
|
|
807
|
+
if isinstance(context, LLMContext):
|
|
808
|
+
# Future code will be something like this:
|
|
809
|
+
# adapter = self.get_llm_adapter()
|
|
810
|
+
# params: AWSBedrockLLMInvocationParams = adapter.get_llm_invocation_params(context)
|
|
811
|
+
# messages = params["messages"]
|
|
812
|
+
# system = params["system_instruction"] # [{"text": "system message"}]
|
|
813
|
+
raise NotImplementedError(
|
|
814
|
+
"Universal LLMContext is not yet supported for AWS Bedrock."
|
|
815
|
+
)
|
|
816
|
+
else:
|
|
817
|
+
context = AWSBedrockLLMContext.upgrade_to_bedrock(context)
|
|
818
|
+
messages = context.messages
|
|
819
|
+
system = getattr(context, "system", None) # [{"text": "system message"}]
|
|
820
|
+
|
|
821
|
+
# Determine if we're using Claude or Nova based on model ID
|
|
822
|
+
model_id = self.model_name
|
|
823
|
+
|
|
824
|
+
# Prepare request parameters
|
|
825
|
+
request_params = {
|
|
826
|
+
"modelId": model_id,
|
|
827
|
+
"messages": messages,
|
|
828
|
+
"inferenceConfig": {
|
|
829
|
+
"maxTokens": 8192,
|
|
830
|
+
"temperature": 0.7,
|
|
831
|
+
"topP": 0.9,
|
|
832
|
+
},
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
if system:
|
|
836
|
+
request_params["system"] = system
|
|
837
|
+
|
|
838
|
+
async with self._aws_session.client(
|
|
839
|
+
service_name="bedrock-runtime", **self._aws_params
|
|
840
|
+
) as client:
|
|
841
|
+
# Call Bedrock without streaming
|
|
842
|
+
response = await client.converse(**request_params)
|
|
843
|
+
|
|
844
|
+
# Extract the response text
|
|
845
|
+
if (
|
|
846
|
+
"output" in response
|
|
847
|
+
and "message" in response["output"]
|
|
848
|
+
and "content" in response["output"]["message"]
|
|
849
|
+
):
|
|
850
|
+
content = response["output"]["message"]["content"]
|
|
851
|
+
if isinstance(content, list):
|
|
852
|
+
for item in content:
|
|
853
|
+
if item.get("text"):
|
|
854
|
+
return item["text"]
|
|
855
|
+
elif isinstance(content, str):
|
|
856
|
+
return content
|
|
857
|
+
|
|
858
|
+
return None
|
|
859
|
+
|
|
860
|
+
except Exception as e:
|
|
861
|
+
logger.error(f"Bedrock summary generation failed: {e}", exc_info=True)
|
|
862
|
+
return None
|
|
863
|
+
|
|
792
864
|
async def _create_converse_stream(self, client, request_params):
|
|
793
865
|
"""Create converse stream with optional timeout and retry.
|
|
794
866
|
|
|
@@ -802,7 +874,7 @@ class AWSBedrockLLMService(LLMService):
|
|
|
802
874
|
if self._retry_on_timeout:
|
|
803
875
|
try:
|
|
804
876
|
response = await asyncio.wait_for(
|
|
805
|
-
|
|
877
|
+
client.converse_stream(**request_params), timeout=self._retry_timeout_secs
|
|
806
878
|
)
|
|
807
879
|
return response
|
|
808
880
|
except (ReadTimeoutError, asyncio.TimeoutError) as e:
|
|
@@ -1044,6 +1116,8 @@ class AWSBedrockLLMService(LLMService):
|
|
|
1044
1116
|
context = None
|
|
1045
1117
|
if isinstance(frame, OpenAILLMContextFrame):
|
|
1046
1118
|
context = AWSBedrockLLMContext.upgrade_to_bedrock(frame.context)
|
|
1119
|
+
if isinstance(frame, LLMContextFrame):
|
|
1120
|
+
raise NotImplementedError("Universal LLMContext is not yet supported for AWS Bedrock.")
|
|
1047
1121
|
elif isinstance(frame, LLMMessagesFrame):
|
|
1048
1122
|
context = AWSBedrockLLMContext.from_messages(frame.messages)
|
|
1049
1123
|
elif isinstance(frame, VisionImageRawFrame):
|
pipecat/services/aws/tts.py
CHANGED
|
@@ -185,16 +185,6 @@ class AWSPollyTTSService(TTSService):
|
|
|
185
185
|
"region_name": region or os.getenv("AWS_REGION", "us-east-1"),
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
-
# Validate that we have the required credentials
|
|
189
|
-
if (
|
|
190
|
-
not self._aws_params["aws_access_key_id"]
|
|
191
|
-
or not self._aws_params["aws_secret_access_key"]
|
|
192
|
-
):
|
|
193
|
-
raise ValueError(
|
|
194
|
-
"AWS credentials not found. Please provide them either through constructor parameters "
|
|
195
|
-
"or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables."
|
|
196
|
-
)
|
|
197
|
-
|
|
198
188
|
self._aws_session = aioboto3.Session()
|
|
199
189
|
self._settings = {
|
|
200
190
|
"engine": params.engine,
|