dv-pipecat-ai 0.0.85.dev7__py3-none-any.whl → 0.0.85.dev699__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/METADATA +78 -117
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/RECORD +158 -122
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +120 -87
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +12 -4
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +85 -24
- pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_response.py +6 -7
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/filters/stt_mute_filter.py +2 -0
- pipecat/processors/frame_processor.py +103 -17
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +209 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +4 -4
- pipecat/processors/user_idle_processor.py +3 -6
- pipecat/runner/run.py +270 -50
- pipecat/runner/types.py +2 -0
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +6 -9
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/asyncai/tts.py +2 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +122 -97
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +367 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1155 -0
- pipecat/services/aws/stt.py +1 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +13 -355
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/tts.py +2 -2
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +636 -0
- pipecat/services/elevenlabs/__init__.py +2 -1
- pipecat/services/elevenlabs/stt.py +254 -276
- pipecat/services/elevenlabs/tts.py +5 -5
- pipecat/services/fish/tts.py +2 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +2 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +2 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +16 -8
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/playht/tts.py +31 -4
- pipecat/services/rime/tts.py +3 -4
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +465 -0
- pipecat/services/sarvam/tts.py +2 -6
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +1 -7
- pipecat/services/stt_service.py +34 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +9 -9
- pipecat/services/vision_service.py +7 -6
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +17 -42
- pipecat/transports/base_output.py +42 -26
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +98 -19
- pipecat/transports/smallwebrtc/request_handler.py +204 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/string.py +12 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Salesforce Agent API LLM service implementation."""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import time
|
|
13
|
+
from typing import Any, Dict, List, Optional, AsyncGenerator
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
|
|
16
|
+
import httpx
|
|
17
|
+
from loguru import logger
|
|
18
|
+
|
|
19
|
+
from pipecat.frames.frames import (
|
|
20
|
+
Frame,
|
|
21
|
+
LLMFullResponseEndFrame,
|
|
22
|
+
LLMFullResponseStartFrame,
|
|
23
|
+
LLMMessagesFrame,
|
|
24
|
+
LLMTextFrame,
|
|
25
|
+
LLMUpdateSettingsFrame,
|
|
26
|
+
)
|
|
27
|
+
from pipecat.processors.aggregators.openai_llm_context import (
|
|
28
|
+
OpenAILLMContext,
|
|
29
|
+
OpenAILLMContextFrame,
|
|
30
|
+
)
|
|
31
|
+
from pipecat.processors.frame_processor import FrameDirection
|
|
32
|
+
from pipecat.services.llm_service import LLMService
|
|
33
|
+
from pipecat.services.openai.llm import (
|
|
34
|
+
OpenAIAssistantContextAggregator,
|
|
35
|
+
OpenAIContextAggregatorPair,
|
|
36
|
+
OpenAIUserContextAggregator,
|
|
37
|
+
)
|
|
38
|
+
from pipecat.processors.aggregators.llm_response import (
|
|
39
|
+
LLMAssistantAggregatorParams,
|
|
40
|
+
LLMUserAggregatorParams,
|
|
41
|
+
)
|
|
42
|
+
from env_config import api_config
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class SalesforceSessionInfo:
|
|
47
|
+
"""Information about an active Salesforce Agent session."""
|
|
48
|
+
|
|
49
|
+
session_id: str
|
|
50
|
+
agent_id: str
|
|
51
|
+
created_at: float
|
|
52
|
+
last_used: float
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SalesforceAgentLLMService(LLMService):
|
|
56
|
+
"""Salesforce Agent API LLM service implementation.
|
|
57
|
+
|
|
58
|
+
This service integrates with Salesforce Agent API to provide conversational
|
|
59
|
+
AI capabilities using Salesforce's Agentforce platform. It follows the same
|
|
60
|
+
pattern as Vistaar LLM service for proper frame processing.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
*,
|
|
66
|
+
model: str = "salesforce-agent",
|
|
67
|
+
session_timeout_secs: float = 3600.0,
|
|
68
|
+
**kwargs,
|
|
69
|
+
):
|
|
70
|
+
"""Initialize Salesforce Agent LLM service.
|
|
71
|
+
|
|
72
|
+
Reads configuration from environment variables:
|
|
73
|
+
- SALESFORCE_AGENT_ID: The Salesforce agent ID to interact with
|
|
74
|
+
- SALESFORCE_API_KEY: OAuth access token (optional, will use client credentials if not provided)
|
|
75
|
+
- SALESFORCE_ORG_DOMAIN: Salesforce org domain (e.g., https://myorg.my.salesforce.com)
|
|
76
|
+
- SALESFORCE_CLIENT_ID: Connected app client ID for OAuth
|
|
77
|
+
- SALESFORCE_CLIENT_SECRET: Connected app client secret for OAuth
|
|
78
|
+
- SALESFORCE_API_HOST: Salesforce API host (default: https://api.salesforce.com)
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
model: The model name (defaults to "salesforce-agent").
|
|
82
|
+
session_timeout_secs: Session timeout in seconds (default: 1 hour).
|
|
83
|
+
**kwargs: Additional arguments passed to parent LLMService.
|
|
84
|
+
"""
|
|
85
|
+
# Initialize parent LLM service
|
|
86
|
+
super().__init__(**kwargs)
|
|
87
|
+
|
|
88
|
+
self._agent_id = api_config.SALESFORCE_AGENT_ID
|
|
89
|
+
self._api_key = api_config.SALESFORCE_API_KEY
|
|
90
|
+
self._org_domain = api_config.SALESFORCE_ORG_DOMAIN
|
|
91
|
+
self._client_id = api_config.SALESFORCE_CLIENT_ID
|
|
92
|
+
self._client_secret = api_config.SALESFORCE_CLIENT_SECRET
|
|
93
|
+
self._api_host = api_config.SALESFORCE_API_HOST
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Validate required environment variables
|
|
97
|
+
required_vars = {
|
|
98
|
+
"SALESFORCE_AGENT_ID": self._agent_id,
|
|
99
|
+
"SALESFORCE_ORG_DOMAIN": self._org_domain,
|
|
100
|
+
"SALESFORCE_CLIENT_ID": self._client_id,
|
|
101
|
+
"SALESFORCE_CLIENT_SECRET": self._client_secret,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
missing_vars = [var for var, value in required_vars.items() if not value]
|
|
105
|
+
if missing_vars:
|
|
106
|
+
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
|
|
107
|
+
|
|
108
|
+
logger.info(f"Salesforce LLM initialized - Agent ID: {self._agent_id}")
|
|
109
|
+
|
|
110
|
+
self._session_timeout_secs = session_timeout_secs
|
|
111
|
+
|
|
112
|
+
# Session management
|
|
113
|
+
self._sessions: Dict[str, SalesforceSessionInfo] = {}
|
|
114
|
+
self._current_session_id: Optional[str] = None
|
|
115
|
+
|
|
116
|
+
# HTTP client for API calls
|
|
117
|
+
self._http_client = httpx.AsyncClient(timeout=30.0)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
async def __aenter__(self):
|
|
121
|
+
"""Async context manager entry."""
|
|
122
|
+
return self
|
|
123
|
+
|
|
124
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
125
|
+
"""Async context manager exit."""
|
|
126
|
+
await self._cleanup_sessions()
|
|
127
|
+
await self._http_client.aclose()
|
|
128
|
+
|
|
129
|
+
def can_generate_metrics(self) -> bool:
|
|
130
|
+
"""Check if this service can generate processing metrics."""
|
|
131
|
+
return True
|
|
132
|
+
|
|
133
|
+
async def _get_access_token(self) -> str:
|
|
134
|
+
"""Get OAuth access token using client credentials."""
|
|
135
|
+
if self._api_key and not self._api_key.startswith("Bearer"):
|
|
136
|
+
return self._api_key
|
|
137
|
+
|
|
138
|
+
token_url = f"{self._org_domain}/services/oauth2/token"
|
|
139
|
+
data = {
|
|
140
|
+
"grant_type": "client_credentials",
|
|
141
|
+
"client_id": self._client_id,
|
|
142
|
+
"client_secret": self._client_secret,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
response = await self._http_client.post(token_url, data=data)
|
|
147
|
+
response.raise_for_status()
|
|
148
|
+
token_data = response.json()
|
|
149
|
+
return token_data["access_token"]
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.error(f"Failed to get access token: {e}")
|
|
152
|
+
raise
|
|
153
|
+
|
|
154
|
+
async def _create_session(self) -> str:
|
|
155
|
+
"""Create a new Salesforce Agent session."""
|
|
156
|
+
access_token = await self._get_access_token()
|
|
157
|
+
session_url = f"{self._api_host}/einstein/ai-agent/v1/agents/{self._agent_id}/sessions"
|
|
158
|
+
|
|
159
|
+
headers = {
|
|
160
|
+
"Authorization": f"Bearer {access_token}",
|
|
161
|
+
"Content-Type": "application/json",
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
external_session_key = f"pipecat-{int(time.time())}-{id(self)}"
|
|
165
|
+
|
|
166
|
+
payload = {
|
|
167
|
+
"externalSessionKey": external_session_key,
|
|
168
|
+
"instanceConfig": {"endpoint": self._org_domain},
|
|
169
|
+
"tz": "America/Los_Angeles",
|
|
170
|
+
"variables": [{"name": "$Context.EndUserLanguage", "type": "Text", "value": "en_US"}],
|
|
171
|
+
"featureSupport": "Streaming",
|
|
172
|
+
"streamingCapabilities": {"chunkTypes": ["Text"]},
|
|
173
|
+
"bypassUser": True,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
response = await self._http_client.post(session_url, headers=headers, json=payload)
|
|
178
|
+
response.raise_for_status()
|
|
179
|
+
session_data = response.json()
|
|
180
|
+
session_id = session_data["sessionId"]
|
|
181
|
+
|
|
182
|
+
# Store session info
|
|
183
|
+
current_time = time.time()
|
|
184
|
+
self._sessions[session_id] = SalesforceSessionInfo(
|
|
185
|
+
session_id=session_id,
|
|
186
|
+
agent_id=self._agent_id,
|
|
187
|
+
created_at=current_time,
|
|
188
|
+
last_used=current_time,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
logger.debug(f"Created Salesforce Agent session: {session_id}")
|
|
192
|
+
return session_id
|
|
193
|
+
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.error(f"Failed to create Salesforce Agent session: {e}")
|
|
196
|
+
raise
|
|
197
|
+
|
|
198
|
+
async def _get_or_create_session(self) -> str:
|
|
199
|
+
"""Get existing session or create a new one."""
|
|
200
|
+
current_time = time.time()
|
|
201
|
+
|
|
202
|
+
# Check if current session is still valid
|
|
203
|
+
if self._current_session_id and self._current_session_id in self._sessions:
|
|
204
|
+
session = self._sessions[self._current_session_id]
|
|
205
|
+
if current_time - session.last_used < self._session_timeout_secs:
|
|
206
|
+
session.last_used = current_time
|
|
207
|
+
return self._current_session_id
|
|
208
|
+
else:
|
|
209
|
+
# Session expired, remove it
|
|
210
|
+
del self._sessions[self._current_session_id]
|
|
211
|
+
self._current_session_id = None
|
|
212
|
+
|
|
213
|
+
# Create new session
|
|
214
|
+
self._current_session_id = await self._create_session()
|
|
215
|
+
return self._current_session_id
|
|
216
|
+
|
|
217
|
+
async def _cleanup_sessions(self):
|
|
218
|
+
"""Clean up expired sessions."""
|
|
219
|
+
current_time = time.time()
|
|
220
|
+
expired_sessions = []
|
|
221
|
+
|
|
222
|
+
for session_id, session in self._sessions.items():
|
|
223
|
+
if current_time - session.last_used > self._session_timeout_secs:
|
|
224
|
+
expired_sessions.append(session_id)
|
|
225
|
+
|
|
226
|
+
for session_id in expired_sessions:
|
|
227
|
+
try:
|
|
228
|
+
# End the session via API
|
|
229
|
+
access_token = await self._get_access_token()
|
|
230
|
+
url = f"{self._api_host}/einstein/ai-agent/v1/sessions/{session_id}"
|
|
231
|
+
headers = {
|
|
232
|
+
"Authorization": f"Bearer {access_token}",
|
|
233
|
+
"x-session-end-reason": "UserRequest",
|
|
234
|
+
}
|
|
235
|
+
await self._http_client.delete(url, headers=headers)
|
|
236
|
+
except Exception as e:
|
|
237
|
+
logger.warning(f"Failed to end session {session_id}: {e}")
|
|
238
|
+
finally:
|
|
239
|
+
del self._sessions[session_id]
|
|
240
|
+
if self._current_session_id == session_id:
|
|
241
|
+
self._current_session_id = None
|
|
242
|
+
|
|
243
|
+
def _extract_user_message(self, context: OpenAILLMContext) -> str:
|
|
244
|
+
"""Extract the last user message from context.
|
|
245
|
+
|
|
246
|
+
Similar to Vistaar pattern - extract only the most recent user message.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
context: The OpenAI LLM context containing messages.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
The last user message as a string.
|
|
253
|
+
"""
|
|
254
|
+
messages = context.get_messages()
|
|
255
|
+
|
|
256
|
+
# Find the last user message (iterate in reverse for efficiency)
|
|
257
|
+
for message in reversed(messages):
|
|
258
|
+
if message.get("role") == "user":
|
|
259
|
+
content = message.get("content", "")
|
|
260
|
+
|
|
261
|
+
# Handle content that might be a list (for multimodal messages)
|
|
262
|
+
if isinstance(content, list):
|
|
263
|
+
text_parts = [
|
|
264
|
+
item.get("text", "") for item in content if item.get("type") == "text"
|
|
265
|
+
]
|
|
266
|
+
content = " ".join(text_parts)
|
|
267
|
+
|
|
268
|
+
if isinstance(content, str):
|
|
269
|
+
return content.strip()
|
|
270
|
+
|
|
271
|
+
return ""
|
|
272
|
+
|
|
273
|
+
def _generate_sequence_id(self) -> int:
|
|
274
|
+
"""Generate a sequence ID for the message."""
|
|
275
|
+
return int(time.time() * 1000) % 2147483647 # Keep within int32 range
|
|
276
|
+
|
|
277
|
+
async def _stream_salesforce_response(self, session_id: str, user_message: str) -> AsyncGenerator[str, None]:
|
|
278
|
+
"""Stream response from Salesforce Agent API."""
|
|
279
|
+
access_token = await self._get_access_token()
|
|
280
|
+
url = f"{self._api_host}/einstein/ai-agent/v1/sessions/{session_id}/messages/stream"
|
|
281
|
+
|
|
282
|
+
headers = {
|
|
283
|
+
"Authorization": f"Bearer {access_token}",
|
|
284
|
+
"Content-Type": "application/json",
|
|
285
|
+
"Accept": "text/event-stream",
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
message_data = {
|
|
289
|
+
"message": {
|
|
290
|
+
"sequenceId": self._generate_sequence_id(),
|
|
291
|
+
"type": "Text",
|
|
292
|
+
"text": user_message
|
|
293
|
+
},
|
|
294
|
+
"variables": [
|
|
295
|
+
{
|
|
296
|
+
"name": "$Context.EndUserLanguage",
|
|
297
|
+
"type": "Text",
|
|
298
|
+
"value": "en_US"
|
|
299
|
+
}
|
|
300
|
+
]
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
logger.info(f"🌐 Salesforce API request: {user_message[:50]}...")
|
|
305
|
+
async with self._http_client.stream("POST", url, headers=headers, json=message_data) as response:
|
|
306
|
+
response.raise_for_status()
|
|
307
|
+
|
|
308
|
+
async for line in response.aiter_lines():
|
|
309
|
+
if not line:
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
# Parse SSE format
|
|
313
|
+
if line.startswith("data: "):
|
|
314
|
+
try:
|
|
315
|
+
data = json.loads(line[6:])
|
|
316
|
+
message = data.get("message", {})
|
|
317
|
+
message_type = message.get("type")
|
|
318
|
+
|
|
319
|
+
if message_type == "TextChunk":
|
|
320
|
+
content = message.get("text", "") or message.get("message", "")
|
|
321
|
+
if content:
|
|
322
|
+
yield content
|
|
323
|
+
elif message_type == "EndOfTurn":
|
|
324
|
+
logger.info("🏁 Salesforce response complete")
|
|
325
|
+
break
|
|
326
|
+
elif message_type == "Inform":
|
|
327
|
+
# Skip INFORM events to avoid duplication
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
except json.JSONDecodeError as e:
|
|
331
|
+
logger.warning(f"JSON decode error: {e}, line: {line}")
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logger.error(f"Failed to stream from Salesforce Agent API: {e}")
|
|
336
|
+
raise
|
|
337
|
+
|
|
338
|
+
async def _process_context(self, context: OpenAILLMContext):
|
|
339
|
+
"""Process the LLM context and generate streaming response.
|
|
340
|
+
|
|
341
|
+
Following Vistaar pattern for simple, direct processing.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
context: The OpenAI LLM context containing messages to process.
|
|
345
|
+
"""
|
|
346
|
+
logger.info(f"🔄 Salesforce processing context with {len(context.get_messages())} messages")
|
|
347
|
+
|
|
348
|
+
# Extract user message from context first
|
|
349
|
+
user_message = self._extract_user_message(context)
|
|
350
|
+
|
|
351
|
+
if not user_message:
|
|
352
|
+
logger.warning("Salesforce: No user message found in context")
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
logger.info(f"🎯 Salesforce extracted query: {user_message}")
|
|
357
|
+
|
|
358
|
+
# Start response
|
|
359
|
+
await self.push_frame(LLMFullResponseStartFrame())
|
|
360
|
+
await self.push_frame(LLMFullResponseStartFrame(),FrameDirection.UPSTREAM)
|
|
361
|
+
await self.start_processing_metrics()
|
|
362
|
+
await self.start_ttfb_metrics()
|
|
363
|
+
|
|
364
|
+
# Get or create session
|
|
365
|
+
session_id = await self._get_or_create_session()
|
|
366
|
+
|
|
367
|
+
first_chunk = True
|
|
368
|
+
|
|
369
|
+
# Stream the response
|
|
370
|
+
async for text_chunk in self._stream_salesforce_response(session_id, user_message):
|
|
371
|
+
if first_chunk:
|
|
372
|
+
await self.stop_ttfb_metrics()
|
|
373
|
+
first_chunk = False
|
|
374
|
+
|
|
375
|
+
# Push each text chunk as it arrives
|
|
376
|
+
await self.push_frame(LLMTextFrame(text=text_chunk))
|
|
377
|
+
|
|
378
|
+
except Exception as e:
|
|
379
|
+
logger.error(f"Salesforce context processing error: {type(e).__name__}: {str(e)}")
|
|
380
|
+
import traceback
|
|
381
|
+
logger.error(f"Salesforce traceback: {traceback.format_exc()}")
|
|
382
|
+
raise
|
|
383
|
+
finally:
|
|
384
|
+
await self.stop_processing_metrics()
|
|
385
|
+
await self.push_frame(LLMFullResponseEndFrame())
|
|
386
|
+
await self.push_frame(LLMFullResponseEndFrame(), FrameDirection.UPSTREAM)
|
|
387
|
+
|
|
388
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
389
|
+
"""Process frames for LLM completion requests.
|
|
390
|
+
|
|
391
|
+
Following the exact Vistaar pattern - call super() for non-context frames only.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
frame: The frame to process.
|
|
395
|
+
direction: The direction of frame processing.
|
|
396
|
+
"""
|
|
397
|
+
context = None
|
|
398
|
+
if isinstance(frame, OpenAILLMContextFrame):
|
|
399
|
+
context = frame.context
|
|
400
|
+
logger.info(f"🔍 Received OpenAILLMContextFrame with {len(context.get_messages())} messages")
|
|
401
|
+
elif isinstance(frame, LLMMessagesFrame):
|
|
402
|
+
context = OpenAILLMContext.from_messages(frame.messages)
|
|
403
|
+
logger.info(f"🔍 Received LLMMessagesFrame with {len(frame.messages)} messages")
|
|
404
|
+
elif isinstance(frame, LLMUpdateSettingsFrame):
|
|
405
|
+
# Call super for settings frames and update settings
|
|
406
|
+
await super().process_frame(frame, direction)
|
|
407
|
+
settings = frame.settings
|
|
408
|
+
logger.debug(f"Updated Salesforce settings: {settings}")
|
|
409
|
+
else:
|
|
410
|
+
# For non-context frames, call super and push them downstream
|
|
411
|
+
await super().process_frame(frame, direction)
|
|
412
|
+
await self.push_frame(frame, direction)
|
|
413
|
+
|
|
414
|
+
if context:
|
|
415
|
+
try:
|
|
416
|
+
await self._process_context(context)
|
|
417
|
+
except httpx.TimeoutException:
|
|
418
|
+
logger.error("Timeout while processing Salesforce request")
|
|
419
|
+
await self._call_event_handler("on_completion_timeout")
|
|
420
|
+
except Exception as e:
|
|
421
|
+
logger.error(f"Error processing Salesforce request: {e}")
|
|
422
|
+
raise
|
|
423
|
+
|
|
424
|
+
def create_context_aggregator(
|
|
425
|
+
self,
|
|
426
|
+
context: OpenAILLMContext,
|
|
427
|
+
*,
|
|
428
|
+
user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(),
|
|
429
|
+
assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(),
|
|
430
|
+
) -> OpenAIContextAggregatorPair:
|
|
431
|
+
"""Create context aggregators for Salesforce LLM.
|
|
432
|
+
|
|
433
|
+
Since Salesforce uses OpenAI-compatible message format, we reuse OpenAI's
|
|
434
|
+
context aggregators directly, similar to how Vistaar works.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
context: The LLM context to create aggregators for.
|
|
438
|
+
user_params: Parameters for user message aggregation.
|
|
439
|
+
assistant_params: Parameters for assistant message aggregation.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
OpenAIContextAggregatorPair: A pair of OpenAI context aggregators,
|
|
443
|
+
compatible with Salesforce's OpenAI-like message format.
|
|
444
|
+
"""
|
|
445
|
+
context.set_llm_adapter(self.get_llm_adapter())
|
|
446
|
+
user = OpenAIUserContextAggregator(context, params=user_params)
|
|
447
|
+
assistant = OpenAIAssistantContextAggregator(context, params=assistant_params)
|
|
448
|
+
return OpenAIContextAggregatorPair(_user=user, _assistant=assistant)
|
|
449
|
+
|
|
450
|
+
def get_llm_adapter(self):
|
|
451
|
+
"""Get the LLM adapter for this service."""
|
|
452
|
+
from pipecat.adapters.services.open_ai_adapter import OpenAILLMAdapter
|
|
453
|
+
return OpenAILLMAdapter()
|
|
454
|
+
|
|
455
|
+
async def close(self):
|
|
456
|
+
"""Close the HTTP client when the service is destroyed."""
|
|
457
|
+
await self._cleanup_sessions()
|
|
458
|
+
await self._http_client.aclose()
|
|
459
|
+
|
|
460
|
+
def __del__(self):
|
|
461
|
+
"""Ensure the client is closed on deletion."""
|
|
462
|
+
try:
|
|
463
|
+
asyncio.create_task(self._http_client.aclose())
|
|
464
|
+
except:
|
|
465
|
+
pass
|
pipecat/services/sarvam/tts.py
CHANGED
|
@@ -20,6 +20,7 @@ from pipecat.frames.frames import (
|
|
|
20
20
|
EndFrame,
|
|
21
21
|
ErrorFrame,
|
|
22
22
|
Frame,
|
|
23
|
+
InterruptionFrame,
|
|
23
24
|
LLMFullResponseEndFrame,
|
|
24
25
|
StartFrame,
|
|
25
26
|
StartInterruptionFrame,
|
|
@@ -455,7 +456,7 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
455
456
|
direction: The direction to push the frame.
|
|
456
457
|
"""
|
|
457
458
|
await super().push_frame(frame, direction)
|
|
458
|
-
if isinstance(frame, (TTSStoppedFrame,
|
|
459
|
+
if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
|
|
459
460
|
self._started = False
|
|
460
461
|
|
|
461
462
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
@@ -632,11 +633,6 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
632
633
|
"""
|
|
633
634
|
logger.debug(f"Generating TTS: [{text}]")
|
|
634
635
|
|
|
635
|
-
# Validate text input
|
|
636
|
-
if not text or not isinstance(text, str) or not text.strip():
|
|
637
|
-
logger.warning(f"Invalid text input for Sarvam TTS run_tts: {repr(text)}")
|
|
638
|
-
return
|
|
639
|
-
|
|
640
636
|
try:
|
|
641
637
|
if not self._websocket or self._websocket.state is State.CLOSED:
|
|
642
638
|
await self._connect()
|
pipecat/services/simli/video.py
CHANGED
|
@@ -15,8 +15,8 @@ from pipecat.frames.frames import (
|
|
|
15
15
|
CancelFrame,
|
|
16
16
|
EndFrame,
|
|
17
17
|
Frame,
|
|
18
|
+
InterruptionFrame,
|
|
18
19
|
OutputImageRawFrame,
|
|
19
|
-
StartInterruptionFrame,
|
|
20
20
|
TTSAudioRawFrame,
|
|
21
21
|
TTSStoppedFrame,
|
|
22
22
|
UserStartedSpeakingFrame,
|
|
@@ -179,7 +179,7 @@ class SimliVideoService(FrameProcessor):
|
|
|
179
179
|
return
|
|
180
180
|
elif isinstance(frame, (EndFrame, CancelFrame)):
|
|
181
181
|
await self._stop()
|
|
182
|
-
elif isinstance(frame, (
|
|
182
|
+
elif isinstance(frame, (InterruptionFrame, UserStartedSpeakingFrame)):
|
|
183
183
|
if not self._previously_interrupted:
|
|
184
184
|
await self._simli_client.clearBuffer()
|
|
185
185
|
self._previously_interrupted = self._is_trinity_avatar
|
|
@@ -19,7 +19,6 @@ from loguru import logger
|
|
|
19
19
|
from pydantic import BaseModel
|
|
20
20
|
|
|
21
21
|
from pipecat.frames.frames import (
|
|
22
|
-
BotInterruptionFrame,
|
|
23
22
|
CancelFrame,
|
|
24
23
|
EndFrame,
|
|
25
24
|
ErrorFrame,
|
|
@@ -749,14 +748,13 @@ class SpeechmaticsSTTService(STTService):
|
|
|
749
748
|
return
|
|
750
749
|
|
|
751
750
|
# Frames to send
|
|
752
|
-
upstream_frames: list[Frame] = []
|
|
753
751
|
downstream_frames: list[Frame] = []
|
|
754
752
|
|
|
755
753
|
# If VAD is enabled, then send a speaking frame
|
|
756
754
|
if self._params.enable_vad and not self._is_speaking:
|
|
757
755
|
logger.debug("User started speaking")
|
|
758
756
|
self._is_speaking = True
|
|
759
|
-
|
|
757
|
+
await self.push_interruption_task_frame_and_wait()
|
|
760
758
|
downstream_frames += [UserStartedSpeakingFrame()]
|
|
761
759
|
|
|
762
760
|
# If final, then re-parse into TranscriptionFrame
|
|
@@ -794,10 +792,6 @@ class SpeechmaticsSTTService(STTService):
|
|
|
794
792
|
self._is_speaking = False
|
|
795
793
|
downstream_frames += [UserStoppedSpeakingFrame()]
|
|
796
794
|
|
|
797
|
-
# Send UPSTREAM frames
|
|
798
|
-
for frame in upstream_frames:
|
|
799
|
-
await self.push_frame(frame, FrameDirection.UPSTREAM)
|
|
800
|
-
|
|
801
795
|
# Send the DOWNSTREAM frames
|
|
802
796
|
for frame in downstream_frames:
|
|
803
797
|
await self.push_frame(frame, FrameDirection.DOWNSTREAM)
|
pipecat/services/stt_service.py
CHANGED
|
@@ -16,6 +16,7 @@ from loguru import logger
|
|
|
16
16
|
from pipecat.frames.frames import (
|
|
17
17
|
AudioRawFrame,
|
|
18
18
|
BotStoppedSpeakingFrame,
|
|
19
|
+
ErrorFrame,
|
|
19
20
|
Frame,
|
|
20
21
|
StartFrame,
|
|
21
22
|
STTMuteFrame,
|
|
@@ -25,6 +26,7 @@ from pipecat.frames.frames import (
|
|
|
25
26
|
)
|
|
26
27
|
from pipecat.processors.frame_processor import FrameDirection
|
|
27
28
|
from pipecat.services.ai_service import AIService
|
|
29
|
+
from pipecat.services.websocket_service import WebsocketService
|
|
28
30
|
from pipecat.transcriptions.language import Language
|
|
29
31
|
|
|
30
32
|
|
|
@@ -298,3 +300,35 @@ class SegmentedSTTService(STTService):
|
|
|
298
300
|
if not self._user_speaking and len(self._audio_buffer) > self._audio_buffer_size_1s:
|
|
299
301
|
discarded = len(self._audio_buffer) - self._audio_buffer_size_1s
|
|
300
302
|
self._audio_buffer = self._audio_buffer[discarded:]
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class WebsocketSTTService(STTService, WebsocketService):
|
|
306
|
+
"""Base class for websocket-based STT services.
|
|
307
|
+
|
|
308
|
+
Combines STT functionality with websocket connectivity, providing automatic
|
|
309
|
+
error handling and reconnection capabilities.
|
|
310
|
+
|
|
311
|
+
Event handlers:
|
|
312
|
+
on_connection_error: Called when a websocket connection error occurs.
|
|
313
|
+
|
|
314
|
+
Example::
|
|
315
|
+
|
|
316
|
+
@stt.event_handler("on_connection_error")
|
|
317
|
+
async def on_connection_error(stt: STTService, error: str):
|
|
318
|
+
logger.error(f"STT connection error: {error}")
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
|
|
322
|
+
"""Initialize the Websocket STT service.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
reconnect_on_error: Whether to automatically reconnect on websocket errors.
|
|
326
|
+
**kwargs: Additional arguments passed to parent classes.
|
|
327
|
+
"""
|
|
328
|
+
STTService.__init__(self, **kwargs)
|
|
329
|
+
WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
|
|
330
|
+
self._register_event_handler("on_connection_error")
|
|
331
|
+
|
|
332
|
+
async def _report_error(self, error: ErrorFrame):
|
|
333
|
+
await self._call_event_handler("on_connection_error", error.error)
|
|
334
|
+
await self.push_error(error)
|
pipecat/services/tavus/video.py
CHANGED
|
@@ -23,12 +23,12 @@ from pipecat.frames.frames import (
|
|
|
23
23
|
CancelFrame,
|
|
24
24
|
EndFrame,
|
|
25
25
|
Frame,
|
|
26
|
+
InterruptionFrame,
|
|
26
27
|
OutputAudioRawFrame,
|
|
27
28
|
OutputImageRawFrame,
|
|
28
29
|
OutputTransportReadyFrame,
|
|
29
30
|
SpeechOutputAudioRawFrame,
|
|
30
31
|
StartFrame,
|
|
31
|
-
StartInterruptionFrame,
|
|
32
32
|
TTSAudioRawFrame,
|
|
33
33
|
TTSStartedFrame,
|
|
34
34
|
)
|
|
@@ -222,7 +222,7 @@ class TavusVideoService(AIService):
|
|
|
222
222
|
"""
|
|
223
223
|
await super().process_frame(frame, direction)
|
|
224
224
|
|
|
225
|
-
if isinstance(frame,
|
|
225
|
+
if isinstance(frame, InterruptionFrame):
|
|
226
226
|
await self._handle_interruptions()
|
|
227
227
|
await self.push_frame(frame, direction)
|
|
228
228
|
elif isinstance(frame, TTSAudioRawFrame):
|