dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -14,7 +14,8 @@ visual content.
|
|
|
14
14
|
from abc import abstractmethod
|
|
15
15
|
from typing import AsyncGenerator
|
|
16
16
|
|
|
17
|
-
from pipecat.frames.frames import Frame,
|
|
17
|
+
from pipecat.frames.frames import Frame, LLMContextFrame
|
|
18
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
18
19
|
from pipecat.processors.frame_processor import FrameDirection
|
|
19
20
|
from pipecat.services.ai_service import AIService
|
|
20
21
|
|
|
@@ -37,15 +38,15 @@ class VisionService(AIService):
|
|
|
37
38
|
self._describe_text = None
|
|
38
39
|
|
|
39
40
|
@abstractmethod
|
|
40
|
-
async def run_vision(self,
|
|
41
|
-
"""Process
|
|
41
|
+
async def run_vision(self, context: LLMContext) -> AsyncGenerator[Frame, None]:
|
|
42
|
+
"""Process the latest image in the context and generate results.
|
|
42
43
|
|
|
43
44
|
This method must be implemented by subclasses to provide actual computer
|
|
44
45
|
vision functionality such as image description, object detection, or
|
|
45
46
|
visual question answering.
|
|
46
47
|
|
|
47
48
|
Args:
|
|
48
|
-
|
|
49
|
+
context: The context to process, containing image data.
|
|
49
50
|
|
|
50
51
|
Yields:
|
|
51
52
|
Frame: Frames containing the vision analysis results, typically TextFrame
|
|
@@ -65,9 +66,9 @@ class VisionService(AIService):
|
|
|
65
66
|
"""
|
|
66
67
|
await super().process_frame(frame, direction)
|
|
67
68
|
|
|
68
|
-
if isinstance(frame,
|
|
69
|
+
if isinstance(frame, LLMContextFrame):
|
|
69
70
|
await self.start_processing_metrics()
|
|
70
|
-
await self.process_generator(self.run_vision(frame))
|
|
71
|
+
await self.process_generator(self.run_vision(frame.context))
|
|
71
72
|
await self.stop_processing_metrics()
|
|
72
73
|
else:
|
|
73
74
|
await self.push_frame(frame, direction)
|
pipecat/services/vistaar/llm.py
CHANGED
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
|
+
import random
|
|
5
6
|
import time
|
|
6
7
|
import uuid
|
|
7
8
|
from dataclasses import dataclass
|
|
8
|
-
from typing import Any, AsyncGenerator, Dict, Optional
|
|
9
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
9
10
|
from urllib.parse import urlencode
|
|
10
11
|
|
|
11
12
|
import httpx
|
|
@@ -13,13 +14,15 @@ from loguru import logger
|
|
|
13
14
|
from pydantic import BaseModel, Field
|
|
14
15
|
|
|
15
16
|
from pipecat.frames.frames import (
|
|
17
|
+
CancelFrame,
|
|
18
|
+
EndFrame,
|
|
16
19
|
Frame,
|
|
20
|
+
InterruptionFrame,
|
|
17
21
|
LLMFullResponseEndFrame,
|
|
18
22
|
LLMFullResponseStartFrame,
|
|
19
23
|
LLMMessagesFrame,
|
|
20
24
|
LLMTextFrame,
|
|
21
25
|
LLMUpdateSettingsFrame,
|
|
22
|
-
StartInterruptionFrame,
|
|
23
26
|
)
|
|
24
27
|
from pipecat.processors.aggregators.llm_response import (
|
|
25
28
|
LLMAssistantAggregatorParams,
|
|
@@ -53,12 +56,13 @@ class VistaarLLMService(LLMService):
|
|
|
53
56
|
source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
|
|
54
57
|
target_lang: Target language code for responses.
|
|
55
58
|
session_id: Session ID for maintaining conversation context.
|
|
56
|
-
extra: Additional model-specific parameters
|
|
59
|
+
extra: Additional model-specific parameters
|
|
57
60
|
"""
|
|
58
61
|
|
|
59
62
|
source_lang: Optional[str] = Field(default="mr")
|
|
60
63
|
target_lang: Optional[str] = Field(default="mr")
|
|
61
64
|
session_id: Optional[str] = Field(default=None)
|
|
65
|
+
pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
|
|
62
66
|
extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
63
67
|
|
|
64
68
|
def __init__(
|
|
@@ -68,7 +72,6 @@ class VistaarLLMService(LLMService):
|
|
|
68
72
|
params: Optional[InputParams] = None,
|
|
69
73
|
timeout: float = 30.0,
|
|
70
74
|
interim_timeout: float = 5.0,
|
|
71
|
-
interim_message: str = "एक क्षण थांबा, मी बघतो. ",
|
|
72
75
|
**kwargs,
|
|
73
76
|
):
|
|
74
77
|
"""Initialize Vistaar LLM service.
|
|
@@ -77,8 +80,7 @@ class VistaarLLMService(LLMService):
|
|
|
77
80
|
base_url: The base URL for Vistaar API. Defaults to "https://vistaar.kenpath.ai/api".
|
|
78
81
|
params: Input parameters for model configuration and behavior.
|
|
79
82
|
timeout: Request timeout in seconds. Defaults to 30.0 seconds.
|
|
80
|
-
interim_timeout: Time in seconds before sending interim message. Defaults to
|
|
81
|
-
interim_message: Message to send if API takes longer than interim_timeout. Defaults to "एक क्षण थांबा, मी बघतो. ".
|
|
83
|
+
interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
|
|
82
84
|
**kwargs: Additional arguments passed to the parent LLMService.
|
|
83
85
|
"""
|
|
84
86
|
super().__init__(**kwargs)
|
|
@@ -89,10 +91,10 @@ class VistaarLLMService(LLMService):
|
|
|
89
91
|
self._source_lang = params.source_lang
|
|
90
92
|
self._target_lang = params.target_lang
|
|
91
93
|
self._session_id = params.session_id or str(uuid.uuid4())
|
|
94
|
+
self._pre_query_response_phrases = params.pre_query_response_phrases or []
|
|
92
95
|
self._extra = params.extra if isinstance(params.extra, dict) else {}
|
|
93
96
|
self._timeout = timeout
|
|
94
97
|
self._interim_timeout = interim_timeout
|
|
95
|
-
self._interim_message = interim_message
|
|
96
98
|
|
|
97
99
|
# Create an async HTTP client
|
|
98
100
|
self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
|
|
@@ -103,6 +105,8 @@ class VistaarLLMService(LLMService):
|
|
|
103
105
|
self._partial_response = [] # Track what was actually sent before interruption
|
|
104
106
|
self._interim_sent = False # Track if interim message was sent
|
|
105
107
|
self._interim_task = None # Track interim message task
|
|
108
|
+
self._interim_completion_event = asyncio.Event() # Track interim message completion
|
|
109
|
+
self._interim_in_progress = False # Track if interim message is being spoken
|
|
106
110
|
|
|
107
111
|
logger.info(
|
|
108
112
|
f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
|
|
@@ -161,6 +165,10 @@ class VistaarLLMService(LLMService):
|
|
|
161
165
|
# Set interruption flag
|
|
162
166
|
self._is_interrupted = True
|
|
163
167
|
|
|
168
|
+
# Reset interim state on interruption
|
|
169
|
+
self._interim_in_progress = False
|
|
170
|
+
self._interim_completion_event.set() # Unblock any waiting LLM responses
|
|
171
|
+
|
|
164
172
|
# Cancel interim message task if active
|
|
165
173
|
await self._cancel_interim_message_task(
|
|
166
174
|
"Cancelled interim message task - handling interruption"
|
|
@@ -193,11 +201,28 @@ class VistaarLLMService(LLMService):
|
|
|
193
201
|
if not self._is_interrupted and not self._interim_sent:
|
|
194
202
|
logger.info(f"Sending interim message after {self._interim_timeout}s timeout")
|
|
195
203
|
self._interim_sent = True
|
|
196
|
-
|
|
204
|
+
self._interim_in_progress = True
|
|
205
|
+
|
|
206
|
+
# Use random selection from pre_query_response_phrases if available, otherwise fallback to default
|
|
207
|
+
if self._pre_query_response_phrases:
|
|
208
|
+
message = random.choice(self._pre_query_response_phrases)
|
|
209
|
+
else:
|
|
210
|
+
message = "एक क्षण थांबा, मी बघतो. "
|
|
211
|
+
|
|
212
|
+
await self.push_frame(LLMTextFrame(text=message))
|
|
213
|
+
|
|
214
|
+
# Wait for estimated TTS duration before marking as complete
|
|
215
|
+
estimated_tts_duration = max(2.0, len(message) * 0.08) # ~80ms per character
|
|
216
|
+
logger.info(f"Waiting {estimated_tts_duration:.2f}s for interim TTS completion")
|
|
217
|
+
await asyncio.sleep(estimated_tts_duration)
|
|
197
218
|
except asyncio.CancelledError:
|
|
198
219
|
logger.debug("Interim message task cancelled")
|
|
199
220
|
except Exception as e:
|
|
200
221
|
logger.error(f"Error sending interim message: {e}")
|
|
222
|
+
finally:
|
|
223
|
+
# Signal that interim message handling is complete
|
|
224
|
+
self._interim_completion_event.set()
|
|
225
|
+
self._interim_in_progress = False
|
|
201
226
|
|
|
202
227
|
async def _stream_response(self, query: str) -> AsyncGenerator[str, None]:
|
|
203
228
|
"""Stream response from Vistaar API using Server-Sent Events.
|
|
@@ -231,6 +256,8 @@ class VistaarLLMService(LLMService):
|
|
|
231
256
|
self._is_interrupted = False
|
|
232
257
|
self._partial_response = []
|
|
233
258
|
self._interim_sent = False
|
|
259
|
+
self._interim_in_progress = False
|
|
260
|
+
self._interim_completion_event.clear() # Reset the event for new request
|
|
234
261
|
|
|
235
262
|
try:
|
|
236
263
|
# Use httpx to handle SSE streaming
|
|
@@ -291,6 +318,7 @@ class VistaarLLMService(LLMService):
|
|
|
291
318
|
|
|
292
319
|
# Start response
|
|
293
320
|
await self.push_frame(LLMFullResponseStartFrame())
|
|
321
|
+
await self.push_frame(LLMFullResponseStartFrame(), FrameDirection.UPSTREAM)
|
|
294
322
|
await self.start_processing_metrics()
|
|
295
323
|
await self.start_ttfb_metrics()
|
|
296
324
|
|
|
@@ -307,6 +335,15 @@ class VistaarLLMService(LLMService):
|
|
|
307
335
|
if first_chunk:
|
|
308
336
|
await self.stop_ttfb_metrics()
|
|
309
337
|
first_chunk = False
|
|
338
|
+
|
|
339
|
+
# Wait for interim message to complete if it was sent and is in progress
|
|
340
|
+
if self._interim_sent:
|
|
341
|
+
logger.debug(
|
|
342
|
+
"Waiting for interim message completion before sending LLM response"
|
|
343
|
+
)
|
|
344
|
+
await self._interim_completion_event.wait()
|
|
345
|
+
logger.debug("Interim message completed, proceeding with LLM response")
|
|
346
|
+
|
|
310
347
|
# Cancel interim message task since we got first response
|
|
311
348
|
await self._cancel_interim_message_task(
|
|
312
349
|
"Cancelled interim message task - got first response"
|
|
@@ -334,6 +371,7 @@ class VistaarLLMService(LLMService):
|
|
|
334
371
|
)
|
|
335
372
|
await self.stop_processing_metrics()
|
|
336
373
|
await self.push_frame(LLMFullResponseEndFrame())
|
|
374
|
+
await self.push_frame(LLMFullResponseEndFrame(), FrameDirection.UPSTREAM)
|
|
337
375
|
|
|
338
376
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
339
377
|
"""Process frames for LLM completion requests.
|
|
@@ -353,7 +391,7 @@ class VistaarLLMService(LLMService):
|
|
|
353
391
|
)
|
|
354
392
|
await self.push_frame(frame, direction)
|
|
355
393
|
return
|
|
356
|
-
elif isinstance(frame,
|
|
394
|
+
elif isinstance(frame, InterruptionFrame):
|
|
357
395
|
await self._handle_interruption()
|
|
358
396
|
await self.push_frame(frame, direction)
|
|
359
397
|
return
|
|
@@ -426,3 +464,7 @@ class VistaarLLMService(LLMService):
|
|
|
426
464
|
await self.cancel_task(self._interim_task)
|
|
427
465
|
self._interim_task = None
|
|
428
466
|
logger.debug(message)
|
|
467
|
+
|
|
468
|
+
def can_generate_metrics(self) -> bool:
|
|
469
|
+
"""Check if this service can generate processing metrics."""
|
|
470
|
+
return True
|
pipecat/tests/utils.py
CHANGED
|
@@ -128,7 +128,7 @@ async def run_test(
|
|
|
128
128
|
expected_up_frames: Optional[Sequence[type]] = None,
|
|
129
129
|
ignore_start: bool = True,
|
|
130
130
|
observers: Optional[List[BaseObserver]] = None,
|
|
131
|
-
|
|
131
|
+
pipeline_params: Optional[PipelineParams] = None,
|
|
132
132
|
send_end_frame: bool = True,
|
|
133
133
|
) -> Tuple[Sequence[Frame], Sequence[Frame]]:
|
|
134
134
|
"""Run a test pipeline with the specified processor and validate frame flow.
|
|
@@ -144,7 +144,7 @@ async def run_test(
|
|
|
144
144
|
expected_up_frames: Expected frame types flowing upstream (optional).
|
|
145
145
|
ignore_start: Whether to ignore StartFrames in frame validation.
|
|
146
146
|
observers: Optional list of observers to attach to the pipeline.
|
|
147
|
-
|
|
147
|
+
pipeline_params: Optional pipeline parameters.
|
|
148
148
|
send_end_frame: Whether to send an EndFrame at the end of the test.
|
|
149
149
|
|
|
150
150
|
Returns:
|
|
@@ -154,7 +154,7 @@ async def run_test(
|
|
|
154
154
|
AssertionError: If the received frames don't match the expected frame types.
|
|
155
155
|
"""
|
|
156
156
|
observers = observers or []
|
|
157
|
-
|
|
157
|
+
pipeline_params = pipeline_params or PipelineParams()
|
|
158
158
|
|
|
159
159
|
received_up = asyncio.Queue()
|
|
160
160
|
received_down = asyncio.Queue()
|
|
@@ -173,7 +173,7 @@ async def run_test(
|
|
|
173
173
|
|
|
174
174
|
task = PipelineTask(
|
|
175
175
|
pipeline,
|
|
176
|
-
params=
|
|
176
|
+
params=pipeline_params,
|
|
177
177
|
observers=observers,
|
|
178
178
|
cancel_on_idle_timeout=False,
|
|
179
179
|
)
|
|
@@ -68,6 +68,9 @@ class Language(StrEnum):
|
|
|
68
68
|
AS = "as"
|
|
69
69
|
AS_IN = "as-IN"
|
|
70
70
|
|
|
71
|
+
# Asturian
|
|
72
|
+
AST = "ast"
|
|
73
|
+
|
|
71
74
|
# Azerbaijani
|
|
72
75
|
AZ = "az"
|
|
73
76
|
AZ_AZ = "az-AZ"
|
|
@@ -101,6 +104,9 @@ class Language(StrEnum):
|
|
|
101
104
|
CA = "ca"
|
|
102
105
|
CA_ES = "ca-ES"
|
|
103
106
|
|
|
107
|
+
# Cebuano
|
|
108
|
+
CEB = "ceb"
|
|
109
|
+
|
|
104
110
|
# Mandarin Chinese
|
|
105
111
|
CMN = "cmn"
|
|
106
112
|
CMN_CN = "cmn-CN"
|
|
@@ -185,6 +191,9 @@ class Language(StrEnum):
|
|
|
185
191
|
FA = "fa"
|
|
186
192
|
FA_IR = "fa-IR"
|
|
187
193
|
|
|
194
|
+
# Fulah
|
|
195
|
+
FF = "ff"
|
|
196
|
+
|
|
188
197
|
# Finnish
|
|
189
198
|
FI = "fi"
|
|
190
199
|
FI_FI = "fi-FI"
|
|
@@ -251,6 +260,9 @@ class Language(StrEnum):
|
|
|
251
260
|
ID = "id"
|
|
252
261
|
ID_ID = "id-ID"
|
|
253
262
|
|
|
263
|
+
# Igbo
|
|
264
|
+
IG = "ig"
|
|
265
|
+
|
|
254
266
|
# Icelandic
|
|
255
267
|
IS = "is"
|
|
256
268
|
IS_IS = "is-IS"
|
|
@@ -279,6 +291,9 @@ class Language(StrEnum):
|
|
|
279
291
|
KA = "ka"
|
|
280
292
|
KA_GE = "ka-GE"
|
|
281
293
|
|
|
294
|
+
# Kabuverdianu
|
|
295
|
+
KEA = "kea"
|
|
296
|
+
|
|
282
297
|
# Kazakh
|
|
283
298
|
KK = "kk"
|
|
284
299
|
KK_KZ = "kk-KZ"
|
|
@@ -295,6 +310,13 @@ class Language(StrEnum):
|
|
|
295
310
|
KO = "ko"
|
|
296
311
|
KO_KR = "ko-KR"
|
|
297
312
|
|
|
313
|
+
# Kurdish
|
|
314
|
+
KU = "ku"
|
|
315
|
+
|
|
316
|
+
# Kyrgyz
|
|
317
|
+
KY = "ky"
|
|
318
|
+
KY_KG = "ky-KG"
|
|
319
|
+
|
|
298
320
|
# Latin
|
|
299
321
|
LA = "la"
|
|
300
322
|
|
|
@@ -312,6 +334,12 @@ class Language(StrEnum):
|
|
|
312
334
|
LT = "lt"
|
|
313
335
|
LT_LT = "lt-LT"
|
|
314
336
|
|
|
337
|
+
# Ganda
|
|
338
|
+
LG = "lg"
|
|
339
|
+
|
|
340
|
+
# Luo
|
|
341
|
+
LUO = "luo"
|
|
342
|
+
|
|
315
343
|
# Latvian
|
|
316
344
|
LV = "lv"
|
|
317
345
|
LV_LV = "lv-LV"
|
|
@@ -366,6 +394,12 @@ class Language(StrEnum):
|
|
|
366
394
|
NL_BE = "nl-BE"
|
|
367
395
|
NL_NL = "nl-NL"
|
|
368
396
|
|
|
397
|
+
# Northern Sotho
|
|
398
|
+
NSO = "nso"
|
|
399
|
+
|
|
400
|
+
# Chichewa
|
|
401
|
+
NY = "ny"
|
|
402
|
+
|
|
369
403
|
# Occitan
|
|
370
404
|
OC = "oc"
|
|
371
405
|
|
|
@@ -484,6 +518,9 @@ class Language(StrEnum):
|
|
|
484
518
|
UK = "uk"
|
|
485
519
|
UK_UA = "uk-UA"
|
|
486
520
|
|
|
521
|
+
# Umbundu
|
|
522
|
+
UMB = "umb"
|
|
523
|
+
|
|
487
524
|
# Urdu
|
|
488
525
|
UR = "ur"
|
|
489
526
|
UR_IN = "ur-IN"
|
|
@@ -497,6 +534,9 @@ class Language(StrEnum):
|
|
|
497
534
|
VI = "vi"
|
|
498
535
|
VI_VN = "vi-VN"
|
|
499
536
|
|
|
537
|
+
# Wolof
|
|
538
|
+
WO = "wo"
|
|
539
|
+
|
|
500
540
|
# Wu Chinese
|
|
501
541
|
WUU = "wuu"
|
|
502
542
|
WUU_CN = "wuu-CN"
|
|
@@ -507,7 +547,7 @@ class Language(StrEnum):
|
|
|
507
547
|
# Yoruba
|
|
508
548
|
YO = "yo"
|
|
509
549
|
|
|
510
|
-
# Yue Chinese
|
|
550
|
+
# Yue Chinese (Cantonese)
|
|
511
551
|
YUE = "yue"
|
|
512
552
|
YUE_CN = "yue-CN"
|
|
513
553
|
|
pipecat/transports/base_input.py
CHANGED
|
@@ -11,7 +11,6 @@ input processing, including VAD, turn analysis, and interruption management.
|
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
import asyncio
|
|
14
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
15
14
|
from typing import Optional
|
|
16
15
|
|
|
17
16
|
from loguru import logger
|
|
@@ -22,7 +21,6 @@ from pipecat.audio.turn.base_turn_analyzer import (
|
|
|
22
21
|
)
|
|
23
22
|
from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADState
|
|
24
23
|
from pipecat.frames.frames import (
|
|
25
|
-
BotInterruptionFrame,
|
|
26
24
|
BotStartedSpeakingFrame,
|
|
27
25
|
BotStoppedSpeakingFrame,
|
|
28
26
|
CancelFrame,
|
|
@@ -36,7 +34,6 @@ from pipecat.frames.frames import (
|
|
|
36
34
|
MetricsFrame,
|
|
37
35
|
SpeechControlParamsFrame,
|
|
38
36
|
StartFrame,
|
|
39
|
-
StartInterruptionFrame,
|
|
40
37
|
StopFrame,
|
|
41
38
|
SystemFrame,
|
|
42
39
|
UserSpeakingFrame,
|
|
@@ -81,10 +78,6 @@ class BaseInputTransport(FrameProcessor):
|
|
|
81
78
|
# Track user speaking state for interruption logic
|
|
82
79
|
self._user_speaking = False
|
|
83
80
|
|
|
84
|
-
# We read audio from a single queue one at a time and we then run VAD in
|
|
85
|
-
# a thread. Therefore, only one thread should be necessary.
|
|
86
|
-
self._executor = ThreadPoolExecutor(max_workers=1)
|
|
87
|
-
|
|
88
81
|
# Task to process incoming audio (VAD) and push audio frames downstream
|
|
89
82
|
# if passthrough is enabled.
|
|
90
83
|
self._audio_task = None
|
|
@@ -239,6 +232,9 @@ class BaseInputTransport(FrameProcessor):
|
|
|
239
232
|
"""
|
|
240
233
|
# Cancel and wait for the audio input task to finish.
|
|
241
234
|
await self._cancel_audio_task()
|
|
235
|
+
# Stop audio filter.
|
|
236
|
+
if self._params.audio_in_filter:
|
|
237
|
+
await self._params.audio_in_filter.stop()
|
|
242
238
|
|
|
243
239
|
async def set_transport_ready(self, frame: StartFrame):
|
|
244
240
|
"""Called when the transport is ready to stream.
|
|
@@ -289,8 +285,6 @@ class BaseInputTransport(FrameProcessor):
|
|
|
289
285
|
elif isinstance(frame, CancelFrame):
|
|
290
286
|
await self.cancel(frame)
|
|
291
287
|
await self.push_frame(frame, direction)
|
|
292
|
-
elif isinstance(frame, BotInterruptionFrame):
|
|
293
|
-
await self._handle_bot_interruption(frame)
|
|
294
288
|
elif isinstance(frame, BotStartedSpeakingFrame):
|
|
295
289
|
await self._handle_bot_started_speaking(frame)
|
|
296
290
|
await self.push_frame(frame, direction)
|
|
@@ -298,15 +292,14 @@ class BaseInputTransport(FrameProcessor):
|
|
|
298
292
|
await self._handle_bot_stopped_speaking(frame)
|
|
299
293
|
await self.push_frame(frame, direction)
|
|
300
294
|
elif isinstance(frame, EmulateUserStartedSpeakingFrame):
|
|
301
|
-
logger.debug("Emulating user started speaking")
|
|
295
|
+
self.logger.debug("Emulating user started speaking")
|
|
302
296
|
await self._handle_user_interruption(VADState.SPEAKING, emulated=True)
|
|
303
297
|
elif isinstance(frame, EmulateUserStoppedSpeakingFrame):
|
|
304
|
-
logger.debug("Emulating user stopped speaking")
|
|
298
|
+
self.logger.debug("Emulating user stopped speaking")
|
|
305
299
|
await self._handle_user_interruption(VADState.QUIET, emulated=True)
|
|
306
|
-
# All other system frames
|
|
307
300
|
elif isinstance(frame, VADParamsUpdateFrame):
|
|
308
301
|
if self.vad_analyzer:
|
|
309
|
-
self.vad_analyzer.set_params(frame.params
|
|
302
|
+
self.vad_analyzer.set_params(frame.params)
|
|
310
303
|
speech_frame = SpeechControlParamsFrame(
|
|
311
304
|
vad_params=frame.params,
|
|
312
305
|
turn_params=self._params.turn_analyzer.params
|
|
@@ -314,6 +307,8 @@ class BaseInputTransport(FrameProcessor):
|
|
|
314
307
|
else None,
|
|
315
308
|
)
|
|
316
309
|
await self.push_frame(speech_frame)
|
|
310
|
+
await self.push_frame(frame, direction)
|
|
311
|
+
# All other system frames
|
|
317
312
|
elif isinstance(frame, SystemFrame):
|
|
318
313
|
await self.push_frame(frame, direction)
|
|
319
314
|
# Control frames
|
|
@@ -335,13 +330,6 @@ class BaseInputTransport(FrameProcessor):
|
|
|
335
330
|
# Handle interruptions
|
|
336
331
|
#
|
|
337
332
|
|
|
338
|
-
async def _handle_bot_interruption(self, frame: BotInterruptionFrame):
|
|
339
|
-
"""Handle bot interruption frames."""
|
|
340
|
-
self.logger.debug("Bot interruption")
|
|
341
|
-
if self.interruptions_allowed:
|
|
342
|
-
await self._start_interruption()
|
|
343
|
-
await self.push_frame(StartInterruptionFrame())
|
|
344
|
-
|
|
345
333
|
async def _handle_user_interruption(self, vad_state: VADState, emulated: bool = False):
|
|
346
334
|
"""Handle user interruption events based on speaking state."""
|
|
347
335
|
if vad_state == VADState.SPEAKING:
|
|
@@ -353,7 +341,7 @@ class BaseInputTransport(FrameProcessor):
|
|
|
353
341
|
await self.push_frame(downstream_frame)
|
|
354
342
|
await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
|
|
355
343
|
|
|
356
|
-
# Only push
|
|
344
|
+
# Only push InterruptionFrame if:
|
|
357
345
|
# 1. No interruption config is set, OR
|
|
358
346
|
# 2. Interruption config is set but bot is not speaking
|
|
359
347
|
should_push_immediate_interruption = (
|
|
@@ -362,13 +350,9 @@ class BaseInputTransport(FrameProcessor):
|
|
|
362
350
|
|
|
363
351
|
# Make sure we notify about interruptions quickly out-of-band.
|
|
364
352
|
if should_push_immediate_interruption and self.interruptions_allowed:
|
|
365
|
-
await self.
|
|
366
|
-
# Push an out-of-band frame (i.e. not using the ordered push
|
|
367
|
-
# frame task) to stop everything, specially at the output
|
|
368
|
-
# transport.
|
|
369
|
-
await self.push_frame(StartInterruptionFrame())
|
|
353
|
+
await self.push_interruption_task_frame_and_wait()
|
|
370
354
|
elif self.interruption_strategies and self._bot_speaking:
|
|
371
|
-
logger.debug(
|
|
355
|
+
self.logger.debug(
|
|
372
356
|
"User started speaking while bot is speaking with interruption config - "
|
|
373
357
|
"deferring interruption to aggregator"
|
|
374
358
|
)
|
|
@@ -381,9 +365,6 @@ class BaseInputTransport(FrameProcessor):
|
|
|
381
365
|
await self.push_frame(downstream_frame)
|
|
382
366
|
await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
|
|
383
367
|
|
|
384
|
-
if self.interruptions_allowed:
|
|
385
|
-
await self._stop_interruption()
|
|
386
|
-
|
|
387
368
|
#
|
|
388
369
|
# Handle bot speaking state
|
|
389
370
|
#
|
|
@@ -416,9 +397,7 @@ class BaseInputTransport(FrameProcessor):
|
|
|
416
397
|
"""Analyze audio frame for voice activity."""
|
|
417
398
|
state = VADState.QUIET
|
|
418
399
|
if self.vad_analyzer:
|
|
419
|
-
state = await self.
|
|
420
|
-
self._executor, self.vad_analyzer.analyze_audio, audio_frame.audio
|
|
421
|
-
)
|
|
400
|
+
state = await self.vad_analyzer.analyze_audio(audio_frame.audio)
|
|
422
401
|
return state
|
|
423
402
|
|
|
424
403
|
async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState) -> VADState:
|
|
@@ -511,7 +490,7 @@ class BaseInputTransport(FrameProcessor):
|
|
|
511
490
|
self._audio_in_queue.task_done()
|
|
512
491
|
except asyncio.TimeoutError:
|
|
513
492
|
if self._user_speaking:
|
|
514
|
-
logger.warning(
|
|
493
|
+
self.logger.warning(
|
|
515
494
|
"Forcing user stopped speaking due to timeout receiving audio frame!"
|
|
516
495
|
)
|
|
517
496
|
vad_state = VADState.QUIET
|