dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
- pipecat/adapters/base_llm_adapter.py +44 -6
- pipecat/adapters/services/anthropic_adapter.py +302 -2
- pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
- pipecat/adapters/services/bedrock_adapter.py +40 -2
- pipecat/adapters/services/gemini_adapter.py +276 -6
- pipecat/adapters/services/open_ai_adapter.py +88 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
- pipecat/audio/dtmf/__init__.py +0 -0
- pipecat/audio/dtmf/types.py +47 -0
- pipecat/audio/dtmf/utils.py +70 -0
- pipecat/audio/filters/aic_filter.py +199 -0
- pipecat/audio/utils.py +9 -7
- pipecat/extensions/ivr/__init__.py +0 -0
- pipecat/extensions/ivr/ivr_navigator.py +452 -0
- pipecat/frames/frames.py +156 -43
- pipecat/pipeline/llm_switcher.py +76 -0
- pipecat/pipeline/parallel_pipeline.py +3 -3
- pipecat/pipeline/service_switcher.py +144 -0
- pipecat/pipeline/task.py +68 -28
- pipecat/pipeline/task_observer.py +10 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
- pipecat/processors/aggregators/llm_context.py +277 -0
- pipecat/processors/aggregators/llm_response.py +48 -15
- pipecat/processors/aggregators/llm_response_universal.py +840 -0
- pipecat/processors/aggregators/openai_llm_context.py +3 -3
- pipecat/processors/dtmf_aggregator.py +0 -2
- pipecat/processors/filters/stt_mute_filter.py +0 -2
- pipecat/processors/frame_processor.py +18 -11
- pipecat/processors/frameworks/rtvi.py +17 -10
- pipecat/processors/metrics/sentry.py +2 -0
- pipecat/runner/daily.py +137 -36
- pipecat/runner/run.py +1 -1
- pipecat/runner/utils.py +7 -7
- pipecat/serializers/asterisk.py +20 -4
- pipecat/serializers/exotel.py +1 -1
- pipecat/serializers/plivo.py +1 -1
- pipecat/serializers/telnyx.py +1 -1
- pipecat/serializers/twilio.py +1 -1
- pipecat/services/__init__.py +2 -2
- pipecat/services/anthropic/llm.py +113 -28
- pipecat/services/asyncai/tts.py +4 -0
- pipecat/services/aws/llm.py +82 -8
- pipecat/services/aws/tts.py +0 -10
- pipecat/services/aws_nova_sonic/aws.py +5 -0
- pipecat/services/cartesia/tts.py +28 -16
- pipecat/services/cerebras/llm.py +15 -10
- pipecat/services/deepgram/stt.py +8 -0
- pipecat/services/deepseek/llm.py +13 -8
- pipecat/services/fireworks/llm.py +13 -8
- pipecat/services/fish/tts.py +8 -6
- pipecat/services/gemini_multimodal_live/gemini.py +5 -0
- pipecat/services/gladia/config.py +7 -1
- pipecat/services/gladia/stt.py +23 -15
- pipecat/services/google/llm.py +159 -59
- pipecat/services/google/llm_openai.py +18 -3
- pipecat/services/grok/llm.py +2 -1
- pipecat/services/llm_service.py +38 -3
- pipecat/services/mem0/memory.py +2 -1
- pipecat/services/mistral/llm.py +5 -6
- pipecat/services/nim/llm.py +2 -1
- pipecat/services/openai/base_llm.py +88 -26
- pipecat/services/openai/image.py +6 -1
- pipecat/services/openai_realtime_beta/openai.py +5 -2
- pipecat/services/openpipe/llm.py +6 -8
- pipecat/services/perplexity/llm.py +13 -8
- pipecat/services/playht/tts.py +9 -6
- pipecat/services/rime/tts.py +1 -1
- pipecat/services/sambanova/llm.py +18 -13
- pipecat/services/sarvam/tts.py +415 -10
- pipecat/services/speechmatics/stt.py +2 -2
- pipecat/services/tavus/video.py +1 -1
- pipecat/services/tts_service.py +15 -5
- pipecat/services/vistaar/llm.py +2 -5
- pipecat/transports/base_input.py +32 -19
- pipecat/transports/base_output.py +39 -5
- pipecat/transports/daily/__init__.py +0 -0
- pipecat/transports/daily/transport.py +2371 -0
- pipecat/transports/daily/utils.py +410 -0
- pipecat/transports/livekit/__init__.py +0 -0
- pipecat/transports/livekit/transport.py +1042 -0
- pipecat/transports/network/fastapi_websocket.py +12 -546
- pipecat/transports/network/small_webrtc.py +12 -922
- pipecat/transports/network/webrtc_connection.py +9 -595
- pipecat/transports/network/websocket_client.py +12 -481
- pipecat/transports/network/websocket_server.py +12 -487
- pipecat/transports/services/daily.py +9 -2334
- pipecat/transports/services/helpers/daily_rest.py +12 -396
- pipecat/transports/services/livekit.py +12 -975
- pipecat/transports/services/tavus.py +12 -757
- pipecat/transports/smallwebrtc/__init__.py +0 -0
- pipecat/transports/smallwebrtc/connection.py +612 -0
- pipecat/transports/smallwebrtc/transport.py +936 -0
- pipecat/transports/tavus/__init__.py +0 -0
- pipecat/transports/tavus/transport.py +770 -0
- pipecat/transports/websocket/__init__.py +0 -0
- pipecat/transports/websocket/client.py +494 -0
- pipecat/transports/websocket/fastapi.py +559 -0
- pipecat/transports/websocket/server.py +500 -0
- pipecat/transports/whatsapp/__init__.py +0 -0
- pipecat/transports/whatsapp/api.py +345 -0
- pipecat/transports/whatsapp/client.py +364 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
pipecat/pipeline/task.py
CHANGED
|
@@ -32,15 +32,11 @@ from pipecat.frames.frames import (
|
|
|
32
32
|
Frame,
|
|
33
33
|
HeartbeatFrame,
|
|
34
34
|
InputAudioRawFrame,
|
|
35
|
-
InterimTranscriptionFrame,
|
|
36
|
-
LLMFullResponseEndFrame,
|
|
37
35
|
MetricsFrame,
|
|
38
36
|
StartFrame,
|
|
39
37
|
StopFrame,
|
|
40
38
|
StopTaskFrame,
|
|
41
|
-
|
|
42
|
-
UserStartedSpeakingFrame,
|
|
43
|
-
UserStoppedSpeakingFrame,
|
|
39
|
+
UserSpeakingFrame,
|
|
44
40
|
)
|
|
45
41
|
from pipecat.metrics.metrics import ProcessingMetricsData, TTFBMetricsData
|
|
46
42
|
from pipecat.observers.base_observer import BaseObserver
|
|
@@ -53,8 +49,12 @@ from pipecat.utils.asyncio.task_manager import BaseTaskManager, TaskManager, Tas
|
|
|
53
49
|
from pipecat.utils.tracing.setup import is_tracing_available
|
|
54
50
|
from pipecat.utils.tracing.turn_trace_observer import TurnTraceObserver
|
|
55
51
|
|
|
56
|
-
|
|
57
|
-
|
|
52
|
+
HEARTBEAT_SECS = 1.0
|
|
53
|
+
HEARTBEAT_MONITOR_SECS = HEARTBEAT_SECS * 10
|
|
54
|
+
|
|
55
|
+
IDLE_TIMEOUT_SECS = 300
|
|
56
|
+
|
|
57
|
+
CANCEL_TIMEOUT_SECS = 20.0
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
class PipelineParams(BaseModel):
|
|
@@ -91,7 +91,7 @@ class PipelineParams(BaseModel):
|
|
|
91
91
|
enable_heartbeats: bool = False
|
|
92
92
|
enable_metrics: bool = False
|
|
93
93
|
enable_usage_metrics: bool = False
|
|
94
|
-
heartbeats_period_secs: float =
|
|
94
|
+
heartbeats_period_secs: float = HEARTBEAT_SECS
|
|
95
95
|
interruption_strategies: List[BaseInterruptionStrategy] = Field(default_factory=list)
|
|
96
96
|
observers: List[BaseObserver] = Field(default_factory=list)
|
|
97
97
|
report_only_initial_ttfb: bool = False
|
|
@@ -135,20 +135,14 @@ class PipelineTask(BasePipelineTask):
|
|
|
135
135
|
params: Optional[PipelineParams] = None,
|
|
136
136
|
additional_span_attributes: Optional[dict] = None,
|
|
137
137
|
cancel_on_idle_timeout: bool = True,
|
|
138
|
+
cancel_timeout_secs: float = CANCEL_TIMEOUT_SECS,
|
|
138
139
|
check_dangling_tasks: bool = True,
|
|
139
140
|
clock: Optional[BaseClock] = None,
|
|
140
141
|
conversation_id: Optional[str] = None,
|
|
141
142
|
enable_tracing: bool = False,
|
|
142
143
|
enable_turn_tracking: bool = True,
|
|
143
|
-
idle_timeout_frames: Tuple[Type[Frame], ...] = (
|
|
144
|
-
|
|
145
|
-
InterimTranscriptionFrame,
|
|
146
|
-
LLMFullResponseEndFrame,
|
|
147
|
-
TranscriptionFrame,
|
|
148
|
-
UserStartedSpeakingFrame,
|
|
149
|
-
UserStoppedSpeakingFrame,
|
|
150
|
-
),
|
|
151
|
-
idle_timeout_secs: Optional[float] = 300,
|
|
144
|
+
idle_timeout_frames: Tuple[Type[Frame], ...] = (BotSpeakingFrame, UserSpeakingFrame),
|
|
145
|
+
idle_timeout_secs: Optional[float] = IDLE_TIMEOUT_SECS,
|
|
152
146
|
observers: Optional[List[BaseObserver]] = None,
|
|
153
147
|
task_manager: Optional[BaseTaskManager] = None,
|
|
154
148
|
):
|
|
@@ -161,6 +155,8 @@ class PipelineTask(BasePipelineTask):
|
|
|
161
155
|
OpenTelemetry conversation span attributes.
|
|
162
156
|
cancel_on_idle_timeout: Whether the pipeline task should be cancelled if
|
|
163
157
|
the idle timeout is reached.
|
|
158
|
+
cancel_timeout_secs: Timeout (in seconds) to wait for cancellation to happen
|
|
159
|
+
cleanly.
|
|
164
160
|
check_dangling_tasks: Whether to check for processors' tasks finishing properly.
|
|
165
161
|
clock: Clock implementation for timing operations.
|
|
166
162
|
conversation_id: Optional custom ID for the conversation.
|
|
@@ -178,6 +174,7 @@ class PipelineTask(BasePipelineTask):
|
|
|
178
174
|
self._params = params or PipelineParams()
|
|
179
175
|
self._additional_span_attributes = additional_span_attributes or {}
|
|
180
176
|
self._cancel_on_idle_timeout = cancel_on_idle_timeout
|
|
177
|
+
self._cancel_timeout_secs = cancel_timeout_secs
|
|
181
178
|
self._check_dangling_tasks = check_dangling_tasks
|
|
182
179
|
self._clock = clock or SystemClock()
|
|
183
180
|
self._conversation_id = conversation_id
|
|
@@ -228,8 +225,13 @@ class PipelineTask(BasePipelineTask):
|
|
|
228
225
|
# idle.
|
|
229
226
|
self._idle_queue = asyncio.Queue()
|
|
230
227
|
self._idle_monitor_task: Optional[asyncio.Task] = None
|
|
228
|
+
|
|
229
|
+
# This event is used to indicate the StartFrame has been received at the
|
|
230
|
+
# end of the pipeline.
|
|
231
|
+
self._pipeline_start_event = asyncio.Event()
|
|
232
|
+
|
|
231
233
|
# This event is used to indicate a finalize frame (e.g. EndFrame,
|
|
232
|
-
# StopFrame) has been received
|
|
234
|
+
# StopFrame) has been received at the end of the pipeline.
|
|
233
235
|
self._pipeline_end_event = asyncio.Event()
|
|
234
236
|
|
|
235
237
|
# This is the final pipeline. It is composed of a source processor,
|
|
@@ -394,12 +396,13 @@ class PipelineTask(BasePipelineTask):
|
|
|
394
396
|
# `PipelineRunner` which will call `PipelineTask.cancel()` and
|
|
395
397
|
# therefore becoming case (2).
|
|
396
398
|
if self._finished or self._cancelled:
|
|
397
|
-
logger.debug(f"Pipeline task {self}
|
|
399
|
+
logger.debug(f"Pipeline task {self} is finishing cleanup...")
|
|
398
400
|
await self._cancel_tasks()
|
|
399
401
|
await self._cleanup(cleanup_pipeline)
|
|
400
402
|
if self._check_dangling_tasks:
|
|
401
403
|
self._print_dangling_tasks()
|
|
402
404
|
self._finished = True
|
|
405
|
+
logger.debug(f"Pipeline task {self} has finished")
|
|
403
406
|
|
|
404
407
|
async def queue_frame(self, frame: Frame):
|
|
405
408
|
"""Queue a single frame to be pushed down the pipeline.
|
|
@@ -427,12 +430,13 @@ class PipelineTask(BasePipelineTask):
|
|
|
427
430
|
if not self._cancelled:
|
|
428
431
|
logger.debug(f"Canceling pipeline task {self}", call_id=self._conversation_id)
|
|
429
432
|
self._cancelled = True
|
|
433
|
+
cancel_frame = CancelFrame()
|
|
430
434
|
# Make sure everything is cleaned up downstream. This is sent
|
|
431
435
|
# out-of-band from the main streaming task which is what we want since
|
|
432
436
|
# we want to cancel right away.
|
|
433
|
-
await self._pipeline.queue_frame(
|
|
434
|
-
# Wait for CancelFrame to make it
|
|
435
|
-
await self._wait_for_pipeline_end()
|
|
437
|
+
await self._pipeline.queue_frame(cancel_frame)
|
|
438
|
+
# Wait for CancelFrame to make it through the pipeline.
|
|
439
|
+
await self._wait_for_pipeline_end(cancel_frame)
|
|
436
440
|
# Only cancel the push task, we don't want to be able to process any
|
|
437
441
|
# other frame after cancel. Everything else will be cancelled in
|
|
438
442
|
# run().
|
|
@@ -506,9 +510,37 @@ class PipelineTask(BasePipelineTask):
|
|
|
506
510
|
data.append(ProcessingMetricsData(processor=p.name, value=0.0))
|
|
507
511
|
return MetricsFrame(data=data)
|
|
508
512
|
|
|
509
|
-
async def
|
|
510
|
-
"""Wait for the
|
|
511
|
-
|
|
513
|
+
async def _wait_for_pipeline_start(self, frame: Frame):
|
|
514
|
+
"""Wait for the specified start frame to reach the end of the pipeline."""
|
|
515
|
+
logger.debug(f"{self}: Starting. Waiting for {frame} to reach the end of the pipeline...")
|
|
516
|
+
await self._pipeline_start_event.wait()
|
|
517
|
+
self._pipeline_start_event.clear()
|
|
518
|
+
logger.debug(f"{self}: {frame} reached the end of the pipeline, pipeline is now ready.")
|
|
519
|
+
|
|
520
|
+
async def _wait_for_pipeline_end(self, frame: Frame):
|
|
521
|
+
"""Wait for the specified frame to reach the end of the pipeline."""
|
|
522
|
+
|
|
523
|
+
async def wait_for_cancel():
|
|
524
|
+
try:
|
|
525
|
+
await asyncio.wait_for(
|
|
526
|
+
self._pipeline_end_event.wait(), timeout=self._cancel_timeout_secs
|
|
527
|
+
)
|
|
528
|
+
logger.debug(f"{self}: {frame} reached the end of the pipeline.")
|
|
529
|
+
except asyncio.TimeoutError:
|
|
530
|
+
logger.warning(
|
|
531
|
+
f"{self}: timeout waiting for {frame} to reach the end of the pipeline (being blocked somewhere?)."
|
|
532
|
+
)
|
|
533
|
+
finally:
|
|
534
|
+
await self._call_event_handler("on_pipeline_cancelled", frame)
|
|
535
|
+
|
|
536
|
+
logger.debug(f"{self}: Closing. Waiting for {frame} to reach the end of the pipeline...")
|
|
537
|
+
|
|
538
|
+
if isinstance(frame, CancelFrame):
|
|
539
|
+
await wait_for_cancel()
|
|
540
|
+
else:
|
|
541
|
+
await self._pipeline_end_event.wait()
|
|
542
|
+
logger.debug(f"{self}: {frame} reached the end of the pipeline, pipeline is closing.")
|
|
543
|
+
|
|
512
544
|
self._pipeline_end_event.clear()
|
|
513
545
|
|
|
514
546
|
async def _setup(self, params: PipelineTaskParams):
|
|
@@ -528,6 +560,10 @@ class PipelineTask(BasePipelineTask):
|
|
|
528
560
|
# Cleanup base object.
|
|
529
561
|
await self.cleanup()
|
|
530
562
|
|
|
563
|
+
# Cleanup observers.
|
|
564
|
+
if self._observer:
|
|
565
|
+
await self._observer.cleanup()
|
|
566
|
+
|
|
531
567
|
# End conversation tracing if it's active - this will also close any active turn span
|
|
532
568
|
if self._enable_tracing and hasattr(self, "_turn_trace_observer"):
|
|
533
569
|
self._turn_trace_observer.end_conversation_tracing()
|
|
@@ -560,6 +596,9 @@ class PipelineTask(BasePipelineTask):
|
|
|
560
596
|
start_frame.metadata = self._params.start_metadata
|
|
561
597
|
await self._pipeline.queue_frame(start_frame)
|
|
562
598
|
|
|
599
|
+
# Wait for the pipeline to be started before pushing any other frame.
|
|
600
|
+
await self._wait_for_pipeline_start(start_frame)
|
|
601
|
+
|
|
563
602
|
if self._params.enable_metrics and self._params.send_initial_empty_metrics:
|
|
564
603
|
await self._pipeline.queue_frame(self._initial_metrics_frame())
|
|
565
604
|
|
|
@@ -569,7 +608,7 @@ class PipelineTask(BasePipelineTask):
|
|
|
569
608
|
frame = await self._push_queue.get()
|
|
570
609
|
await self._pipeline.queue_frame(frame)
|
|
571
610
|
if isinstance(frame, (CancelFrame, EndFrame, StopFrame)):
|
|
572
|
-
await self._wait_for_pipeline_end()
|
|
611
|
+
await self._wait_for_pipeline_end(frame)
|
|
573
612
|
running = not isinstance(frame, (CancelFrame, EndFrame, StopFrame))
|
|
574
613
|
cleanup_pipeline = not isinstance(frame, StopFrame)
|
|
575
614
|
self._push_queue.task_done()
|
|
@@ -626,6 +665,8 @@ class PipelineTask(BasePipelineTask):
|
|
|
626
665
|
# Start heartbeat tasks now that StartFrame has been processed
|
|
627
666
|
# by all processors in the pipeline
|
|
628
667
|
self._maybe_start_heartbeat_tasks()
|
|
668
|
+
|
|
669
|
+
self._pipeline_start_event.set()
|
|
629
670
|
elif isinstance(frame, EndFrame):
|
|
630
671
|
await self._call_event_handler("on_pipeline_ended", frame)
|
|
631
672
|
self._pipeline_end_event.set()
|
|
@@ -633,7 +674,6 @@ class PipelineTask(BasePipelineTask):
|
|
|
633
674
|
await self._call_event_handler("on_pipeline_stopped", frame)
|
|
634
675
|
self._pipeline_end_event.set()
|
|
635
676
|
elif isinstance(frame, CancelFrame):
|
|
636
|
-
await self._call_event_handler("on_pipeline_cancelled", frame)
|
|
637
677
|
self._pipeline_end_event.set()
|
|
638
678
|
elif isinstance(frame, HeartbeatFrame):
|
|
639
679
|
await self._heartbeat_queue.put(frame)
|
|
@@ -655,7 +695,7 @@ class PipelineTask(BasePipelineTask):
|
|
|
655
695
|
the time that a heartbeat frame takes to processes, that is how long it
|
|
656
696
|
takes for the heartbeat frame to traverse all the pipeline.
|
|
657
697
|
"""
|
|
658
|
-
wait_time =
|
|
698
|
+
wait_time = HEARTBEAT_MONITOR_SECS
|
|
659
699
|
while True:
|
|
660
700
|
try:
|
|
661
701
|
frame = await asyncio.wait_for(self._heartbeat_queue.get(), timeout=wait_time)
|
|
@@ -119,6 +119,16 @@ class TaskObserver(BaseObserver):
|
|
|
119
119
|
for proxy in self._proxies.values():
|
|
120
120
|
await self._task_manager.cancel_task(proxy.task)
|
|
121
121
|
|
|
122
|
+
async def cleanup(self):
|
|
123
|
+
"""Cleanup all proxy observers."""
|
|
124
|
+
await super().cleanup()
|
|
125
|
+
|
|
126
|
+
if not self._proxies:
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
for proxy in self._proxies:
|
|
130
|
+
await proxy.cleanup()
|
|
131
|
+
|
|
122
132
|
async def on_process_frame(self, data: FramePushed):
|
|
123
133
|
"""Queue frame data for all managed observers.
|
|
124
134
|
|
|
@@ -14,13 +14,13 @@ for downstream processing by LLM context aggregators.
|
|
|
14
14
|
import asyncio
|
|
15
15
|
from typing import Optional
|
|
16
16
|
|
|
17
|
+
from pipecat.audio.dtmf.types import KeypadEntry
|
|
17
18
|
from pipecat.frames.frames import (
|
|
18
19
|
BotInterruptionFrame,
|
|
19
20
|
CancelFrame,
|
|
20
21
|
EndFrame,
|
|
21
22
|
Frame,
|
|
22
23
|
InputDTMFFrame,
|
|
23
|
-
KeypadEntry,
|
|
24
24
|
StartFrame,
|
|
25
25
|
TranscriptionFrame,
|
|
26
26
|
)
|
|
@@ -103,7 +103,7 @@ class DTMFAggregator(FrameProcessor):
|
|
|
103
103
|
digit_value = frame.button.value
|
|
104
104
|
self._aggregation += digit_value
|
|
105
105
|
|
|
106
|
-
# For first digit, schedule interruption
|
|
106
|
+
# For first digit, schedule interruption.
|
|
107
107
|
if is_first_digit:
|
|
108
108
|
await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
|
|
109
109
|
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Universal LLM context management for LLM services in Pipecat.
|
|
8
|
+
|
|
9
|
+
Context contents are represented in a universal format (based on OpenAI)
|
|
10
|
+
that supports a union of known Pipecat LLM service functionality.
|
|
11
|
+
|
|
12
|
+
Whenever an LLM service needs to access context, it does a just-in-time
|
|
13
|
+
translation from this universal context into whatever format it needs, using a
|
|
14
|
+
service-specific adapter.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import base64
|
|
18
|
+
import io
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from typing import Any, List, Optional, TypeAlias, Union
|
|
21
|
+
|
|
22
|
+
from loguru import logger
|
|
23
|
+
from openai._types import NOT_GIVEN as OPEN_AI_NOT_GIVEN
|
|
24
|
+
from openai._types import NotGiven as OpenAINotGiven
|
|
25
|
+
from openai.types.chat import (
|
|
26
|
+
ChatCompletionMessageParam,
|
|
27
|
+
ChatCompletionToolChoiceOptionParam,
|
|
28
|
+
)
|
|
29
|
+
from PIL import Image
|
|
30
|
+
|
|
31
|
+
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
|
32
|
+
from pipecat.frames.frames import AudioRawFrame
|
|
33
|
+
|
|
34
|
+
# "Re-export" types from OpenAI that we're using as universal context types.
|
|
35
|
+
# NOTE: if universal message types need to someday diverge from OpenAI's, we
|
|
36
|
+
# should consider managing our own definitions. But we should do so carefully,
|
|
37
|
+
# as the OpenAI messages are somewhat of a standard and we want to continue
|
|
38
|
+
# supporting them.
|
|
39
|
+
LLMStandardMessage = ChatCompletionMessageParam
|
|
40
|
+
LLMContextToolChoice = ChatCompletionToolChoiceOptionParam
|
|
41
|
+
NOT_GIVEN = OPEN_AI_NOT_GIVEN
|
|
42
|
+
NotGiven = OpenAINotGiven
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class LLMSpecificMessage:
|
|
47
|
+
"""A container for a context message that is specific to a particular LLM service.
|
|
48
|
+
|
|
49
|
+
Enables the use of service-specific message types while maintaining
|
|
50
|
+
compatibility with the universal LLM context format.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
llm: str
|
|
54
|
+
message: Any
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
LLMContextMessage: TypeAlias = Union[LLMStandardMessage, LLMSpecificMessage]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class LLMContext:
|
|
61
|
+
"""Manages conversation context for LLM interactions.
|
|
62
|
+
|
|
63
|
+
Handles message history, tool definitions, tool choices, and multimedia
|
|
64
|
+
content for LLM conversations. Provides methods for message manipulation,
|
|
65
|
+
and content formatting.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
messages: Optional[List[LLMContextMessage]] = None,
|
|
71
|
+
tools: ToolsSchema | NotGiven = NOT_GIVEN,
|
|
72
|
+
tool_choice: LLMContextToolChoice | NotGiven = NOT_GIVEN,
|
|
73
|
+
):
|
|
74
|
+
"""Initialize the LLM context.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
messages: Initial list of conversation messages.
|
|
78
|
+
tools: Available tools for the LLM to use.
|
|
79
|
+
tool_choice: Tool selection strategy for the LLM.
|
|
80
|
+
"""
|
|
81
|
+
self._messages: List[LLMContextMessage] = messages if messages else []
|
|
82
|
+
self._tools: ToolsSchema | NotGiven = LLMContext._normalize_and_validate_tools(tools)
|
|
83
|
+
self._tool_choice: LLMContextToolChoice | NotGiven = tool_choice
|
|
84
|
+
|
|
85
|
+
def get_messages(self, llm_specific_filter: Optional[str] = None) -> List[LLMContextMessage]:
|
|
86
|
+
"""Get the current messages list.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
llm_specific_filter: Optional filter to return LLM-specific
|
|
90
|
+
messages for the given LLM, in addition to the standard
|
|
91
|
+
messages. If messages end up being filtered, an error will be
|
|
92
|
+
logged.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
List of conversation messages.
|
|
96
|
+
"""
|
|
97
|
+
if llm_specific_filter is None:
|
|
98
|
+
return self._messages
|
|
99
|
+
filtered_messages = [
|
|
100
|
+
msg
|
|
101
|
+
for msg in self._messages
|
|
102
|
+
if not isinstance(msg, LLMSpecificMessage) or msg.llm == llm_specific_filter
|
|
103
|
+
]
|
|
104
|
+
if len(filtered_messages) < len(self._messages):
|
|
105
|
+
logger.error(
|
|
106
|
+
f"Attempted to use incompatible LLMSpecificMessages with LLM '{llm_specific_filter}'."
|
|
107
|
+
)
|
|
108
|
+
return filtered_messages
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def tools(self) -> ToolsSchema | NotGiven:
|
|
112
|
+
"""Get the tools list.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Tools list.
|
|
116
|
+
"""
|
|
117
|
+
return self._tools
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def tool_choice(self) -> LLMContextToolChoice | NotGiven:
|
|
121
|
+
"""Get the current tool choice setting.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
The tool choice configuration.
|
|
125
|
+
"""
|
|
126
|
+
return self._tool_choice
|
|
127
|
+
|
|
128
|
+
def add_message(self, message: LLMContextMessage):
|
|
129
|
+
"""Add a single message to the context.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
message: The message to add to the conversation history.
|
|
133
|
+
"""
|
|
134
|
+
self._messages.append(message)
|
|
135
|
+
|
|
136
|
+
def add_messages(self, messages: List[LLMContextMessage]):
|
|
137
|
+
"""Add multiple messages to the context.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
messages: List of messages to add to the conversation history.
|
|
141
|
+
"""
|
|
142
|
+
self._messages.extend(messages)
|
|
143
|
+
|
|
144
|
+
def set_messages(self, messages: List[LLMContextMessage]):
|
|
145
|
+
"""Replace all messages in the context.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
messages: New list of messages to replace the current history.
|
|
149
|
+
"""
|
|
150
|
+
self._messages[:] = messages
|
|
151
|
+
|
|
152
|
+
def set_tools(self, tools: ToolsSchema | NotGiven = NOT_GIVEN):
|
|
153
|
+
"""Set the available tools for the LLM.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
tools: A ToolsSchema or NOT_GIVEN to disable tools.
|
|
157
|
+
"""
|
|
158
|
+
self._tools = LLMContext._normalize_and_validate_tools(tools)
|
|
159
|
+
|
|
160
|
+
def set_tool_choice(self, tool_choice: LLMContextToolChoice | NotGiven):
|
|
161
|
+
"""Set the tool choice configuration.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
tool_choice: Tool selection strategy for the LLM.
|
|
165
|
+
"""
|
|
166
|
+
self._tool_choice = tool_choice
|
|
167
|
+
|
|
168
|
+
def add_image_frame_message(
|
|
169
|
+
self, *, format: str, size: tuple[int, int], image: bytes, text: str = None
|
|
170
|
+
):
|
|
171
|
+
"""Add a message containing an image frame.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
format: Image format (e.g., 'RGB', 'RGBA').
|
|
175
|
+
size: Image dimensions as (width, height) tuple.
|
|
176
|
+
image: Raw image bytes.
|
|
177
|
+
text: Optional text to include with the image.
|
|
178
|
+
"""
|
|
179
|
+
buffer = io.BytesIO()
|
|
180
|
+
Image.frombytes(format, size, image).save(buffer, format="JPEG")
|
|
181
|
+
encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
|
182
|
+
|
|
183
|
+
content = []
|
|
184
|
+
if text:
|
|
185
|
+
content.append({"type": "text", "text": text})
|
|
186
|
+
content.append(
|
|
187
|
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}},
|
|
188
|
+
)
|
|
189
|
+
self.add_message({"role": "user", "content": content})
|
|
190
|
+
|
|
191
|
+
def add_audio_frames_message(
|
|
192
|
+
self, *, audio_frames: list[AudioRawFrame], text: str = "Audio follows"
|
|
193
|
+
):
|
|
194
|
+
"""Add a message containing audio frames.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
audio_frames: List of audio frame objects to include.
|
|
198
|
+
text: Optional text to include with the audio.
|
|
199
|
+
"""
|
|
200
|
+
if not audio_frames:
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
sample_rate = audio_frames[0].sample_rate
|
|
204
|
+
num_channels = audio_frames[0].num_channels
|
|
205
|
+
|
|
206
|
+
content = []
|
|
207
|
+
content.append({"type": "text", "text": text})
|
|
208
|
+
data = b"".join(frame.audio for frame in audio_frames)
|
|
209
|
+
data = bytes(
|
|
210
|
+
self._create_wav_header(
|
|
211
|
+
sample_rate,
|
|
212
|
+
num_channels,
|
|
213
|
+
16,
|
|
214
|
+
len(data),
|
|
215
|
+
)
|
|
216
|
+
+ data
|
|
217
|
+
)
|
|
218
|
+
encoded_audio = base64.b64encode(data).decode("utf-8")
|
|
219
|
+
content.append(
|
|
220
|
+
{
|
|
221
|
+
"type": "input_audio",
|
|
222
|
+
"input_audio": {"data": encoded_audio, "format": "wav"},
|
|
223
|
+
}
|
|
224
|
+
)
|
|
225
|
+
self.add_message({"role": "user", "content": content})
|
|
226
|
+
|
|
227
|
+
def _create_wav_header(self, sample_rate, num_channels, bits_per_sample, data_size):
|
|
228
|
+
"""Create a WAV file header for audio data.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
sample_rate: Audio sample rate in Hz.
|
|
232
|
+
num_channels: Number of audio channels.
|
|
233
|
+
bits_per_sample: Bits per audio sample.
|
|
234
|
+
data_size: Size of audio data in bytes.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
WAV header as a bytearray.
|
|
238
|
+
"""
|
|
239
|
+
# RIFF chunk descriptor
|
|
240
|
+
header = bytearray()
|
|
241
|
+
header.extend(b"RIFF") # ChunkID
|
|
242
|
+
header.extend((data_size + 36).to_bytes(4, "little")) # ChunkSize: total size - 8
|
|
243
|
+
header.extend(b"WAVE") # Format
|
|
244
|
+
# "fmt " sub-chunk
|
|
245
|
+
header.extend(b"fmt ") # Subchunk1ID
|
|
246
|
+
header.extend((16).to_bytes(4, "little")) # Subchunk1Size (16 for PCM)
|
|
247
|
+
header.extend((1).to_bytes(2, "little")) # AudioFormat (1 for PCM)
|
|
248
|
+
header.extend(num_channels.to_bytes(2, "little")) # NumChannels
|
|
249
|
+
header.extend(sample_rate.to_bytes(4, "little")) # SampleRate
|
|
250
|
+
# Calculate byte rate and block align
|
|
251
|
+
byte_rate = sample_rate * num_channels * (bits_per_sample // 8)
|
|
252
|
+
block_align = num_channels * (bits_per_sample // 8)
|
|
253
|
+
header.extend(byte_rate.to_bytes(4, "little")) # ByteRate
|
|
254
|
+
header.extend(block_align.to_bytes(2, "little")) # BlockAlign
|
|
255
|
+
header.extend(bits_per_sample.to_bytes(2, "little")) # BitsPerSample
|
|
256
|
+
# "data" sub-chunk
|
|
257
|
+
header.extend(b"data") # Subchunk2ID
|
|
258
|
+
header.extend(data_size.to_bytes(4, "little")) # Subchunk2Size
|
|
259
|
+
return header
|
|
260
|
+
|
|
261
|
+
@staticmethod
|
|
262
|
+
def _normalize_and_validate_tools(tools: ToolsSchema | NotGiven) -> ToolsSchema | NotGiven:
|
|
263
|
+
"""Normalize and validate the given tools.
|
|
264
|
+
|
|
265
|
+
Raises:
|
|
266
|
+
TypeError: If tools are not a ToolsSchema or NotGiven.
|
|
267
|
+
"""
|
|
268
|
+
if isinstance(tools, ToolsSchema):
|
|
269
|
+
if not tools.standard_tools and not tools.custom_tools:
|
|
270
|
+
return NOT_GIVEN
|
|
271
|
+
return tools
|
|
272
|
+
elif tools is NOT_GIVEN:
|
|
273
|
+
return NOT_GIVEN
|
|
274
|
+
else:
|
|
275
|
+
raise TypeError(
|
|
276
|
+
f"In LLMContext, tools must be a ToolsSchema object or NOT_GIVEN. Got type: {type(tools)}",
|
|
277
|
+
)
|
|
@@ -13,7 +13,6 @@ LLM processing, and text-to-speech components in conversational AI pipelines.
|
|
|
13
13
|
|
|
14
14
|
import asyncio
|
|
15
15
|
import time
|
|
16
|
-
import warnings
|
|
17
16
|
from abc import abstractmethod
|
|
18
17
|
from dataclasses import dataclass
|
|
19
18
|
from typing import Dict, List, Literal, Optional, Set
|
|
@@ -43,6 +42,7 @@ from pipecat.frames.frames import (
|
|
|
43
42
|
LLMMessagesAppendFrame,
|
|
44
43
|
LLMMessagesFrame,
|
|
45
44
|
LLMMessagesUpdateFrame,
|
|
45
|
+
LLMRunFrame,
|
|
46
46
|
LLMSetToolChoiceFrame,
|
|
47
47
|
LLMSetToolsFrame,
|
|
48
48
|
LLMTextFrame,
|
|
@@ -320,9 +320,24 @@ class LLMContextResponseAggregator(BaseLLMResponseAggregator):
|
|
|
320
320
|
def get_context_frame(self) -> OpenAILLMContextFrame:
|
|
321
321
|
"""Create a context frame with the current context.
|
|
322
322
|
|
|
323
|
+
.. deprecated:: 0.0.82
|
|
324
|
+
This method is deprecated and will be removed in a future version.
|
|
325
|
+
|
|
323
326
|
Returns:
|
|
324
|
-
|
|
327
|
+
LLMContextFrame containing the current context.
|
|
325
328
|
"""
|
|
329
|
+
import warnings
|
|
330
|
+
|
|
331
|
+
with warnings.catch_warnings():
|
|
332
|
+
warnings.simplefilter("always")
|
|
333
|
+
warnings.warn(
|
|
334
|
+
"get_context_frame() is deprecated and will be removed in a future version. To trigger an LLM response, use LLMRunFrame instead.",
|
|
335
|
+
DeprecationWarning,
|
|
336
|
+
stacklevel=2,
|
|
337
|
+
)
|
|
338
|
+
return self._get_context_frame()
|
|
339
|
+
|
|
340
|
+
def _get_context_frame(self) -> OpenAILLMContextFrame:
|
|
326
341
|
return OpenAILLMContextFrame(context=self._context)
|
|
327
342
|
|
|
328
343
|
async def push_context_frame(self, direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
|
@@ -331,7 +346,7 @@ class LLMContextResponseAggregator(BaseLLMResponseAggregator):
|
|
|
331
346
|
Args:
|
|
332
347
|
direction: The direction to push the frame (upstream or downstream).
|
|
333
348
|
"""
|
|
334
|
-
frame = self.
|
|
349
|
+
frame = self._get_context_frame()
|
|
335
350
|
await self.push_frame(frame, direction)
|
|
336
351
|
|
|
337
352
|
def add_messages(self, messages):
|
|
@@ -491,6 +506,8 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
491
506
|
await self._handle_transcription(frame)
|
|
492
507
|
elif isinstance(frame, InterimTranscriptionFrame):
|
|
493
508
|
await self._handle_interim_transcription(frame)
|
|
509
|
+
elif isinstance(frame, LLMRunFrame):
|
|
510
|
+
await self._handle_llm_run(frame)
|
|
494
511
|
elif isinstance(frame, LLMMessagesAppendFrame):
|
|
495
512
|
await self._handle_llm_messages_append(frame)
|
|
496
513
|
elif isinstance(frame, LLMMessagesUpdateFrame):
|
|
@@ -583,6 +600,9 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
583
600
|
async def _cancel(self, frame: CancelFrame):
|
|
584
601
|
await self._cancel_aggregation_task()
|
|
585
602
|
|
|
603
|
+
async def _handle_llm_run(self, frame: LLMRunFrame):
|
|
604
|
+
await self.push_context_frame()
|
|
605
|
+
|
|
586
606
|
async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
|
|
587
607
|
self.add_messages(frame.messages)
|
|
588
608
|
if frame.run_llm:
|
|
@@ -897,6 +917,8 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
897
917
|
await self._handle_llm_end(frame)
|
|
898
918
|
elif isinstance(frame, TextFrame):
|
|
899
919
|
await self._handle_text(frame)
|
|
920
|
+
elif isinstance(frame, LLMRunFrame):
|
|
921
|
+
await self._handle_llm_run(frame)
|
|
900
922
|
elif isinstance(frame, LLMMessagesAppendFrame):
|
|
901
923
|
await self._handle_llm_messages_append(frame)
|
|
902
924
|
elif isinstance(frame, LLMMessagesUpdateFrame):
|
|
@@ -939,6 +961,9 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
|
|
939
961
|
timestamp_frame = OpenAILLMContextAssistantTimestampFrame(timestamp=time_now_iso8601())
|
|
940
962
|
await self.push_frame(timestamp_frame)
|
|
941
963
|
|
|
964
|
+
async def _handle_llm_run(self, frame: LLMRunFrame):
|
|
965
|
+
await self.push_context_frame(FrameDirection.UPSTREAM)
|
|
966
|
+
|
|
942
967
|
async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
|
|
943
968
|
self.add_messages(frame.messages)
|
|
944
969
|
if frame.run_llm:
|
|
@@ -1083,12 +1108,16 @@ class LLMUserResponseAggregator(LLMUserContextAggregator):
|
|
|
1083
1108
|
params: Configuration parameters for aggregation behavior.
|
|
1084
1109
|
**kwargs: Additional arguments passed to parent class.
|
|
1085
1110
|
"""
|
|
1086
|
-
warnings
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1111
|
+
import warnings
|
|
1112
|
+
|
|
1113
|
+
with warnings.catch_warnings():
|
|
1114
|
+
warnings.simplefilter("always")
|
|
1115
|
+
warnings.warn(
|
|
1116
|
+
"LLMUserResponseAggregator is deprecated and will be removed in a future version. "
|
|
1117
|
+
"Use LLMUserContextAggregator or another LLM-specific subclass instead.",
|
|
1118
|
+
DeprecationWarning,
|
|
1119
|
+
stacklevel=2,
|
|
1120
|
+
)
|
|
1092
1121
|
super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
|
|
1093
1122
|
|
|
1094
1123
|
async def _process_aggregation(self):
|
|
@@ -1126,12 +1155,16 @@ class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
|
|
|
1126
1155
|
params: Configuration parameters for aggregation behavior.
|
|
1127
1156
|
**kwargs: Additional arguments passed to parent class.
|
|
1128
1157
|
"""
|
|
1129
|
-
warnings
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1158
|
+
import warnings
|
|
1159
|
+
|
|
1160
|
+
with warnings.catch_warnings():
|
|
1161
|
+
warnings.simplefilter("always")
|
|
1162
|
+
warnings.warn(
|
|
1163
|
+
"LLMAssistantResponseAggregator is deprecated and will be removed in a future version. "
|
|
1164
|
+
"Use LLMAssistantContextAggregator or another LLM-specific subclass instead.",
|
|
1165
|
+
DeprecationWarning,
|
|
1166
|
+
stacklevel=2,
|
|
1167
|
+
)
|
|
1135
1168
|
super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
|
|
1136
1169
|
|
|
1137
1170
|
async def push_aggregation(self):
|