dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -26,6 +26,7 @@ from pydantic import BaseModel, Field
|
|
|
26
26
|
|
|
27
27
|
from pipecat.adapters.services.anthropic_adapter import AnthropicLLMAdapter
|
|
28
28
|
from pipecat.frames.frames import (
|
|
29
|
+
ErrorFrame,
|
|
29
30
|
Frame,
|
|
30
31
|
FunctionCallCancelFrame,
|
|
31
32
|
FunctionCallInProgressFrame,
|
|
@@ -52,11 +53,10 @@ from pipecat.processors.aggregators.openai_llm_context import (
|
|
|
52
53
|
)
|
|
53
54
|
from pipecat.processors.frame_processor import FrameDirection
|
|
54
55
|
from pipecat.services.llm_service import FunctionCallFromLLM, LLMService
|
|
55
|
-
from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
|
|
56
56
|
from pipecat.utils.tracing.service_decorators import traced_llm
|
|
57
57
|
|
|
58
58
|
try:
|
|
59
|
-
from anthropic import NOT_GIVEN, AsyncAnthropic, NotGiven
|
|
59
|
+
from anthropic import NOT_GIVEN, APITimeoutError, AsyncAnthropic, NotGiven
|
|
60
60
|
except ModuleNotFoundError as e:
|
|
61
61
|
logger.error(f"Exception: {e}")
|
|
62
62
|
logger.error("In order to use Anthropic, you need to `pip install pipecat-ai[anthropic]`.")
|
|
@@ -101,13 +101,6 @@ class AnthropicLLMService(LLMService):
|
|
|
101
101
|
Provides inference capabilities with Claude models including support for
|
|
102
102
|
function calling, vision processing, streaming responses, and prompt caching.
|
|
103
103
|
Can use custom clients like AsyncAnthropicBedrock and AsyncAnthropicVertex.
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
api_key: Anthropic API key for authentication.
|
|
107
|
-
model: Model name to use. Defaults to "claude-sonnet-4-20250514".
|
|
108
|
-
params: Optional model parameters for inference.
|
|
109
|
-
client: Optional custom Anthropic client instance.
|
|
110
|
-
**kwargs: Additional arguments passed to parent LLMService.
|
|
111
104
|
"""
|
|
112
105
|
|
|
113
106
|
# Overriding the default adapter to use the Anthropic one.
|
|
@@ -139,14 +132,29 @@ class AnthropicLLMService(LLMService):
|
|
|
139
132
|
model: str = "claude-sonnet-4-20250514",
|
|
140
133
|
params: Optional[InputParams] = None,
|
|
141
134
|
client=None,
|
|
135
|
+
retry_timeout_secs: Optional[float] = 5.0,
|
|
136
|
+
retry_on_timeout: Optional[bool] = False,
|
|
142
137
|
**kwargs,
|
|
143
138
|
):
|
|
139
|
+
"""Initialize the Anthropic LLM service.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
api_key: Anthropic API key for authentication.
|
|
143
|
+
model: Model name to use. Defaults to "claude-sonnet-4-20250514".
|
|
144
|
+
params: Optional model parameters for inference.
|
|
145
|
+
client: Optional custom Anthropic client instance.
|
|
146
|
+
retry_timeout_secs: Request timeout in seconds for retry logic.
|
|
147
|
+
retry_on_timeout: Whether to retry the request once if it times out.
|
|
148
|
+
**kwargs: Additional arguments passed to parent LLMService.
|
|
149
|
+
"""
|
|
144
150
|
super().__init__(**kwargs)
|
|
145
151
|
params = params or AnthropicLLMService.InputParams()
|
|
146
152
|
self._client = client or AsyncAnthropic(
|
|
147
153
|
api_key=api_key
|
|
148
154
|
) # if the client is provided, use it and remove it, otherwise create a new one
|
|
149
155
|
self.set_model_name(model)
|
|
156
|
+
self._retry_timeout_secs = retry_timeout_secs
|
|
157
|
+
self._retry_on_timeout = retry_on_timeout
|
|
150
158
|
self._settings = {
|
|
151
159
|
"max_tokens": params.max_tokens,
|
|
152
160
|
"enable_prompt_caching_beta": params.enable_prompt_caching_beta or False,
|
|
@@ -164,6 +172,31 @@ class AnthropicLLMService(LLMService):
|
|
|
164
172
|
"""
|
|
165
173
|
return True
|
|
166
174
|
|
|
175
|
+
async def _create_message_stream(self, api_call, params):
|
|
176
|
+
"""Create message stream with optional timeout and retry.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
api_call: The Anthropic API method to call.
|
|
180
|
+
params: Parameters for the API call.
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Async stream of message events.
|
|
184
|
+
"""
|
|
185
|
+
if self._retry_on_timeout:
|
|
186
|
+
try:
|
|
187
|
+
response = await asyncio.wait_for(
|
|
188
|
+
api_call(**params), timeout=self._retry_timeout_secs
|
|
189
|
+
)
|
|
190
|
+
return response
|
|
191
|
+
except (APITimeoutError, asyncio.TimeoutError):
|
|
192
|
+
# Retry, this time without a timeout so we get a response
|
|
193
|
+
logger.debug(f"{self}: Retrying message creation due to timeout")
|
|
194
|
+
response = await api_call(**params)
|
|
195
|
+
return response
|
|
196
|
+
else:
|
|
197
|
+
response = await api_call(**params)
|
|
198
|
+
return response
|
|
199
|
+
|
|
167
200
|
@property
|
|
168
201
|
def enable_prompt_caching_beta(self) -> bool:
|
|
169
202
|
"""Check if prompt caching beta feature is enabled.
|
|
@@ -247,7 +280,7 @@ class AnthropicLLMService(LLMService):
|
|
|
247
280
|
|
|
248
281
|
params.update(self._settings["extra"])
|
|
249
282
|
|
|
250
|
-
response = await api_call
|
|
283
|
+
response = await self._create_message_stream(api_call, params)
|
|
251
284
|
|
|
252
285
|
await self.stop_ttfb_metrics()
|
|
253
286
|
|
|
@@ -256,7 +289,7 @@ class AnthropicLLMService(LLMService):
|
|
|
256
289
|
json_accumulator = ""
|
|
257
290
|
|
|
258
291
|
function_calls = []
|
|
259
|
-
async for event in
|
|
292
|
+
async for event in response:
|
|
260
293
|
# Aggregate streaming content, create frames, trigger events
|
|
261
294
|
|
|
262
295
|
if event.type == "content_block_delta":
|
|
@@ -344,6 +377,7 @@ class AnthropicLLMService(LLMService):
|
|
|
344
377
|
await self._call_event_handler("on_completion_timeout")
|
|
345
378
|
except Exception as e:
|
|
346
379
|
self.logger.exception(f"{self} exception: {e}")
|
|
380
|
+
await self.push_error(ErrorFrame(f"{e}"))
|
|
347
381
|
finally:
|
|
348
382
|
await self.stop_processing_metrics()
|
|
349
383
|
await self.push_frame(LLMFullResponseEndFrame())
|
|
@@ -425,12 +459,6 @@ class AnthropicLLMContext(OpenAILLMContext):
|
|
|
425
459
|
Extends OpenAILLMContext to handle Anthropic-specific features like
|
|
426
460
|
system messages, prompt caching, and message format conversions.
|
|
427
461
|
Manages conversation state and message history formatting.
|
|
428
|
-
|
|
429
|
-
Args:
|
|
430
|
-
messages: Initial list of conversation messages.
|
|
431
|
-
tools: Available function calling tools.
|
|
432
|
-
tool_choice: Tool selection preference.
|
|
433
|
-
system: System message content.
|
|
434
462
|
"""
|
|
435
463
|
|
|
436
464
|
def __init__(
|
|
@@ -441,15 +469,25 @@ class AnthropicLLMContext(OpenAILLMContext):
|
|
|
441
469
|
*,
|
|
442
470
|
system: Union[str, NotGiven] = NOT_GIVEN,
|
|
443
471
|
):
|
|
472
|
+
"""Initialize the Anthropic LLM context.
|
|
473
|
+
|
|
474
|
+
Args:
|
|
475
|
+
messages: Initial list of conversation messages.
|
|
476
|
+
tools: Available function calling tools.
|
|
477
|
+
tool_choice: Tool selection preference.
|
|
478
|
+
system: System message content.
|
|
479
|
+
"""
|
|
444
480
|
super().__init__(messages=messages, tools=tools, tool_choice=tool_choice)
|
|
481
|
+
self.__setup_local()
|
|
482
|
+
self.system = system
|
|
445
483
|
|
|
484
|
+
def __setup_local(self):
|
|
446
485
|
# For beta prompt caching. This is a counter that tracks the number of turns
|
|
447
486
|
# we've seen above the cache threshold. We reset this when we reset the
|
|
448
487
|
# messages list. We only care about this number being 0, 1, or 2. But
|
|
449
488
|
# it's easiest just to treat it as a counter.
|
|
450
489
|
self.turns_above_cache_threshold = 0
|
|
451
|
-
|
|
452
|
-
self.system = system
|
|
490
|
+
return
|
|
453
491
|
|
|
454
492
|
@staticmethod
|
|
455
493
|
def upgrade_to_anthropic(obj: OpenAILLMContext) -> "AnthropicLLMContext":
|
|
@@ -466,6 +504,7 @@ class AnthropicLLMContext(OpenAILLMContext):
|
|
|
466
504
|
logger.debug(f"Upgrading to Anthropic: {obj}")
|
|
467
505
|
if isinstance(obj, OpenAILLMContext) and not isinstance(obj, AnthropicLLMContext):
|
|
468
506
|
obj.__class__ = AnthropicLLMContext
|
|
507
|
+
obj.__setup_local()
|
|
469
508
|
obj._restructure_from_openai_messages()
|
|
470
509
|
return obj
|
|
471
510
|
|
|
@@ -534,20 +573,37 @@ class AnthropicLLMContext(OpenAILLMContext):
|
|
|
534
573
|
Handles text content and function calls for both user and assistant messages.
|
|
535
574
|
|
|
536
575
|
Args:
|
|
537
|
-
obj: Message in Anthropic format
|
|
538
|
-
{
|
|
539
|
-
"role": "user/assistant",
|
|
540
|
-
"content": str | [{"type": "text/tool_use/tool_result", ...}]
|
|
541
|
-
}
|
|
576
|
+
obj: Message in Anthropic format.
|
|
542
577
|
|
|
543
578
|
Returns:
|
|
544
|
-
List of messages in standard format
|
|
545
|
-
|
|
579
|
+
List of messages in standard format.
|
|
580
|
+
|
|
581
|
+
Examples:
|
|
582
|
+
Input Anthropic format::
|
|
583
|
+
|
|
546
584
|
{
|
|
547
|
-
"role": "
|
|
548
|
-
"content": [
|
|
585
|
+
"role": "assistant",
|
|
586
|
+
"content": [
|
|
587
|
+
{"type": "text", "text": "Hello"},
|
|
588
|
+
{"type": "tool_use", "id": "123", "name": "search", "input": {"q": "test"}}
|
|
589
|
+
]
|
|
549
590
|
}
|
|
550
|
-
|
|
591
|
+
|
|
592
|
+
Output standard format::
|
|
593
|
+
|
|
594
|
+
[
|
|
595
|
+
{"role": "assistant", "content": [{"type": "text", "text": "Hello"}]},
|
|
596
|
+
{
|
|
597
|
+
"role": "assistant",
|
|
598
|
+
"tool_calls": [
|
|
599
|
+
{
|
|
600
|
+
"type": "function",
|
|
601
|
+
"id": "123",
|
|
602
|
+
"function": {"name": "search", "arguments": '{"q": "test"}'}
|
|
603
|
+
}
|
|
604
|
+
]
|
|
605
|
+
}
|
|
606
|
+
]
|
|
551
607
|
"""
|
|
552
608
|
# todo: image format (?)
|
|
553
609
|
# tool_use
|
|
@@ -609,23 +665,37 @@ class AnthropicLLMContext(OpenAILLMContext):
|
|
|
609
665
|
Empty text content is converted to "(empty)".
|
|
610
666
|
|
|
611
667
|
Args:
|
|
612
|
-
message: Message in standard format
|
|
668
|
+
message: Message in standard format.
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
Message in Anthropic format.
|
|
672
|
+
|
|
673
|
+
Examples:
|
|
674
|
+
Input standard format::
|
|
675
|
+
|
|
613
676
|
{
|
|
614
|
-
"role": "
|
|
615
|
-
"
|
|
616
|
-
|
|
677
|
+
"role": "assistant",
|
|
678
|
+
"tool_calls": [
|
|
679
|
+
{
|
|
680
|
+
"id": "123",
|
|
681
|
+
"function": {"name": "search", "arguments": '{"q": "test"}'}
|
|
682
|
+
}
|
|
683
|
+
]
|
|
617
684
|
}
|
|
618
685
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
686
|
+
Output Anthropic format::
|
|
687
|
+
|
|
688
|
+
{
|
|
689
|
+
"role": "assistant",
|
|
690
|
+
"content": [
|
|
691
|
+
{
|
|
692
|
+
"type": "tool_use",
|
|
693
|
+
"id": "123",
|
|
694
|
+
"name": "search",
|
|
695
|
+
"input": {"q": "test"}
|
|
696
|
+
}
|
|
697
|
+
]
|
|
698
|
+
}
|
|
629
699
|
"""
|
|
630
700
|
# todo: image messages (?)
|
|
631
701
|
if message["role"] == "tool":
|
|
@@ -1,10 +1,30 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""AssemblyAI WebSocket API message models and connection parameters.
|
|
8
|
+
|
|
9
|
+
This module defines Pydantic models for handling AssemblyAI's real-time
|
|
10
|
+
transcription WebSocket messages and connection configuration.
|
|
11
|
+
"""
|
|
12
|
+
|
|
1
13
|
from typing import List, Literal, Optional
|
|
2
14
|
|
|
3
15
|
from pydantic import BaseModel, Field
|
|
4
16
|
|
|
5
17
|
|
|
6
18
|
class Word(BaseModel):
|
|
7
|
-
"""Represents a single word in a transcription with timing and confidence.
|
|
19
|
+
"""Represents a single word in a transcription with timing and confidence.
|
|
20
|
+
|
|
21
|
+
Parameters:
|
|
22
|
+
start: Start time of the word in milliseconds.
|
|
23
|
+
end: End time of the word in milliseconds.
|
|
24
|
+
text: The transcribed word text.
|
|
25
|
+
confidence: Confidence score for the word (0.0 to 1.0).
|
|
26
|
+
word_is_final: Whether this word is finalized and won't change.
|
|
27
|
+
"""
|
|
8
28
|
|
|
9
29
|
start: int
|
|
10
30
|
end: int
|
|
@@ -14,13 +34,23 @@ class Word(BaseModel):
|
|
|
14
34
|
|
|
15
35
|
|
|
16
36
|
class BaseMessage(BaseModel):
|
|
17
|
-
"""Base class for all AssemblyAI WebSocket messages.
|
|
37
|
+
"""Base class for all AssemblyAI WebSocket messages.
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
type: The message type identifier.
|
|
41
|
+
"""
|
|
18
42
|
|
|
19
43
|
type: str
|
|
20
44
|
|
|
21
45
|
|
|
22
46
|
class BeginMessage(BaseMessage):
|
|
23
|
-
"""Message sent when a new session begins.
|
|
47
|
+
"""Message sent when a new session begins.
|
|
48
|
+
|
|
49
|
+
Parameters:
|
|
50
|
+
type: Always "Begin" for this message type.
|
|
51
|
+
id: Unique session identifier.
|
|
52
|
+
expires_at: Unix timestamp when the session expires.
|
|
53
|
+
"""
|
|
24
54
|
|
|
25
55
|
type: Literal["Begin"] = "Begin"
|
|
26
56
|
id: str
|
|
@@ -28,7 +58,17 @@ class BeginMessage(BaseMessage):
|
|
|
28
58
|
|
|
29
59
|
|
|
30
60
|
class TurnMessage(BaseMessage):
|
|
31
|
-
"""Message containing transcription data for a turn of speech.
|
|
61
|
+
"""Message containing transcription data for a turn of speech.
|
|
62
|
+
|
|
63
|
+
Parameters:
|
|
64
|
+
type: Always "Turn" for this message type.
|
|
65
|
+
turn_order: Sequential number of this turn in the session.
|
|
66
|
+
turn_is_formatted: Whether the transcript has been formatted.
|
|
67
|
+
end_of_turn: Whether this marks the end of a speaking turn.
|
|
68
|
+
transcript: The transcribed text for this turn.
|
|
69
|
+
end_of_turn_confidence: Confidence score for end-of-turn detection.
|
|
70
|
+
words: List of individual words with timing and confidence data.
|
|
71
|
+
"""
|
|
32
72
|
|
|
33
73
|
type: Literal["Turn"] = "Turn"
|
|
34
74
|
turn_order: int
|
|
@@ -40,7 +80,13 @@ class TurnMessage(BaseMessage):
|
|
|
40
80
|
|
|
41
81
|
|
|
42
82
|
class TerminationMessage(BaseMessage):
|
|
43
|
-
"""Message sent when the session is terminated.
|
|
83
|
+
"""Message sent when the session is terminated.
|
|
84
|
+
|
|
85
|
+
Parameters:
|
|
86
|
+
type: Always "Termination" for this message type.
|
|
87
|
+
audio_duration_seconds: Total duration of audio processed.
|
|
88
|
+
session_duration_seconds: Total duration of the session.
|
|
89
|
+
"""
|
|
44
90
|
|
|
45
91
|
type: Literal["Termination"] = "Termination"
|
|
46
92
|
audio_duration_seconds: float
|
|
@@ -52,6 +98,18 @@ AnyMessage = BeginMessage | TurnMessage | TerminationMessage
|
|
|
52
98
|
|
|
53
99
|
|
|
54
100
|
class AssemblyAIConnectionParams(BaseModel):
|
|
101
|
+
"""Configuration parameters for AssemblyAI WebSocket connection.
|
|
102
|
+
|
|
103
|
+
Parameters:
|
|
104
|
+
sample_rate: Audio sample rate in Hz. Defaults to 16000.
|
|
105
|
+
encoding: Audio encoding format. Defaults to "pcm_s16le".
|
|
106
|
+
formatted_finals: Whether to enable transcript formatting. Defaults to True.
|
|
107
|
+
word_finalization_max_wait_time: Maximum time to wait for word finalization in milliseconds.
|
|
108
|
+
end_of_turn_confidence_threshold: Confidence threshold for end-of-turn detection.
|
|
109
|
+
min_end_of_turn_silence_when_confident: Minimum silence duration when confident about end-of-turn.
|
|
110
|
+
max_turn_silence: Maximum silence duration before forcing end-of-turn.
|
|
111
|
+
"""
|
|
112
|
+
|
|
55
113
|
sample_rate: int = 16000
|
|
56
114
|
encoding: Literal["pcm_s16le", "pcm_mulaw"] = "pcm_s16le"
|
|
57
115
|
formatted_finals: bool = True
|
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""AssemblyAI speech-to-text service implementation.
|
|
8
|
+
|
|
9
|
+
This module provides integration with AssemblyAI's real-time speech-to-text
|
|
10
|
+
WebSocket API for streaming audio transcription.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import asyncio
|
|
8
14
|
import json
|
|
9
15
|
from typing import Any, AsyncGenerator, Dict
|
|
@@ -38,6 +44,7 @@ from .models import (
|
|
|
38
44
|
|
|
39
45
|
try:
|
|
40
46
|
import websockets
|
|
47
|
+
from websockets.asyncio.client import connect as websocket_connect
|
|
41
48
|
except ModuleNotFoundError as e:
|
|
42
49
|
logger.error(f"Exception: {e}")
|
|
43
50
|
logger.error('In order to use AssemblyAI, you need to `pip install "pipecat-ai[assemblyai]"`.')
|
|
@@ -45,6 +52,13 @@ except ModuleNotFoundError as e:
|
|
|
45
52
|
|
|
46
53
|
|
|
47
54
|
class AssemblyAISTTService(STTService):
|
|
55
|
+
"""AssemblyAI real-time speech-to-text service.
|
|
56
|
+
|
|
57
|
+
Provides real-time speech transcription using AssemblyAI's WebSocket API.
|
|
58
|
+
Supports both interim and final transcriptions with configurable parameters
|
|
59
|
+
for audio processing and connection management.
|
|
60
|
+
"""
|
|
61
|
+
|
|
48
62
|
def __init__(
|
|
49
63
|
self,
|
|
50
64
|
*,
|
|
@@ -55,6 +69,16 @@ class AssemblyAISTTService(STTService):
|
|
|
55
69
|
vad_force_turn_endpoint: bool = True,
|
|
56
70
|
**kwargs,
|
|
57
71
|
):
|
|
72
|
+
"""Initialize the AssemblyAI STT service.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
api_key: AssemblyAI API key for authentication.
|
|
76
|
+
language: Language code for transcription. Defaults to English (Language.EN).
|
|
77
|
+
api_endpoint_base_url: WebSocket endpoint URL. Defaults to AssemblyAI's streaming endpoint.
|
|
78
|
+
connection_params: Connection configuration parameters. Defaults to AssemblyAIConnectionParams().
|
|
79
|
+
vad_force_turn_endpoint: Whether to force turn endpoint on VAD stop. Defaults to True.
|
|
80
|
+
**kwargs: Additional arguments passed to parent STTService class.
|
|
81
|
+
"""
|
|
58
82
|
self._api_key = api_key
|
|
59
83
|
self._language = language
|
|
60
84
|
self._api_endpoint_base_url = api_endpoint_base_url
|
|
@@ -75,22 +99,50 @@ class AssemblyAISTTService(STTService):
|
|
|
75
99
|
self._chunk_size_bytes = 0
|
|
76
100
|
|
|
77
101
|
def can_generate_metrics(self) -> bool:
|
|
102
|
+
"""Check if the service can generate metrics.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
True if metrics generation is supported.
|
|
106
|
+
"""
|
|
78
107
|
return True
|
|
79
108
|
|
|
80
109
|
async def start(self, frame: StartFrame):
|
|
110
|
+
"""Start the speech-to-text service.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
frame: Start frame to begin processing.
|
|
114
|
+
"""
|
|
81
115
|
await super().start(frame)
|
|
82
116
|
self._chunk_size_bytes = int(self._chunk_size_ms * self._sample_rate * 2 / 1000)
|
|
83
117
|
await self._connect()
|
|
84
118
|
|
|
85
119
|
async def stop(self, frame: EndFrame):
|
|
120
|
+
"""Stop the speech-to-text service.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
frame: End frame to stop processing.
|
|
124
|
+
"""
|
|
86
125
|
await super().stop(frame)
|
|
87
126
|
await self._disconnect()
|
|
88
127
|
|
|
89
128
|
async def cancel(self, frame: CancelFrame):
|
|
129
|
+
"""Cancel the speech-to-text service.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
frame: Cancel frame to abort processing.
|
|
133
|
+
"""
|
|
90
134
|
await super().cancel(frame)
|
|
91
135
|
await self._disconnect()
|
|
92
136
|
|
|
93
137
|
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
|
|
138
|
+
"""Process audio data for speech-to-text conversion.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
audio: Raw audio bytes to process.
|
|
142
|
+
|
|
143
|
+
Yields:
|
|
144
|
+
None (processing handled via WebSocket messages).
|
|
145
|
+
"""
|
|
94
146
|
self._audio_buffer.extend(audio)
|
|
95
147
|
|
|
96
148
|
while len(self._audio_buffer) >= self._chunk_size_bytes:
|
|
@@ -101,6 +153,12 @@ class AssemblyAISTTService(STTService):
|
|
|
101
153
|
yield None
|
|
102
154
|
|
|
103
155
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
156
|
+
"""Process frames for VAD and metrics handling.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
frame: Frame to process.
|
|
160
|
+
direction: Direction of frame processing.
|
|
161
|
+
"""
|
|
104
162
|
await super().process_frame(frame, direction)
|
|
105
163
|
if isinstance(frame, UserStartedSpeakingFrame):
|
|
106
164
|
await self.start_ttfb_metrics()
|
|
@@ -133,9 +191,9 @@ class AssemblyAISTTService(STTService):
|
|
|
133
191
|
"Authorization": self._api_key,
|
|
134
192
|
"User-Agent": f"AssemblyAI/1.0 (integration=Pipecat/{pipecat_version})",
|
|
135
193
|
}
|
|
136
|
-
self._websocket = await
|
|
194
|
+
self._websocket = await websocket_connect(
|
|
137
195
|
ws_url,
|
|
138
|
-
|
|
196
|
+
additional_headers=headers,
|
|
139
197
|
)
|
|
140
198
|
self._connected = True
|
|
141
199
|
self._receive_task = self.create_task(self._receive_task_handler())
|
|
@@ -161,10 +219,7 @@ class AssemblyAISTTService(STTService):
|
|
|
161
219
|
await self._websocket.send(json.dumps({"type": "Terminate"}))
|
|
162
220
|
|
|
163
221
|
try:
|
|
164
|
-
await asyncio.wait_for(
|
|
165
|
-
self._termination_event.wait(),
|
|
166
|
-
timeout=5.0,
|
|
167
|
-
)
|
|
222
|
+
await asyncio.wait_for(self._termination_event.wait(), timeout=5.0)
|
|
168
223
|
except asyncio.TimeoutError:
|
|
169
224
|
logger.warning("Timed out waiting for termination message from server")
|
|
170
225
|
|
|
@@ -189,11 +244,9 @@ class AssemblyAISTTService(STTService):
|
|
|
189
244
|
try:
|
|
190
245
|
while self._connected:
|
|
191
246
|
try:
|
|
192
|
-
message = await
|
|
247
|
+
message = await self._websocket.recv()
|
|
193
248
|
data = json.loads(message)
|
|
194
249
|
await self._handle_message(data)
|
|
195
|
-
except asyncio.TimeoutError:
|
|
196
|
-
self.reset_watchdog()
|
|
197
250
|
except websockets.exceptions.ConnectionClosedOK:
|
|
198
251
|
break
|
|
199
252
|
except Exception as e:
|
|
@@ -254,7 +307,7 @@ class AssemblyAISTTService(STTService):
|
|
|
254
307
|
await self.push_frame(
|
|
255
308
|
TranscriptionFrame(
|
|
256
309
|
message.transcript,
|
|
257
|
-
|
|
310
|
+
self._user_id,
|
|
258
311
|
time_now_iso8601(),
|
|
259
312
|
self._language,
|
|
260
313
|
message,
|
|
@@ -266,7 +319,7 @@ class AssemblyAISTTService(STTService):
|
|
|
266
319
|
await self.push_frame(
|
|
267
320
|
InterimTranscriptionFrame(
|
|
268
321
|
message.transcript,
|
|
269
|
-
|
|
322
|
+
self._user_id,
|
|
270
323
|
time_now_iso8601(),
|
|
271
324
|
self._language,
|
|
272
325
|
message,
|
|
File without changes
|