dv-pipecat-ai 0.0.75.dev883__py3-none-any.whl → 0.0.82.dev19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/METADATA +8 -3
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/RECORD +121 -80
- pipecat/adapters/base_llm_adapter.py +44 -6
- pipecat/adapters/services/anthropic_adapter.py +302 -2
- pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
- pipecat/adapters/services/bedrock_adapter.py +40 -2
- pipecat/adapters/services/gemini_adapter.py +276 -6
- pipecat/adapters/services/open_ai_adapter.py +88 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
- pipecat/audio/dtmf/__init__.py +0 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/dtmf/types.py +47 -0
- pipecat/audio/dtmf/utils.py +70 -0
- pipecat/audio/filters/aic_filter.py +199 -0
- pipecat/audio/utils.py +9 -7
- pipecat/extensions/ivr/__init__.py +0 -0
- pipecat/extensions/ivr/ivr_navigator.py +452 -0
- pipecat/frames/frames.py +156 -43
- pipecat/pipeline/llm_switcher.py +76 -0
- pipecat/pipeline/parallel_pipeline.py +3 -3
- pipecat/pipeline/service_switcher.py +144 -0
- pipecat/pipeline/task.py +68 -28
- pipecat/pipeline/task_observer.py +10 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
- pipecat/processors/aggregators/llm_context.py +277 -0
- pipecat/processors/aggregators/llm_response.py +48 -15
- pipecat/processors/aggregators/llm_response_universal.py +840 -0
- pipecat/processors/aggregators/openai_llm_context.py +3 -3
- pipecat/processors/dtmf_aggregator.py +0 -2
- pipecat/processors/filters/stt_mute_filter.py +0 -2
- pipecat/processors/frame_processor.py +18 -11
- pipecat/processors/frameworks/rtvi.py +17 -10
- pipecat/processors/metrics/sentry.py +2 -0
- pipecat/runner/daily.py +137 -36
- pipecat/runner/run.py +1 -1
- pipecat/runner/utils.py +7 -7
- pipecat/serializers/asterisk.py +145 -0
- pipecat/serializers/exotel.py +1 -1
- pipecat/serializers/plivo.py +1 -1
- pipecat/serializers/telnyx.py +1 -1
- pipecat/serializers/twilio.py +1 -1
- pipecat/services/__init__.py +2 -2
- pipecat/services/anthropic/llm.py +113 -28
- pipecat/services/asyncai/tts.py +4 -0
- pipecat/services/aws/llm.py +82 -8
- pipecat/services/aws/tts.py +0 -10
- pipecat/services/aws_nova_sonic/aws.py +5 -0
- pipecat/services/azure/llm.py +77 -1
- pipecat/services/cartesia/tts.py +28 -16
- pipecat/services/cerebras/llm.py +15 -10
- pipecat/services/deepgram/stt.py +8 -0
- pipecat/services/deepseek/llm.py +13 -8
- pipecat/services/elevenlabs/__init__.py +2 -0
- pipecat/services/elevenlabs/stt.py +351 -0
- pipecat/services/fireworks/llm.py +13 -8
- pipecat/services/fish/tts.py +8 -6
- pipecat/services/gemini_multimodal_live/gemini.py +5 -0
- pipecat/services/gladia/config.py +7 -1
- pipecat/services/gladia/stt.py +23 -15
- pipecat/services/google/llm.py +159 -59
- pipecat/services/google/llm_openai.py +18 -3
- pipecat/services/grok/llm.py +2 -1
- pipecat/services/llm_service.py +38 -3
- pipecat/services/mem0/memory.py +2 -1
- pipecat/services/mistral/llm.py +5 -6
- pipecat/services/nim/llm.py +2 -1
- pipecat/services/openai/base_llm.py +88 -26
- pipecat/services/openai/image.py +6 -1
- pipecat/services/openai_realtime_beta/openai.py +5 -2
- pipecat/services/openpipe/llm.py +6 -8
- pipecat/services/perplexity/llm.py +13 -8
- pipecat/services/playht/tts.py +9 -6
- pipecat/services/rime/tts.py +1 -1
- pipecat/services/sambanova/llm.py +18 -13
- pipecat/services/sarvam/tts.py +415 -10
- pipecat/services/speechmatics/stt.py +4 -4
- pipecat/services/tavus/video.py +1 -1
- pipecat/services/tts_service.py +15 -5
- pipecat/services/vistaar/llm.py +2 -5
- pipecat/transports/base_input.py +32 -19
- pipecat/transports/base_output.py +39 -5
- pipecat/transports/daily/__init__.py +0 -0
- pipecat/transports/daily/transport.py +2371 -0
- pipecat/transports/daily/utils.py +410 -0
- pipecat/transports/livekit/__init__.py +0 -0
- pipecat/transports/livekit/transport.py +1042 -0
- pipecat/transports/network/fastapi_websocket.py +12 -546
- pipecat/transports/network/small_webrtc.py +12 -922
- pipecat/transports/network/webrtc_connection.py +9 -595
- pipecat/transports/network/websocket_client.py +12 -481
- pipecat/transports/network/websocket_server.py +12 -487
- pipecat/transports/services/daily.py +9 -2334
- pipecat/transports/services/helpers/daily_rest.py +12 -396
- pipecat/transports/services/livekit.py +12 -975
- pipecat/transports/services/tavus.py +12 -757
- pipecat/transports/smallwebrtc/__init__.py +0 -0
- pipecat/transports/smallwebrtc/connection.py +612 -0
- pipecat/transports/smallwebrtc/transport.py +936 -0
- pipecat/transports/tavus/__init__.py +0 -0
- pipecat/transports/tavus/transport.py +770 -0
- pipecat/transports/websocket/__init__.py +0 -0
- pipecat/transports/websocket/client.py +494 -0
- pipecat/transports/websocket/fastapi.py +559 -0
- pipecat/transports/websocket/server.py +500 -0
- pipecat/transports/whatsapp/__init__.py +0 -0
- pipecat/transports/whatsapp/api.py +345 -0
- pipecat/transports/whatsapp/client.py +364 -0
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""ElevenLabs speech-to-text service implementation."""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
from typing import AsyncGenerator, Optional
|
|
11
|
+
|
|
12
|
+
from loguru import logger
|
|
13
|
+
|
|
14
|
+
from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
|
|
15
|
+
from pipecat.services.stt_service import SegmentedSTTService
|
|
16
|
+
from pipecat.transcriptions.language import Language
|
|
17
|
+
from pipecat.utils.time import time_now_iso8601
|
|
18
|
+
from pipecat.utils.tracing.service_decorators import traced_stt
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
from elevenlabs.client import ElevenLabs
|
|
22
|
+
except ModuleNotFoundError as e:
|
|
23
|
+
logger.error(f"Exception: {e}")
|
|
24
|
+
logger.error("In order to use ElevenLabs, you need to `pip install pipecat-ai[elevenlabs]`.")
|
|
25
|
+
raise Exception(f"Missing module: {e}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def language_to_elevenlabs_language(language: Language) -> Optional[str]:
|
|
29
|
+
"""Maps pipecat Language enum to ElevenLabs language codes.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
language: A Language enum value representing the input language.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str or None: The corresponding ElevenLabs language code, or None if not supported.
|
|
36
|
+
"""
|
|
37
|
+
language_map = {
|
|
38
|
+
# English
|
|
39
|
+
Language.EN: "eng",
|
|
40
|
+
Language.EN_US: "eng",
|
|
41
|
+
Language.EN_GB: "eng",
|
|
42
|
+
Language.EN_AU: "eng",
|
|
43
|
+
Language.EN_CA: "eng",
|
|
44
|
+
Language.EN_IN: "eng",
|
|
45
|
+
Language.EN_IE: "eng",
|
|
46
|
+
Language.EN_NZ: "eng",
|
|
47
|
+
Language.EN_ZA: "eng",
|
|
48
|
+
Language.EN_SG: "eng",
|
|
49
|
+
Language.EN_HK: "eng",
|
|
50
|
+
Language.EN_PH: "eng",
|
|
51
|
+
Language.EN_KE: "eng",
|
|
52
|
+
Language.EN_NG: "eng",
|
|
53
|
+
Language.EN_TZ: "eng",
|
|
54
|
+
# Spanish
|
|
55
|
+
Language.ES: "spa",
|
|
56
|
+
Language.ES_ES: "spa",
|
|
57
|
+
Language.ES_MX: "spa",
|
|
58
|
+
Language.ES_AR: "spa",
|
|
59
|
+
Language.ES_CO: "spa",
|
|
60
|
+
Language.ES_CL: "spa",
|
|
61
|
+
Language.ES_VE: "spa",
|
|
62
|
+
Language.ES_PE: "spa",
|
|
63
|
+
Language.ES_EC: "spa",
|
|
64
|
+
Language.ES_GT: "spa",
|
|
65
|
+
Language.ES_CU: "spa",
|
|
66
|
+
Language.ES_BO: "spa",
|
|
67
|
+
Language.ES_DO: "spa",
|
|
68
|
+
Language.ES_HN: "spa",
|
|
69
|
+
Language.ES_PY: "spa",
|
|
70
|
+
Language.ES_SV: "spa",
|
|
71
|
+
Language.ES_NI: "spa",
|
|
72
|
+
Language.ES_CR: "spa",
|
|
73
|
+
Language.ES_PA: "spa",
|
|
74
|
+
Language.ES_UY: "spa",
|
|
75
|
+
Language.ES_PR: "spa",
|
|
76
|
+
Language.ES_US: "spa",
|
|
77
|
+
Language.ES_GQ: "spa",
|
|
78
|
+
# French
|
|
79
|
+
Language.FR: "fra",
|
|
80
|
+
Language.FR_FR: "fra",
|
|
81
|
+
Language.FR_CA: "fra",
|
|
82
|
+
Language.FR_BE: "fra",
|
|
83
|
+
Language.FR_CH: "fra",
|
|
84
|
+
# German
|
|
85
|
+
Language.DE: "deu",
|
|
86
|
+
Language.DE_DE: "deu",
|
|
87
|
+
Language.DE_AT: "deu",
|
|
88
|
+
Language.DE_CH: "deu",
|
|
89
|
+
# Italian
|
|
90
|
+
Language.IT: "ita",
|
|
91
|
+
Language.IT_IT: "ita",
|
|
92
|
+
# Portuguese
|
|
93
|
+
Language.PT: "por",
|
|
94
|
+
Language.PT_PT: "por",
|
|
95
|
+
Language.PT_BR: "por",
|
|
96
|
+
# Hindi
|
|
97
|
+
Language.HI: "hin",
|
|
98
|
+
Language.HI_IN: "hin",
|
|
99
|
+
# Arabic
|
|
100
|
+
Language.AR: "ara",
|
|
101
|
+
Language.AR_SA: "ara",
|
|
102
|
+
Language.AR_EG: "ara",
|
|
103
|
+
Language.AR_AE: "ara",
|
|
104
|
+
Language.AR_BH: "ara",
|
|
105
|
+
Language.AR_DZ: "ara",
|
|
106
|
+
Language.AR_IQ: "ara",
|
|
107
|
+
Language.AR_JO: "ara",
|
|
108
|
+
Language.AR_KW: "ara",
|
|
109
|
+
Language.AR_LB: "ara",
|
|
110
|
+
Language.AR_LY: "ara",
|
|
111
|
+
Language.AR_MA: "ara",
|
|
112
|
+
Language.AR_OM: "ara",
|
|
113
|
+
Language.AR_QA: "ara",
|
|
114
|
+
Language.AR_SY: "ara",
|
|
115
|
+
Language.AR_TN: "ara",
|
|
116
|
+
Language.AR_YE: "ara",
|
|
117
|
+
# Japanese
|
|
118
|
+
Language.JA: "jpn",
|
|
119
|
+
Language.JA_JP: "jpn",
|
|
120
|
+
# Korean
|
|
121
|
+
Language.KO: "kor",
|
|
122
|
+
Language.KO_KR: "kor",
|
|
123
|
+
# Chinese
|
|
124
|
+
Language.ZH: "cmn",
|
|
125
|
+
Language.ZH_CN: "cmn",
|
|
126
|
+
Language.ZH_TW: "cmn",
|
|
127
|
+
Language.ZH_HK: "cmn",
|
|
128
|
+
# Russian
|
|
129
|
+
Language.RU: "rus",
|
|
130
|
+
Language.RU_RU: "rus",
|
|
131
|
+
# Dutch
|
|
132
|
+
Language.NL: "nld",
|
|
133
|
+
Language.NL_NL: "nld",
|
|
134
|
+
Language.NL_BE: "nld",
|
|
135
|
+
# Polish
|
|
136
|
+
Language.PL: "pol",
|
|
137
|
+
Language.PL_PL: "pol",
|
|
138
|
+
# Turkish
|
|
139
|
+
Language.TR: "tur",
|
|
140
|
+
Language.TR_TR: "tur",
|
|
141
|
+
# Swedish
|
|
142
|
+
Language.SV: "swe",
|
|
143
|
+
Language.SV_SE: "swe",
|
|
144
|
+
# Norwegian
|
|
145
|
+
Language.NO: "nor",
|
|
146
|
+
Language.NB: "nor",
|
|
147
|
+
Language.NN: "nor",
|
|
148
|
+
# Danish
|
|
149
|
+
Language.DA: "dan",
|
|
150
|
+
Language.DA_DK: "dan",
|
|
151
|
+
# Finnish
|
|
152
|
+
Language.FI: "fin",
|
|
153
|
+
Language.FI_FI: "fin",
|
|
154
|
+
# Czech
|
|
155
|
+
Language.CS: "ces",
|
|
156
|
+
Language.CS_CZ: "ces",
|
|
157
|
+
# Hungarian
|
|
158
|
+
Language.HU: "hun",
|
|
159
|
+
Language.HU_HU: "hun",
|
|
160
|
+
# Greek
|
|
161
|
+
Language.EL: "ell",
|
|
162
|
+
Language.EL_GR: "ell",
|
|
163
|
+
# Hebrew
|
|
164
|
+
Language.HE: "heb",
|
|
165
|
+
Language.HE_IL: "heb",
|
|
166
|
+
# Thai
|
|
167
|
+
Language.TH: "tha",
|
|
168
|
+
Language.TH_TH: "tha",
|
|
169
|
+
# Vietnamese
|
|
170
|
+
Language.VI: "vie",
|
|
171
|
+
Language.VI_VN: "vie",
|
|
172
|
+
# Indonesian
|
|
173
|
+
Language.ID: "ind",
|
|
174
|
+
Language.ID_ID: "ind",
|
|
175
|
+
# Malay
|
|
176
|
+
Language.MS: "msa",
|
|
177
|
+
Language.MS_MY: "msa",
|
|
178
|
+
# Ukrainian
|
|
179
|
+
Language.UK: "ukr",
|
|
180
|
+
Language.UK_UA: "ukr",
|
|
181
|
+
# Bulgarian
|
|
182
|
+
Language.BG: "bul",
|
|
183
|
+
Language.BG_BG: "bul",
|
|
184
|
+
# Croatian
|
|
185
|
+
Language.HR: "hrv",
|
|
186
|
+
Language.HR_HR: "hrv",
|
|
187
|
+
# Slovak
|
|
188
|
+
Language.SK: "slk",
|
|
189
|
+
Language.SK_SK: "slk",
|
|
190
|
+
# Slovenian
|
|
191
|
+
Language.SL: "slv",
|
|
192
|
+
Language.SL_SI: "slv",
|
|
193
|
+
# Estonian
|
|
194
|
+
Language.ET: "est",
|
|
195
|
+
Language.ET_EE: "est",
|
|
196
|
+
# Latvian
|
|
197
|
+
Language.LV: "lav",
|
|
198
|
+
Language.LV_LV: "lav",
|
|
199
|
+
# Lithuanian
|
|
200
|
+
Language.LT: "lit",
|
|
201
|
+
Language.LT_LT: "lit",
|
|
202
|
+
}
|
|
203
|
+
return language_map.get(language)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class ElevenlabsSTTService(SegmentedSTTService):
|
|
207
|
+
"""ElevenLabs speech-to-text service using Scribe v1 model.
|
|
208
|
+
|
|
209
|
+
This service uses ElevenLabs' batch STT API to transcribe audio segments.
|
|
210
|
+
It extends SegmentedSTTService to handle VAD-based audio segmentation.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
api_key: ElevenLabs API key for authentication.
|
|
214
|
+
model_id: Model to use for transcription (default: "scribe_v1").
|
|
215
|
+
language: Default language for transcription.
|
|
216
|
+
tag_audio_events: Whether to tag audio events like laughter (default: False).
|
|
217
|
+
diarize: Whether to enable speaker diarization (default: False).
|
|
218
|
+
**kwargs: Additional arguments passed to SegmentedSTTService.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
def __init__(
|
|
222
|
+
self,
|
|
223
|
+
*,
|
|
224
|
+
api_key: str,
|
|
225
|
+
model_id: str = "scribe_v1",
|
|
226
|
+
language: Language = Language.EN,
|
|
227
|
+
tag_audio_events: bool = False,
|
|
228
|
+
sample_rate: Optional[int] = None,
|
|
229
|
+
diarize: bool = False,
|
|
230
|
+
**kwargs,
|
|
231
|
+
):
|
|
232
|
+
super().__init__(**kwargs)
|
|
233
|
+
|
|
234
|
+
self._client = ElevenLabs(api_key=api_key)
|
|
235
|
+
self._model_id = model_id
|
|
236
|
+
self._tag_audio_events = tag_audio_events
|
|
237
|
+
self._diarize = diarize
|
|
238
|
+
|
|
239
|
+
self._settings = {
|
|
240
|
+
"language": language,
|
|
241
|
+
"model_id": self._model_id,
|
|
242
|
+
"tag_audio_events": self._tag_audio_events,
|
|
243
|
+
"diarize": self._diarize,
|
|
244
|
+
}
|
|
245
|
+
self.set_model_name(model_id)
|
|
246
|
+
|
|
247
|
+
def can_generate_metrics(self) -> bool:
|
|
248
|
+
"""Check if this service can generate processing metrics.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
True, as ElevenLabs service supports metrics generation.
|
|
252
|
+
"""
|
|
253
|
+
return True
|
|
254
|
+
|
|
255
|
+
def language_to_service_language(self, language: Language) -> Optional[str]:
|
|
256
|
+
"""Convert from pipecat Language to ElevenLabs language code.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
language: The Language enum value to convert.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
str or None: The corresponding ElevenLabs language code, or None if not supported.
|
|
263
|
+
"""
|
|
264
|
+
return language_to_elevenlabs_language(language)
|
|
265
|
+
|
|
266
|
+
async def set_language(self, language: Language):
|
|
267
|
+
"""Set the language for transcription.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
language: The Language enum value to use for transcription.
|
|
271
|
+
"""
|
|
272
|
+
self.logger.info(f"Switching STT language to: [{language}]")
|
|
273
|
+
self._settings["language"] = language
|
|
274
|
+
|
|
275
|
+
@traced_stt
|
|
276
|
+
async def _handle_transcription(
|
|
277
|
+
self, transcript: str, is_final: bool, language: Optional[Language] = None
|
|
278
|
+
):
|
|
279
|
+
"""Handle a transcription result with tracing."""
|
|
280
|
+
pass
|
|
281
|
+
|
|
282
|
+
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
|
|
283
|
+
"""Transcribe the provided audio using ElevenLabs STT.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
audio: Audio data (WAV format) to transcribe.
|
|
287
|
+
|
|
288
|
+
Yields:
|
|
289
|
+
Frame: TranscriptionFrame containing the transcribed text or ErrorFrame on failure.
|
|
290
|
+
"""
|
|
291
|
+
try:
|
|
292
|
+
await self.start_processing_metrics()
|
|
293
|
+
await self.start_ttfb_metrics()
|
|
294
|
+
|
|
295
|
+
# Get language code for ElevenLabs API
|
|
296
|
+
language = self._settings["language"]
|
|
297
|
+
elevenlabs_lang = self.language_to_service_language(language)
|
|
298
|
+
|
|
299
|
+
# Prepare API parameters
|
|
300
|
+
params = {
|
|
301
|
+
"file": audio,
|
|
302
|
+
"model_id": self._model_id,
|
|
303
|
+
"tag_audio_events": self._tag_audio_events,
|
|
304
|
+
"diarize": self._diarize,
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
# Add language if specified
|
|
308
|
+
if elevenlabs_lang:
|
|
309
|
+
params["language_code"] = elevenlabs_lang
|
|
310
|
+
|
|
311
|
+
# Call ElevenLabs STT API in thread pool to avoid blocking
|
|
312
|
+
transcription = await asyncio.to_thread(self._client.speech_to_text.convert, **params)
|
|
313
|
+
|
|
314
|
+
await self.stop_ttfb_metrics()
|
|
315
|
+
|
|
316
|
+
# Process transcription result
|
|
317
|
+
if transcription and hasattr(transcription, "text") and transcription.text:
|
|
318
|
+
transcript_text = transcription.text.strip()
|
|
319
|
+
|
|
320
|
+
if transcript_text:
|
|
321
|
+
# Determine language if available from response
|
|
322
|
+
response_language = language
|
|
323
|
+
if hasattr(transcription, "language_code") and transcription.language_code:
|
|
324
|
+
# Try to map back from ElevenLabs language code to pipecat Language
|
|
325
|
+
try:
|
|
326
|
+
# This is a simplified mapping - you might want to create a reverse map
|
|
327
|
+
response_language = language # For now, keep the original
|
|
328
|
+
except ValueError:
|
|
329
|
+
self.logger.warning(
|
|
330
|
+
f"Unknown language detected: {transcription.language_code}"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Handle transcription with tracing
|
|
334
|
+
await self._handle_transcription(transcript_text, True, response_language)
|
|
335
|
+
|
|
336
|
+
self.logger.debug(f"ElevenLabs transcription: [{transcript_text}]")
|
|
337
|
+
|
|
338
|
+
yield TranscriptionFrame(
|
|
339
|
+
text=transcript_text,
|
|
340
|
+
user_id="",
|
|
341
|
+
timestamp=time_now_iso8601(),
|
|
342
|
+
language=response_language,
|
|
343
|
+
result=transcription,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
await self.stop_processing_metrics()
|
|
347
|
+
|
|
348
|
+
except Exception as e:
|
|
349
|
+
self.logger.error(f"ElevenLabs STT error: {e}")
|
|
350
|
+
await self.stop_all_metrics()
|
|
351
|
+
yield ErrorFrame(f"ElevenLabs STT error: {str(e)}")
|
|
@@ -9,9 +9,8 @@
|
|
|
9
9
|
from typing import List
|
|
10
10
|
|
|
11
11
|
from loguru import logger
|
|
12
|
-
from openai.types.chat import ChatCompletionMessageParam
|
|
13
12
|
|
|
14
|
-
from pipecat.
|
|
13
|
+
from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
|
|
15
14
|
from pipecat.services.openai.llm import OpenAILLMService
|
|
16
15
|
|
|
17
16
|
|
|
@@ -54,20 +53,23 @@ class FireworksLLMService(OpenAILLMService):
|
|
|
54
53
|
logger.debug(f"Creating Fireworks client with api {base_url}")
|
|
55
54
|
return super().create_client(api_key, base_url, **kwargs)
|
|
56
55
|
|
|
57
|
-
def build_chat_completion_params(
|
|
58
|
-
self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
|
|
59
|
-
) -> dict:
|
|
56
|
+
def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
|
|
60
57
|
"""Build parameters for Fireworks chat completion request.
|
|
61
58
|
|
|
62
59
|
Fireworks doesn't support some OpenAI parameters like seed, max_completion_tokens,
|
|
63
60
|
and stream_options.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
params_from_context: Parameters, derived from the LLM context, to
|
|
64
|
+
use for the chat completion. Contains messages, tools, and tool
|
|
65
|
+
choice.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Dictionary of parameters for the chat completion request.
|
|
64
69
|
"""
|
|
65
70
|
params = {
|
|
66
71
|
"model": self.model_name,
|
|
67
72
|
"stream": True,
|
|
68
|
-
"messages": messages,
|
|
69
|
-
"tools": context.tools,
|
|
70
|
-
"tool_choice": context.tool_choice,
|
|
71
73
|
"frequency_penalty": self._settings["frequency_penalty"],
|
|
72
74
|
"presence_penalty": self._settings["presence_penalty"],
|
|
73
75
|
"temperature": self._settings["temperature"],
|
|
@@ -75,5 +77,8 @@ class FireworksLLMService(OpenAILLMService):
|
|
|
75
77
|
"max_tokens": self._settings["max_tokens"],
|
|
76
78
|
}
|
|
77
79
|
|
|
80
|
+
# Messages, tools, tool_choice
|
|
81
|
+
params.update(params_from_context)
|
|
82
|
+
|
|
78
83
|
params.update(self._settings["extra"])
|
|
79
84
|
return params
|
pipecat/services/fish/tts.py
CHANGED
|
@@ -120,12 +120,14 @@ class FishAudioTTSService(InterruptibleTTSService):
|
|
|
120
120
|
if model:
|
|
121
121
|
import warnings
|
|
122
122
|
|
|
123
|
-
warnings.
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
123
|
+
with warnings.catch_warnings():
|
|
124
|
+
warnings.simplefilter("always")
|
|
125
|
+
warnings.warn(
|
|
126
|
+
"Parameter 'model' is deprecated and will be removed in a future version. "
|
|
127
|
+
"Use 'reference_id' instead.",
|
|
128
|
+
DeprecationWarning,
|
|
129
|
+
stacklevel=2,
|
|
130
|
+
)
|
|
129
131
|
reference_id = model
|
|
130
132
|
|
|
131
133
|
self._api_key = api_key
|
|
@@ -33,6 +33,7 @@ from pipecat.frames.frames import (
|
|
|
33
33
|
InputAudioRawFrame,
|
|
34
34
|
InputImageRawFrame,
|
|
35
35
|
InputTextRawFrame,
|
|
36
|
+
LLMContextFrame,
|
|
36
37
|
LLMFullResponseEndFrame,
|
|
37
38
|
LLMFullResponseStartFrame,
|
|
38
39
|
LLMMessagesAppendFrame,
|
|
@@ -738,6 +739,10 @@ class GeminiMultimodalLiveLLMService(LLMService):
|
|
|
738
739
|
# Support just one tool call per context frame for now
|
|
739
740
|
tool_result_message = context.messages[-1]
|
|
740
741
|
await self._tool_result(tool_result_message)
|
|
742
|
+
elif isinstance(frame, LLMContextFrame):
|
|
743
|
+
raise NotImplementedError(
|
|
744
|
+
"Universal LLMContext is not yet supported for Gemini Multimodal Live."
|
|
745
|
+
)
|
|
741
746
|
elif isinstance(frame, InputTextRawFrame):
|
|
742
747
|
await self._send_user_text(frame.text)
|
|
743
748
|
await self.push_frame(frame, direction)
|
|
@@ -29,9 +29,11 @@ class PreProcessingConfig(BaseModel):
|
|
|
29
29
|
"""Configuration for audio pre-processing options.
|
|
30
30
|
|
|
31
31
|
Parameters:
|
|
32
|
+
audio_enhancer: Apply pre-processing to the audio stream to enhance quality
|
|
32
33
|
speech_threshold: Sensitivity for speech detection (0-1)
|
|
33
34
|
"""
|
|
34
35
|
|
|
36
|
+
audio_enhancer: Optional[bool] = None
|
|
35
37
|
speech_threshold: Optional[float] = None
|
|
36
38
|
|
|
37
39
|
|
|
@@ -41,10 +43,14 @@ class CustomVocabularyItem(BaseModel):
|
|
|
41
43
|
Parameters:
|
|
42
44
|
value: The vocabulary word or phrase
|
|
43
45
|
intensity: The bias intensity for this vocabulary item (0-1)
|
|
46
|
+
pronunciations: The pronunciations used in the transcription.
|
|
47
|
+
language: Specify the language in which it will be pronounced when sound comparison occurs. Default to transcription language.
|
|
44
48
|
"""
|
|
45
49
|
|
|
46
50
|
value: str
|
|
47
51
|
intensity: float
|
|
52
|
+
pronunciations: Optional[List[str]] = None
|
|
53
|
+
language: Optional[str] = None
|
|
48
54
|
|
|
49
55
|
|
|
50
56
|
class CustomVocabularyConfig(BaseModel):
|
|
@@ -170,7 +176,7 @@ class GladiaInputParams(BaseModel):
|
|
|
170
176
|
channels: Optional[int] = 1
|
|
171
177
|
custom_metadata: Optional[Dict[str, Any]] = None
|
|
172
178
|
endpointing: Optional[float] = None
|
|
173
|
-
maximum_duration_without_endpointing: Optional[int] =
|
|
179
|
+
maximum_duration_without_endpointing: Optional[int] = 5
|
|
174
180
|
language: Optional[Language] = None # Deprecated
|
|
175
181
|
language_config: Optional[LanguageConfig] = None
|
|
176
182
|
pre_processing: Optional[PreProcessingConfig] = None
|
pipecat/services/gladia/stt.py
CHANGED
|
@@ -14,11 +14,12 @@ import asyncio
|
|
|
14
14
|
import base64
|
|
15
15
|
import json
|
|
16
16
|
import warnings
|
|
17
|
-
from typing import Any, AsyncGenerator, Dict,
|
|
17
|
+
from typing import Any, AsyncGenerator, Dict, List, Literal, Optional
|
|
18
18
|
|
|
19
19
|
import aiohttp
|
|
20
20
|
from loguru import logger
|
|
21
21
|
|
|
22
|
+
from pipecat import __version__ as pipecat_version
|
|
22
23
|
from pipecat.frames.frames import (
|
|
23
24
|
CancelFrame,
|
|
24
25
|
EndFrame,
|
|
@@ -179,12 +180,16 @@ class _InputParamsDescriptor:
|
|
|
179
180
|
"""Descriptor for backward compatibility with deprecation warning."""
|
|
180
181
|
|
|
181
182
|
def __get__(self, obj, objtype=None):
|
|
182
|
-
warnings
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
183
|
+
import warnings
|
|
184
|
+
|
|
185
|
+
with warnings.catch_warnings():
|
|
186
|
+
warnings.simplefilter("always")
|
|
187
|
+
warnings.warn(
|
|
188
|
+
"GladiaSTTService.InputParams is deprecated and will be removed in a future version. "
|
|
189
|
+
"Import and use GladiaInputParams directly instead.",
|
|
190
|
+
DeprecationWarning,
|
|
191
|
+
stacklevel=2,
|
|
192
|
+
)
|
|
188
193
|
return GladiaInputParams
|
|
189
194
|
|
|
190
195
|
|
|
@@ -241,12 +246,14 @@ class GladiaSTTService(STTService):
|
|
|
241
246
|
|
|
242
247
|
# Warn about deprecated language parameter if it's used
|
|
243
248
|
if params.language is not None:
|
|
244
|
-
warnings.
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
249
|
+
with warnings.catch_warnings():
|
|
250
|
+
warnings.simplefilter("always")
|
|
251
|
+
warnings.warn(
|
|
252
|
+
"The 'language' parameter is deprecated and will be removed in a future version. "
|
|
253
|
+
"Use 'language_config' instead.",
|
|
254
|
+
DeprecationWarning,
|
|
255
|
+
stacklevel=2,
|
|
256
|
+
)
|
|
250
257
|
|
|
251
258
|
self._api_key = api_key
|
|
252
259
|
self._region = region
|
|
@@ -322,8 +329,8 @@ class GladiaSTTService(STTService):
|
|
|
322
329
|
}
|
|
323
330
|
|
|
324
331
|
# Add custom_metadata if provided
|
|
325
|
-
|
|
326
|
-
|
|
332
|
+
settings["custom_metadata"] = dict(self._params.custom_metadata or {})
|
|
333
|
+
settings["custom_metadata"]["pipecat"] = pipecat_version
|
|
327
334
|
|
|
328
335
|
# Add endpointing parameters if provided
|
|
329
336
|
if self._params.endpointing is not None:
|
|
@@ -449,6 +456,7 @@ class GladiaSTTService(STTService):
|
|
|
449
456
|
response = await self._setup_gladia(settings)
|
|
450
457
|
self._session_url = response["url"]
|
|
451
458
|
self._reconnection_attempts = 0
|
|
459
|
+
logger.info(f"Session URL : {self._session_url}")
|
|
452
460
|
|
|
453
461
|
# Connect with automatic reconnection
|
|
454
462
|
async with websocket_connect(self._session_url) as websocket:
|