dv-pipecat-ai 0.0.75.dev887__py3-none-any.whl → 0.0.82.dev19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (121) hide show
  1. {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/METADATA +8 -3
  2. {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/RECORD +121 -81
  3. pipecat/adapters/base_llm_adapter.py +44 -6
  4. pipecat/adapters/services/anthropic_adapter.py +302 -2
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
  6. pipecat/adapters/services/bedrock_adapter.py +40 -2
  7. pipecat/adapters/services/gemini_adapter.py +276 -6
  8. pipecat/adapters/services/open_ai_adapter.py +88 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
  10. pipecat/audio/dtmf/__init__.py +0 -0
  11. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  22. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  23. pipecat/audio/dtmf/types.py +47 -0
  24. pipecat/audio/dtmf/utils.py +70 -0
  25. pipecat/audio/filters/aic_filter.py +199 -0
  26. pipecat/audio/utils.py +9 -7
  27. pipecat/extensions/ivr/__init__.py +0 -0
  28. pipecat/extensions/ivr/ivr_navigator.py +452 -0
  29. pipecat/frames/frames.py +156 -43
  30. pipecat/pipeline/llm_switcher.py +76 -0
  31. pipecat/pipeline/parallel_pipeline.py +3 -3
  32. pipecat/pipeline/service_switcher.py +144 -0
  33. pipecat/pipeline/task.py +68 -28
  34. pipecat/pipeline/task_observer.py +10 -0
  35. pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
  36. pipecat/processors/aggregators/llm_context.py +277 -0
  37. pipecat/processors/aggregators/llm_response.py +48 -15
  38. pipecat/processors/aggregators/llm_response_universal.py +840 -0
  39. pipecat/processors/aggregators/openai_llm_context.py +3 -3
  40. pipecat/processors/dtmf_aggregator.py +0 -2
  41. pipecat/processors/filters/stt_mute_filter.py +0 -2
  42. pipecat/processors/frame_processor.py +18 -11
  43. pipecat/processors/frameworks/rtvi.py +17 -10
  44. pipecat/processors/metrics/sentry.py +2 -0
  45. pipecat/runner/daily.py +137 -36
  46. pipecat/runner/run.py +1 -1
  47. pipecat/runner/utils.py +7 -7
  48. pipecat/serializers/asterisk.py +20 -4
  49. pipecat/serializers/exotel.py +1 -1
  50. pipecat/serializers/plivo.py +1 -1
  51. pipecat/serializers/telnyx.py +1 -1
  52. pipecat/serializers/twilio.py +1 -1
  53. pipecat/services/__init__.py +2 -2
  54. pipecat/services/anthropic/llm.py +113 -28
  55. pipecat/services/asyncai/tts.py +4 -0
  56. pipecat/services/aws/llm.py +82 -8
  57. pipecat/services/aws/tts.py +0 -10
  58. pipecat/services/aws_nova_sonic/aws.py +5 -0
  59. pipecat/services/azure/llm.py +77 -1
  60. pipecat/services/cartesia/tts.py +28 -16
  61. pipecat/services/cerebras/llm.py +15 -10
  62. pipecat/services/deepgram/stt.py +8 -0
  63. pipecat/services/deepseek/llm.py +13 -8
  64. pipecat/services/elevenlabs/__init__.py +2 -0
  65. pipecat/services/elevenlabs/stt.py +351 -0
  66. pipecat/services/fireworks/llm.py +13 -8
  67. pipecat/services/fish/tts.py +8 -6
  68. pipecat/services/gemini_multimodal_live/gemini.py +5 -0
  69. pipecat/services/gladia/config.py +7 -1
  70. pipecat/services/gladia/stt.py +23 -15
  71. pipecat/services/google/llm.py +159 -59
  72. pipecat/services/google/llm_openai.py +18 -3
  73. pipecat/services/grok/llm.py +2 -1
  74. pipecat/services/llm_service.py +38 -3
  75. pipecat/services/mem0/memory.py +2 -1
  76. pipecat/services/mistral/llm.py +5 -6
  77. pipecat/services/nim/llm.py +2 -1
  78. pipecat/services/openai/base_llm.py +88 -26
  79. pipecat/services/openai/image.py +6 -1
  80. pipecat/services/openai_realtime_beta/openai.py +5 -2
  81. pipecat/services/openpipe/llm.py +6 -8
  82. pipecat/services/perplexity/llm.py +13 -8
  83. pipecat/services/playht/tts.py +9 -6
  84. pipecat/services/rime/tts.py +1 -1
  85. pipecat/services/sambanova/llm.py +18 -13
  86. pipecat/services/sarvam/tts.py +415 -10
  87. pipecat/services/speechmatics/stt.py +4 -4
  88. pipecat/services/tavus/video.py +1 -1
  89. pipecat/services/tts_service.py +15 -5
  90. pipecat/services/vistaar/llm.py +2 -5
  91. pipecat/transports/base_input.py +32 -19
  92. pipecat/transports/base_output.py +39 -5
  93. pipecat/transports/daily/__init__.py +0 -0
  94. pipecat/transports/daily/transport.py +2371 -0
  95. pipecat/transports/daily/utils.py +410 -0
  96. pipecat/transports/livekit/__init__.py +0 -0
  97. pipecat/transports/livekit/transport.py +1042 -0
  98. pipecat/transports/network/fastapi_websocket.py +12 -546
  99. pipecat/transports/network/small_webrtc.py +12 -922
  100. pipecat/transports/network/webrtc_connection.py +9 -595
  101. pipecat/transports/network/websocket_client.py +12 -481
  102. pipecat/transports/network/websocket_server.py +12 -487
  103. pipecat/transports/services/daily.py +9 -2334
  104. pipecat/transports/services/helpers/daily_rest.py +12 -396
  105. pipecat/transports/services/livekit.py +12 -975
  106. pipecat/transports/services/tavus.py +12 -757
  107. pipecat/transports/smallwebrtc/__init__.py +0 -0
  108. pipecat/transports/smallwebrtc/connection.py +612 -0
  109. pipecat/transports/smallwebrtc/transport.py +936 -0
  110. pipecat/transports/tavus/__init__.py +0 -0
  111. pipecat/transports/tavus/transport.py +770 -0
  112. pipecat/transports/websocket/__init__.py +0 -0
  113. pipecat/transports/websocket/client.py +494 -0
  114. pipecat/transports/websocket/fastapi.py +559 -0
  115. pipecat/transports/websocket/server.py +500 -0
  116. pipecat/transports/whatsapp/__init__.py +0 -0
  117. pipecat/transports/whatsapp/api.py +345 -0
  118. pipecat/transports/whatsapp/client.py +364 -0
  119. {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/WHEEL +0 -0
  120. {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/licenses/LICENSE +0 -0
  121. {dv_pipecat_ai-0.0.75.dev887.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,7 @@ import base64
10
10
  import json
11
11
  import uuid
12
12
  import warnings
13
- from typing import AsyncGenerator, List, Optional, Union
13
+ from typing import AsyncGenerator, List, Literal, Optional, Union
14
14
 
15
15
  from loguru import logger
16
16
  from pydantic import BaseModel, Field
@@ -102,7 +102,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
102
102
 
103
103
  Parameters:
104
104
  language: Language to use for synthesis.
105
- speed: Voice speed control (string or float).
105
+ speed: Voice speed control.
106
106
  emotion: List of emotion controls.
107
107
 
108
108
  .. deprecated:: 0.0.68
@@ -110,7 +110,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
110
110
  """
111
111
 
112
112
  language: Optional[Language] = Language.EN
113
- speed: Optional[Union[str, float]] = ""
113
+ speed: Optional[Literal["slow", "normal", "fast"]] = None
114
114
  emotion: Optional[List[str]] = []
115
115
 
116
116
  def __init__(
@@ -272,11 +272,13 @@ class CartesiaTTSService(AudioContextWordTTSService):
272
272
  voice_config["id"] = self._voice_id
273
273
 
274
274
  if self._settings["emotion"]:
275
- warnings.warn(
276
- "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
277
- DeprecationWarning,
278
- stacklevel=2,
279
- )
275
+ with warnings.catch_warnings():
276
+ warnings.simplefilter("always")
277
+ warnings.warn(
278
+ "The 'emotion' parameter in __experimental_controls is deprecated and will be removed in a future version.",
279
+ DeprecationWarning,
280
+ stacklevel=2,
281
+ )
280
282
  voice_config["__experimental_controls"] = {}
281
283
  if self._settings["emotion"]:
282
284
  voice_config["__experimental_controls"]["emotion"] = self._settings["emotion"]
@@ -387,7 +389,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
387
389
  await self._websocket.send(msg)
388
390
  self._context_id = None
389
391
 
390
- async def _receive_messages(self):
392
+ async def _process_messages(self):
391
393
  async for message in self._get_websocket():
392
394
  msg = json.loads(message)
393
395
  if not msg or not self.audio_context_available(msg["context_id"]):
@@ -421,6 +423,14 @@ class CartesiaTTSService(AudioContextWordTTSService):
421
423
  else:
422
424
  logger.error(f"{self} error, unknown message type: {msg}")
423
425
 
426
+ async def _receive_messages(self):
427
+ while True:
428
+ await self._process_messages()
429
+ # Cartesia times out after 5 minutes of innactivity (no keepalive
430
+ # mechanism is available). So, we try to reconnect.
431
+ logger.debug(f"{self} Cartesia connection was disconnected (timeout?), reconnecting")
432
+ await self._connect_websocket()
433
+
424
434
  @traced_tts
425
435
  async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
426
436
  """Generate speech from text using Cartesia's streaming API.
@@ -472,7 +482,7 @@ class CartesiaHttpTTSService(TTSService):
472
482
 
473
483
  Parameters:
474
484
  language: Language to use for synthesis.
475
- speed: Voice speed control (string or float).
485
+ speed: Voice speed control.
476
486
  emotion: List of emotion controls.
477
487
 
478
488
  .. deprecated:: 0.0.68
@@ -480,7 +490,7 @@ class CartesiaHttpTTSService(TTSService):
480
490
  """
481
491
 
482
492
  language: Optional[Language] = Language.EN
483
- speed: Optional[Union[str, float]] = ""
493
+ speed: Optional[Literal["slow", "normal", "fast"]] = None
484
494
  emotion: Optional[List[str]] = Field(default_factory=list)
485
495
 
486
496
  def __init__(
@@ -600,11 +610,13 @@ class CartesiaHttpTTSService(TTSService):
600
610
  voice_config = {"mode": "id", "id": self._voice_id}
601
611
 
602
612
  if self._settings["emotion"]:
603
- warnings.warn(
604
- "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
605
- DeprecationWarning,
606
- stacklevel=2,
607
- )
613
+ with warnings.catch_warnings():
614
+ warnings.simplefilter("always")
615
+ warnings.warn(
616
+ "The 'emotion' parameter in voice.__experimental_controls is deprecated and will be removed in a future version.",
617
+ DeprecationWarning,
618
+ stacklevel=2,
619
+ )
608
620
  voice_config["__experimental_controls"] = {"emotion": self._settings["emotion"]}
609
621
 
610
622
  await self.start_ttfb_metrics()
@@ -9,9 +9,8 @@
9
9
  from typing import List
10
10
 
11
11
  from loguru import logger
12
- from openai.types.chat import ChatCompletionMessageParam
13
12
 
14
- from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
13
+ from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
15
14
  from pipecat.services.openai.llm import OpenAILLMService
16
15
 
17
16
 
@@ -27,7 +26,7 @@ class CerebrasLLMService(OpenAILLMService):
27
26
  *,
28
27
  api_key: str,
29
28
  base_url: str = "https://api.cerebras.ai/v1",
30
- model: str = "llama-3.3-70b",
29
+ model: str = "gpt-oss-120b",
31
30
  **kwargs,
32
31
  ):
33
32
  """Initialize the Cerebras LLM service.
@@ -35,7 +34,7 @@ class CerebrasLLMService(OpenAILLMService):
35
34
  Args:
36
35
  api_key: The API key for accessing Cerebras's API.
37
36
  base_url: The base URL for Cerebras API. Defaults to "https://api.cerebras.ai/v1".
38
- model: The model identifier to use. Defaults to "llama-3.3-70b".
37
+ model: The model identifier to use. Defaults to "gpt-oss-120b".
39
38
  **kwargs: Additional keyword arguments passed to OpenAILLMService.
40
39
  """
41
40
  super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
@@ -54,25 +53,31 @@ class CerebrasLLMService(OpenAILLMService):
54
53
  logger.debug(f"Creating Cerebras client with api {base_url}")
55
54
  return super().create_client(api_key, base_url, **kwargs)
56
55
 
57
- def build_chat_completion_params(
58
- self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
59
- ) -> dict:
56
+ def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
60
57
  """Build parameters for Cerebras chat completion request.
61
58
 
62
59
  Cerebras supports a subset of OpenAI parameters, focusing on core
63
60
  completion settings without advanced features like frequency/presence penalties.
61
+
62
+ Args:
63
+ params_from_context: Parameters, derived from the LLM context, to
64
+ use for the chat completion. Contains messages, tools, and tool
65
+ choice.
66
+
67
+ Returns:
68
+ Dictionary of parameters for the chat completion request.
64
69
  """
65
70
  params = {
66
71
  "model": self.model_name,
67
72
  "stream": True,
68
- "messages": messages,
69
- "tools": context.tools,
70
- "tool_choice": context.tool_choice,
71
73
  "seed": self._settings["seed"],
72
74
  "temperature": self._settings["temperature"],
73
75
  "top_p": self._settings["top_p"],
74
76
  "max_completion_tokens": self._settings["max_completion_tokens"],
75
77
  }
76
78
 
79
+ # Messages, tools, tool_choice
80
+ params.update(params_from_context)
81
+
77
82
  params.update(self._settings["extra"])
78
83
  return params
@@ -276,6 +276,14 @@ class DeepgramSTTService(STTService):
276
276
  async def _disconnect(self):
277
277
  if self._connection.is_connected:
278
278
  self.logger.debug("Disconnecting from Deepgram")
279
+ # Deepgram swallows asyncio.CancelledError internally which prevents
280
+ # proper cancellation propagation. This issue was found with
281
+ # parallel pipelines where `CancelFrame` was not awaited for to
282
+ # finish in all branches and it was pushed downstream reaching the
283
+ # end of the pipeline, which caused `cleanup()` to be called while
284
+ # Deepgram disconnection was still finishing and therefore
285
+ # preventing the task cancellation that occurs during `cleanup()`.
286
+ # GH issue: https://github.com/deepgram/deepgram-python-sdk/issues/570
279
287
  await self._connection.finish()
280
288
 
281
289
  async def start_metrics(self):
@@ -9,9 +9,8 @@
9
9
  from typing import List
10
10
 
11
11
  from loguru import logger
12
- from openai.types.chat import ChatCompletionMessageParam
13
12
 
14
- from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
13
+ from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
15
14
  from pipecat.services.openai.llm import OpenAILLMService
16
15
 
17
16
 
@@ -54,19 +53,22 @@ class DeepSeekLLMService(OpenAILLMService):
54
53
  logger.debug(f"Creating DeepSeek client with api {base_url}")
55
54
  return super().create_client(api_key, base_url, **kwargs)
56
55
 
57
- def _build_chat_completion_params(
58
- self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
59
- ) -> dict:
56
+ def _build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
60
57
  """Build parameters for DeepSeek chat completion request.
61
58
 
62
59
  DeepSeek doesn't support some OpenAI parameters like seed and max_completion_tokens.
60
+
61
+ Args:
62
+ params_from_context: Parameters, derived from the LLM context, to
63
+ use for the chat completion. Contains messages, tools, and tool
64
+ choice.
65
+
66
+ Returns:
67
+ Dictionary of parameters for the chat completion request.
63
68
  """
64
69
  params = {
65
70
  "model": self.model_name,
66
71
  "stream": True,
67
- "messages": messages,
68
- "tools": context.tools,
69
- "tool_choice": context.tool_choice,
70
72
  "stream_options": {"include_usage": True},
71
73
  "frequency_penalty": self._settings["frequency_penalty"],
72
74
  "presence_penalty": self._settings["presence_penalty"],
@@ -75,5 +77,8 @@ class DeepSeekLLMService(OpenAILLMService):
75
77
  "max_tokens": self._settings["max_tokens"],
76
78
  }
77
79
 
80
+ # Messages, tools, tool_choice
81
+ params.update(params_from_context)
82
+
78
83
  params.update(self._settings["extra"])
79
84
  return params
@@ -9,5 +9,7 @@ import sys
9
9
  from pipecat.services import DeprecatedModuleProxy
10
10
 
11
11
  from .tts import *
12
+ from .stt import *
13
+ # Old
12
14
 
13
15
  sys.modules[__name__] = DeprecatedModuleProxy(globals(), "elevenlabs", "elevenlabs.tts")
@@ -0,0 +1,351 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """ElevenLabs speech-to-text service implementation."""
8
+
9
+ import asyncio
10
+ from typing import AsyncGenerator, Optional
11
+
12
+ from loguru import logger
13
+
14
+ from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
15
+ from pipecat.services.stt_service import SegmentedSTTService
16
+ from pipecat.transcriptions.language import Language
17
+ from pipecat.utils.time import time_now_iso8601
18
+ from pipecat.utils.tracing.service_decorators import traced_stt
19
+
20
+ try:
21
+ from elevenlabs.client import ElevenLabs
22
+ except ModuleNotFoundError as e:
23
+ logger.error(f"Exception: {e}")
24
+ logger.error("In order to use ElevenLabs, you need to `pip install pipecat-ai[elevenlabs]`.")
25
+ raise Exception(f"Missing module: {e}")
26
+
27
+
28
+ def language_to_elevenlabs_language(language: Language) -> Optional[str]:
29
+ """Maps pipecat Language enum to ElevenLabs language codes.
30
+
31
+ Args:
32
+ language: A Language enum value representing the input language.
33
+
34
+ Returns:
35
+ str or None: The corresponding ElevenLabs language code, or None if not supported.
36
+ """
37
+ language_map = {
38
+ # English
39
+ Language.EN: "eng",
40
+ Language.EN_US: "eng",
41
+ Language.EN_GB: "eng",
42
+ Language.EN_AU: "eng",
43
+ Language.EN_CA: "eng",
44
+ Language.EN_IN: "eng",
45
+ Language.EN_IE: "eng",
46
+ Language.EN_NZ: "eng",
47
+ Language.EN_ZA: "eng",
48
+ Language.EN_SG: "eng",
49
+ Language.EN_HK: "eng",
50
+ Language.EN_PH: "eng",
51
+ Language.EN_KE: "eng",
52
+ Language.EN_NG: "eng",
53
+ Language.EN_TZ: "eng",
54
+ # Spanish
55
+ Language.ES: "spa",
56
+ Language.ES_ES: "spa",
57
+ Language.ES_MX: "spa",
58
+ Language.ES_AR: "spa",
59
+ Language.ES_CO: "spa",
60
+ Language.ES_CL: "spa",
61
+ Language.ES_VE: "spa",
62
+ Language.ES_PE: "spa",
63
+ Language.ES_EC: "spa",
64
+ Language.ES_GT: "spa",
65
+ Language.ES_CU: "spa",
66
+ Language.ES_BO: "spa",
67
+ Language.ES_DO: "spa",
68
+ Language.ES_HN: "spa",
69
+ Language.ES_PY: "spa",
70
+ Language.ES_SV: "spa",
71
+ Language.ES_NI: "spa",
72
+ Language.ES_CR: "spa",
73
+ Language.ES_PA: "spa",
74
+ Language.ES_UY: "spa",
75
+ Language.ES_PR: "spa",
76
+ Language.ES_US: "spa",
77
+ Language.ES_GQ: "spa",
78
+ # French
79
+ Language.FR: "fra",
80
+ Language.FR_FR: "fra",
81
+ Language.FR_CA: "fra",
82
+ Language.FR_BE: "fra",
83
+ Language.FR_CH: "fra",
84
+ # German
85
+ Language.DE: "deu",
86
+ Language.DE_DE: "deu",
87
+ Language.DE_AT: "deu",
88
+ Language.DE_CH: "deu",
89
+ # Italian
90
+ Language.IT: "ita",
91
+ Language.IT_IT: "ita",
92
+ # Portuguese
93
+ Language.PT: "por",
94
+ Language.PT_PT: "por",
95
+ Language.PT_BR: "por",
96
+ # Hindi
97
+ Language.HI: "hin",
98
+ Language.HI_IN: "hin",
99
+ # Arabic
100
+ Language.AR: "ara",
101
+ Language.AR_SA: "ara",
102
+ Language.AR_EG: "ara",
103
+ Language.AR_AE: "ara",
104
+ Language.AR_BH: "ara",
105
+ Language.AR_DZ: "ara",
106
+ Language.AR_IQ: "ara",
107
+ Language.AR_JO: "ara",
108
+ Language.AR_KW: "ara",
109
+ Language.AR_LB: "ara",
110
+ Language.AR_LY: "ara",
111
+ Language.AR_MA: "ara",
112
+ Language.AR_OM: "ara",
113
+ Language.AR_QA: "ara",
114
+ Language.AR_SY: "ara",
115
+ Language.AR_TN: "ara",
116
+ Language.AR_YE: "ara",
117
+ # Japanese
118
+ Language.JA: "jpn",
119
+ Language.JA_JP: "jpn",
120
+ # Korean
121
+ Language.KO: "kor",
122
+ Language.KO_KR: "kor",
123
+ # Chinese
124
+ Language.ZH: "cmn",
125
+ Language.ZH_CN: "cmn",
126
+ Language.ZH_TW: "cmn",
127
+ Language.ZH_HK: "cmn",
128
+ # Russian
129
+ Language.RU: "rus",
130
+ Language.RU_RU: "rus",
131
+ # Dutch
132
+ Language.NL: "nld",
133
+ Language.NL_NL: "nld",
134
+ Language.NL_BE: "nld",
135
+ # Polish
136
+ Language.PL: "pol",
137
+ Language.PL_PL: "pol",
138
+ # Turkish
139
+ Language.TR: "tur",
140
+ Language.TR_TR: "tur",
141
+ # Swedish
142
+ Language.SV: "swe",
143
+ Language.SV_SE: "swe",
144
+ # Norwegian
145
+ Language.NO: "nor",
146
+ Language.NB: "nor",
147
+ Language.NN: "nor",
148
+ # Danish
149
+ Language.DA: "dan",
150
+ Language.DA_DK: "dan",
151
+ # Finnish
152
+ Language.FI: "fin",
153
+ Language.FI_FI: "fin",
154
+ # Czech
155
+ Language.CS: "ces",
156
+ Language.CS_CZ: "ces",
157
+ # Hungarian
158
+ Language.HU: "hun",
159
+ Language.HU_HU: "hun",
160
+ # Greek
161
+ Language.EL: "ell",
162
+ Language.EL_GR: "ell",
163
+ # Hebrew
164
+ Language.HE: "heb",
165
+ Language.HE_IL: "heb",
166
+ # Thai
167
+ Language.TH: "tha",
168
+ Language.TH_TH: "tha",
169
+ # Vietnamese
170
+ Language.VI: "vie",
171
+ Language.VI_VN: "vie",
172
+ # Indonesian
173
+ Language.ID: "ind",
174
+ Language.ID_ID: "ind",
175
+ # Malay
176
+ Language.MS: "msa",
177
+ Language.MS_MY: "msa",
178
+ # Ukrainian
179
+ Language.UK: "ukr",
180
+ Language.UK_UA: "ukr",
181
+ # Bulgarian
182
+ Language.BG: "bul",
183
+ Language.BG_BG: "bul",
184
+ # Croatian
185
+ Language.HR: "hrv",
186
+ Language.HR_HR: "hrv",
187
+ # Slovak
188
+ Language.SK: "slk",
189
+ Language.SK_SK: "slk",
190
+ # Slovenian
191
+ Language.SL: "slv",
192
+ Language.SL_SI: "slv",
193
+ # Estonian
194
+ Language.ET: "est",
195
+ Language.ET_EE: "est",
196
+ # Latvian
197
+ Language.LV: "lav",
198
+ Language.LV_LV: "lav",
199
+ # Lithuanian
200
+ Language.LT: "lit",
201
+ Language.LT_LT: "lit",
202
+ }
203
+ return language_map.get(language)
204
+
205
+
206
+ class ElevenlabsSTTService(SegmentedSTTService):
207
+ """ElevenLabs speech-to-text service using Scribe v1 model.
208
+
209
+ This service uses ElevenLabs' batch STT API to transcribe audio segments.
210
+ It extends SegmentedSTTService to handle VAD-based audio segmentation.
211
+
212
+ Args:
213
+ api_key: ElevenLabs API key for authentication.
214
+ model_id: Model to use for transcription (default: "scribe_v1").
215
+ language: Default language for transcription.
216
+ tag_audio_events: Whether to tag audio events like laughter (default: False).
217
+ diarize: Whether to enable speaker diarization (default: False).
218
+ **kwargs: Additional arguments passed to SegmentedSTTService.
219
+ """
220
+
221
+ def __init__(
222
+ self,
223
+ *,
224
+ api_key: str,
225
+ model_id: str = "scribe_v1",
226
+ language: Language = Language.EN,
227
+ tag_audio_events: bool = False,
228
+ sample_rate: Optional[int] = None,
229
+ diarize: bool = False,
230
+ **kwargs,
231
+ ):
232
+ super().__init__(**kwargs)
233
+
234
+ self._client = ElevenLabs(api_key=api_key)
235
+ self._model_id = model_id
236
+ self._tag_audio_events = tag_audio_events
237
+ self._diarize = diarize
238
+
239
+ self._settings = {
240
+ "language": language,
241
+ "model_id": self._model_id,
242
+ "tag_audio_events": self._tag_audio_events,
243
+ "diarize": self._diarize,
244
+ }
245
+ self.set_model_name(model_id)
246
+
247
+ def can_generate_metrics(self) -> bool:
248
+ """Check if this service can generate processing metrics.
249
+
250
+ Returns:
251
+ True, as ElevenLabs service supports metrics generation.
252
+ """
253
+ return True
254
+
255
+ def language_to_service_language(self, language: Language) -> Optional[str]:
256
+ """Convert from pipecat Language to ElevenLabs language code.
257
+
258
+ Args:
259
+ language: The Language enum value to convert.
260
+
261
+ Returns:
262
+ str or None: The corresponding ElevenLabs language code, or None if not supported.
263
+ """
264
+ return language_to_elevenlabs_language(language)
265
+
266
+ async def set_language(self, language: Language):
267
+ """Set the language for transcription.
268
+
269
+ Args:
270
+ language: The Language enum value to use for transcription.
271
+ """
272
+ self.logger.info(f"Switching STT language to: [{language}]")
273
+ self._settings["language"] = language
274
+
275
+ @traced_stt
276
+ async def _handle_transcription(
277
+ self, transcript: str, is_final: bool, language: Optional[Language] = None
278
+ ):
279
+ """Handle a transcription result with tracing."""
280
+ pass
281
+
282
+ async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
283
+ """Transcribe the provided audio using ElevenLabs STT.
284
+
285
+ Args:
286
+ audio: Audio data (WAV format) to transcribe.
287
+
288
+ Yields:
289
+ Frame: TranscriptionFrame containing the transcribed text or ErrorFrame on failure.
290
+ """
291
+ try:
292
+ await self.start_processing_metrics()
293
+ await self.start_ttfb_metrics()
294
+
295
+ # Get language code for ElevenLabs API
296
+ language = self._settings["language"]
297
+ elevenlabs_lang = self.language_to_service_language(language)
298
+
299
+ # Prepare API parameters
300
+ params = {
301
+ "file": audio,
302
+ "model_id": self._model_id,
303
+ "tag_audio_events": self._tag_audio_events,
304
+ "diarize": self._diarize,
305
+ }
306
+
307
+ # Add language if specified
308
+ if elevenlabs_lang:
309
+ params["language_code"] = elevenlabs_lang
310
+
311
+ # Call ElevenLabs STT API in thread pool to avoid blocking
312
+ transcription = await asyncio.to_thread(self._client.speech_to_text.convert, **params)
313
+
314
+ await self.stop_ttfb_metrics()
315
+
316
+ # Process transcription result
317
+ if transcription and hasattr(transcription, "text") and transcription.text:
318
+ transcript_text = transcription.text.strip()
319
+
320
+ if transcript_text:
321
+ # Determine language if available from response
322
+ response_language = language
323
+ if hasattr(transcription, "language_code") and transcription.language_code:
324
+ # Try to map back from ElevenLabs language code to pipecat Language
325
+ try:
326
+ # This is a simplified mapping - you might want to create a reverse map
327
+ response_language = language # For now, keep the original
328
+ except ValueError:
329
+ self.logger.warning(
330
+ f"Unknown language detected: {transcription.language_code}"
331
+ )
332
+
333
+ # Handle transcription with tracing
334
+ await self._handle_transcription(transcript_text, True, response_language)
335
+
336
+ self.logger.debug(f"ElevenLabs transcription: [{transcript_text}]")
337
+
338
+ yield TranscriptionFrame(
339
+ text=transcript_text,
340
+ user_id="",
341
+ timestamp=time_now_iso8601(),
342
+ language=response_language,
343
+ result=transcription,
344
+ )
345
+
346
+ await self.stop_processing_metrics()
347
+
348
+ except Exception as e:
349
+ self.logger.error(f"ElevenLabs STT error: {e}")
350
+ await self.stop_all_metrics()
351
+ yield ErrorFrame(f"ElevenLabs STT error: {str(e)}")
@@ -9,9 +9,8 @@
9
9
  from typing import List
10
10
 
11
11
  from loguru import logger
12
- from openai.types.chat import ChatCompletionMessageParam
13
12
 
14
- from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
13
+ from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
15
14
  from pipecat.services.openai.llm import OpenAILLMService
16
15
 
17
16
 
@@ -54,20 +53,23 @@ class FireworksLLMService(OpenAILLMService):
54
53
  logger.debug(f"Creating Fireworks client with api {base_url}")
55
54
  return super().create_client(api_key, base_url, **kwargs)
56
55
 
57
- def build_chat_completion_params(
58
- self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
59
- ) -> dict:
56
+ def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
60
57
  """Build parameters for Fireworks chat completion request.
61
58
 
62
59
  Fireworks doesn't support some OpenAI parameters like seed, max_completion_tokens,
63
60
  and stream_options.
61
+
62
+ Args:
63
+ params_from_context: Parameters, derived from the LLM context, to
64
+ use for the chat completion. Contains messages, tools, and tool
65
+ choice.
66
+
67
+ Returns:
68
+ Dictionary of parameters for the chat completion request.
64
69
  """
65
70
  params = {
66
71
  "model": self.model_name,
67
72
  "stream": True,
68
- "messages": messages,
69
- "tools": context.tools,
70
- "tool_choice": context.tool_choice,
71
73
  "frequency_penalty": self._settings["frequency_penalty"],
72
74
  "presence_penalty": self._settings["presence_penalty"],
73
75
  "temperature": self._settings["temperature"],
@@ -75,5 +77,8 @@ class FireworksLLMService(OpenAILLMService):
75
77
  "max_tokens": self._settings["max_tokens"],
76
78
  }
77
79
 
80
+ # Messages, tools, tool_choice
81
+ params.update(params_from_context)
82
+
78
83
  params.update(self._settings["extra"])
79
84
  return params
@@ -120,12 +120,14 @@ class FishAudioTTSService(InterruptibleTTSService):
120
120
  if model:
121
121
  import warnings
122
122
 
123
- warnings.warn(
124
- "Parameter 'model' is deprecated and will be removed in a future version. "
125
- "Use 'reference_id' instead.",
126
- DeprecationWarning,
127
- stacklevel=2,
128
- )
123
+ with warnings.catch_warnings():
124
+ warnings.simplefilter("always")
125
+ warnings.warn(
126
+ "Parameter 'model' is deprecated and will be removed in a future version. "
127
+ "Use 'reference_id' instead.",
128
+ DeprecationWarning,
129
+ stacklevel=2,
130
+ )
129
131
  reference_id = model
130
132
 
131
133
  self._api_key = api_key
@@ -33,6 +33,7 @@ from pipecat.frames.frames import (
33
33
  InputAudioRawFrame,
34
34
  InputImageRawFrame,
35
35
  InputTextRawFrame,
36
+ LLMContextFrame,
36
37
  LLMFullResponseEndFrame,
37
38
  LLMFullResponseStartFrame,
38
39
  LLMMessagesAppendFrame,
@@ -738,6 +739,10 @@ class GeminiMultimodalLiveLLMService(LLMService):
738
739
  # Support just one tool call per context frame for now
739
740
  tool_result_message = context.messages[-1]
740
741
  await self._tool_result(tool_result_message)
742
+ elif isinstance(frame, LLMContextFrame):
743
+ raise NotImplementedError(
744
+ "Universal LLMContext is not yet supported for Gemini Multimodal Live."
745
+ )
741
746
  elif isinstance(frame, InputTextRawFrame):
742
747
  await self._send_user_text(frame.text)
743
748
  await self.push_frame(frame, direction)