dv-pipecat-ai 0.0.85.dev825__py3-none-any.whl → 0.0.85.dev831__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev825.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/METADATA +1 -1
- {dv_pipecat_ai-0.0.85.dev825.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/RECORD +10 -10
- pipecat/frames/frames.py +1 -0
- pipecat/processors/aggregators/llm_response.py +12 -1
- pipecat/processors/transcript_processor.py +2 -1
- pipecat/services/cartesia/tts.py +48 -10
- pipecat/services/salesforce/llm.py +64 -59
- {dv_pipecat_ai-0.0.85.dev825.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev825.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev825.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
dv_pipecat_ai-0.0.85.
|
|
1
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
|
|
2
2
|
pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
|
|
3
3
|
pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -79,7 +79,7 @@ pipecat/extensions/voicemail/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
79
79
|
pipecat/extensions/voicemail/voicemail_detector.py,sha256=JxmU2752iWP_1_GmzZReNESUTFAeyEa4XBPL20_C208,30004
|
|
80
80
|
pipecat/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
81
|
pipecat/frames/frames.proto,sha256=JXZm3VXLR8zMOUcOuhVoe2mhM3MQIQGMJXLopdJO_5Y,839
|
|
82
|
-
pipecat/frames/frames.py,sha256=
|
|
82
|
+
pipecat/frames/frames.py,sha256=vuYtmyK1QSU2AWx2c_pFQhcmpXqSTnfqAXF6DXKzTG8,49605
|
|
83
83
|
pipecat/frames/protobufs/frames_pb2.py,sha256=VHgGV_W7qQ4sfQK6RHb5_DggLm3PiSYMr6aBZ8_p1cQ,2590
|
|
84
84
|
pipecat/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
85
85
|
pipecat/metrics/metrics.py,sha256=bdZNciEtLTtA-xgoKDz2RJAy6fKrXkTwz3pryVHzc2M,2713
|
|
@@ -113,7 +113,7 @@ pipecat/processors/idle_frame_processor.py,sha256=z8AuhGap61lA5K35P6XCaOpn4kkmK_
|
|
|
113
113
|
pipecat/processors/logger.py,sha256=8xa4KKekXQIETlQR7zoGnwUpLNo8CeDVm7YjyXePN-w,2385
|
|
114
114
|
pipecat/processors/producer_processor.py,sha256=iIIOHZd77APvUGP7JqFbznAHUnCULcq_qYiSEjwXHcc,3265
|
|
115
115
|
pipecat/processors/text_transformer.py,sha256=LnfWJYzntJhZhrQ1lgSSY4D4VbHtrQJgrC227M69ZYU,1718
|
|
116
|
-
pipecat/processors/transcript_processor.py,sha256=
|
|
116
|
+
pipecat/processors/transcript_processor.py,sha256=TbMSeZCxXgQIdYodXuMSwLp6BvXTy7vQB9YiQZfPxc0,12488
|
|
117
117
|
pipecat/processors/two_stage_user_idle_processor.py,sha256=uf2aZh_lfW-eMxmFogP3R4taAJ1yXOSqjKsR7oXtD0Y,2938
|
|
118
118
|
pipecat/processors/user_idle_processor.py,sha256=Dl-Kcg0B4JZqWXXiyGuvYszGimbu2oKOyOJC92R9_hE,9140
|
|
119
119
|
pipecat/processors/aggregators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -122,7 +122,7 @@ pipecat/processors/aggregators/gated.py,sha256=tii0sRrBkRW6y9Xq5iTWPnqlOEejU4VqP
|
|
|
122
122
|
pipecat/processors/aggregators/gated_llm_context.py,sha256=CPv6sMA8irD1zZ3fU1gSv6D7qcPvCA0MdpFhBtJ_ekI,3007
|
|
123
123
|
pipecat/processors/aggregators/gated_open_ai_llm_context.py,sha256=DgqmdPj1u3fP_SVmxtfP7NjHqnyhN_RVVTDfmjbkxAs,361
|
|
124
124
|
pipecat/processors/aggregators/llm_context.py,sha256=wNbZA0Vt0FzNc5cu06xiv1z7DIClIlfqR1ZD8EusbVw,11085
|
|
125
|
-
pipecat/processors/aggregators/llm_response.py,sha256=
|
|
125
|
+
pipecat/processors/aggregators/llm_response.py,sha256=V6wBTzfUGLJfMuI34fkf5VTR0I66AWIW8btxKI8_3IM,48795
|
|
126
126
|
pipecat/processors/aggregators/llm_response_universal.py,sha256=5PqmpATpekD8BVWyBExZgatKHsNbZem8M-A7_VwTbiQ,34334
|
|
127
127
|
pipecat/processors/aggregators/openai_llm_context.py,sha256=cC8DXdVPERRN04i0i-1Ys6kusvnbMALeH-Z8Pu5K684,12999
|
|
128
128
|
pipecat/processors/aggregators/sentence.py,sha256=E7e3knfQl6HEGpYMKPklF1aO_gOn-rr7SnynErwfkQk,2235
|
|
@@ -205,7 +205,7 @@ pipecat/services/azure/realtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
205
205
|
pipecat/services/azure/realtime/llm.py,sha256=MnDiw-YJP3kll1gbkta4z4vsWfWZ5oBprZCinMP9O0M,2385
|
|
206
206
|
pipecat/services/cartesia/__init__.py,sha256=vzh0jBnfPwWdxFfV-tu0x1HFoOTgr9s91GYmD-CJUtY,284
|
|
207
207
|
pipecat/services/cartesia/stt.py,sha256=00k9gQYo_xPKb-RRJ-RNV4LPFw-7xXiFU7ACFLYttWY,12388
|
|
208
|
-
pipecat/services/cartesia/tts.py,sha256=
|
|
208
|
+
pipecat/services/cartesia/tts.py,sha256=Fh6hm5AUj2rNX8J4UOjHA7uAPIGcie1Dyxv5WBvV1OY,26279
|
|
209
209
|
pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
|
|
210
210
|
pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
|
|
211
211
|
pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
|
|
@@ -318,7 +318,7 @@ pipecat/services/riva/__init__.py,sha256=rObSsj504O_TMXhPBg_ymqKslZBhovlR-A0aaRZ
|
|
|
318
318
|
pipecat/services/riva/stt.py,sha256=bAss4dimx8eideaSPmPHM15_rSV3tfXNf13o5n1mfv4,25146
|
|
319
319
|
pipecat/services/riva/tts.py,sha256=idbqx3I2NlWCXtrIFsjEaYapxA3BLIA14ai3aMBh-2w,8158
|
|
320
320
|
pipecat/services/salesforce/__init__.py,sha256=OFvYbcvCadYhcKdBAVLj3ZUXVXQ1HyVyhgxIFf6_Thg,173
|
|
321
|
-
pipecat/services/salesforce/llm.py,sha256=
|
|
321
|
+
pipecat/services/salesforce/llm.py,sha256=2wh4U5mBElj5Olze2L0jC6V-UjFDC8ZEXlAKu0ORTwI,27825
|
|
322
322
|
pipecat/services/sambanova/__init__.py,sha256=oTXExLic-qTcsfsiWmssf3Elclf3IIWoN41_2IpoF18,128
|
|
323
323
|
pipecat/services/sambanova/llm.py,sha256=5XVfPLEk__W8ykFqLdV95ZUhlGGkAaJwmbciLdZYtTc,8976
|
|
324
324
|
pipecat/services/sambanova/stt.py,sha256=ZZgEZ7WQjLFHbCko-3LNTtVajjtfUvbtVLtFcaNadVQ,2536
|
|
@@ -415,7 +415,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
|
|
|
415
415
|
pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
|
|
416
416
|
pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
|
|
417
417
|
pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
|
|
418
|
-
dv_pipecat_ai-0.0.85.
|
|
419
|
-
dv_pipecat_ai-0.0.85.
|
|
420
|
-
dv_pipecat_ai-0.0.85.
|
|
421
|
-
dv_pipecat_ai-0.0.85.
|
|
418
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/METADATA,sha256=5ahEs864DAPIEEiiv7-7Oa-vRhRN1Ede341NuqED3Sw,32924
|
|
419
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
420
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
|
|
421
|
+
dv_pipecat_ai-0.0.85.dev831.dist-info/RECORD,,
|
pipecat/frames/frames.py
CHANGED
|
@@ -451,6 +451,7 @@ class TranscriptionMessage:
|
|
|
451
451
|
content: The message content/text.
|
|
452
452
|
user_id: Optional identifier for the user.
|
|
453
453
|
timestamp: Optional timestamp when the message was created.
|
|
454
|
+
message_id: Optional unique identifier for tracking and dropping messages.
|
|
454
455
|
"""
|
|
455
456
|
|
|
456
457
|
role: Literal["user", "assistant"]
|
|
@@ -551,7 +551,8 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
551
551
|
await self._process_aggregation()
|
|
552
552
|
else:
|
|
553
553
|
self.logger.debug("Interruption conditions not met - not pushing aggregation")
|
|
554
|
-
# Don't process aggregation,
|
|
554
|
+
# Don't process aggregation, discard pending transcriptions and reset
|
|
555
|
+
await self._discard_pending_transcriptions("interruption_conditions_not_met")
|
|
555
556
|
await self.reset()
|
|
556
557
|
else:
|
|
557
558
|
if trigger_interruption:
|
|
@@ -628,8 +629,17 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
628
629
|
for s in self.interruption_strategies:
|
|
629
630
|
await s.append_audio(frame.audio, frame.sample_rate)
|
|
630
631
|
|
|
632
|
+
async def _discard_pending_transcriptions(self, reason: str):
|
|
633
|
+
"""Notify upstream processors that pending transcripts should be dropped."""
|
|
634
|
+
if self._pending_transcription_ids:
|
|
635
|
+
drop_frame = TranscriptDropFrame(transcript_ids=list(self._pending_transcription_ids))
|
|
636
|
+
await self.push_frame(drop_frame, FrameDirection.UPSTREAM)
|
|
637
|
+
self._pending_transcription_ids.clear()
|
|
638
|
+
|
|
631
639
|
async def _handle_user_started_speaking(self, frame: UserStartedSpeakingFrame):
|
|
632
640
|
if len(self._aggregation) > 0:
|
|
641
|
+
self.logger.debug(f"Dropping {self._aggregation}")
|
|
642
|
+
self._aggregation = ""
|
|
633
643
|
await self._discard_pending_transcriptions("user_started_speaking")
|
|
634
644
|
self._latest_final_transcript = ""
|
|
635
645
|
self._last_user_speaking_time = time.time()
|
|
@@ -805,6 +815,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
|
|
805
815
|
if self._bot_speaking and not self._params.enable_emulated_vad_interruptions:
|
|
806
816
|
# If emulated VAD interruptions are disabled and bot is speaking, ignore
|
|
807
817
|
logger.debug("Ignoring user speaking emulation, bot is speaking.")
|
|
818
|
+
await self._discard_pending_transcriptions("emulated_vad_ignored")
|
|
808
819
|
await self.reset()
|
|
809
820
|
else:
|
|
810
821
|
# Either bot is not speaking, or emulated VAD interruptions are enabled
|
|
@@ -64,12 +64,13 @@ class BaseTranscriptProcessor(FrameProcessor):
|
|
|
64
64
|
if not frame.transcript_ids:
|
|
65
65
|
return
|
|
66
66
|
|
|
67
|
+
await self._call_event_handler("on_transcript_drop", frame)
|
|
68
|
+
|
|
67
69
|
drop_ids = set(frame.transcript_ids)
|
|
68
70
|
if drop_ids:
|
|
69
71
|
self._processed_messages = [
|
|
70
72
|
msg for msg in self._processed_messages if msg.message_id not in drop_ids
|
|
71
73
|
]
|
|
72
|
-
await self._call_event_handler("on_transcript_drop", frame)
|
|
73
74
|
|
|
74
75
|
|
|
75
76
|
class UserTranscriptProcessor(BaseTranscriptProcessor):
|
pipecat/services/cartesia/tts.py
CHANGED
|
@@ -15,7 +15,6 @@ from typing import AsyncGenerator, List, Literal, Optional, Union
|
|
|
15
15
|
from loguru import logger
|
|
16
16
|
from pydantic import BaseModel, Field
|
|
17
17
|
|
|
18
|
-
|
|
19
18
|
from pipecat.frames.frames import (
|
|
20
19
|
CancelFrame,
|
|
21
20
|
EndFrame,
|
|
@@ -49,6 +48,26 @@ except ModuleNotFoundError as e:
|
|
|
49
48
|
raise Exception(f"Missing module: {e}")
|
|
50
49
|
|
|
51
50
|
|
|
51
|
+
class GenerationConfig(BaseModel):
|
|
52
|
+
"""Configuration for Cartesia Sonic-3 generation parameters.
|
|
53
|
+
|
|
54
|
+
Sonic-3 interprets these parameters as guidance to ensure natural speech.
|
|
55
|
+
Test against your content for best results.
|
|
56
|
+
|
|
57
|
+
Parameters:
|
|
58
|
+
volume: Volume multiplier for generated speech. Valid range: [0.5, 2.0]. Default is 1.0.
|
|
59
|
+
speed: Speed multiplier for generated speech. Valid range: [0.6, 1.5]. Default is 1.0.
|
|
60
|
+
emotion: Single emotion string to guide the emotional tone. Examples include neutral,
|
|
61
|
+
angry, excited, content, sad, scared. Over 60 emotions are supported. For best
|
|
62
|
+
results, use with recommended voices: Leo, Jace, Kyle, Gavin, Maya, Tessa, Dana,
|
|
63
|
+
and Marian.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
volume: Optional[float] = None
|
|
67
|
+
speed: Optional[float] = None
|
|
68
|
+
emotion: Optional[str] = None
|
|
69
|
+
|
|
70
|
+
|
|
52
71
|
def language_to_cartesia_language(language: Language) -> Optional[str]:
|
|
53
72
|
"""Convert a Language enum to Cartesia language code.
|
|
54
73
|
|
|
@@ -102,16 +121,20 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
102
121
|
|
|
103
122
|
Parameters:
|
|
104
123
|
language: Language to use for synthesis.
|
|
105
|
-
speed: Voice speed control.
|
|
106
|
-
emotion: List of emotion controls.
|
|
124
|
+
speed: Voice speed control for non-Sonic-3 models (literal values).
|
|
125
|
+
emotion: List of emotion controls for non-Sonic-3 models.
|
|
107
126
|
|
|
108
127
|
.. deprecated:: 0.0.68
|
|
109
128
|
The `emotion` parameter is deprecated and will be removed in a future version.
|
|
129
|
+
|
|
130
|
+
generation_config: Generation configuration for Sonic-3 models. Includes volume,
|
|
131
|
+
speed (numeric), and emotion (string) parameters.
|
|
110
132
|
"""
|
|
111
133
|
|
|
112
134
|
language: Optional[Language] = Language.EN
|
|
113
135
|
speed: Optional[Literal["slow", "normal", "fast"]] = None
|
|
114
136
|
emotion: Optional[List[str]] = []
|
|
137
|
+
generation_config: Optional[GenerationConfig] = None
|
|
115
138
|
|
|
116
139
|
def __init__(
|
|
117
140
|
self,
|
|
@@ -120,7 +143,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
120
143
|
voice_id: str,
|
|
121
144
|
cartesia_version: str = "2025-04-16",
|
|
122
145
|
url: str = "wss://api.cartesia.ai/tts/websocket",
|
|
123
|
-
model: str = "sonic-
|
|
146
|
+
model: str = "sonic-3",
|
|
124
147
|
sample_rate: Optional[int] = None,
|
|
125
148
|
encoding: str = "pcm_s16le",
|
|
126
149
|
container: str = "raw",
|
|
@@ -136,7 +159,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
136
159
|
voice_id: ID of the voice to use for synthesis.
|
|
137
160
|
cartesia_version: API version string for Cartesia service.
|
|
138
161
|
url: WebSocket URL for Cartesia TTS API.
|
|
139
|
-
model: TTS model to use (e.g., "sonic-
|
|
162
|
+
model: TTS model to use (e.g., "sonic-3").
|
|
140
163
|
sample_rate: Audio sample rate. If None, uses default.
|
|
141
164
|
encoding: Audio encoding format.
|
|
142
165
|
container: Audio container format.
|
|
@@ -180,6 +203,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
180
203
|
else "en",
|
|
181
204
|
"speed": params.speed,
|
|
182
205
|
"emotion": params.emotion,
|
|
206
|
+
"generation_config": params.generation_config,
|
|
183
207
|
}
|
|
184
208
|
self.set_model_name(model)
|
|
185
209
|
self.set_voice(voice_id)
|
|
@@ -298,6 +322,11 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
298
322
|
if self._settings["speed"]:
|
|
299
323
|
msg["speed"] = self._settings["speed"]
|
|
300
324
|
|
|
325
|
+
if self._settings["generation_config"]:
|
|
326
|
+
msg["generation_config"] = self._settings["generation_config"].model_dump(
|
|
327
|
+
exclude_none=True
|
|
328
|
+
)
|
|
329
|
+
|
|
301
330
|
return json.dumps(msg)
|
|
302
331
|
|
|
303
332
|
async def start(self, frame: StartFrame):
|
|
@@ -419,7 +448,6 @@ class CartesiaTTSService(AudioContextWordTTSService):
|
|
|
419
448
|
logger.error(f"{self} error: {msg}")
|
|
420
449
|
await self.push_frame(TTSStoppedFrame())
|
|
421
450
|
await self.stop_all_metrics()
|
|
422
|
-
|
|
423
451
|
await self.push_error(ErrorFrame(f"{self} error: {msg['error']}"))
|
|
424
452
|
self._context_id = None
|
|
425
453
|
else:
|
|
@@ -484,23 +512,27 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
484
512
|
|
|
485
513
|
Parameters:
|
|
486
514
|
language: Language to use for synthesis.
|
|
487
|
-
speed: Voice speed control.
|
|
488
|
-
emotion: List of emotion controls.
|
|
515
|
+
speed: Voice speed control for non-Sonic-3 models (literal values).
|
|
516
|
+
emotion: List of emotion controls for non-Sonic-3 models.
|
|
489
517
|
|
|
490
518
|
.. deprecated:: 0.0.68
|
|
491
519
|
The `emotion` parameter is deprecated and will be removed in a future version.
|
|
520
|
+
|
|
521
|
+
generation_config: Generation configuration for Sonic-3 models. Includes volume,
|
|
522
|
+
speed (numeric), and emotion (string) parameters.
|
|
492
523
|
"""
|
|
493
524
|
|
|
494
525
|
language: Optional[Language] = Language.EN
|
|
495
526
|
speed: Optional[Literal["slow", "normal", "fast"]] = None
|
|
496
527
|
emotion: Optional[List[str]] = Field(default_factory=list)
|
|
528
|
+
generation_config: Optional[GenerationConfig] = None
|
|
497
529
|
|
|
498
530
|
def __init__(
|
|
499
531
|
self,
|
|
500
532
|
*,
|
|
501
533
|
api_key: str,
|
|
502
534
|
voice_id: str,
|
|
503
|
-
model: str = "sonic-
|
|
535
|
+
model: str = "sonic-3",
|
|
504
536
|
base_url: str = "https://api.cartesia.ai",
|
|
505
537
|
cartesia_version: str = "2024-11-13",
|
|
506
538
|
sample_rate: Optional[int] = None,
|
|
@@ -514,7 +546,7 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
514
546
|
Args:
|
|
515
547
|
api_key: Cartesia API key for authentication.
|
|
516
548
|
voice_id: ID of the voice to use for synthesis.
|
|
517
|
-
model: TTS model to use (e.g., "sonic-
|
|
549
|
+
model: TTS model to use (e.g., "sonic-3").
|
|
518
550
|
base_url: Base URL for Cartesia HTTP API.
|
|
519
551
|
cartesia_version: API version string for Cartesia service.
|
|
520
552
|
sample_rate: Audio sample rate. If None, uses default.
|
|
@@ -541,6 +573,7 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
541
573
|
else "en",
|
|
542
574
|
"speed": params.speed,
|
|
543
575
|
"emotion": params.emotion,
|
|
576
|
+
"generation_config": params.generation_config,
|
|
544
577
|
}
|
|
545
578
|
self.set_voice(voice_id)
|
|
546
579
|
self.set_model_name(model)
|
|
@@ -634,6 +667,11 @@ class CartesiaHttpTTSService(TTSService):
|
|
|
634
667
|
if self._settings["speed"]:
|
|
635
668
|
payload["speed"] = self._settings["speed"]
|
|
636
669
|
|
|
670
|
+
if self._settings["generation_config"]:
|
|
671
|
+
payload["generation_config"] = self._settings["generation_config"].model_dump(
|
|
672
|
+
exclude_none=True
|
|
673
|
+
)
|
|
674
|
+
|
|
637
675
|
yield TTSStartedFrame()
|
|
638
676
|
|
|
639
677
|
session = await self._client._get_session()
|
|
@@ -13,6 +13,7 @@ from dataclasses import dataclass
|
|
|
13
13
|
from typing import AsyncGenerator, Dict, Optional
|
|
14
14
|
|
|
15
15
|
import httpx
|
|
16
|
+
from env_config import api_config
|
|
16
17
|
from loguru import logger
|
|
17
18
|
|
|
18
19
|
from pipecat.frames.frames import (
|
|
@@ -23,6 +24,10 @@ from pipecat.frames.frames import (
|
|
|
23
24
|
LLMTextFrame,
|
|
24
25
|
LLMUpdateSettingsFrame,
|
|
25
26
|
)
|
|
27
|
+
from pipecat.processors.aggregators.llm_response import (
|
|
28
|
+
LLMAssistantAggregatorParams,
|
|
29
|
+
LLMUserAggregatorParams,
|
|
30
|
+
)
|
|
26
31
|
from pipecat.processors.aggregators.openai_llm_context import (
|
|
27
32
|
OpenAILLMContext,
|
|
28
33
|
OpenAILLMContextFrame,
|
|
@@ -34,11 +39,6 @@ from pipecat.services.openai.llm import (
|
|
|
34
39
|
OpenAIContextAggregatorPair,
|
|
35
40
|
OpenAIUserContextAggregator,
|
|
36
41
|
)
|
|
37
|
-
from pipecat.processors.aggregators.llm_response import (
|
|
38
|
-
LLMAssistantAggregatorParams,
|
|
39
|
-
LLMUserAggregatorParams,
|
|
40
|
-
)
|
|
41
|
-
from env_config import api_config
|
|
42
42
|
from pipecat.utils.redis import create_async_redis_client
|
|
43
43
|
|
|
44
44
|
|
|
@@ -96,12 +96,11 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
96
96
|
# Initialize parent LLM service
|
|
97
97
|
super().__init__(**kwargs)
|
|
98
98
|
self._agent_id = agent_id
|
|
99
|
-
self._org_domain = org_domain
|
|
99
|
+
self._org_domain = org_domain
|
|
100
100
|
self._client_id = client_id
|
|
101
101
|
self._client_secret = client_secret
|
|
102
102
|
self._api_host = api_host
|
|
103
103
|
|
|
104
|
-
|
|
105
104
|
# Validate required environment variables
|
|
106
105
|
required_vars = {
|
|
107
106
|
"SALESFORCE_AGENT_ID": self._agent_id,
|
|
@@ -145,7 +144,6 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
145
144
|
)
|
|
146
145
|
|
|
147
146
|
self._schedule_session_warmup()
|
|
148
|
-
|
|
149
147
|
|
|
150
148
|
async def __aenter__(self):
|
|
151
149
|
"""Async context manager entry."""
|
|
@@ -237,7 +235,7 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
237
235
|
return
|
|
238
236
|
|
|
239
237
|
ttl_seconds = 3600 # Default fallback
|
|
240
|
-
|
|
238
|
+
|
|
241
239
|
# Try to get expiration from expires_in parameter first
|
|
242
240
|
if expires_in is not None:
|
|
243
241
|
try:
|
|
@@ -246,7 +244,7 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
246
244
|
except (TypeError, ValueError):
|
|
247
245
|
logger.debug("Unable to parse expires_in parameter")
|
|
248
246
|
expires_in = None
|
|
249
|
-
|
|
247
|
+
|
|
250
248
|
# If no expires_in available, use default TTL
|
|
251
249
|
if expires_in is None:
|
|
252
250
|
logger.debug("No expiration info found, using default TTL")
|
|
@@ -271,7 +269,7 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
271
269
|
|
|
272
270
|
async def _get_access_token(self, *, force_refresh: bool = False) -> str:
|
|
273
271
|
"""Get OAuth access token using client credentials.
|
|
274
|
-
|
|
272
|
+
|
|
275
273
|
Args:
|
|
276
274
|
force_refresh: If True, skip cache and fetch fresh token from Salesforce.
|
|
277
275
|
"""
|
|
@@ -301,15 +299,15 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
301
299
|
|
|
302
300
|
async def _make_authenticated_request(self, method: str, url: str, **kwargs):
|
|
303
301
|
"""Make an authenticated HTTP request with automatic token refresh on auth errors.
|
|
304
|
-
|
|
302
|
+
|
|
305
303
|
Args:
|
|
306
304
|
method: HTTP method (GET, POST, DELETE, etc.)
|
|
307
305
|
url: Request URL
|
|
308
306
|
**kwargs: Additional arguments passed to httpx request
|
|
309
|
-
|
|
307
|
+
|
|
310
308
|
Returns:
|
|
311
309
|
httpx.Response: The HTTP response
|
|
312
|
-
|
|
310
|
+
|
|
313
311
|
Raises:
|
|
314
312
|
Exception: If request fails after token refresh attempt
|
|
315
313
|
"""
|
|
@@ -318,7 +316,7 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
318
316
|
headers = kwargs.get("headers", {})
|
|
319
317
|
headers["Authorization"] = f"Bearer {access_token}"
|
|
320
318
|
kwargs["headers"] = headers
|
|
321
|
-
|
|
319
|
+
|
|
322
320
|
try:
|
|
323
321
|
response = await self._http_client.request(method, url, **kwargs)
|
|
324
322
|
response.raise_for_status()
|
|
@@ -326,14 +324,16 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
326
324
|
except httpx.HTTPStatusError as e:
|
|
327
325
|
# If authentication error, clear cache and retry with fresh token
|
|
328
326
|
if e.response.status_code in (401, 403):
|
|
329
|
-
logger.warning(
|
|
327
|
+
logger.warning(
|
|
328
|
+
f"Salesforce authentication error ({e.response.status_code}), refreshing token"
|
|
329
|
+
)
|
|
330
330
|
await self._clear_cached_access_token()
|
|
331
|
-
|
|
331
|
+
|
|
332
332
|
# Retry with fresh token
|
|
333
333
|
fresh_token = await self._get_access_token(force_refresh=True)
|
|
334
334
|
headers["Authorization"] = f"Bearer {fresh_token}"
|
|
335
335
|
kwargs["headers"] = headers
|
|
336
|
-
|
|
336
|
+
|
|
337
337
|
response = await self._http_client.request(method, url, **kwargs)
|
|
338
338
|
response.raise_for_status()
|
|
339
339
|
return response
|
|
@@ -359,9 +359,7 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
359
359
|
|
|
360
360
|
try:
|
|
361
361
|
response = await self._make_authenticated_request(
|
|
362
|
-
"POST", session_url,
|
|
363
|
-
headers={"Content-Type": "application/json"},
|
|
364
|
-
json=payload
|
|
362
|
+
"POST", session_url, headers={"Content-Type": "application/json"}, json=payload
|
|
365
363
|
)
|
|
366
364
|
session_data = response.json()
|
|
367
365
|
session_id = session_data["sessionId"]
|
|
@@ -419,8 +417,7 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
419
417
|
# End the session via API
|
|
420
418
|
url = f"{self._api_host}/einstein/ai-agent/v1/sessions/{session_id}"
|
|
421
419
|
await self._make_authenticated_request(
|
|
422
|
-
"DELETE", url,
|
|
423
|
-
headers={"x-session-end-reason": "UserRequest"}
|
|
420
|
+
"DELETE", url, headers={"x-session-end-reason": "UserRequest"}
|
|
424
421
|
)
|
|
425
422
|
except Exception as e:
|
|
426
423
|
logger.warning(f"Failed to end session {session_id}: {e}")
|
|
@@ -431,32 +428,32 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
431
428
|
|
|
432
429
|
def _extract_user_message(self, context: OpenAILLMContext) -> str:
|
|
433
430
|
"""Extract the last user message from context.
|
|
434
|
-
|
|
431
|
+
|
|
435
432
|
Similar to Vistaar pattern - extract only the most recent user message.
|
|
436
|
-
|
|
433
|
+
|
|
437
434
|
Args:
|
|
438
435
|
context: The OpenAI LLM context containing messages.
|
|
439
|
-
|
|
436
|
+
|
|
440
437
|
Returns:
|
|
441
438
|
The last user message as a string.
|
|
442
439
|
"""
|
|
443
440
|
messages = context.get_messages()
|
|
444
|
-
|
|
441
|
+
|
|
445
442
|
# Find the last user message (iterate in reverse for efficiency)
|
|
446
443
|
for message in reversed(messages):
|
|
447
444
|
if message.get("role") == "user":
|
|
448
445
|
content = message.get("content", "")
|
|
449
|
-
|
|
446
|
+
|
|
450
447
|
# Handle content that might be a list (for multimodal messages)
|
|
451
448
|
if isinstance(content, list):
|
|
452
449
|
text_parts = [
|
|
453
450
|
item.get("text", "") for item in content if item.get("type") == "text"
|
|
454
451
|
]
|
|
455
452
|
content = " ".join(text_parts)
|
|
456
|
-
|
|
453
|
+
|
|
457
454
|
if isinstance(content, str):
|
|
458
455
|
return content.strip()
|
|
459
|
-
|
|
456
|
+
|
|
460
457
|
return ""
|
|
461
458
|
|
|
462
459
|
def _generate_sequence_id(self) -> int:
|
|
@@ -464,7 +461,9 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
464
461
|
self._sequence_counter += 1
|
|
465
462
|
return self._sequence_counter
|
|
466
463
|
|
|
467
|
-
async def _stream_salesforce_response(
|
|
464
|
+
async def _stream_salesforce_response(
|
|
465
|
+
self, session_id: str, user_message: str
|
|
466
|
+
) -> AsyncGenerator[str, None]:
|
|
468
467
|
"""Stream response from Salesforce Agent API."""
|
|
469
468
|
url = f"{self._api_host}/einstein/ai-agent/v1/sessions/{session_id}/messages/stream"
|
|
470
469
|
|
|
@@ -472,15 +471,9 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
472
471
|
"message": {
|
|
473
472
|
"sequenceId": self._generate_sequence_id(),
|
|
474
473
|
"type": "Text",
|
|
475
|
-
"text": user_message
|
|
474
|
+
"text": user_message,
|
|
476
475
|
},
|
|
477
|
-
"variables": [
|
|
478
|
-
{
|
|
479
|
-
"name": "$Context.EndUserLanguage",
|
|
480
|
-
"type": "Text",
|
|
481
|
-
"value": "en_US"
|
|
482
|
-
}
|
|
483
|
-
]
|
|
476
|
+
"variables": [{"name": "$Context.EndUserLanguage", "type": "Text", "value": "en_US"}],
|
|
484
477
|
}
|
|
485
478
|
|
|
486
479
|
# First attempt with current token
|
|
@@ -493,9 +486,11 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
493
486
|
|
|
494
487
|
try:
|
|
495
488
|
logger.info(f"🌐 Salesforce API request: {user_message[:50]}...")
|
|
496
|
-
async with self._http_client.stream(
|
|
489
|
+
async with self._http_client.stream(
|
|
490
|
+
"POST", url, headers=headers, json=message_data
|
|
491
|
+
) as response:
|
|
497
492
|
response.raise_for_status()
|
|
498
|
-
|
|
493
|
+
|
|
499
494
|
async for line in response.aiter_lines():
|
|
500
495
|
if not line:
|
|
501
496
|
continue
|
|
@@ -525,17 +520,23 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
525
520
|
except httpx.HTTPStatusError as e:
|
|
526
521
|
# If authentication error, retry with fresh token
|
|
527
522
|
if e.response.status_code in (401, 403):
|
|
528
|
-
logger.warning(
|
|
523
|
+
logger.warning(
|
|
524
|
+
f"Salesforce streaming authentication error ({e.response.status_code}), refreshing token"
|
|
525
|
+
)
|
|
529
526
|
await self._clear_cached_access_token()
|
|
530
|
-
|
|
527
|
+
|
|
531
528
|
# Retry with fresh token
|
|
532
529
|
fresh_token = await self._get_access_token(force_refresh=True)
|
|
533
530
|
headers["Authorization"] = f"Bearer {fresh_token}"
|
|
534
|
-
|
|
535
|
-
logger.info(
|
|
536
|
-
|
|
531
|
+
|
|
532
|
+
logger.info(
|
|
533
|
+
f"🔄 Retrying Salesforce stream with fresh token: {user_message[:50]}..."
|
|
534
|
+
)
|
|
535
|
+
async with self._http_client.stream(
|
|
536
|
+
"POST", url, headers=headers, json=message_data
|
|
537
|
+
) as response:
|
|
537
538
|
response.raise_for_status()
|
|
538
|
-
|
|
539
|
+
|
|
539
540
|
async for line in response.aiter_lines():
|
|
540
541
|
if not line:
|
|
541
542
|
continue
|
|
@@ -576,40 +577,41 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
576
577
|
context: The OpenAI LLM context containing messages to process.
|
|
577
578
|
"""
|
|
578
579
|
logger.info(f"🔄 Salesforce processing context with {len(context.get_messages())} messages")
|
|
579
|
-
|
|
580
|
+
|
|
580
581
|
# Extract user message from context first
|
|
581
582
|
user_message = self._extract_user_message(context)
|
|
582
|
-
|
|
583
|
+
|
|
583
584
|
if not user_message:
|
|
584
585
|
logger.warning("Salesforce: No user message found in context")
|
|
585
586
|
return
|
|
586
|
-
|
|
587
|
+
|
|
587
588
|
try:
|
|
588
589
|
logger.info(f"🎯 Salesforce extracted query: {user_message}")
|
|
589
|
-
|
|
590
|
-
# Start response
|
|
590
|
+
|
|
591
|
+
# Start response
|
|
591
592
|
await self.push_frame(LLMFullResponseStartFrame())
|
|
592
|
-
await self.push_frame(LLMFullResponseStartFrame(),FrameDirection.UPSTREAM)
|
|
593
|
+
await self.push_frame(LLMFullResponseStartFrame(), FrameDirection.UPSTREAM)
|
|
593
594
|
await self.start_processing_metrics()
|
|
594
595
|
await self.start_ttfb_metrics()
|
|
595
|
-
|
|
596
|
+
|
|
596
597
|
# Get or create session
|
|
597
598
|
session_id = await self._get_or_create_session()
|
|
598
|
-
|
|
599
|
+
|
|
599
600
|
first_chunk = True
|
|
600
|
-
|
|
601
|
+
|
|
601
602
|
# Stream the response
|
|
602
603
|
async for text_chunk in self._stream_salesforce_response(session_id, user_message):
|
|
603
604
|
if first_chunk:
|
|
604
605
|
await self.stop_ttfb_metrics()
|
|
605
606
|
first_chunk = False
|
|
606
|
-
|
|
607
|
+
|
|
607
608
|
# Push each text chunk as it arrives
|
|
608
609
|
await self.push_frame(LLMTextFrame(text=text_chunk))
|
|
609
|
-
|
|
610
|
+
|
|
610
611
|
except Exception as e:
|
|
611
612
|
logger.error(f"Salesforce context processing error: {type(e).__name__}: {str(e)}")
|
|
612
613
|
import traceback
|
|
614
|
+
|
|
613
615
|
logger.error(f"Salesforce traceback: {traceback.format_exc()}")
|
|
614
616
|
raise
|
|
615
617
|
finally:
|
|
@@ -627,7 +629,9 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
627
629
|
context = None
|
|
628
630
|
if isinstance(frame, OpenAILLMContextFrame):
|
|
629
631
|
context = frame.context
|
|
630
|
-
logger.info(
|
|
632
|
+
logger.info(
|
|
633
|
+
f"🔍 Received OpenAILLMContextFrame with {len(context.get_messages())} messages"
|
|
634
|
+
)
|
|
631
635
|
elif isinstance(frame, LLMMessagesFrame):
|
|
632
636
|
context = OpenAILLMContext.from_messages(frame.messages)
|
|
633
637
|
logger.info(f"🔍 Received LLMMessagesFrame with {len(frame.messages)} messages")
|
|
@@ -680,6 +684,7 @@ class SalesforceAgentLLMService(LLMService):
|
|
|
680
684
|
def get_llm_adapter(self):
|
|
681
685
|
"""Get the LLM adapter for this service."""
|
|
682
686
|
from pipecat.adapters.services.open_ai_adapter import OpenAILLMAdapter
|
|
687
|
+
|
|
683
688
|
return OpenAILLMAdapter()
|
|
684
689
|
|
|
685
690
|
async def close(self):
|
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev825.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev825.dist-info → dv_pipecat_ai-0.0.85.dev831.dist-info}/top_level.txt
RENAMED
|
File without changes
|