dv-pipecat-ai 0.0.82.dev881__py3-none-any.whl → 0.0.85.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/METADATA +2 -1
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/RECORD +24 -22
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/frames/frames.py +49 -0
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +22 -29
- pipecat/processors/aggregators/llm_response.py +2 -0
- pipecat/processors/dtmf_aggregator.py +175 -74
- pipecat/processors/filters/stt_mute_filter.py +15 -0
- pipecat/processors/user_idle_processor.py +32 -5
- pipecat/serializers/__init__.py +3 -1
- pipecat/serializers/convox.py +40 -3
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/plivo.py +4 -1
- pipecat/services/elevenlabs/stt.py +18 -8
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +13 -1
- pipecat/services/speechmatics/stt.py +16 -0
- pipecat/services/vistaar/llm.py +45 -7
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/top_level.txt +0 -0
pipecat/services/vistaar/llm.py
CHANGED
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
|
+
import random
|
|
5
6
|
import time
|
|
6
7
|
import uuid
|
|
7
8
|
from dataclasses import dataclass
|
|
8
|
-
from typing import Any, AsyncGenerator, Dict, Optional
|
|
9
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
9
10
|
from urllib.parse import urlencode
|
|
10
11
|
|
|
11
12
|
import httpx
|
|
@@ -13,6 +14,8 @@ from loguru import logger
|
|
|
13
14
|
from pydantic import BaseModel, Field
|
|
14
15
|
|
|
15
16
|
from pipecat.frames.frames import (
|
|
17
|
+
EndFrame,
|
|
18
|
+
CancelFrame,
|
|
16
19
|
Frame,
|
|
17
20
|
LLMFullResponseEndFrame,
|
|
18
21
|
LLMFullResponseStartFrame,
|
|
@@ -53,12 +56,13 @@ class VistaarLLMService(LLMService):
|
|
|
53
56
|
source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
|
|
54
57
|
target_lang: Target language code for responses.
|
|
55
58
|
session_id: Session ID for maintaining conversation context.
|
|
56
|
-
extra: Additional model-specific parameters
|
|
59
|
+
extra: Additional model-specific parameters
|
|
57
60
|
"""
|
|
58
61
|
|
|
59
62
|
source_lang: Optional[str] = Field(default="mr")
|
|
60
63
|
target_lang: Optional[str] = Field(default="mr")
|
|
61
64
|
session_id: Optional[str] = Field(default=None)
|
|
65
|
+
pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
|
|
62
66
|
extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
63
67
|
|
|
64
68
|
def __init__(
|
|
@@ -68,7 +72,6 @@ class VistaarLLMService(LLMService):
|
|
|
68
72
|
params: Optional[InputParams] = None,
|
|
69
73
|
timeout: float = 30.0,
|
|
70
74
|
interim_timeout: float = 5.0,
|
|
71
|
-
interim_message: str = "एक क्षण थांबा, मी बघतो. ",
|
|
72
75
|
**kwargs,
|
|
73
76
|
):
|
|
74
77
|
"""Initialize Vistaar LLM service.
|
|
@@ -77,8 +80,7 @@ class VistaarLLMService(LLMService):
|
|
|
77
80
|
base_url: The base URL for Vistaar API. Defaults to "https://vistaar.kenpath.ai/api".
|
|
78
81
|
params: Input parameters for model configuration and behavior.
|
|
79
82
|
timeout: Request timeout in seconds. Defaults to 30.0 seconds.
|
|
80
|
-
interim_timeout: Time in seconds before sending interim message. Defaults to
|
|
81
|
-
interim_message: Message to send if API takes longer than interim_timeout. Defaults to "एक क्षण थांबा, मी बघतो. ".
|
|
83
|
+
interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
|
|
82
84
|
**kwargs: Additional arguments passed to the parent LLMService.
|
|
83
85
|
"""
|
|
84
86
|
super().__init__(**kwargs)
|
|
@@ -89,10 +91,10 @@ class VistaarLLMService(LLMService):
|
|
|
89
91
|
self._source_lang = params.source_lang
|
|
90
92
|
self._target_lang = params.target_lang
|
|
91
93
|
self._session_id = params.session_id or str(uuid.uuid4())
|
|
94
|
+
self._pre_query_response_phrases = params.pre_query_response_phrases or []
|
|
92
95
|
self._extra = params.extra if isinstance(params.extra, dict) else {}
|
|
93
96
|
self._timeout = timeout
|
|
94
97
|
self._interim_timeout = interim_timeout
|
|
95
|
-
self._interim_message = interim_message
|
|
96
98
|
|
|
97
99
|
# Create an async HTTP client
|
|
98
100
|
self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
|
|
@@ -103,6 +105,8 @@ class VistaarLLMService(LLMService):
|
|
|
103
105
|
self._partial_response = [] # Track what was actually sent before interruption
|
|
104
106
|
self._interim_sent = False # Track if interim message was sent
|
|
105
107
|
self._interim_task = None # Track interim message task
|
|
108
|
+
self._interim_completion_event = asyncio.Event() # Track interim message completion
|
|
109
|
+
self._interim_in_progress = False # Track if interim message is being spoken
|
|
106
110
|
|
|
107
111
|
logger.info(
|
|
108
112
|
f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
|
|
@@ -161,6 +165,10 @@ class VistaarLLMService(LLMService):
|
|
|
161
165
|
# Set interruption flag
|
|
162
166
|
self._is_interrupted = True
|
|
163
167
|
|
|
168
|
+
# Reset interim state on interruption
|
|
169
|
+
self._interim_in_progress = False
|
|
170
|
+
self._interim_completion_event.set() # Unblock any waiting LLM responses
|
|
171
|
+
|
|
164
172
|
# Cancel interim message task if active
|
|
165
173
|
await self._cancel_interim_message_task(
|
|
166
174
|
"Cancelled interim message task - handling interruption"
|
|
@@ -193,11 +201,28 @@ class VistaarLLMService(LLMService):
|
|
|
193
201
|
if not self._is_interrupted and not self._interim_sent:
|
|
194
202
|
logger.info(f"Sending interim message after {self._interim_timeout}s timeout")
|
|
195
203
|
self._interim_sent = True
|
|
196
|
-
|
|
204
|
+
self._interim_in_progress = True
|
|
205
|
+
|
|
206
|
+
# Use random selection from pre_query_response_phrases if available, otherwise fallback to default
|
|
207
|
+
if self._pre_query_response_phrases:
|
|
208
|
+
message = random.choice(self._pre_query_response_phrases)
|
|
209
|
+
else:
|
|
210
|
+
message = "एक क्षण थांबा, मी बघतो. "
|
|
211
|
+
|
|
212
|
+
await self.push_frame(LLMTextFrame(text=message))
|
|
213
|
+
|
|
214
|
+
# Wait for estimated TTS duration before marking as complete
|
|
215
|
+
estimated_tts_duration = max(2.0, len(message) * 0.08) # ~80ms per character
|
|
216
|
+
logger.info(f"Waiting {estimated_tts_duration:.2f}s for interim TTS completion")
|
|
217
|
+
await asyncio.sleep(estimated_tts_duration)
|
|
197
218
|
except asyncio.CancelledError:
|
|
198
219
|
logger.debug("Interim message task cancelled")
|
|
199
220
|
except Exception as e:
|
|
200
221
|
logger.error(f"Error sending interim message: {e}")
|
|
222
|
+
finally:
|
|
223
|
+
# Signal that interim message handling is complete
|
|
224
|
+
self._interim_completion_event.set()
|
|
225
|
+
self._interim_in_progress = False
|
|
201
226
|
|
|
202
227
|
async def _stream_response(self, query: str) -> AsyncGenerator[str, None]:
|
|
203
228
|
"""Stream response from Vistaar API using Server-Sent Events.
|
|
@@ -231,6 +256,8 @@ class VistaarLLMService(LLMService):
|
|
|
231
256
|
self._is_interrupted = False
|
|
232
257
|
self._partial_response = []
|
|
233
258
|
self._interim_sent = False
|
|
259
|
+
self._interim_in_progress = False
|
|
260
|
+
self._interim_completion_event.clear() # Reset the event for new request
|
|
234
261
|
|
|
235
262
|
try:
|
|
236
263
|
# Use httpx to handle SSE streaming
|
|
@@ -291,6 +318,7 @@ class VistaarLLMService(LLMService):
|
|
|
291
318
|
|
|
292
319
|
# Start response
|
|
293
320
|
await self.push_frame(LLMFullResponseStartFrame())
|
|
321
|
+
await self.push_frame(LLMFullResponseStartFrame(), FrameDirection.UPSTREAM)
|
|
294
322
|
await self.start_processing_metrics()
|
|
295
323
|
await self.start_ttfb_metrics()
|
|
296
324
|
|
|
@@ -307,6 +335,15 @@ class VistaarLLMService(LLMService):
|
|
|
307
335
|
if first_chunk:
|
|
308
336
|
await self.stop_ttfb_metrics()
|
|
309
337
|
first_chunk = False
|
|
338
|
+
|
|
339
|
+
# Wait for interim message to complete if it was sent and is in progress
|
|
340
|
+
if self._interim_sent:
|
|
341
|
+
logger.debug(
|
|
342
|
+
"Waiting for interim message completion before sending LLM response"
|
|
343
|
+
)
|
|
344
|
+
await self._interim_completion_event.wait()
|
|
345
|
+
logger.debug("Interim message completed, proceeding with LLM response")
|
|
346
|
+
|
|
310
347
|
# Cancel interim message task since we got first response
|
|
311
348
|
await self._cancel_interim_message_task(
|
|
312
349
|
"Cancelled interim message task - got first response"
|
|
@@ -334,6 +371,7 @@ class VistaarLLMService(LLMService):
|
|
|
334
371
|
)
|
|
335
372
|
await self.stop_processing_metrics()
|
|
336
373
|
await self.push_frame(LLMFullResponseEndFrame())
|
|
374
|
+
await self.push_frame(LLMFullResponseEndFrame(), FrameDirection.UPSTREAM)
|
|
337
375
|
|
|
338
376
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
339
377
|
"""Process frames for LLM completion requests.
|
pipecat/serializers/genesys.py
DELETED
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
import base64
|
|
2
|
-
import json
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel
|
|
6
|
-
|
|
7
|
-
from pipecat.audio.utils import create_default_resampler, pcm_to_ulaw, ulaw_to_pcm
|
|
8
|
-
from pipecat.frames.frames import (
|
|
9
|
-
AudioRawFrame,
|
|
10
|
-
Frame,
|
|
11
|
-
InputAudioRawFrame,
|
|
12
|
-
InputDTMFFrame,
|
|
13
|
-
KeypadEntry,
|
|
14
|
-
StartFrame,
|
|
15
|
-
StartInterruptionFrame,
|
|
16
|
-
TransportMessageFrame,
|
|
17
|
-
TransportMessageUrgentFrame,
|
|
18
|
-
)
|
|
19
|
-
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class GenesysFrameSerializer(FrameSerializer):
|
|
23
|
-
class InputParams(BaseModel):
|
|
24
|
-
genesys_sample_rate: int = 8000 # Default Genesys rate (8kHz)
|
|
25
|
-
sample_rate: Optional[int] = None # Pipeline input rate
|
|
26
|
-
|
|
27
|
-
def __init__(self, session_id: str, params: InputParams = InputParams()):
|
|
28
|
-
self._session_id = session_id
|
|
29
|
-
self._params = params
|
|
30
|
-
self._genesys_sample_rate = self._params.genesys_sample_rate
|
|
31
|
-
self._sample_rate = 0 # Pipeline input rate
|
|
32
|
-
self._resampler = create_default_resampler()
|
|
33
|
-
self._seq = 1 # Sequence number for outgoing messages
|
|
34
|
-
|
|
35
|
-
@property
|
|
36
|
-
def type(self) -> FrameSerializerType:
|
|
37
|
-
return FrameSerializerType.TEXT
|
|
38
|
-
|
|
39
|
-
async def setup(self, frame: StartFrame):
|
|
40
|
-
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
|
|
41
|
-
|
|
42
|
-
async def serialize(self, frame: Frame) -> str | bytes | None:
|
|
43
|
-
if isinstance(frame, StartInterruptionFrame):
|
|
44
|
-
answer = {
|
|
45
|
-
"version": "2",
|
|
46
|
-
"type": "clearAudio", # Or appropriate event for interruption
|
|
47
|
-
"seq": self._seq,
|
|
48
|
-
"id": self._session_id,
|
|
49
|
-
}
|
|
50
|
-
self._seq += 1
|
|
51
|
-
return json.dumps(answer)
|
|
52
|
-
elif isinstance(frame, AudioRawFrame):
|
|
53
|
-
data = frame.audio
|
|
54
|
-
# Convert PCM to 8kHz μ-law for Genesys
|
|
55
|
-
serialized_data = await pcm_to_ulaw(
|
|
56
|
-
data, frame.sample_rate, self._genesys_sample_rate, self._resampler
|
|
57
|
-
)
|
|
58
|
-
payload = base64.b64encode(serialized_data).decode("utf-8")
|
|
59
|
-
answer = {
|
|
60
|
-
"version": "2",
|
|
61
|
-
"type": "audio",
|
|
62
|
-
"seq": self._seq,
|
|
63
|
-
"id": self._session_id,
|
|
64
|
-
"media": {
|
|
65
|
-
"payload": payload,
|
|
66
|
-
"format": "PCMU",
|
|
67
|
-
"rate": self._genesys_sample_rate,
|
|
68
|
-
},
|
|
69
|
-
}
|
|
70
|
-
self._seq += 1
|
|
71
|
-
return json.dumps(answer)
|
|
72
|
-
elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
|
|
73
|
-
return json.dumps(frame.message)
|
|
74
|
-
|
|
75
|
-
async def deserialize(self, data: str | bytes) -> Frame | None:
|
|
76
|
-
message = json.loads(data)
|
|
77
|
-
if message.get("type") == "audio":
|
|
78
|
-
payload_base64 = message["media"]["payload"]
|
|
79
|
-
payload = base64.b64decode(payload_base64)
|
|
80
|
-
# Convert Genesys 8kHz μ-law to PCM at pipeline input rate
|
|
81
|
-
deserialized_data = await ulaw_to_pcm(
|
|
82
|
-
payload, self._genesys_sample_rate, self._sample_rate, self._resampler
|
|
83
|
-
)
|
|
84
|
-
audio_frame = InputAudioRawFrame(
|
|
85
|
-
audio=deserialized_data, num_channels=1, sample_rate=self._sample_rate
|
|
86
|
-
)
|
|
87
|
-
return audio_frame
|
|
88
|
-
elif message.get("type") == "dtmf":
|
|
89
|
-
digit = message.get("dtmf", {}).get("digit")
|
|
90
|
-
try:
|
|
91
|
-
return InputDTMFFrame(KeypadEntry(digit))
|
|
92
|
-
except ValueError:
|
|
93
|
-
return None
|
|
94
|
-
else:
|
|
95
|
-
return None
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
from pipecat.frames.frames import TTSAudioRawFrame
|
|
5
|
-
from pipecat.services.google.tts import GoogleTTSService
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
async def test_chirp_tts():
|
|
9
|
-
# Get credentials from environment variable
|
|
10
|
-
credentials_path = (
|
|
11
|
-
"/Users/kalicharanvemuru/Documents/Code/pipecat/examples/ringg-chatbot/creds.json"
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
if not credentials_path or not os.path.exists(credentials_path):
|
|
15
|
-
raise ValueError(
|
|
16
|
-
"Please set GOOGLE_APPLICATION_CREDENTIALS environment variable to your service account key file"
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
# Initialize the TTS service with Chirp voice
|
|
20
|
-
tts = GoogleTTSService(
|
|
21
|
-
credentials_path=credentials_path,
|
|
22
|
-
voice_id="en-US-Chirp3-HD-Charon", # Using Chirp3 HD Charon voice
|
|
23
|
-
sample_rate=24000,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
# Test text
|
|
27
|
-
test_text = "Hello, this is a test of the Google TTS service with Chirp voice."
|
|
28
|
-
|
|
29
|
-
print(f"Testing TTS with text: {test_text}")
|
|
30
|
-
|
|
31
|
-
# Generate speech
|
|
32
|
-
try:
|
|
33
|
-
async for frame in tts.run_tts(test_text):
|
|
34
|
-
if isinstance(frame, TTSAudioRawFrame):
|
|
35
|
-
print(f"Received audio chunk of size: {len(frame.audio)} bytes")
|
|
36
|
-
else:
|
|
37
|
-
print(f"Received frame: {frame.__class__.__name__}")
|
|
38
|
-
|
|
39
|
-
print("TTS generation completed successfully!")
|
|
40
|
-
except Exception as e:
|
|
41
|
-
print(f"Error during TTS generation: {str(e)}")
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
if __name__ == "__main__":
|
|
45
|
-
asyncio.run(test_chirp_tts())
|
|
File without changes
|
{dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|