dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (106) hide show
  1. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
  2. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
  3. pipecat/adapters/base_llm_adapter.py +44 -6
  4. pipecat/adapters/services/anthropic_adapter.py +302 -2
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
  6. pipecat/adapters/services/bedrock_adapter.py +40 -2
  7. pipecat/adapters/services/gemini_adapter.py +276 -6
  8. pipecat/adapters/services/open_ai_adapter.py +88 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
  10. pipecat/audio/dtmf/__init__.py +0 -0
  11. pipecat/audio/dtmf/types.py +47 -0
  12. pipecat/audio/dtmf/utils.py +70 -0
  13. pipecat/audio/filters/aic_filter.py +199 -0
  14. pipecat/audio/utils.py +9 -7
  15. pipecat/extensions/ivr/__init__.py +0 -0
  16. pipecat/extensions/ivr/ivr_navigator.py +452 -0
  17. pipecat/frames/frames.py +156 -43
  18. pipecat/pipeline/llm_switcher.py +76 -0
  19. pipecat/pipeline/parallel_pipeline.py +3 -3
  20. pipecat/pipeline/service_switcher.py +144 -0
  21. pipecat/pipeline/task.py +68 -28
  22. pipecat/pipeline/task_observer.py +10 -0
  23. pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
  24. pipecat/processors/aggregators/llm_context.py +277 -0
  25. pipecat/processors/aggregators/llm_response.py +48 -15
  26. pipecat/processors/aggregators/llm_response_universal.py +840 -0
  27. pipecat/processors/aggregators/openai_llm_context.py +3 -3
  28. pipecat/processors/dtmf_aggregator.py +0 -2
  29. pipecat/processors/filters/stt_mute_filter.py +0 -2
  30. pipecat/processors/frame_processor.py +18 -11
  31. pipecat/processors/frameworks/rtvi.py +17 -10
  32. pipecat/processors/metrics/sentry.py +2 -0
  33. pipecat/runner/daily.py +137 -36
  34. pipecat/runner/run.py +1 -1
  35. pipecat/runner/utils.py +7 -7
  36. pipecat/serializers/asterisk.py +20 -4
  37. pipecat/serializers/exotel.py +1 -1
  38. pipecat/serializers/plivo.py +1 -1
  39. pipecat/serializers/telnyx.py +1 -1
  40. pipecat/serializers/twilio.py +1 -1
  41. pipecat/services/__init__.py +2 -2
  42. pipecat/services/anthropic/llm.py +113 -28
  43. pipecat/services/asyncai/tts.py +4 -0
  44. pipecat/services/aws/llm.py +82 -8
  45. pipecat/services/aws/tts.py +0 -10
  46. pipecat/services/aws_nova_sonic/aws.py +5 -0
  47. pipecat/services/cartesia/tts.py +28 -16
  48. pipecat/services/cerebras/llm.py +15 -10
  49. pipecat/services/deepgram/stt.py +8 -0
  50. pipecat/services/deepseek/llm.py +13 -8
  51. pipecat/services/fireworks/llm.py +13 -8
  52. pipecat/services/fish/tts.py +8 -6
  53. pipecat/services/gemini_multimodal_live/gemini.py +5 -0
  54. pipecat/services/gladia/config.py +7 -1
  55. pipecat/services/gladia/stt.py +23 -15
  56. pipecat/services/google/llm.py +159 -59
  57. pipecat/services/google/llm_openai.py +18 -3
  58. pipecat/services/grok/llm.py +2 -1
  59. pipecat/services/llm_service.py +38 -3
  60. pipecat/services/mem0/memory.py +2 -1
  61. pipecat/services/mistral/llm.py +5 -6
  62. pipecat/services/nim/llm.py +2 -1
  63. pipecat/services/openai/base_llm.py +88 -26
  64. pipecat/services/openai/image.py +6 -1
  65. pipecat/services/openai_realtime_beta/openai.py +5 -2
  66. pipecat/services/openpipe/llm.py +6 -8
  67. pipecat/services/perplexity/llm.py +13 -8
  68. pipecat/services/playht/tts.py +9 -6
  69. pipecat/services/rime/tts.py +1 -1
  70. pipecat/services/sambanova/llm.py +18 -13
  71. pipecat/services/sarvam/tts.py +415 -10
  72. pipecat/services/speechmatics/stt.py +2 -2
  73. pipecat/services/tavus/video.py +1 -1
  74. pipecat/services/tts_service.py +15 -5
  75. pipecat/services/vistaar/llm.py +2 -5
  76. pipecat/transports/base_input.py +32 -19
  77. pipecat/transports/base_output.py +39 -5
  78. pipecat/transports/daily/__init__.py +0 -0
  79. pipecat/transports/daily/transport.py +2371 -0
  80. pipecat/transports/daily/utils.py +410 -0
  81. pipecat/transports/livekit/__init__.py +0 -0
  82. pipecat/transports/livekit/transport.py +1042 -0
  83. pipecat/transports/network/fastapi_websocket.py +12 -546
  84. pipecat/transports/network/small_webrtc.py +12 -922
  85. pipecat/transports/network/webrtc_connection.py +9 -595
  86. pipecat/transports/network/websocket_client.py +12 -481
  87. pipecat/transports/network/websocket_server.py +12 -487
  88. pipecat/transports/services/daily.py +9 -2334
  89. pipecat/transports/services/helpers/daily_rest.py +12 -396
  90. pipecat/transports/services/livekit.py +12 -975
  91. pipecat/transports/services/tavus.py +12 -757
  92. pipecat/transports/smallwebrtc/__init__.py +0 -0
  93. pipecat/transports/smallwebrtc/connection.py +612 -0
  94. pipecat/transports/smallwebrtc/transport.py +936 -0
  95. pipecat/transports/tavus/__init__.py +0 -0
  96. pipecat/transports/tavus/transport.py +770 -0
  97. pipecat/transports/websocket/__init__.py +0 -0
  98. pipecat/transports/websocket/client.py +494 -0
  99. pipecat/transports/websocket/fastapi.py +559 -0
  100. pipecat/transports/websocket/server.py +500 -0
  101. pipecat/transports/whatsapp/__init__.py +0 -0
  102. pipecat/transports/whatsapp/api.py +345 -0
  103. pipecat/transports/whatsapp/client.py +364 -0
  104. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
  105. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
  106. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
@@ -6,25 +6,40 @@
6
6
 
7
7
  """Sarvam AI text-to-speech service implementation."""
8
8
 
9
+ import asyncio
9
10
  import base64
10
- from typing import AsyncGenerator, Optional
11
+ import json
12
+ from typing import Any, AsyncGenerator, Mapping, Optional
11
13
 
12
14
  import aiohttp
13
15
  from loguru import logger
14
16
  from pydantic import BaseModel, Field
15
17
 
16
18
  from pipecat.frames.frames import (
19
+ CancelFrame,
20
+ EndFrame,
17
21
  ErrorFrame,
18
22
  Frame,
23
+ LLMFullResponseEndFrame,
19
24
  StartFrame,
25
+ StartInterruptionFrame,
20
26
  TTSAudioRawFrame,
21
27
  TTSStartedFrame,
22
28
  TTSStoppedFrame,
23
29
  )
24
- from pipecat.services.tts_service import TTSService
30
+ from pipecat.processors.frame_processor import FrameDirection
31
+ from pipecat.services.tts_service import InterruptibleTTSService, TTSService
25
32
  from pipecat.transcriptions.language import Language
26
33
  from pipecat.utils.tracing.service_decorators import traced_tts
27
34
 
35
+ try:
36
+ from websockets.asyncio.client import connect as websocket_connect
37
+ from websockets.protocol import State
38
+ except ModuleNotFoundError as e:
39
+ logger.error(f"Exception: {e}")
40
+ logger.error("In order to use Sarvam, you need to `pip install pipecat-ai[sarvam]`.")
41
+ raise Exception(f"Missing module: {e}")
42
+
28
43
 
29
44
  def language_to_sarvam_language(language: Language) -> Optional[str]:
30
45
  """Convert Pipecat Language enum to Sarvam AI language codes.
@@ -52,7 +67,7 @@ def language_to_sarvam_language(language: Language) -> Optional[str]:
52
67
  return LANGUAGE_MAP.get(language)
53
68
 
54
69
 
55
- class SarvamTTSService(TTSService):
70
+ class SarvamHttpTTSService(TTSService):
56
71
  """Text-to-Speech service using Sarvam AI's API.
57
72
 
58
73
  Converts text to speech using Sarvam AI's TTS models with support for multiple
@@ -95,9 +110,9 @@ class SarvamTTSService(TTSService):
95
110
  self,
96
111
  *,
97
112
  api_key: str,
113
+ aiohttp_session: aiohttp.ClientSession,
98
114
  voice_id: str = "anushka",
99
115
  model: str = "bulbul:v2",
100
- aiohttp_session: aiohttp.ClientSession,
101
116
  base_url: str = "https://api.sarvam.ai",
102
117
  sample_rate: Optional[int] = None,
103
118
  params: Optional[InputParams] = None,
@@ -107,9 +122,9 @@ class SarvamTTSService(TTSService):
107
122
 
108
123
  Args:
109
124
  api_key: Sarvam AI API subscription key.
125
+ aiohttp_session: Shared aiohttp session for making requests.
110
126
  voice_id: Speaker voice ID (e.g., "anushka", "meera"). Defaults to "anushka".
111
127
  model: TTS model to use ("bulbul:v1" or "bulbul:v2"). Defaults to "bulbul:v2".
112
- aiohttp_session: Shared aiohttp session for making requests.
113
128
  base_url: Sarvam AI API base URL. Defaults to "https://api.sarvam.ai".
114
129
  sample_rate: Audio sample rate in Hz (8000, 16000, 22050, 24000). If None, uses default.
115
130
  params: Additional voice and preprocessing parameters. If None, uses defaults.
@@ -117,16 +132,16 @@ class SarvamTTSService(TTSService):
117
132
  """
118
133
  super().__init__(sample_rate=sample_rate, **kwargs)
119
134
 
120
- params = params or SarvamTTSService.InputParams()
135
+ params = params or SarvamHttpTTSService.InputParams()
121
136
 
122
137
  self._api_key = api_key
123
138
  self._base_url = base_url
124
139
  self._session = aiohttp_session
125
140
 
126
141
  self._settings = {
127
- "language": self.language_to_service_language(params.language)
128
- if params.language
129
- else "en-IN",
142
+ "language": (
143
+ self.language_to_service_language(params.language) if params.language else "en-IN"
144
+ ),
130
145
  "pitch": params.pitch,
131
146
  "pace": params.pace,
132
147
  "loudness": params.loudness,
@@ -186,7 +201,7 @@ class SarvamTTSService(TTSService):
186
201
  "pitch": self._settings["pitch"],
187
202
  "pace": self._settings["pace"],
188
203
  "loudness": self._settings["loudness"],
189
- "speech_sample_rate": self.sample_rate,
204
+ "sample_rate": self.sample_rate,
190
205
  "enable_preprocessing": self._settings["enable_preprocessing"],
191
206
  "model": self._model_name,
192
207
  }
@@ -240,3 +255,393 @@ class SarvamTTSService(TTSService):
240
255
  finally:
241
256
  await self.stop_ttfb_metrics()
242
257
  yield TTSStoppedFrame()
258
+
259
+
260
+ class SarvamTTSService(InterruptibleTTSService):
261
+ """WebSocket-based text-to-speech service using Sarvam AI.
262
+
263
+ Provides streaming TTS with real-time audio generation for multiple Indian languages.
264
+ Supports voice control parameters like pitch, pace, and loudness adjustment.
265
+
266
+ Example::
267
+
268
+ tts = SarvamTTSService(
269
+ api_key="your-api-key",
270
+ voice_id="anushka",
271
+ model="bulbul:v2",
272
+ params=SarvamTTSService.InputParams(
273
+ language=Language.HI,
274
+ pitch=0.1,
275
+ pace=1.2
276
+ )
277
+ )
278
+ """
279
+
280
+ class InputParams(BaseModel):
281
+ """Configuration parameters for Sarvam TTS.
282
+
283
+ Parameters:
284
+ pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0.
285
+ pace: Speech pace multiplier (0.3 to 3.0). Defaults to 1.0.
286
+ loudness: Volume multiplier (0.1 to 3.0). Defaults to 1.0.
287
+ enable_preprocessing: Enable text preprocessing. Defaults to False.
288
+ min_buffer_size: Minimum number of characters to buffer before generating audio.
289
+ Lower values reduce latency but may affect quality. Defaults to 50.
290
+ max_chunk_length: Maximum number of characters processed in a single chunk.
291
+ Controls memory usage and processing efficiency. Defaults to 200.
292
+ output_audio_codec: Audio codec format. Defaults to "linear16".
293
+ output_audio_bitrate: Audio bitrate. Defaults to "128k".
294
+ language: Target language for synthesis. Supports Bengali (bn-IN), English (en-IN),
295
+ Gujarati (gu-IN), Hindi (hi-IN), Kannada (kn-IN), Malayalam (ml-IN),
296
+ Marathi (mr-IN), Odia (od-IN), Punjabi (pa-IN), Tamil (ta-IN),
297
+ Telugu (te-IN). Defaults to en-IN.
298
+
299
+ Available Speakers:
300
+ Female: anushka, manisha, vidya, arya
301
+ Male: abhilash, karun, hitesh
302
+ """
303
+
304
+ pitch: Optional[float] = Field(default=0.0, ge=-0.75, le=0.75)
305
+ pace: Optional[float] = Field(default=1.0, ge=0.3, le=3.0)
306
+ loudness: Optional[float] = Field(default=1.0, ge=0.1, le=3.0)
307
+ enable_preprocessing: Optional[bool] = False
308
+ min_buffer_size: Optional[int] = 50
309
+ max_chunk_length: Optional[int] = 200
310
+ output_audio_codec: Optional[str] = "linear16"
311
+ output_audio_bitrate: Optional[str] = "128k"
312
+ language: Optional[Language] = Language.EN
313
+
314
+ def __init__(
315
+ self,
316
+ *,
317
+ api_key: str,
318
+ model: str = "bulbul:v2",
319
+ voice_id: str = "anushka",
320
+ url: str = "wss://api.sarvam.ai/text-to-speech/ws",
321
+ aiohttp_session: Optional[aiohttp.ClientSession] = None,
322
+ aggregate_sentences: Optional[bool] = True,
323
+ sample_rate: Optional[int] = None,
324
+ params: Optional[InputParams] = None,
325
+ **kwargs,
326
+ ):
327
+ """Initialize the Sarvam TTS service with voice and transport configuration.
328
+
329
+ Args:
330
+ api_key: Sarvam API key for authenticating TTS requests.
331
+ model: Identifier of the Sarvam speech model (default "bulbul:v2").
332
+ voice_id: Voice identifier for synthesis (default "anushka").
333
+ url: WebSocket URL for connecting to the TTS backend (default production URL).
334
+ aiohttp_session: Optional shared aiohttp session. To maintain backward compatibility.
335
+
336
+ .. deprecated:: 0.0.81
337
+ aiohttp_session is no longer used. This parameter will be removed in a future version.
338
+
339
+ aggregate_sentences: Whether to merge multiple sentences into one audio chunk (default True).
340
+ sample_rate: Desired sample rate for the output audio in Hz (overrides default if set).
341
+ params: Optional input parameters to override global configuration.
342
+ **kwargs: Optional keyword arguments forwarded to InterruptibleTTSService (such as
343
+ `push_stop_frames`, `sample_rate`, task manager parameters, event hooks, etc.)
344
+ to customize transport behavior or enable metrics support.
345
+
346
+ This method sets up the internal TTS configuration mapping, constructs the WebSocket
347
+ URL based on the chosen model, and initializes state flags before connecting.
348
+ """
349
+ # Initialize parent class first
350
+ super().__init__(
351
+ aggregate_sentences=aggregate_sentences,
352
+ push_text_frames=True,
353
+ pause_frame_processing=True,
354
+ push_stop_frames=True,
355
+ sample_rate=sample_rate,
356
+ **kwargs,
357
+ )
358
+ params = params or SarvamTTSService.InputParams()
359
+ if aiohttp_session is not None:
360
+ import warnings
361
+
362
+ with warnings.catch_warnings():
363
+ warnings.simplefilter("always")
364
+ warnings.warn(
365
+ "The 'aiohttp_session' parameter is deprecated and will be removed in a future version. ",
366
+ DeprecationWarning,
367
+ stacklevel=2,
368
+ )
369
+ # WebSocket endpoint URL
370
+ self._websocket_url = f"{url}?model={model}"
371
+ self._api_key = api_key
372
+ self.set_model_name(model)
373
+ self.set_voice(voice_id)
374
+ # Configuration parameters
375
+ self._settings = {
376
+ "target_language_code": (
377
+ self.language_to_service_language(params.language) if params.language else "en-IN"
378
+ ),
379
+ "pitch": params.pitch,
380
+ "pace": params.pace,
381
+ "speaker": voice_id,
382
+ "loudness": params.loudness,
383
+ "speech_sample_rate": 0,
384
+ "enable_preprocessing": params.enable_preprocessing,
385
+ "min_buffer_size": params.min_buffer_size,
386
+ "max_chunk_length": params.max_chunk_length,
387
+ "output_audio_codec": params.output_audio_codec,
388
+ "output_audio_bitrate": params.output_audio_bitrate,
389
+ }
390
+ self._started = False
391
+
392
+ self._receive_task = None
393
+ self._keepalive_task = None
394
+ self._disconnecting = False
395
+
396
+ def can_generate_metrics(self) -> bool:
397
+ """Check if this service can generate processing metrics.
398
+
399
+ Returns:
400
+ True, as Sarvam service supports metrics generation.
401
+ """
402
+ return True
403
+
404
+ def language_to_service_language(self, language: Language) -> Optional[str]:
405
+ """Convert a Language enum to Sarvam AI language format.
406
+
407
+ Args:
408
+ language: The language to convert.
409
+
410
+ Returns:
411
+ The Sarvam AI-specific language code, or None if not supported.
412
+ """
413
+ return language_to_sarvam_language(language)
414
+
415
+ async def start(self, frame: StartFrame):
416
+ """Start the Sarvam TTS service.
417
+
418
+ Args:
419
+ frame: The start frame containing initialization parameters.
420
+ """
421
+ await super().start(frame)
422
+
423
+ self._settings["speech_sample_rate"] = self.sample_rate
424
+ await self._connect()
425
+
426
+ async def stop(self, frame: EndFrame):
427
+ """Stop the Sarvam TTS service.
428
+
429
+ Args:
430
+ frame: The end frame.
431
+ """
432
+ await super().stop(frame)
433
+ await self._disconnect()
434
+
435
+ async def cancel(self, frame: CancelFrame):
436
+ """Cancel the Sarvam TTS service.
437
+
438
+ Args:
439
+ frame: The cancel frame.
440
+ """
441
+ await super().cancel(frame)
442
+ await self._disconnect()
443
+
444
+ async def flush_audio(self):
445
+ """Flush any pending audio synthesis by sending stop command."""
446
+ if self._websocket:
447
+ msg = {"type": "flush"}
448
+ await self._websocket.send(json.dumps(msg))
449
+
450
+ async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
451
+ """Push a frame downstream with special handling for stop conditions.
452
+
453
+ Args:
454
+ frame: The frame to push.
455
+ direction: The direction to push the frame.
456
+ """
457
+ await super().push_frame(frame, direction)
458
+ if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
459
+ self._started = False
460
+
461
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
462
+ """Process a frame and flush audio if it's the end of a full response."""
463
+ if isinstance(frame, LLMFullResponseEndFrame):
464
+ await self.flush_audio()
465
+ return await super().process_frame(frame, direction)
466
+
467
+ async def _update_settings(self, settings: Mapping[str, Any]):
468
+ """Update service settings and reconnect if voice changed."""
469
+ prev_voice = self._voice_id
470
+ await super()._update_settings(settings)
471
+ if not prev_voice == self._voice_id:
472
+ logger.info(f"Switching TTS voice to: [{self._voice_id}]")
473
+ await self._send_config()
474
+
475
+ async def _connect(self):
476
+ """Connect to Sarvam WebSocket and start background tasks."""
477
+ await self._connect_websocket()
478
+
479
+ if self._websocket and not self._receive_task:
480
+ self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
481
+
482
+ if self._websocket and not self._keepalive_task:
483
+ self._keepalive_task = self.create_task(
484
+ self._keepalive_task_handler(),
485
+ )
486
+
487
+ async def _disconnect(self):
488
+ """Disconnect from Sarvam WebSocket and clean up tasks."""
489
+ try:
490
+ # First, set a flag to prevent new operations
491
+ self._disconnecting = True
492
+
493
+ # Cancel background tasks BEFORE closing websocket
494
+ if self._receive_task:
495
+ await self.cancel_task(self._receive_task, timeout=2.0)
496
+ self._receive_task = None
497
+
498
+ if self._keepalive_task:
499
+ await self.cancel_task(self._keepalive_task, timeout=2.0)
500
+ self._keepalive_task = None
501
+
502
+ # Now close the websocket
503
+ await self._disconnect_websocket()
504
+
505
+ except Exception as e:
506
+ logger.error(f"Error during disconnect: {e}")
507
+ finally:
508
+ # Reset state only after everything is cleaned up
509
+ self._started = False
510
+ self._websocket = None
511
+ self._disconnecting = False
512
+
513
+ async def _connect_websocket(self):
514
+ """Establish WebSocket connection to Sarvam API."""
515
+ try:
516
+ if self._websocket and self._websocket.state is State.OPEN:
517
+ return
518
+
519
+ self._websocket = await websocket_connect(
520
+ self._websocket_url,
521
+ additional_headers={
522
+ "api-subscription-key": self._api_key,
523
+ },
524
+ )
525
+ logger.debug("Connected to Sarvam TTS Websocket")
526
+ await self._send_config()
527
+
528
+ except Exception as e:
529
+ logger.error(f"{self} initialization error: {e}")
530
+ self._websocket = None
531
+ await self._call_event_handler("on_connection_error", f"{e}")
532
+
533
+ async def _send_config(self):
534
+ """Send initial configuration message."""
535
+ if not self._websocket:
536
+ raise Exception("WebSocket not connected")
537
+ self._settings["speaker"] = self._voice_id
538
+ logger.debug(f"Config being sent is {self._settings}")
539
+ config_message = {"type": "config", "data": self._settings}
540
+
541
+ try:
542
+ await self._websocket.send(json.dumps(config_message))
543
+ logger.debug("Configuration sent successfully")
544
+ except Exception as e:
545
+ logger.error(f"Failed to send config: {str(e)}")
546
+ await self.push_frame(ErrorFrame(f"Failed to send config: {str(e)}"))
547
+ raise
548
+
549
+ async def _disconnect_websocket(self):
550
+ """Close WebSocket connection and clean up state."""
551
+ try:
552
+ await self.stop_all_metrics()
553
+
554
+ if self._websocket:
555
+ logger.debug("Disconnecting from Sarvam")
556
+ await self._websocket.close()
557
+ except Exception as e:
558
+ logger.error(f"{self} error closing websocket: {e}")
559
+
560
+ def _get_websocket(self):
561
+ if self._websocket:
562
+ return self._websocket
563
+ raise Exception("Websocket not connected")
564
+
565
+ async def _receive_messages(self):
566
+ """Receive and process messages from Sarvam WebSocket."""
567
+ async for message in self._get_websocket():
568
+ if isinstance(message, str):
569
+ msg = json.loads(message)
570
+ if msg.get("type") == "audio":
571
+ # Check for interruption before processing audio
572
+ await self.stop_ttfb_metrics()
573
+ audio = base64.b64decode(msg["data"]["audio"])
574
+ frame = TTSAudioRawFrame(audio, self.sample_rate, 1)
575
+ await self.push_frame(frame)
576
+ elif msg.get("type") == "error":
577
+ error_msg = msg["data"]["message"]
578
+ logger.error(f"TTS Error: {error_msg}")
579
+
580
+ # If it's a timeout error, the connection might need to be reset
581
+ if "too long" in error_msg.lower() or "timeout" in error_msg.lower():
582
+ logger.warning("Connection timeout detected, service may need restart")
583
+
584
+ await self.push_frame(ErrorFrame(f"TTS Error: {error_msg}"))
585
+
586
+ async def _keepalive_task_handler(self):
587
+ """Handle keepalive messages to maintain WebSocket connection."""
588
+ KEEPALIVE_SLEEP = 20
589
+ while True:
590
+ await asyncio.sleep(KEEPALIVE_SLEEP)
591
+ await self._send_keepalive()
592
+
593
+ async def _send_keepalive(self):
594
+ """Send keepalive message to maintain connection."""
595
+ if self._disconnecting:
596
+ return
597
+
598
+ if self._websocket and self._websocket.state == State.OPEN:
599
+ msg = {"type": "ping"}
600
+ await self._websocket.send(json.dumps(msg))
601
+
602
+ async def _send_text(self, text: str):
603
+ """Send text to Sarvam WebSocket for synthesis."""
604
+ if self._disconnecting:
605
+ logger.warning("Service is disconnecting, ignoring text send")
606
+ return
607
+
608
+ if self._websocket and self._websocket.state == State.OPEN:
609
+ msg = {"type": "text", "data": {"text": text}}
610
+ await self._websocket.send(json.dumps(msg))
611
+ else:
612
+ logger.warning("WebSocket not ready, cannot send text")
613
+
614
+ @traced_tts
615
+ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
616
+ """Generate speech audio frames from input text using Sarvam TTS.
617
+
618
+ Sends text over WebSocket for synthesis and yields corresponding audio or status frames.
619
+
620
+ Args:
621
+ text: The text input to synthesize.
622
+
623
+ Yields:
624
+ Frame objects including TTSStartedFrame, TTSAudioRawFrame(s), or TTSStoppedFrame.
625
+ """
626
+ logger.debug(f"Generating TTS: [{text}]")
627
+
628
+ try:
629
+ if not self._websocket or self._websocket.state is State.CLOSED:
630
+ await self._connect()
631
+
632
+ try:
633
+ if not self._started:
634
+ await self.start_ttfb_metrics()
635
+ yield TTSStartedFrame()
636
+ self._started = True
637
+ await self._send_text(text)
638
+ await self.start_tts_usage_metrics(text)
639
+ except Exception as e:
640
+ logger.error(f"{self} error sending message: {e}")
641
+ yield TTSStoppedFrame()
642
+ await self._disconnect()
643
+ await self._connect()
644
+ return
645
+ yield None
646
+ except Exception as e:
647
+ logger.error(f"{self} exception: {e}")
@@ -10,7 +10,6 @@ import asyncio
10
10
  import datetime
11
11
  import os
12
12
  import re
13
- import warnings
14
13
  from dataclasses import dataclass, field
15
14
  from enum import Enum
16
15
  from typing import Any, AsyncGenerator
@@ -581,7 +580,6 @@ class SpeechmaticsSTTService(STTService):
581
580
  logger.debug(f"{self} Connected to Speechmatics STT service")
582
581
  except Exception as e:
583
582
  logger.error(f"{self} Error connecting to Speechmatics: {e}")
584
- finally:
585
583
  self._client = None
586
584
 
587
585
  async def _disconnect(self) -> None:
@@ -1108,6 +1106,8 @@ def _check_deprecated_args(kwargs: dict, params: SpeechmaticsSTTService.InputPar
1108
1106
 
1109
1107
  # Show deprecation warnings
1110
1108
  def _deprecation_warning(old: str, new: str | None = None):
1109
+ import warnings
1110
+
1111
1111
  with warnings.catch_warnings():
1112
1112
  warnings.simplefilter("always")
1113
1113
  if new:
@@ -34,7 +34,7 @@ from pipecat.frames.frames import (
34
34
  )
35
35
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup
36
36
  from pipecat.services.ai_service import AIService
37
- from pipecat.transports.services.tavus import TavusCallbacks, TavusParams, TavusTransportClient
37
+ from pipecat.transports.tavus.transport import TavusCallbacks, TavusParams, TavusTransportClient
38
38
 
39
39
 
40
40
  class TavusVideoService(AIService):
@@ -122,6 +122,9 @@ class TTSService(AIService):
122
122
  self._voice = None
123
123
  self._voice_clone_params = None
124
124
 
125
+ self._tracing_enabled: bool = False
126
+
127
+
125
128
  if text_filter:
126
129
  import warnings
127
130
 
@@ -283,11 +286,13 @@ class TTSService(AIService):
283
286
  """
284
287
  import warnings
285
288
 
286
- warnings.warn(
287
- "`TTSService.say()` is deprecated. Push a `TTSSpeakFrame` instead.",
288
- DeprecationWarning,
289
- stacklevel=2,
290
- )
289
+ with warnings.catch_warnings():
290
+ warnings.simplefilter("always")
291
+ warnings.warn(
292
+ "`TTSService.say()` is deprecated. Push a `TTSSpeakFrame` instead.",
293
+ DeprecationWarning,
294
+ stacklevel=2,
295
+ )
291
296
 
292
297
  await self.queue_frame(TTSSpeakFrame(text))
293
298
 
@@ -304,6 +309,11 @@ class TTSService(AIService):
304
309
  await super().process_frame(frame, direction)
305
310
 
306
311
  if (
312
+ isinstance(frame, (TextFrame, LLMFullResponseStartFrame, LLMFullResponseEndFrame))
313
+ and frame.skip_tts
314
+ ):
315
+ await self.push_frame(frame, direction)
316
+ elif (
307
317
  isinstance(frame, TextFrame)
308
318
  and not isinstance(frame, InterimTranscriptionFrame)
309
319
  and not isinstance(frame, TranscriptionFrame)
@@ -13,8 +13,6 @@ from loguru import logger
13
13
  from pydantic import BaseModel, Field
14
14
 
15
15
  from pipecat.frames.frames import (
16
- CancelFrame,
17
- EndFrame,
18
16
  Frame,
19
17
  LLMFullResponseEndFrame,
20
18
  LLMFullResponseStartFrame,
@@ -22,7 +20,6 @@ from pipecat.frames.frames import (
22
20
  LLMTextFrame,
23
21
  LLMUpdateSettingsFrame,
24
22
  StartInterruptionFrame,
25
- StopInterruptionFrame,
26
23
  )
27
24
  from pipecat.processors.aggregators.llm_response import (
28
25
  LLMAssistantAggregatorParams,
@@ -32,13 +29,13 @@ from pipecat.processors.aggregators.openai_llm_context import (
32
29
  OpenAILLMContext,
33
30
  OpenAILLMContextFrame,
34
31
  )
32
+ from pipecat.processors.frame_processor import FrameDirection
33
+ from pipecat.services.llm_service import LLMService
35
34
  from pipecat.services.openai.llm import (
36
35
  OpenAIAssistantContextAggregator,
37
36
  OpenAIContextAggregatorPair,
38
37
  OpenAIUserContextAggregator,
39
38
  )
40
- from pipecat.processors.frame_processor import FrameDirection
41
- from pipecat.services.llm_service import LLMService
42
39
 
43
40
 
44
41
  class VistaarLLMService(LLMService):