dv-pipecat-ai 0.0.85.dev699__py3-none-any.whl → 0.0.85.dev814__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (43) hide show
  1. {dv_pipecat_ai-0.0.85.dev699.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/METADATA +23 -18
  2. {dv_pipecat_ai-0.0.85.dev699.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/RECORD +43 -43
  3. pipecat/adapters/services/aws_nova_sonic_adapter.py +116 -6
  4. pipecat/pipeline/runner.py +6 -2
  5. pipecat/pipeline/task.py +40 -55
  6. pipecat/processors/aggregators/llm_context.py +40 -2
  7. pipecat/processors/frameworks/rtvi.py +1 -0
  8. pipecat/runner/daily.py +59 -20
  9. pipecat/runner/run.py +149 -67
  10. pipecat/runner/types.py +5 -5
  11. pipecat/services/assemblyai/models.py +6 -0
  12. pipecat/services/assemblyai/stt.py +13 -5
  13. pipecat/services/asyncai/tts.py +3 -0
  14. pipecat/services/aws/llm.py +33 -16
  15. pipecat/services/aws/nova_sonic/context.py +69 -0
  16. pipecat/services/aws/nova_sonic/llm.py +199 -89
  17. pipecat/services/aws/stt.py +2 -0
  18. pipecat/services/aws_nova_sonic/context.py +8 -12
  19. pipecat/services/cartesia/stt.py +77 -70
  20. pipecat/services/cartesia/tts.py +3 -1
  21. pipecat/services/deepgram/flux/stt.py +4 -0
  22. pipecat/services/elevenlabs/tts.py +82 -41
  23. pipecat/services/fish/tts.py +3 -0
  24. pipecat/services/google/stt.py +4 -0
  25. pipecat/services/lmnt/tts.py +2 -0
  26. pipecat/services/neuphonic/tts.py +3 -0
  27. pipecat/services/openai/tts.py +37 -6
  28. pipecat/services/piper/tts.py +7 -9
  29. pipecat/services/playht/tts.py +3 -0
  30. pipecat/services/rime/tts.py +9 -8
  31. pipecat/services/riva/stt.py +3 -1
  32. pipecat/services/sarvam/tts.py +87 -10
  33. pipecat/services/speechmatics/stt.py +3 -1
  34. pipecat/services/stt_service.py +23 -10
  35. pipecat/services/tts_service.py +64 -13
  36. pipecat/transports/base_input.py +3 -0
  37. pipecat/transports/base_output.py +71 -77
  38. pipecat/transports/smallwebrtc/connection.py +5 -0
  39. pipecat/transports/smallwebrtc/request_handler.py +42 -0
  40. pipecat/utils/string.py +1 -0
  41. {dv_pipecat_ai-0.0.85.dev699.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/WHEEL +0 -0
  42. {dv_pipecat_ai-0.0.85.dev699.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/licenses/LICENSE +0 -0
  43. {dv_pipecat_ai-0.0.85.dev699.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@ from typing import AsyncGenerator, Dict, Literal, Optional
14
14
 
15
15
  from loguru import logger
16
16
  from openai import AsyncOpenAI, BadRequestError
17
+ from pydantic import BaseModel
17
18
 
18
19
  from pipecat.frames.frames import (
19
20
  ErrorFrame,
@@ -55,6 +56,17 @@ class OpenAITTSService(TTSService):
55
56
 
56
57
  OPENAI_SAMPLE_RATE = 24000 # OpenAI TTS always outputs at 24kHz
57
58
 
59
+ class InputParams(BaseModel):
60
+ """Input parameters for OpenAI TTS configuration.
61
+
62
+ Parameters:
63
+ instructions: Instructions to guide voice synthesis behavior.
64
+ speed: Voice speed control (0.25 to 4.0, default 1.0).
65
+ """
66
+
67
+ instructions: Optional[str] = None
68
+ speed: Optional[float] = None
69
+
58
70
  def __init__(
59
71
  self,
60
72
  *,
@@ -65,6 +77,7 @@ class OpenAITTSService(TTSService):
65
77
  sample_rate: Optional[int] = None,
66
78
  instructions: Optional[str] = None,
67
79
  speed: Optional[float] = None,
80
+ params: Optional[InputParams] = None,
68
81
  **kwargs,
69
82
  ):
70
83
  """Initialize OpenAI TTS service.
@@ -77,7 +90,11 @@ class OpenAITTSService(TTSService):
77
90
  sample_rate: Output audio sample rate in Hz. If None, uses OpenAI's default 24kHz.
78
91
  instructions: Optional instructions to guide voice synthesis behavior.
79
92
  speed: Voice speed control (0.25 to 4.0, default 1.0).
93
+ params: Optional synthesis controls (acting instructions, speed, ...).
80
94
  **kwargs: Additional keyword arguments passed to TTSService.
95
+
96
+ .. deprecated:: 0.0.91
97
+ The `instructions` and `speed` parameters are deprecated, use `InputParams` instead.
81
98
  """
82
99
  if sample_rate and sample_rate != self.OPENAI_SAMPLE_RATE:
83
100
  logger.warning(
@@ -86,12 +103,26 @@ class OpenAITTSService(TTSService):
86
103
  )
87
104
  super().__init__(sample_rate=sample_rate, **kwargs)
88
105
 
89
- self._speed = speed
90
106
  self.set_model_name(model)
91
107
  self.set_voice(voice)
92
- self._instructions = instructions
93
108
  self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
94
109
 
110
+ if instructions or speed:
111
+ import warnings
112
+
113
+ with warnings.catch_warnings():
114
+ warnings.simplefilter("always")
115
+ warnings.warn(
116
+ "The `instructions` and `speed` parameters are deprecated, use `InputParams` instead.",
117
+ DeprecationWarning,
118
+ stacklevel=2,
119
+ )
120
+
121
+ self._settings = {
122
+ "instructions": params.instructions if params else instructions,
123
+ "speed": params.speed if params else speed,
124
+ }
125
+
95
126
  def can_generate_metrics(self) -> bool:
96
127
  """Check if this service can generate processing metrics.
97
128
 
@@ -144,11 +175,11 @@ class OpenAITTSService(TTSService):
144
175
  "response_format": "pcm",
145
176
  }
146
177
 
147
- if self._instructions:
148
- create_params["instructions"] = self._instructions
178
+ if self._settings["instructions"]:
179
+ create_params["instructions"] = self._settings["instructions"]
149
180
 
150
- if self._speed:
151
- create_params["speed"] = self._speed
181
+ if self._settings["speed"]:
182
+ create_params["speed"] = self._settings["speed"]
152
183
 
153
184
  async with self._client.audio.speech.with_streaming_response.create(
154
185
  **create_params
@@ -14,7 +14,6 @@ from loguru import logger
14
14
  from pipecat.frames.frames import (
15
15
  ErrorFrame,
16
16
  Frame,
17
- TTSAudioRawFrame,
18
17
  TTSStartedFrame,
19
18
  TTSStoppedFrame,
20
19
  )
@@ -99,16 +98,15 @@ class PiperTTSService(TTSService):
99
98
 
100
99
  await self.start_tts_usage_metrics(text)
101
100
 
101
+ yield TTSStartedFrame()
102
+
102
103
  CHUNK_SIZE = self.chunk_size
103
104
 
104
- yield TTSStartedFrame()
105
- async for chunk in response.content.iter_chunked(CHUNK_SIZE):
106
- # remove wav header if present
107
- if chunk.startswith(b"RIFF"):
108
- chunk = chunk[44:]
109
- if len(chunk) > 0:
110
- await self.stop_ttfb_metrics()
111
- yield TTSAudioRawFrame(chunk, self.sample_rate, 1)
105
+ async for frame in self._stream_audio_frames_from_iterator(
106
+ response.content.iter_chunked(CHUNK_SIZE), strip_wav_header=True
107
+ ):
108
+ await self.stop_ttfb_metrics()
109
+ yield frame
112
110
  except Exception as e:
113
111
  logger.error(f"Error in run_tts: {e}")
114
112
  yield ErrorFrame(error=str(e))
@@ -269,6 +269,8 @@ class PlayHTTTSService(InterruptibleTTSService):
269
269
  raise ValueError("WebSocket URL is not a string")
270
270
 
271
271
  self._websocket = await websocket_connect(self._websocket_url)
272
+
273
+ await self._call_event_handler("on_connected")
272
274
  except ValueError as e:
273
275
  logger.error(f"{self} initialization error: {e}")
274
276
  self._websocket = None
@@ -291,6 +293,7 @@ class PlayHTTTSService(InterruptibleTTSService):
291
293
  finally:
292
294
  self._request_id = None
293
295
  self._websocket = None
296
+ await self._call_event_handler("on_disconnected")
294
297
 
295
298
  async def _get_websocket_url(self):
296
299
  """Retrieve WebSocket URL from PlayHT API."""
@@ -255,6 +255,8 @@ class RimeTTSService(AudioContextWordTTSService):
255
255
  url = f"{self._url}?{params}"
256
256
  headers = {"Authorization": f"Bearer {self._api_key}"}
257
257
  self._websocket = await websocket_connect(url, additional_headers=headers)
258
+
259
+ await self._call_event_handler("on_connected")
258
260
  except Exception as e:
259
261
  logger.error(f"{self} initialization error: {e}")
260
262
  self._websocket = None
@@ -272,6 +274,7 @@ class RimeTTSService(AudioContextWordTTSService):
272
274
  finally:
273
275
  self._context_id = None
274
276
  self._websocket = None
277
+ await self._call_event_handler("on_disconnected")
275
278
 
276
279
  def _get_websocket(self):
277
280
  """Get active websocket connection or raise exception."""
@@ -553,15 +556,13 @@ class RimeHttpTTSService(TTSService):
553
556
 
554
557
  CHUNK_SIZE = self.chunk_size
555
558
 
556
- async for chunk in response.content.iter_chunked(CHUNK_SIZE):
557
- if need_to_strip_wav_header and chunk.startswith(b"RIFF"):
558
- chunk = chunk[44:]
559
- need_to_strip_wav_header = False
559
+ async for frame in self._stream_audio_frames_from_iterator(
560
+ response.content.iter_chunked(CHUNK_SIZE),
561
+ strip_wav_header=need_to_strip_wav_header,
562
+ ):
563
+ await self.stop_ttfb_metrics()
564
+ yield frame
560
565
 
561
- if len(chunk) > 0:
562
- await self.stop_ttfb_metrics()
563
- frame = TTSAudioRawFrame(chunk, self.sample_rate, 1)
564
- yield frame
565
566
  except Exception as e:
566
567
  logger.exception(f"Error generating TTS: {e}")
567
568
  yield ErrorFrame(error=f"Rime TTS error: {str(e)}")
@@ -583,7 +583,9 @@ class RivaSegmentedSTTService(SegmentedSTTService):
583
583
  self._config.language_code = self._language
584
584
 
585
585
  @traced_stt
586
- async def _handle_transcription(self, transcript: str, language: Optional[Language] = None):
586
+ async def _handle_transcription(
587
+ self, transcript: str, is_final: bool, language: Optional[Language] = None
588
+ ):
587
589
  """Handle a transcription result with tracing."""
588
590
  pass
589
591
 
@@ -77,17 +77,29 @@ class SarvamHttpTTSService(TTSService):
77
77
 
78
78
  Example::
79
79
 
80
- tts = SarvamTTSService(
80
+ tts = SarvamHttpTTSService(
81
81
  api_key="your-api-key",
82
82
  voice_id="anushka",
83
83
  model="bulbul:v2",
84
84
  aiohttp_session=session,
85
- params=SarvamTTSService.InputParams(
85
+ params=SarvamHttpTTSService.InputParams(
86
86
  language=Language.HI,
87
87
  pitch=0.1,
88
88
  pace=1.2
89
89
  )
90
90
  )
91
+
92
+ # For bulbul v3 beta with any speaker:
93
+ tts_v3 = SarvamHttpTTSService(
94
+ api_key="your-api-key",
95
+ voice_id="speaker_name",
96
+ model="bulbul:v3,
97
+ aiohttp_session=session,
98
+ params=SarvamHttpTTSService.InputParams(
99
+ language=Language.HI,
100
+ temperature=0.8
101
+ )
102
+ )
91
103
  """
92
104
 
93
105
  class InputParams(BaseModel):
@@ -106,6 +118,14 @@ class SarvamHttpTTSService(TTSService):
106
118
  pace: Optional[float] = Field(default=1.0, ge=0.3, le=3.0)
107
119
  loudness: Optional[float] = Field(default=1.0, ge=0.1, le=3.0)
108
120
  enable_preprocessing: Optional[bool] = False
121
+ temperature: Optional[float] = Field(
122
+ default=0.6,
123
+ ge=0.01,
124
+ le=1.0,
125
+ description="Controls the randomness of the output for bulbul v3 beta. "
126
+ "Lower values make the output more focused and deterministic, while "
127
+ "higher values make it more random. Range: 0.01 to 1.0. Default: 0.6.",
128
+ )
109
129
 
110
130
  def __init__(
111
131
  self,
@@ -125,7 +145,7 @@ class SarvamHttpTTSService(TTSService):
125
145
  api_key: Sarvam AI API subscription key.
126
146
  aiohttp_session: Shared aiohttp session for making requests.
127
147
  voice_id: Speaker voice ID (e.g., "anushka", "meera"). Defaults to "anushka".
128
- model: TTS model to use ("bulbul:v1" or "bulbul:v2"). Defaults to "bulbul:v2".
148
+ model: TTS model to use ("bulbul:v2" or "bulbul:v3-beta" or "bulbul:v3"). Defaults to "bulbul:v2".
129
149
  base_url: Sarvam AI API base URL. Defaults to "https://api.sarvam.ai".
130
150
  sample_rate: Audio sample rate in Hz (8000, 16000, 22050, 24000). If None, uses default.
131
151
  params: Additional voice and preprocessing parameters. If None, uses defaults.
@@ -139,16 +159,32 @@ class SarvamHttpTTSService(TTSService):
139
159
  self._base_url = base_url
140
160
  self._session = aiohttp_session
141
161
 
162
+ # Build base settings common to all models
142
163
  self._settings = {
143
164
  "language": (
144
165
  self.language_to_service_language(params.language) if params.language else "en-IN"
145
166
  ),
146
- "pitch": params.pitch,
147
- "pace": params.pace,
148
- "loudness": params.loudness,
149
167
  "enable_preprocessing": params.enable_preprocessing,
150
168
  }
151
169
 
170
+ # Add model-specific parameters
171
+ if model in ("bulbul:v3-beta", "bulbul:v3"):
172
+ self._settings.update(
173
+ {
174
+ "temperature": getattr(params, "temperature", 0.6),
175
+ "model": model,
176
+ }
177
+ )
178
+ else:
179
+ self._settings.update(
180
+ {
181
+ "pitch": params.pitch,
182
+ "pace": params.pace,
183
+ "loudness": params.loudness,
184
+ "model": model,
185
+ }
186
+ )
187
+
152
188
  self.set_model_name(model)
153
189
  self.set_voice(voice_id)
154
190
 
@@ -276,6 +312,18 @@ class SarvamTTSService(InterruptibleTTSService):
276
312
  pace=1.2
277
313
  )
278
314
  )
315
+
316
+ # For bulbul v3 beta with any speaker and temperature:
317
+ # Note: pace and loudness are not supported for bulbul v3 and bulbul v3 beta
318
+ tts_v3 = SarvamTTSService(
319
+ api_key="your-api-key",
320
+ voice_id="speaker_name",
321
+ model="bulbul:v3",
322
+ params=SarvamTTSService.InputParams(
323
+ language=Language.HI,
324
+ temperature=0.8
325
+ )
326
+ )
279
327
  """
280
328
 
281
329
  class InputParams(BaseModel):
@@ -311,6 +359,14 @@ class SarvamTTSService(InterruptibleTTSService):
311
359
  output_audio_codec: Optional[str] = "linear16"
312
360
  output_audio_bitrate: Optional[str] = "128k"
313
361
  language: Optional[Language] = Language.EN
362
+ temperature: Optional[float] = Field(
363
+ default=0.6,
364
+ ge=0.01,
365
+ le=1.0,
366
+ description="Controls the randomness of the output for bulbul v3 beta. "
367
+ "Lower values make the output more focused and deterministic, while "
368
+ "higher values make it more random. Range: 0.01 to 1.0. Default: 0.6.",
369
+ )
314
370
 
315
371
  def __init__(
316
372
  self,
@@ -330,6 +386,7 @@ class SarvamTTSService(InterruptibleTTSService):
330
386
  Args:
331
387
  api_key: Sarvam API key for authenticating TTS requests.
332
388
  model: Identifier of the Sarvam speech model (default "bulbul:v2").
389
+ Supports "bulbul:v2", "bulbul:v3-beta" and "bulbul:v3".
333
390
  voice_id: Voice identifier for synthesis (default "anushka").
334
391
  url: WebSocket URL for connecting to the TTS backend (default production URL).
335
392
  aiohttp_session: Optional shared aiohttp session. To maintain backward compatibility.
@@ -372,15 +429,12 @@ class SarvamTTSService(InterruptibleTTSService):
372
429
  self._api_key = api_key
373
430
  self.set_model_name(model)
374
431
  self.set_voice(voice_id)
375
- # Configuration parameters
432
+ # Build base settings common to all models
376
433
  self._settings = {
377
434
  "target_language_code": (
378
435
  self.language_to_service_language(params.language) if params.language else "en-IN"
379
436
  ),
380
- "pitch": params.pitch,
381
- "pace": params.pace,
382
437
  "speaker": voice_id,
383
- "loudness": params.loudness,
384
438
  "speech_sample_rate": 0,
385
439
  "enable_preprocessing": params.enable_preprocessing,
386
440
  "min_buffer_size": params.min_buffer_size,
@@ -388,6 +442,24 @@ class SarvamTTSService(InterruptibleTTSService):
388
442
  "output_audio_codec": params.output_audio_codec,
389
443
  "output_audio_bitrate": params.output_audio_bitrate,
390
444
  }
445
+
446
+ # Add model-specific parameters
447
+ if model in ("bulbul:v3-beta", "bulbul:v3"):
448
+ self._settings.update(
449
+ {
450
+ "temperature": getattr(params, "temperature", 0.6),
451
+ "model": model,
452
+ }
453
+ )
454
+ else:
455
+ self._settings.update(
456
+ {
457
+ "pitch": params.pitch,
458
+ "pace": params.pace,
459
+ "loudness": params.loudness,
460
+ "model": model,
461
+ }
462
+ )
391
463
  self._started = False
392
464
 
393
465
  self._receive_task = None
@@ -526,6 +598,7 @@ class SarvamTTSService(InterruptibleTTSService):
526
598
  logger.debug("Connected to Sarvam TTS Websocket")
527
599
  await self._send_config()
528
600
 
601
+ await self._call_event_handler("on_connected")
529
602
  except Exception as e:
530
603
  logger.error(f"{self} initialization error: {e}")
531
604
  self._websocket = None
@@ -557,6 +630,10 @@ class SarvamTTSService(InterruptibleTTSService):
557
630
  await self._websocket.close()
558
631
  except Exception as e:
559
632
  logger.error(f"{self} error closing websocket: {e}")
633
+ finally:
634
+ self._started = False
635
+ self._websocket = None
636
+ await self._call_event_handler("on_disconnected")
560
637
 
561
638
  def _get_websocket(self):
562
639
  if self._websocket:
@@ -577,6 +577,7 @@ class SpeechmaticsSTTService(STTService):
577
577
  ),
578
578
  )
579
579
  logger.debug(f"{self} Connected to Speechmatics STT service")
580
+ await self._call_event_handler("on_connected")
580
581
  except Exception as e:
581
582
  logger.error(f"{self} Error connecting to Speechmatics: {e}")
582
583
  self._client = None
@@ -595,6 +596,7 @@ class SpeechmaticsSTTService(STTService):
595
596
  logger.error(f"{self} Error closing Speechmatics client: {e}")
596
597
  finally:
597
598
  self._client = None
599
+ await self._call_event_handler("on_disconnected")
598
600
 
599
601
  def _process_config(self) -> None:
600
602
  """Create a formatted STT transcription config.
@@ -618,7 +620,7 @@ class SpeechmaticsSTTService(STTService):
618
620
  transcription_config.additional_vocab = [
619
621
  {
620
622
  "content": e.content,
621
- "sounds_like": e.sounds_like,
623
+ **({"sounds_like": e.sounds_like} if e.sounds_like else {}),
622
624
  }
623
625
  for e in self._params.additional_vocab
624
626
  ]
@@ -36,6 +36,25 @@ class STTService(AIService):
36
36
  Provides common functionality for STT services including audio passthrough,
37
37
  muting, settings management, and audio processing. Subclasses must implement
38
38
  the run_stt method to provide actual speech recognition.
39
+
40
+ Event handlers:
41
+ on_connected: Called when connected to the STT service.
42
+ on_connected: Called when disconnected from the STT service.
43
+ on_connection_error: Called when a connection to the STT service error occurs.
44
+
45
+ Example::
46
+
47
+ @stt.event_handler("on_connected")
48
+ async def on_connected(stt: STTService):
49
+ logger.debug(f"STT connected")
50
+
51
+ @stt.event_handler("on_disconnected")
52
+ async def on_disconnected(stt: STTService):
53
+ logger.debug(f"STT disconnected")
54
+
55
+ @stt.event_handler("on_connection_error")
56
+ async def on_connection_error(stt: STTService, error: str):
57
+ logger.error(f"STT connection error: {error}")
39
58
  """
40
59
 
41
60
  def __init__(
@@ -66,6 +85,10 @@ class STTService(AIService):
66
85
  self._voicemail_detect: bool = False
67
86
  self._user_id: str = ""
68
87
 
88
+ self._register_event_handler("on_connected")
89
+ self._register_event_handler("on_disconnected")
90
+ self._register_event_handler("on_connection_error")
91
+
69
92
  @property
70
93
  def is_muted(self) -> bool:
71
94
  """Check if the STT service is currently muted.
@@ -307,15 +330,6 @@ class WebsocketSTTService(STTService, WebsocketService):
307
330
 
308
331
  Combines STT functionality with websocket connectivity, providing automatic
309
332
  error handling and reconnection capabilities.
310
-
311
- Event handlers:
312
- on_connection_error: Called when a websocket connection error occurs.
313
-
314
- Example::
315
-
316
- @stt.event_handler("on_connection_error")
317
- async def on_connection_error(stt: STTService, error: str):
318
- logger.error(f"STT connection error: {error}")
319
333
  """
320
334
 
321
335
  def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
@@ -327,7 +341,6 @@ class WebsocketSTTService(STTService, WebsocketService):
327
341
  """
328
342
  STTService.__init__(self, **kwargs)
329
343
  WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
330
- self._register_event_handler("on_connection_error")
331
344
 
332
345
  async def _report_error(self, error: ErrorFrame):
333
346
  await self._call_event_handler("on_connection_error", error.error)
@@ -8,7 +8,17 @@
8
8
 
9
9
  import asyncio
10
10
  from abc import abstractmethod
11
- from typing import Any, AsyncGenerator, Callable, Dict, List, Mapping, Optional, Sequence, Tuple
11
+ from typing import (
12
+ Any,
13
+ AsyncGenerator,
14
+ AsyncIterator,
15
+ Dict,
16
+ List,
17
+ Mapping,
18
+ Optional,
19
+ Sequence,
20
+ Tuple,
21
+ )
12
22
 
13
23
  from loguru import logger
14
24
 
@@ -49,6 +59,25 @@ class TTSService(AIService):
49
59
  Provides common functionality for TTS services including text aggregation,
50
60
  filtering, audio generation, and frame management. Supports configurable
51
61
  sentence aggregation, silence insertion, and frame processing control.
62
+
63
+ Event handlers:
64
+ on_connected: Called when connected to the STT service.
65
+ on_connected: Called when disconnected from the STT service.
66
+ on_connection_error: Called when a connection to the STT service error occurs.
67
+
68
+ Example::
69
+
70
+ @tts.event_handler("on_connected")
71
+ async def on_connected(tts: TTSService):
72
+ logger.debug(f"TTS connected")
73
+
74
+ @tts.event_handler("on_disconnected")
75
+ async def on_disconnected(tts: TTSService):
76
+ logger.debug(f"TTS disconnected")
77
+
78
+ @tts.event_handler("on_connection_error")
79
+ async def on_connection_error(stt: TTSService, error: str):
80
+ logger.error(f"TTS connection error: {error}")
52
81
  """
53
82
 
54
83
  def __init__(
@@ -124,7 +153,6 @@ class TTSService(AIService):
124
153
 
125
154
  self._tracing_enabled: bool = False
126
155
 
127
-
128
156
  if text_filter:
129
157
  import warnings
130
158
 
@@ -143,6 +171,10 @@ class TTSService(AIService):
143
171
 
144
172
  self._processing_text: bool = False
145
173
 
174
+ self._register_event_handler("on_connected")
175
+ self._register_event_handler("on_disconnected")
176
+ self._register_event_handler("on_connection_error")
177
+
146
178
  @property
147
179
  def sample_rate(self) -> int:
148
180
  """Get the current sample rate for audio output.
@@ -384,6 +416,36 @@ class TTSService(AIService):
384
416
  ):
385
417
  await self._stop_frame_queue.put(frame)
386
418
 
419
+ async def _stream_audio_frames_from_iterator(
420
+ self, iterator: AsyncIterator[bytes], *, strip_wav_header: bool
421
+ ) -> AsyncGenerator[Frame, None]:
422
+ buffer = bytearray()
423
+ need_to_strip_wav_header = strip_wav_header
424
+ async for chunk in iterator:
425
+ if need_to_strip_wav_header and chunk.startswith(b"RIFF"):
426
+ chunk = chunk[44:]
427
+ need_to_strip_wav_header = False
428
+
429
+ # Append to current buffer.
430
+ buffer.extend(chunk)
431
+
432
+ # Round to nearest even number.
433
+ aligned_length = len(buffer) & ~1 # 111111111...11110
434
+ if aligned_length > 0:
435
+ aligned_chunk = buffer[:aligned_length]
436
+ buffer = buffer[aligned_length:] # keep any leftover byte
437
+
438
+ if len(aligned_chunk) > 0:
439
+ frame = TTSAudioRawFrame(bytes(aligned_chunk), self.sample_rate, 1)
440
+ yield frame
441
+
442
+ if len(buffer) > 0:
443
+ # Make sure we don't need an extra padding byte.
444
+ if len(buffer) % 2 == 1:
445
+ buffer.extend(b"\x00")
446
+ frame = TTSAudioRawFrame(bytes(buffer), self.sample_rate, 1)
447
+ yield frame
448
+
387
449
  async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
388
450
  self._processing_text = False
389
451
  await self._text_aggregator.handle_interruption()
@@ -613,7 +675,6 @@ class WebsocketTTSService(TTSService, WebsocketService):
613
675
  """
614
676
  TTSService.__init__(self, **kwargs)
615
677
  WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
616
- self._register_event_handler("on_connection_error")
617
678
 
618
679
  async def _report_error(self, error: ErrorFrame):
619
680
  await self._call_event_handler("on_connection_error", error.error)
@@ -665,15 +726,6 @@ class WebsocketWordTTSService(WordTTSService, WebsocketService):
665
726
  """Base class for websocket-based TTS services that support word timestamps.
666
727
 
667
728
  Combines word timestamp functionality with websocket connectivity.
668
-
669
- Event handlers:
670
- on_connection_error: Called when a websocket connection error occurs.
671
-
672
- Example::
673
-
674
- @tts.event_handler("on_connection_error")
675
- async def on_connection_error(tts: TTSService, error: str):
676
- logger.error(f"TTS connection error: {error}")
677
729
  """
678
730
 
679
731
  def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
@@ -685,7 +737,6 @@ class WebsocketWordTTSService(WordTTSService, WebsocketService):
685
737
  """
686
738
  WordTTSService.__init__(self, **kwargs)
687
739
  WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
688
- self._register_event_handler("on_connection_error")
689
740
 
690
741
  async def _report_error(self, error: ErrorFrame):
691
742
  await self._call_event_handler("on_connection_error", error.error)
@@ -232,6 +232,9 @@ class BaseInputTransport(FrameProcessor):
232
232
  """
233
233
  # Cancel and wait for the audio input task to finish.
234
234
  await self._cancel_audio_task()
235
+ # Stop audio filter.
236
+ if self._params.audio_in_filter:
237
+ await self._params.audio_in_filter.stop()
235
238
 
236
239
  async def set_transport_ready(self, frame: StartFrame):
237
240
  """Called when the transport is ready to stream.