dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
pipecat/services/rime/tts.py
CHANGED
|
@@ -4,10 +4,16 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Rime text-to-speech service implementations.
|
|
8
|
+
|
|
9
|
+
This module provides both WebSocket and HTTP-based text-to-speech services
|
|
10
|
+
using Rime's API for streaming and batch audio synthesis.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import base64
|
|
8
14
|
import json
|
|
9
15
|
import uuid
|
|
10
|
-
from typing import AsyncGenerator, Optional
|
|
16
|
+
from typing import Any, AsyncGenerator, Mapping, Optional
|
|
11
17
|
|
|
12
18
|
import aiohttp
|
|
13
19
|
from loguru import logger
|
|
@@ -33,7 +39,8 @@ from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator
|
|
|
33
39
|
from pipecat.utils.tracing.service_decorators import traced_tts
|
|
34
40
|
|
|
35
41
|
try:
|
|
36
|
-
import
|
|
42
|
+
from websockets.asyncio.client import connect as websocket_connect
|
|
43
|
+
from websockets.protocol import State
|
|
37
44
|
except ModuleNotFoundError as e:
|
|
38
45
|
logger.error(f"Exception: {e}")
|
|
39
46
|
logger.error("In order to use Rime, you need to `pip install pipecat-ai[rime]`.")
|
|
@@ -47,7 +54,7 @@ def language_to_rime_language(language: Language) -> str:
|
|
|
47
54
|
language: The pipecat Language enum value.
|
|
48
55
|
|
|
49
56
|
Returns:
|
|
50
|
-
|
|
57
|
+
Three-letter language code used by Rime (e.g., 'eng' for English).
|
|
51
58
|
"""
|
|
52
59
|
LANGUAGE_MAP = {
|
|
53
60
|
Language.DE: "ger",
|
|
@@ -67,7 +74,15 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
67
74
|
"""
|
|
68
75
|
|
|
69
76
|
class InputParams(BaseModel):
|
|
70
|
-
"""Configuration parameters for Rime TTS service.
|
|
77
|
+
"""Configuration parameters for Rime TTS service.
|
|
78
|
+
|
|
79
|
+
Parameters:
|
|
80
|
+
language: Language for synthesis. Defaults to English.
|
|
81
|
+
speed_alpha: Speech speed multiplier. Defaults to 1.0.
|
|
82
|
+
reduce_latency: Whether to reduce latency at potential quality cost.
|
|
83
|
+
pause_between_brackets: Whether to add pauses between bracketed content.
|
|
84
|
+
phonemize_between_brackets: Whether to phonemize bracketed content.
|
|
85
|
+
"""
|
|
71
86
|
|
|
72
87
|
language: Optional[Language] = Language.EN
|
|
73
88
|
speed_alpha: Optional[float] = 1.0
|
|
@@ -85,6 +100,7 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
85
100
|
sample_rate: Optional[int] = None,
|
|
86
101
|
params: Optional[InputParams] = None,
|
|
87
102
|
text_aggregator: Optional[BaseTextAggregator] = None,
|
|
103
|
+
aggregate_sentences: Optional[bool] = True,
|
|
88
104
|
**kwargs,
|
|
89
105
|
):
|
|
90
106
|
"""Initialize Rime TTS service.
|
|
@@ -96,10 +112,13 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
96
112
|
model: Model ID to use for synthesis.
|
|
97
113
|
sample_rate: Audio sample rate in Hz.
|
|
98
114
|
params: Additional configuration parameters.
|
|
115
|
+
text_aggregator: Custom text aggregator for processing input text.
|
|
116
|
+
aggregate_sentences: Whether to aggregate sentences within the TTSService.
|
|
117
|
+
**kwargs: Additional arguments passed to parent class.
|
|
99
118
|
"""
|
|
100
119
|
# Initialize with parent class settings for proper frame handling
|
|
101
120
|
super().__init__(
|
|
102
|
-
aggregate_sentences=
|
|
121
|
+
aggregate_sentences=aggregate_sentences,
|
|
103
122
|
push_text_frames=False,
|
|
104
123
|
push_stop_frames=True,
|
|
105
124
|
pause_frame_processing=True,
|
|
@@ -135,17 +154,43 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
135
154
|
self._cumulative_time = 0 # Accumulates time across messages
|
|
136
155
|
|
|
137
156
|
def can_generate_metrics(self) -> bool:
|
|
157
|
+
"""Check if this service can generate processing metrics.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
True, as Rime service supports metrics generation.
|
|
161
|
+
"""
|
|
138
162
|
return True
|
|
139
163
|
|
|
140
164
|
def language_to_service_language(self, language: Language) -> str | None:
|
|
141
|
-
"""Convert pipecat language to Rime language code.
|
|
165
|
+
"""Convert pipecat language to Rime language code.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
language: The language to convert.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
The Rime-specific language code, or None if not supported.
|
|
172
|
+
"""
|
|
142
173
|
return language_to_rime_language(language)
|
|
143
174
|
|
|
144
175
|
async def set_model(self, model: str):
|
|
145
|
-
"""Update the TTS model.
|
|
176
|
+
"""Update the TTS model.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
model: The model name to use for synthesis.
|
|
180
|
+
"""
|
|
146
181
|
self._model = model
|
|
147
182
|
await super().set_model(model)
|
|
148
183
|
|
|
184
|
+
async def _update_settings(self, settings: Mapping[str, Any]):
|
|
185
|
+
"""Update service settings and reconnect if voice changed."""
|
|
186
|
+
prev_voice = self._voice_id
|
|
187
|
+
await super()._update_settings(settings)
|
|
188
|
+
if not prev_voice == self._voice_id:
|
|
189
|
+
self._settings["speaker"] = self._voice_id
|
|
190
|
+
logger.info(f"Switching TTS voice to: [{self._voice_id}]")
|
|
191
|
+
await self._disconnect()
|
|
192
|
+
await self._connect()
|
|
193
|
+
|
|
149
194
|
def _build_msg(self, text: str = "") -> dict:
|
|
150
195
|
"""Build JSON message for Rime API."""
|
|
151
196
|
return {"text": text, "contextId": self._context_id}
|
|
@@ -159,18 +204,30 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
159
204
|
return {"operation": "eos"}
|
|
160
205
|
|
|
161
206
|
async def start(self, frame: StartFrame):
|
|
162
|
-
"""Start the service and establish websocket connection.
|
|
207
|
+
"""Start the service and establish websocket connection.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
frame: The start frame containing initialization parameters.
|
|
211
|
+
"""
|
|
163
212
|
await super().start(frame)
|
|
164
213
|
self._settings["samplingRate"] = self.sample_rate
|
|
165
214
|
await self._connect()
|
|
166
215
|
|
|
167
216
|
async def stop(self, frame: EndFrame):
|
|
168
|
-
"""Stop the service and close connection.
|
|
217
|
+
"""Stop the service and close connection.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
frame: The end frame.
|
|
221
|
+
"""
|
|
169
222
|
await super().stop(frame)
|
|
170
223
|
await self._disconnect()
|
|
171
224
|
|
|
172
225
|
async def cancel(self, frame: CancelFrame):
|
|
173
|
-
"""Cancel current operation and clean up.
|
|
226
|
+
"""Cancel current operation and clean up.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
frame: The cancel frame.
|
|
230
|
+
"""
|
|
174
231
|
await super().cancel(frame)
|
|
175
232
|
await self._disconnect()
|
|
176
233
|
|
|
@@ -192,13 +249,13 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
192
249
|
async def _connect_websocket(self):
|
|
193
250
|
"""Connect to Rime websocket API with configured settings."""
|
|
194
251
|
try:
|
|
195
|
-
if self._websocket and self._websocket.
|
|
252
|
+
if self._websocket and self._websocket.state is State.OPEN:
|
|
196
253
|
return
|
|
197
254
|
|
|
198
255
|
params = "&".join(f"{k}={v}" for k, v in self._settings.items())
|
|
199
256
|
url = f"{self._url}?{params}"
|
|
200
257
|
headers = {"Authorization": f"Bearer {self._api_key}"}
|
|
201
|
-
self._websocket = await
|
|
258
|
+
self._websocket = await websocket_connect(url, additional_headers=headers)
|
|
202
259
|
except Exception as e:
|
|
203
260
|
logger.error(f"{self} initialization error: {e}")
|
|
204
261
|
self._websocket = None
|
|
@@ -261,6 +318,7 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
261
318
|
return word_pairs
|
|
262
319
|
|
|
263
320
|
async def flush_audio(self):
|
|
321
|
+
"""Flush any pending audio synthesis."""
|
|
264
322
|
if not self._context_id or not self._websocket:
|
|
265
323
|
return
|
|
266
324
|
|
|
@@ -310,7 +368,12 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
310
368
|
self._context_id = None
|
|
311
369
|
|
|
312
370
|
async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
|
313
|
-
"""Push frame and handle end-of-turn conditions.
|
|
371
|
+
"""Push frame and handle end-of-turn conditions.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
frame: The frame to push.
|
|
375
|
+
direction: The direction to push the frame.
|
|
376
|
+
"""
|
|
314
377
|
await super().push_frame(frame, direction)
|
|
315
378
|
if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
|
|
316
379
|
if isinstance(frame, TTSStoppedFrame):
|
|
@@ -318,17 +381,17 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
318
381
|
|
|
319
382
|
@traced_tts
|
|
320
383
|
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
321
|
-
"""Generate speech from text.
|
|
384
|
+
"""Generate speech from text using Rime's streaming API.
|
|
322
385
|
|
|
323
386
|
Args:
|
|
324
387
|
text: The text to convert to speech.
|
|
325
388
|
|
|
326
389
|
Yields:
|
|
327
|
-
|
|
390
|
+
Frame: Audio frames containing the synthesized speech.
|
|
328
391
|
"""
|
|
329
392
|
logger.debug(f"{self}: Generating TTS [{text}]")
|
|
330
393
|
try:
|
|
331
|
-
if not self._websocket or self._websocket.
|
|
394
|
+
if not self._websocket or self._websocket.state is State.CLOSED:
|
|
332
395
|
await self._connect()
|
|
333
396
|
|
|
334
397
|
try:
|
|
@@ -354,7 +417,24 @@ class RimeTTSService(AudioContextWordTTSService):
|
|
|
354
417
|
|
|
355
418
|
|
|
356
419
|
class RimeHttpTTSService(TTSService):
|
|
420
|
+
"""Rime HTTP-based text-to-speech service.
|
|
421
|
+
|
|
422
|
+
Provides text-to-speech synthesis using Rime's HTTP API for batch processing.
|
|
423
|
+
Suitable for use cases where streaming is not required.
|
|
424
|
+
"""
|
|
425
|
+
|
|
357
426
|
class InputParams(BaseModel):
|
|
427
|
+
"""Configuration parameters for Rime HTTP TTS service.
|
|
428
|
+
|
|
429
|
+
Parameters:
|
|
430
|
+
language: Language for synthesis. Defaults to English.
|
|
431
|
+
pause_between_brackets: Whether to add pauses between bracketed content.
|
|
432
|
+
phonemize_between_brackets: Whether to phonemize bracketed content.
|
|
433
|
+
inline_speed_alpha: Inline speed control markup.
|
|
434
|
+
speed_alpha: Speech speed multiplier. Defaults to 1.0.
|
|
435
|
+
reduce_latency: Whether to reduce latency at potential quality cost.
|
|
436
|
+
"""
|
|
437
|
+
|
|
358
438
|
language: Optional[Language] = Language.EN
|
|
359
439
|
pause_between_brackets: Optional[bool] = False
|
|
360
440
|
phonemize_between_brackets: Optional[bool] = False
|
|
@@ -373,6 +453,17 @@ class RimeHttpTTSService(TTSService):
|
|
|
373
453
|
params: Optional[InputParams] = None,
|
|
374
454
|
**kwargs,
|
|
375
455
|
):
|
|
456
|
+
"""Initialize Rime HTTP TTS service.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
api_key: Rime API key for authentication.
|
|
460
|
+
voice_id: ID of the voice to use.
|
|
461
|
+
aiohttp_session: Shared aiohttp session for HTTP requests.
|
|
462
|
+
model: Model ID to use for synthesis.
|
|
463
|
+
sample_rate: Audio sample rate in Hz.
|
|
464
|
+
params: Additional configuration parameters.
|
|
465
|
+
**kwargs: Additional arguments passed to parent TTSService.
|
|
466
|
+
"""
|
|
376
467
|
super().__init__(sample_rate=sample_rate, **kwargs)
|
|
377
468
|
|
|
378
469
|
params = params or RimeHttpTTSService.InputParams()
|
|
@@ -396,14 +487,34 @@ class RimeHttpTTSService(TTSService):
|
|
|
396
487
|
self._settings["inlineSpeedAlpha"] = params.inline_speed_alpha
|
|
397
488
|
|
|
398
489
|
def can_generate_metrics(self) -> bool:
|
|
490
|
+
"""Check if this service can generate processing metrics.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
True, as Rime HTTP service supports metrics generation.
|
|
494
|
+
"""
|
|
399
495
|
return True
|
|
400
496
|
|
|
401
497
|
def language_to_service_language(self, language: Language) -> str | None:
|
|
402
|
-
"""Convert pipecat language to Rime language code.
|
|
498
|
+
"""Convert pipecat language to Rime language code.
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
language: The language to convert.
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
The Rime-specific language code, or None if not supported.
|
|
505
|
+
"""
|
|
403
506
|
return language_to_rime_language(language)
|
|
404
507
|
|
|
405
508
|
@traced_tts
|
|
406
509
|
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
510
|
+
"""Generate speech from text using Rime's HTTP API.
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
text: The text to synthesize into speech.
|
|
514
|
+
|
|
515
|
+
Yields:
|
|
516
|
+
Frame: Audio frames containing the synthesized speech.
|
|
517
|
+
"""
|
|
407
518
|
logger.debug(f"{self}: Generating TTS [{text}]")
|
|
408
519
|
|
|
409
520
|
headers = {
|
pipecat/services/riva/stt.py
CHANGED
|
@@ -4,7 +4,10 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""NVIDIA Riva Speech-to-Text service implementations for real-time and batch transcription."""
|
|
8
|
+
|
|
7
9
|
import asyncio
|
|
10
|
+
from concurrent.futures import CancelledError as FuturesCancelledError
|
|
8
11
|
from typing import AsyncGenerator, List, Mapping, Optional
|
|
9
12
|
|
|
10
13
|
from loguru import logger
|
|
@@ -21,7 +24,6 @@ from pipecat.frames.frames import (
|
|
|
21
24
|
)
|
|
22
25
|
from pipecat.services.stt_service import SegmentedSTTService, STTService
|
|
23
26
|
from pipecat.transcriptions.language import Language
|
|
24
|
-
from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
|
|
25
27
|
from pipecat.utils.time import time_now_iso8601
|
|
26
28
|
from pipecat.utils.tracing.service_decorators import traced_stt
|
|
27
29
|
|
|
@@ -87,7 +89,20 @@ def language_to_riva_language(language: Language) -> Optional[str]:
|
|
|
87
89
|
|
|
88
90
|
|
|
89
91
|
class RivaSTTService(STTService):
|
|
92
|
+
"""Real-time speech-to-text service using NVIDIA Riva streaming ASR.
|
|
93
|
+
|
|
94
|
+
Provides real-time transcription capabilities using NVIDIA's Riva ASR models
|
|
95
|
+
through streaming recognition. Supports interim results and continuous audio
|
|
96
|
+
processing for low-latency applications.
|
|
97
|
+
"""
|
|
98
|
+
|
|
90
99
|
class InputParams(BaseModel):
|
|
100
|
+
"""Configuration parameters for Riva STT service.
|
|
101
|
+
|
|
102
|
+
Parameters:
|
|
103
|
+
language: Target language for transcription. Defaults to EN_US.
|
|
104
|
+
"""
|
|
105
|
+
|
|
91
106
|
language: Optional[Language] = Language.EN_US
|
|
92
107
|
|
|
93
108
|
def __init__(
|
|
@@ -103,6 +118,16 @@ class RivaSTTService(STTService):
|
|
|
103
118
|
params: Optional[InputParams] = None,
|
|
104
119
|
**kwargs,
|
|
105
120
|
):
|
|
121
|
+
"""Initialize the Riva STT service.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
api_key: NVIDIA API key for authentication.
|
|
125
|
+
server: Riva server address. Defaults to NVIDIA Cloud Function endpoint.
|
|
126
|
+
model_function_map: Mapping containing 'function_id' and 'model_name' for the ASR model.
|
|
127
|
+
sample_rate: Audio sample rate in Hz. If None, uses pipeline default.
|
|
128
|
+
params: Additional configuration parameters for Riva.
|
|
129
|
+
**kwargs: Additional arguments passed to STTService.
|
|
130
|
+
"""
|
|
106
131
|
super().__init__(sample_rate=sample_rate, **kwargs)
|
|
107
132
|
|
|
108
133
|
params = params or RivaSTTService.InputParams()
|
|
@@ -142,15 +167,29 @@ class RivaSTTService(STTService):
|
|
|
142
167
|
|
|
143
168
|
self._asr_service = riva.client.ASRService(auth)
|
|
144
169
|
|
|
145
|
-
self._queue =
|
|
170
|
+
self._queue = None
|
|
146
171
|
self._config = None
|
|
147
172
|
self._thread_task = None
|
|
148
173
|
self._response_task = None
|
|
149
174
|
|
|
150
175
|
def can_generate_metrics(self) -> bool:
|
|
176
|
+
"""Check if this service can generate processing metrics.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
False - this service does not support metrics generation.
|
|
180
|
+
"""
|
|
151
181
|
return False
|
|
152
182
|
|
|
153
183
|
async def set_model(self, model: str):
|
|
184
|
+
"""Set the ASR model for transcription.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
model: Model name to set.
|
|
188
|
+
|
|
189
|
+
Note:
|
|
190
|
+
Model cannot be changed after initialization. Use model_function_map
|
|
191
|
+
parameter in constructor instead.
|
|
192
|
+
"""
|
|
154
193
|
logger.warning(f"Cannot set model after initialization. Set model and function id like so:")
|
|
155
194
|
example = {"function_id": "<UUID>", "model_name": "<model_name>"}
|
|
156
195
|
logger.warning(
|
|
@@ -158,6 +197,11 @@ class RivaSTTService(STTService):
|
|
|
158
197
|
)
|
|
159
198
|
|
|
160
199
|
async def start(self, frame: StartFrame):
|
|
200
|
+
"""Start the Riva STT service and initialize streaming configuration.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
frame: StartFrame indicating pipeline start.
|
|
204
|
+
"""
|
|
161
205
|
await super().start(frame)
|
|
162
206
|
|
|
163
207
|
if self._config:
|
|
@@ -194,19 +238,30 @@ class RivaSTTService(STTService):
|
|
|
194
238
|
riva.client.add_custom_configuration_to_config(config, self._custom_configuration)
|
|
195
239
|
|
|
196
240
|
self._config = config
|
|
241
|
+
self._queue = asyncio.Queue()
|
|
197
242
|
|
|
198
243
|
if not self._thread_task:
|
|
199
244
|
self._thread_task = self.create_task(self._thread_task_handler())
|
|
200
245
|
|
|
201
246
|
if not self._response_task:
|
|
202
|
-
self._response_queue =
|
|
247
|
+
self._response_queue = asyncio.Queue()
|
|
203
248
|
self._response_task = self.create_task(self._response_task_handler())
|
|
204
249
|
|
|
205
250
|
async def stop(self, frame: EndFrame):
|
|
251
|
+
"""Stop the Riva STT service and clean up resources.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
frame: EndFrame indicating pipeline stop.
|
|
255
|
+
"""
|
|
206
256
|
await super().stop(frame)
|
|
207
257
|
await self._stop_tasks()
|
|
208
258
|
|
|
209
259
|
async def cancel(self, frame: CancelFrame):
|
|
260
|
+
"""Cancel the Riva STT service operation.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
frame: CancelFrame indicating operation cancellation.
|
|
264
|
+
"""
|
|
210
265
|
await super().cancel(frame)
|
|
211
266
|
await self._stop_tasks()
|
|
212
267
|
|
|
@@ -225,7 +280,6 @@ class RivaSTTService(STTService):
|
|
|
225
280
|
streaming_config=self._config,
|
|
226
281
|
)
|
|
227
282
|
for response in responses:
|
|
228
|
-
self.reset_watchdog()
|
|
229
283
|
if not response.results:
|
|
230
284
|
continue
|
|
231
285
|
asyncio.run_coroutine_threadsafe(
|
|
@@ -260,7 +314,7 @@ class RivaSTTService(STTService):
|
|
|
260
314
|
await self.push_frame(
|
|
261
315
|
TranscriptionFrame(
|
|
262
316
|
transcript,
|
|
263
|
-
|
|
317
|
+
self._user_id,
|
|
264
318
|
time_now_iso8601(),
|
|
265
319
|
self._language_code,
|
|
266
320
|
result=result,
|
|
@@ -275,7 +329,7 @@ class RivaSTTService(STTService):
|
|
|
275
329
|
await self.push_frame(
|
|
276
330
|
InterimTranscriptionFrame(
|
|
277
331
|
transcript,
|
|
278
|
-
|
|
332
|
+
self._user_id,
|
|
279
333
|
time_now_iso8601(),
|
|
280
334
|
self._language_code,
|
|
281
335
|
result=result,
|
|
@@ -289,18 +343,43 @@ class RivaSTTService(STTService):
|
|
|
289
343
|
self._response_queue.task_done()
|
|
290
344
|
|
|
291
345
|
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
|
|
346
|
+
"""Process audio data for speech-to-text transcription.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
audio: Raw audio bytes to transcribe.
|
|
350
|
+
|
|
351
|
+
Yields:
|
|
352
|
+
None - transcription results are pushed to the pipeline via frames.
|
|
353
|
+
"""
|
|
292
354
|
await self.start_ttfb_metrics()
|
|
293
355
|
await self.start_processing_metrics()
|
|
294
356
|
await self._queue.put(audio)
|
|
295
357
|
yield None
|
|
296
358
|
|
|
297
359
|
def __next__(self) -> bytes:
|
|
360
|
+
"""Get the next audio chunk for Riva processing.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
Audio bytes from the queue.
|
|
364
|
+
|
|
365
|
+
Raises:
|
|
366
|
+
StopIteration: When the thread is no longer running.
|
|
367
|
+
"""
|
|
298
368
|
if not self._thread_running:
|
|
299
369
|
raise StopIteration
|
|
300
|
-
|
|
301
|
-
|
|
370
|
+
|
|
371
|
+
try:
|
|
372
|
+
future = asyncio.run_coroutine_threadsafe(self._queue.get(), self.get_event_loop())
|
|
373
|
+
return future.result()
|
|
374
|
+
except FuturesCancelledError:
|
|
375
|
+
raise StopIteration
|
|
302
376
|
|
|
303
377
|
def __iter__(self):
|
|
378
|
+
"""Return iterator for audio chunk processing.
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
Self as iterator.
|
|
382
|
+
"""
|
|
304
383
|
return self
|
|
305
384
|
|
|
306
385
|
|
|
@@ -310,17 +389,20 @@ class RivaSegmentedSTTService(SegmentedSTTService):
|
|
|
310
389
|
By default, his service uses NVIDIA's Riva Canary ASR API to perform speech-to-text
|
|
311
390
|
transcription on audio segments. It inherits from SegmentedSTTService to handle
|
|
312
391
|
audio buffering and speech detection.
|
|
313
|
-
|
|
314
|
-
Args:
|
|
315
|
-
api_key: NVIDIA API key for authentication
|
|
316
|
-
server: Riva server address (defaults to NVIDIA Cloud Function endpoint)
|
|
317
|
-
model_function_map: Mapping of model name and its corresponding NVIDIA Cloud Function ID
|
|
318
|
-
sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate
|
|
319
|
-
params: Additional configuration parameters for Riva
|
|
320
|
-
**kwargs: Additional arguments passed to SegmentedSTTService
|
|
321
392
|
"""
|
|
322
393
|
|
|
323
394
|
class InputParams(BaseModel):
|
|
395
|
+
"""Configuration parameters for Riva segmented STT service.
|
|
396
|
+
|
|
397
|
+
Parameters:
|
|
398
|
+
language: Target language for transcription. Defaults to EN_US.
|
|
399
|
+
profanity_filter: Whether to filter profanity from results.
|
|
400
|
+
automatic_punctuation: Whether to add automatic punctuation.
|
|
401
|
+
verbatim_transcripts: Whether to return verbatim transcripts.
|
|
402
|
+
boosted_lm_words: List of words to boost in language model.
|
|
403
|
+
boosted_lm_score: Score boost for specified words.
|
|
404
|
+
"""
|
|
405
|
+
|
|
324
406
|
language: Optional[Language] = Language.EN_US
|
|
325
407
|
profanity_filter: bool = False
|
|
326
408
|
automatic_punctuation: bool = True
|
|
@@ -341,6 +423,16 @@ class RivaSegmentedSTTService(SegmentedSTTService):
|
|
|
341
423
|
params: Optional[InputParams] = None,
|
|
342
424
|
**kwargs,
|
|
343
425
|
):
|
|
426
|
+
"""Initialize the Riva segmented STT service.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
api_key: NVIDIA API key for authentication
|
|
430
|
+
server: Riva server address (defaults to NVIDIA Cloud Function endpoint)
|
|
431
|
+
model_function_map: Mapping of model name and its corresponding NVIDIA Cloud Function ID
|
|
432
|
+
sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate
|
|
433
|
+
params: Additional configuration parameters for Riva
|
|
434
|
+
**kwargs: Additional arguments passed to SegmentedSTTService
|
|
435
|
+
"""
|
|
344
436
|
super().__init__(sample_rate=sample_rate, **kwargs)
|
|
345
437
|
|
|
346
438
|
params = params or RivaSegmentedSTTService.InputParams()
|
|
@@ -380,7 +472,14 @@ class RivaSegmentedSTTService(SegmentedSTTService):
|
|
|
380
472
|
self._settings = {"language": self._language_enum}
|
|
381
473
|
|
|
382
474
|
def language_to_service_language(self, language: Language) -> Optional[str]:
|
|
383
|
-
"""Convert pipecat Language enum to Riva's language code.
|
|
475
|
+
"""Convert pipecat Language enum to Riva's language code.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
language: Language enum value.
|
|
479
|
+
|
|
480
|
+
Returns:
|
|
481
|
+
Riva language code or None if not supported.
|
|
482
|
+
"""
|
|
384
483
|
return language_to_riva_language(language)
|
|
385
484
|
|
|
386
485
|
def _initialize_client(self):
|
|
@@ -435,10 +534,23 @@ class RivaSegmentedSTTService(SegmentedSTTService):
|
|
|
435
534
|
return config
|
|
436
535
|
|
|
437
536
|
def can_generate_metrics(self) -> bool:
|
|
438
|
-
"""
|
|
537
|
+
"""Check if this service can generate processing metrics.
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
True - this service supports metrics generation.
|
|
541
|
+
"""
|
|
439
542
|
return True
|
|
440
543
|
|
|
441
544
|
async def set_model(self, model: str):
|
|
545
|
+
"""Set the ASR model for transcription.
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
model: Model name to set.
|
|
549
|
+
|
|
550
|
+
Note:
|
|
551
|
+
Model cannot be changed after initialization. Use model_function_map
|
|
552
|
+
parameter in constructor instead.
|
|
553
|
+
"""
|
|
442
554
|
logger.warning(f"Cannot set model after initialization. Set model and function id like so:")
|
|
443
555
|
example = {"function_id": "<UUID>", "model_name": "<model_name>"}
|
|
444
556
|
logger.warning(
|
|
@@ -446,13 +558,21 @@ class RivaSegmentedSTTService(SegmentedSTTService):
|
|
|
446
558
|
)
|
|
447
559
|
|
|
448
560
|
async def start(self, frame: StartFrame):
|
|
449
|
-
"""Initialize the service when the pipeline starts.
|
|
561
|
+
"""Initialize the service when the pipeline starts.
|
|
562
|
+
|
|
563
|
+
Args:
|
|
564
|
+
frame: StartFrame indicating pipeline start.
|
|
565
|
+
"""
|
|
450
566
|
await super().start(frame)
|
|
451
567
|
self._initialize_client()
|
|
452
568
|
self._config = self._create_recognition_config()
|
|
453
569
|
|
|
454
570
|
async def set_language(self, language: Language):
|
|
455
|
-
"""Set the language for the STT service.
|
|
571
|
+
"""Set the language for the STT service.
|
|
572
|
+
|
|
573
|
+
Args:
|
|
574
|
+
language: Target language for transcription.
|
|
575
|
+
"""
|
|
456
576
|
logger.info(f"Switching STT language to: [{language}]")
|
|
457
577
|
self._language_enum = language
|
|
458
578
|
self._language = self.language_to_service_language(language) or "en-US"
|
|
@@ -520,7 +640,10 @@ class RivaSegmentedSTTService(SegmentedSTTService):
|
|
|
520
640
|
if text:
|
|
521
641
|
logger.debug(f"Transcription: [{text}]")
|
|
522
642
|
yield TranscriptionFrame(
|
|
523
|
-
text,
|
|
643
|
+
text,
|
|
644
|
+
self._user_id,
|
|
645
|
+
time_now_iso8601(),
|
|
646
|
+
self._language_enum,
|
|
524
647
|
)
|
|
525
648
|
transcription_found = True
|
|
526
649
|
|
|
@@ -539,7 +662,12 @@ class RivaSegmentedSTTService(SegmentedSTTService):
|
|
|
539
662
|
|
|
540
663
|
|
|
541
664
|
class ParakeetSTTService(RivaSTTService):
|
|
542
|
-
"""Deprecated
|
|
665
|
+
"""Deprecated speech-to-text service using NVIDIA Parakeet models.
|
|
666
|
+
|
|
667
|
+
.. deprecated:: 0.0.66
|
|
668
|
+
This class is deprecated. Use `RivaSTTService` instead for equivalent functionality
|
|
669
|
+
with Parakeet models by specifying the appropriate model_function_map.
|
|
670
|
+
"""
|
|
543
671
|
|
|
544
672
|
def __init__(
|
|
545
673
|
self,
|
|
@@ -554,6 +682,16 @@ class ParakeetSTTService(RivaSTTService):
|
|
|
554
682
|
params: Optional[RivaSTTService.InputParams] = None, # Use parent class's type
|
|
555
683
|
**kwargs,
|
|
556
684
|
):
|
|
685
|
+
"""Initialize the Parakeet STT service.
|
|
686
|
+
|
|
687
|
+
Args:
|
|
688
|
+
api_key: NVIDIA API key for authentication.
|
|
689
|
+
server: Riva server address. Defaults to NVIDIA Cloud Function endpoint.
|
|
690
|
+
model_function_map: Mapping containing 'function_id' and 'model_name' for Parakeet model.
|
|
691
|
+
sample_rate: Audio sample rate in Hz. If None, uses pipeline default.
|
|
692
|
+
params: Additional configuration parameters for Riva.
|
|
693
|
+
**kwargs: Additional arguments passed to RivaSTTService.
|
|
694
|
+
"""
|
|
557
695
|
super().__init__(
|
|
558
696
|
api_key=api_key,
|
|
559
697
|
server=server,
|