dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
pipecat/services/google/stt.py
CHANGED
|
@@ -4,12 +4,18 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Google Cloud Speech-to-Text V2 service implementation for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides a Google Cloud Speech-to-Text V2 service with streaming
|
|
10
|
+
support, enabling real-time speech recognition with features like automatic
|
|
11
|
+
punctuation, voice activity detection, and multi-language support.
|
|
12
|
+
"""
|
|
13
|
+
|
|
7
14
|
import asyncio
|
|
8
15
|
import json
|
|
9
16
|
import os
|
|
10
17
|
import time
|
|
11
18
|
|
|
12
|
-
from pipecat.utils.asyncio.watchdog_async_iterator import WatchdogAsyncIterator
|
|
13
19
|
from pipecat.utils.tracing.service_decorators import traced_stt
|
|
14
20
|
|
|
15
21
|
# Suppress gRPC fork warnings
|
|
@@ -353,9 +359,15 @@ class GoogleSTTService(STTService):
|
|
|
353
359
|
|
|
354
360
|
Provides real-time speech recognition using Google Cloud's Speech-to-Text V2 API
|
|
355
361
|
with streaming support. Handles audio transcription and optional voice activity detection.
|
|
362
|
+
Implements automatic stream reconnection to handle Google's 4-minute streaming limit.
|
|
356
363
|
|
|
357
|
-
|
|
364
|
+
Parameters:
|
|
358
365
|
InputParams: Configuration parameters for the STT service.
|
|
366
|
+
STREAMING_LIMIT: Google Cloud's streaming limit in milliseconds (4 minutes).
|
|
367
|
+
|
|
368
|
+
Raises:
|
|
369
|
+
ValueError: If neither credentials nor credentials_path is provided.
|
|
370
|
+
ValueError: If project ID is not found in credentials.
|
|
359
371
|
"""
|
|
360
372
|
|
|
361
373
|
# Google Cloud's STT service has a connection time limit of 5 minutes per stream.
|
|
@@ -367,7 +379,7 @@ class GoogleSTTService(STTService):
|
|
|
367
379
|
class InputParams(BaseModel):
|
|
368
380
|
"""Configuration parameters for Google Speech-to-Text.
|
|
369
381
|
|
|
370
|
-
|
|
382
|
+
Parameters:
|
|
371
383
|
languages: Single language or list of recognition languages. First language is primary.
|
|
372
384
|
model: Speech recognition model to use.
|
|
373
385
|
use_separate_recognition_per_channel: Process each audio channel separately.
|
|
@@ -396,13 +408,25 @@ class GoogleSTTService(STTService):
|
|
|
396
408
|
@field_validator("languages", mode="before")
|
|
397
409
|
@classmethod
|
|
398
410
|
def validate_languages(cls, v) -> List[Language]:
|
|
411
|
+
"""Ensure languages is always a list.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
v: Single Language enum or list of Language enums.
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
List[Language]: List of configured languages.
|
|
418
|
+
"""
|
|
399
419
|
if isinstance(v, Language):
|
|
400
420
|
return [v]
|
|
401
421
|
return v
|
|
402
422
|
|
|
403
423
|
@property
|
|
404
424
|
def language_list(self) -> List[Language]:
|
|
405
|
-
"""Get languages as a guaranteed list.
|
|
425
|
+
"""Get languages as a guaranteed list.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
List[Language]: List of configured languages.
|
|
429
|
+
"""
|
|
406
430
|
assert isinstance(self.languages, list)
|
|
407
431
|
return self.languages
|
|
408
432
|
|
|
@@ -425,10 +449,6 @@ class GoogleSTTService(STTService):
|
|
|
425
449
|
sample_rate: Audio sample rate in Hertz.
|
|
426
450
|
params: Configuration parameters for the service.
|
|
427
451
|
**kwargs: Additional arguments passed to STTService.
|
|
428
|
-
|
|
429
|
-
Raises:
|
|
430
|
-
ValueError: If neither credentials nor credentials_path is provided.
|
|
431
|
-
ValueError: If project ID is not found in credentials.
|
|
432
452
|
"""
|
|
433
453
|
super().__init__(sample_rate=sample_rate, **kwargs)
|
|
434
454
|
|
|
@@ -501,6 +521,11 @@ class GoogleSTTService(STTService):
|
|
|
501
521
|
}
|
|
502
522
|
|
|
503
523
|
def can_generate_metrics(self) -> bool:
|
|
524
|
+
"""Check if the service can generate metrics.
|
|
525
|
+
|
|
526
|
+
Returns:
|
|
527
|
+
bool: True, as this service supports metrics generation.
|
|
528
|
+
"""
|
|
504
529
|
return True
|
|
505
530
|
|
|
506
531
|
def language_to_service_language(self, language: Language | List[Language]) -> str | List[str]:
|
|
@@ -548,7 +573,11 @@ class GoogleSTTService(STTService):
|
|
|
548
573
|
await self._reconnect_if_needed()
|
|
549
574
|
|
|
550
575
|
async def set_model(self, model: str):
|
|
551
|
-
"""Update the service's recognition model.
|
|
576
|
+
"""Update the service's recognition model.
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
model: The new recognition model to use.
|
|
580
|
+
"""
|
|
552
581
|
logger.debug(f"Switching STT model to: {model}")
|
|
553
582
|
await super().set_model(model)
|
|
554
583
|
self._settings["model"] = model
|
|
@@ -556,14 +585,29 @@ class GoogleSTTService(STTService):
|
|
|
556
585
|
await self._reconnect_if_needed()
|
|
557
586
|
|
|
558
587
|
async def start(self, frame: StartFrame):
|
|
588
|
+
"""Start the STT service and establish connection.
|
|
589
|
+
|
|
590
|
+
Args:
|
|
591
|
+
frame: The start frame triggering the service start.
|
|
592
|
+
"""
|
|
559
593
|
await super().start(frame)
|
|
560
594
|
await self._connect()
|
|
561
595
|
|
|
562
596
|
async def stop(self, frame: EndFrame):
|
|
597
|
+
"""Stop the STT service and clean up resources.
|
|
598
|
+
|
|
599
|
+
Args:
|
|
600
|
+
frame: The end frame triggering the service stop.
|
|
601
|
+
"""
|
|
563
602
|
await super().stop(frame)
|
|
564
603
|
await self._disconnect()
|
|
565
604
|
|
|
566
605
|
async def cancel(self, frame: CancelFrame):
|
|
606
|
+
"""Cancel the STT service and clean up resources.
|
|
607
|
+
|
|
608
|
+
Args:
|
|
609
|
+
frame: The cancel frame triggering the service cancellation.
|
|
610
|
+
"""
|
|
567
611
|
await super().cancel(frame)
|
|
568
612
|
await self._disconnect()
|
|
569
613
|
|
|
@@ -585,7 +629,7 @@ class GoogleSTTService(STTService):
|
|
|
585
629
|
"""Update service options dynamically.
|
|
586
630
|
|
|
587
631
|
Args:
|
|
588
|
-
languages: New list of
|
|
632
|
+
languages: New list of recognition languages.
|
|
589
633
|
model: New recognition model.
|
|
590
634
|
enable_automatic_punctuation: Enable/disable automatic punctuation.
|
|
591
635
|
enable_spoken_punctuation: Enable/disable spoken punctuation.
|
|
@@ -736,7 +780,6 @@ class GoogleSTTService(STTService):
|
|
|
736
780
|
if self._request_queue.empty():
|
|
737
781
|
# wait for 10ms in case we don't have audio
|
|
738
782
|
await asyncio.sleep(0.01)
|
|
739
|
-
self.reset_watchdog()
|
|
740
783
|
continue
|
|
741
784
|
|
|
742
785
|
# Start bi-directional streaming
|
|
@@ -767,7 +810,14 @@ class GoogleSTTService(STTService):
|
|
|
767
810
|
await self.push_frame(ErrorFrame(str(e)))
|
|
768
811
|
|
|
769
812
|
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
|
|
770
|
-
"""Process an audio chunk for STT transcription.
|
|
813
|
+
"""Process an audio chunk for STT transcription.
|
|
814
|
+
|
|
815
|
+
Args:
|
|
816
|
+
audio: Raw audio bytes to transcribe.
|
|
817
|
+
|
|
818
|
+
Yields:
|
|
819
|
+
Frame: None (actual transcription frames are pushed via internal processing).
|
|
820
|
+
"""
|
|
771
821
|
if self._streaming_task:
|
|
772
822
|
# Queue the audio data
|
|
773
823
|
await self.start_ttfb_metrics()
|
|
@@ -784,9 +834,7 @@ class GoogleSTTService(STTService):
|
|
|
784
834
|
async def _process_responses(self, streaming_recognize):
|
|
785
835
|
"""Process streaming recognition responses."""
|
|
786
836
|
try:
|
|
787
|
-
async for response in
|
|
788
|
-
streaming_recognize, manager=self.task_manager
|
|
789
|
-
):
|
|
837
|
+
async for response in streaming_recognize:
|
|
790
838
|
# Check streaming limit
|
|
791
839
|
if (int(time.time() * 1000) - self._stream_start_time) > self.STREAMING_LIMIT:
|
|
792
840
|
logger.debug("Stream timeout reached in response processing")
|
|
@@ -810,7 +858,7 @@ class GoogleSTTService(STTService):
|
|
|
810
858
|
await self.push_frame(
|
|
811
859
|
TranscriptionFrame(
|
|
812
860
|
transcript,
|
|
813
|
-
|
|
861
|
+
self._user_id,
|
|
814
862
|
time_now_iso8601(),
|
|
815
863
|
primary_language,
|
|
816
864
|
result=result,
|
|
@@ -828,7 +876,7 @@ class GoogleSTTService(STTService):
|
|
|
828
876
|
await self.push_frame(
|
|
829
877
|
InterimTranscriptionFrame(
|
|
830
878
|
transcript,
|
|
831
|
-
|
|
879
|
+
self._user_id,
|
|
832
880
|
time_now_iso8601(),
|
|
833
881
|
primary_language,
|
|
834
882
|
result=result,
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from pipecat.frames.frames import TTSAudioRawFrame
|
|
5
|
+
from pipecat.services.google.tts import GoogleTTSService
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def test_chirp_tts():
|
|
9
|
+
# Get credentials from environment variable
|
|
10
|
+
credentials_path = (
|
|
11
|
+
"/Users/kalicharanvemuru/Documents/Code/pipecat/examples/ringg-chatbot/creds.json"
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
if not credentials_path or not os.path.exists(credentials_path):
|
|
15
|
+
raise ValueError(
|
|
16
|
+
"Please set GOOGLE_APPLICATION_CREDENTIALS environment variable to your service account key file"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Initialize the TTS service with Chirp voice
|
|
20
|
+
tts = GoogleTTSService(
|
|
21
|
+
credentials_path=credentials_path,
|
|
22
|
+
voice_id="en-US-Chirp3-HD-Charon", # Using Chirp3 HD Charon voice
|
|
23
|
+
sample_rate=24000,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Test text
|
|
27
|
+
test_text = "Hello, this is a test of the Google TTS service with Chirp voice."
|
|
28
|
+
|
|
29
|
+
print(f"Testing TTS with text: {test_text}")
|
|
30
|
+
|
|
31
|
+
# Generate speech
|
|
32
|
+
try:
|
|
33
|
+
async for frame in tts.run_tts(test_text):
|
|
34
|
+
if isinstance(frame, TTSAudioRawFrame):
|
|
35
|
+
print(f"Received audio chunk of size: {len(frame.audio)} bytes")
|
|
36
|
+
else:
|
|
37
|
+
print(f"Received frame: {frame.__class__.__name__}")
|
|
38
|
+
|
|
39
|
+
print("TTS generation completed successfully!")
|
|
40
|
+
except Exception as e:
|
|
41
|
+
print(f"Error during TTS generation: {str(e)}")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
asyncio.run(test_chirp_tts())
|