PyPI - amd-gaia - Versions diffs - 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl - Mend

amd-gaia 0.14.3py3-none-any.whl → 0.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
amd_gaia-0.15.1.dist-info/RECORD +178 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
gaia/__init__.py +29 -29
gaia/agents/__init__.py +19 -19
gaia/agents/base/__init__.py +9 -9
gaia/agents/base/agent.py +2177 -2177
gaia/agents/base/api_agent.py +120 -120
gaia/agents/base/console.py +1841 -1841
gaia/agents/base/errors.py +237 -237
gaia/agents/base/mcp_agent.py +86 -86
gaia/agents/base/tools.py +83 -83
gaia/agents/blender/agent.py +556 -556
gaia/agents/blender/agent_simple.py +133 -135
gaia/agents/blender/app.py +211 -211
gaia/agents/blender/app_simple.py +41 -41
gaia/agents/blender/core/__init__.py +16 -16
gaia/agents/blender/core/materials.py +506 -506
gaia/agents/blender/core/objects.py +316 -316
gaia/agents/blender/core/rendering.py +225 -225
gaia/agents/blender/core/scene.py +220 -220
gaia/agents/blender/core/view.py +146 -146
gaia/agents/chat/__init__.py +9 -9
gaia/agents/chat/agent.py +835 -835
gaia/agents/chat/app.py +1058 -1058
gaia/agents/chat/session.py +508 -508
gaia/agents/chat/tools/__init__.py +15 -15
gaia/agents/chat/tools/file_tools.py +96 -96
gaia/agents/chat/tools/rag_tools.py +1729 -1729
gaia/agents/chat/tools/shell_tools.py +436 -436
gaia/agents/code/__init__.py +7 -7
gaia/agents/code/agent.py +549 -549
gaia/agents/code/cli.py +377 -0
gaia/agents/code/models.py +135 -135
gaia/agents/code/orchestration/__init__.py +24 -24
gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
gaia/agents/code/orchestration/checklist_generator.py +713 -713
gaia/agents/code/orchestration/factories/__init__.py +9 -9
gaia/agents/code/orchestration/factories/base.py +63 -63
gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
gaia/agents/code/orchestration/factories/python_factory.py +106 -106
gaia/agents/code/orchestration/orchestrator.py +841 -841
gaia/agents/code/orchestration/project_analyzer.py +391 -391
gaia/agents/code/orchestration/steps/__init__.py +67 -67
gaia/agents/code/orchestration/steps/base.py +188 -188
gaia/agents/code/orchestration/steps/error_handler.py +314 -314
gaia/agents/code/orchestration/steps/nextjs.py +828 -828
gaia/agents/code/orchestration/steps/python.py +307 -307
gaia/agents/code/orchestration/template_catalog.py +469 -469
gaia/agents/code/orchestration/workflows/__init__.py +14 -14
gaia/agents/code/orchestration/workflows/base.py +80 -80
gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
gaia/agents/code/orchestration/workflows/python.py +94 -94
gaia/agents/code/prompts/__init__.py +11 -11
gaia/agents/code/prompts/base_prompt.py +77 -77
gaia/agents/code/prompts/code_patterns.py +2036 -2036
gaia/agents/code/prompts/nextjs_prompt.py +40 -40
gaia/agents/code/prompts/python_prompt.py +109 -109
gaia/agents/code/schema_inference.py +365 -365
gaia/agents/code/system_prompt.py +41 -41
gaia/agents/code/tools/__init__.py +42 -42
gaia/agents/code/tools/cli_tools.py +1138 -1138
gaia/agents/code/tools/code_formatting.py +319 -319
gaia/agents/code/tools/code_tools.py +769 -769
gaia/agents/code/tools/error_fixing.py +1347 -1347
gaia/agents/code/tools/external_tools.py +180 -180
gaia/agents/code/tools/file_io.py +845 -845
gaia/agents/code/tools/prisma_tools.py +190 -190
gaia/agents/code/tools/project_management.py +1016 -1016
gaia/agents/code/tools/testing.py +321 -321
gaia/agents/code/tools/typescript_tools.py +122 -122
gaia/agents/code/tools/validation_parsing.py +461 -461
gaia/agents/code/tools/validation_tools.py +806 -806
gaia/agents/code/tools/web_dev_tools.py +1758 -1758
gaia/agents/code/validators/__init__.py +16 -16
gaia/agents/code/validators/antipattern_checker.py +241 -241
gaia/agents/code/validators/ast_analyzer.py +197 -197
gaia/agents/code/validators/requirements_validator.py +145 -145
gaia/agents/code/validators/syntax_validator.py +171 -171
gaia/agents/docker/__init__.py +7 -7
gaia/agents/docker/agent.py +642 -642
gaia/agents/emr/__init__.py +8 -8
gaia/agents/emr/agent.py +1506 -1506
gaia/agents/emr/cli.py +1322 -1322
gaia/agents/emr/constants.py +475 -475
gaia/agents/emr/dashboard/__init__.py +4 -4
gaia/agents/emr/dashboard/server.py +1974 -1974
gaia/agents/jira/__init__.py +11 -11
gaia/agents/jira/agent.py +894 -894
gaia/agents/jira/jql_templates.py +299 -299
gaia/agents/routing/__init__.py +7 -7
gaia/agents/routing/agent.py +567 -570
gaia/agents/routing/system_prompt.py +75 -75
gaia/agents/summarize/__init__.py +11 -0
gaia/agents/summarize/agent.py +885 -0
gaia/agents/summarize/prompts.py +129 -0
gaia/api/__init__.py +23 -23
gaia/api/agent_registry.py +238 -238
gaia/api/app.py +305 -305
gaia/api/openai_server.py +575 -575
gaia/api/schemas.py +186 -186
gaia/api/sse_handler.py +373 -373
gaia/apps/__init__.py +4 -4
gaia/apps/llm/__init__.py +6 -6
gaia/apps/llm/app.py +173 -169
gaia/apps/summarize/app.py +116 -633
gaia/apps/summarize/html_viewer.py +133 -133
gaia/apps/summarize/pdf_formatter.py +284 -284
gaia/audio/__init__.py +2 -2
gaia/audio/audio_client.py +439 -439
gaia/audio/audio_recorder.py +269 -269
gaia/audio/kokoro_tts.py +599 -599
gaia/audio/whisper_asr.py +432 -432
gaia/chat/__init__.py +16 -16
gaia/chat/app.py +430 -430
gaia/chat/prompts.py +522 -522
gaia/chat/sdk.py +1228 -1225
gaia/cli.py +5481 -5621
gaia/database/__init__.py +10 -10
gaia/database/agent.py +176 -176
gaia/database/mixin.py +290 -290
gaia/database/testing.py +64 -64
gaia/eval/batch_experiment.py +2332 -2332
gaia/eval/claude.py +542 -542
gaia/eval/config.py +37 -37
gaia/eval/email_generator.py +512 -512
gaia/eval/eval.py +3179 -3179
gaia/eval/groundtruth.py +1130 -1130
gaia/eval/transcript_generator.py +582 -582
gaia/eval/webapp/README.md +167 -167
gaia/eval/webapp/package-lock.json +875 -875
gaia/eval/webapp/package.json +20 -20
gaia/eval/webapp/public/app.js +3402 -3402
gaia/eval/webapp/public/index.html +87 -87
gaia/eval/webapp/public/styles.css +3661 -3661
gaia/eval/webapp/server.js +415 -415
gaia/eval/webapp/test-setup.js +72 -72
gaia/llm/__init__.py +9 -2
gaia/llm/base_client.py +60 -0
gaia/llm/exceptions.py +12 -0
gaia/llm/factory.py +70 -0
gaia/llm/lemonade_client.py +3236 -3221
gaia/llm/lemonade_manager.py +294 -294
gaia/llm/providers/__init__.py +9 -0
gaia/llm/providers/claude.py +108 -0
gaia/llm/providers/lemonade.py +120 -0
gaia/llm/providers/openai_provider.py +79 -0
gaia/llm/vlm_client.py +382 -382
gaia/logger.py +189 -189
gaia/mcp/agent_mcp_server.py +245 -245
gaia/mcp/blender_mcp_client.py +138 -138
gaia/mcp/blender_mcp_server.py +648 -648
gaia/mcp/context7_cache.py +332 -332
gaia/mcp/external_services.py +518 -518
gaia/mcp/mcp_bridge.py +811 -550
gaia/mcp/servers/__init__.py +6 -6
gaia/mcp/servers/docker_mcp.py +83 -83
gaia/perf_analysis.py +361 -0
gaia/rag/__init__.py +10 -10
gaia/rag/app.py +293 -293
gaia/rag/demo.py +304 -304
gaia/rag/pdf_utils.py +235 -235
gaia/rag/sdk.py +2194 -2194
gaia/security.py +163 -163
gaia/talk/app.py +289 -289
gaia/talk/sdk.py +538 -538
gaia/testing/__init__.py +87 -87
gaia/testing/assertions.py +330 -330
gaia/testing/fixtures.py +333 -333
gaia/testing/mocks.py +493 -493
gaia/util.py +46 -46
gaia/utils/__init__.py +33 -33
gaia/utils/file_watcher.py +675 -675
gaia/utils/parsing.py +223 -223
gaia/version.py +100 -100
amd_gaia-0.14.3.dist-info/RECORD +0 -168
gaia/agents/code/app.py +0 -266
gaia/llm/llm_client.py +0 -729
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0

gaia/audio/kokoro_tts.py CHANGED Viewed

@@ -1,599 +1,599 @@
-# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
-# SPDX-License-Identifier: MIT
-import queue
-import threading
-import time
-import numpy as np
-import psutil
-try:
-    import sounddevice as sd
-except ImportError:
-    sd = None
-try:
-    import soundfile as sf
-except ImportError:
-    sf = None
-try:
-    from kokoro import KPipeline
-except ImportError:
-    KPipeline = None
-from gaia.logger import get_logger
-class KokoroTTS:
-    log = get_logger(__name__)
-    def __init__(self):
-        # Check for required dependencies
-        missing = []
-        if sd is None:
-            missing.append("sounddevice")
-        if sf is None:
-            missing.append("soundfile")
-        if KPipeline is None:
-            missing.append("kokoro>=0.3.1")
-        if missing:
-            error_msg = (
-                f"\n❌ Error: Missing required talk dependencies: {', '.join(missing)}\n\n"
-                f"Please install the talk dependencies:\n"
-                f'  uv pip install -e ".[talk]"\n\n'
-                f"Or install packages directly:\n"
-                f"  uv pip install {' '.join(missing)}\n"
-            )
-            raise ImportError(error_msg)
-        self.log = self.__class__.log
-        # Initialize Kokoro pipeline with American English
-        self.pipeline = KPipeline(lang_code="a")  # 'a' for American English
-        # Available voice configurations with metadata
-        self.available_voices = {
-            # American English Voices 🇸
-            "af_alloy": {
-                "name": "American Female - Alloy",
-                "quality": "C",
-                "duration": "MM",
-            },
-            "af_aoede": {
-                "name": "American Female - Aoede",
-                "quality": "C+",
-                "duration": "H",
-            },
-            "af_bella": {
-                "name": "American Female - Bella",
-                "quality": "A-",
-                "duration": "HH",
-            },
-            "af_jessica": {
-                "name": "American Female - Jessica",
-                "quality": "D",
-                "duration": "MM",
-            },
-            "af_kore": {
-                "name": "American Female - Kore",
-                "quality": "C+",
-                "duration": "H",
-            },
-            "af_nicole": {
-                "name": "American Female - Nicole",
-                "quality": "B-",
-                "duration": "HH",
-            },
-            "af_nova": {
-                "name": "American Female - Nova",
-                "quality": "C",
-                "duration": "MM",
-            },
-            "af_river": {
-                "name": "American Female - River",
-                "quality": "D",
-                "duration": "MM",
-            },
-            "af_sarah": {
-                "name": "American Female - Sarah",
-                "quality": "C+",
-                "duration": "H",
-            },
-            "af_sky": {
-                "name": "American Female - Sky",
-                "quality": "C-",
-                "duration": "M",
-            },
-            "am_adam": {
-                "name": "American Male - Adam",
-                "quality": "F+",
-                "duration": "H",
-            },
-            "am_echo": {
-                "name": "American Male - Echo",
-                "quality": "D",
-                "duration": "MM",
-            },
-            "am_eric": {
-                "name": "American Male - Eric",
-                "quality": "D",
-                "duration": "MM",
-            },
-            "am_fenrir": {
-                "name": "American Male - Fenrir",
-                "quality": "C+",
-                "duration": "H",
-            },
-            "am_liam": {
-                "name": "American Male - Liam",
-                "quality": "D",
-                "duration": "MM",
-            },
-            "am_michael": {
-                "name": "American Male - Michael",
-                "quality": "C+",
-                "duration": "H",
-            },
-            "am_onyx": {
-                "name": "American Male - Onyx",
-                "quality": "D",
-                "duration": "MM",
-            },
-            "am_puck": {
-                "name": "American Male - Puck",
-                "quality": "C+",
-                "duration": "H",
-            },
-            # British English Voices 🇧
-            "bf_alice": {
-                "name": "British Female - Alice",
-                "quality": "D",
-                "duration": "MM",
-            },
-            "bf_emma": {
-                "name": "British Female - Emma",
-                "quality": "B-",
-                "duration": "HH",
-            },
-            "bf_isabella": {
-                "name": "British Female - Isabella",
-                "quality": "C",
-                "duration": "MM",
-            },
-            "bf_lily": {
-                "name": "British Female - Lily",
-                "quality": "D",
-                "duration": "MM",
-            },
-            "bm_daniel": {
-                "name": "British Male - Daniel",
-                "quality": "D",
-                "duration": "MM",
-            },
-            "bm_fable": {
-                "name": "British Male - Fable",
-                "quality": "C",
-                "duration": "MM",
-            },
-            "bm_george": {
-                "name": "British Male - George",
-                "quality": "C",
-                "duration": "MM",
-            },
-            "bm_lewis": {
-                "name": "British Male - Lewis",
-                "quality": "D+",
-                "duration": "H",
-            },
-        }
-        # Default to highest quality voice (Bella)
-        self.voice_name = "af_bella"
-        self.chunk_size = 150  # Optimal token chunk size for best quality
-        self.log.debug(
-            f"Loaded voice: {self.voice_name} - {self.available_voices[self.voice_name]['name']} (Quality: {self.available_voices[self.voice_name]['quality']})"
-        )
-    def preprocess_text(self, text: str) -> str:
-        """
-        Preprocess text to add appropriate pauses and improve speech flow.
-        Removes asterisks and adds pause markers.
-        """
-        # First remove all asterisks from the text
-        text = text.replace("*", "")
-        # Add pauses after bullet points and numbered lists
-        lines = text.split("\n")
-        processed_lines = []
-        for line in lines:
-            line = line.strip()
-            if not line:  # Skip empty lines
-                continue
-            # Check for various list formats and add pauses
-            if (
-                line.startswith(("•", "-", "*"))  # Bullet points
-                or (
-                    len(line) > 2 and line[0].isdigit() and line[1] == "."
-                )  # Numbered lists
-                or (len(line) > 2 and line[0].isalpha() and line[1] in [")", "."])
-            ):  # Lettered lists
-                # For list items, ensure we add pause regardless of existing punctuation
-                if line[-1] in ".!?:":
-                    line = line[:-1]  # Remove existing punctuation
-                line = line.replace(")", "...")  # Add pause after list items
-                processed_lines.append(f"{line}...")
-            else:
-                # Add a period at the end of non-empty lines if they don't already have ending punctuation
-                if not line[-1] in ".!?:":
-                    processed_lines.append(line + ".")
-                else:
-                    processed_lines.append(line)
-        return " ".join(processed_lines)  # Join with spaces instead of newlines
-    def generate_speech(
-        self, text: str, stream_callback=None
-    ) -> tuple[list[float], str, dict]:
-        """Generate speech from text using Kokoro TTS with quality optimizations."""
-        self.log.debug(f"Generating speech for text of length {len(text)}")
-        process = psutil.Process()
-        start_memory = process.memory_info().rss / 1024 / 1024
-        start_time = time.time()
-        # Generate audio using the pipeline with chunking for optimal quality
-        audio_chunks = []
-        phonemes = []
-        total_duration = 0
-        # Split text into chunks of optimal size (100-200 tokens)
-        sentences = text.split(".")
-        current_chunk = []
-        current_length = 0
-        for sentence in sentences:
-            sentence = sentence.strip()
-            if not sentence:
-                continue
-            sentence_length = len(sentence.split())
-            if current_length + sentence_length > self.chunk_size:
-                # Process current chunk
-                chunk_text = ". ".join(current_chunk) + "."
-                generator = self.pipeline(chunk_text, voice=self.voice_name, speed=1)
-                for _, phoneme_seq, audio in generator:
-                    audio_chunks.append(audio)
-                    phonemes.append(phoneme_seq)
-                    chunk_duration = len(audio) / 24000
-                    total_duration += chunk_duration
-                    if stream_callback and callable(stream_callback):
-                        stream_callback(audio)
-                current_chunk = [sentence]
-                current_length = sentence_length
-            else:
-                current_chunk.append(sentence)
-                current_length += sentence_length
-        # Process remaining chunk if any
-        if current_chunk:
-            chunk_text = ". ".join(current_chunk) + "."
-            generator = self.pipeline(chunk_text, voice=self.voice_name, speed=1)
-            for _, phoneme_seq, audio in generator:
-                audio_chunks.append(audio)
-                phonemes.append(phoneme_seq)
-                chunk_duration = len(audio) / 24000
-                total_duration += chunk_duration
-                if stream_callback and callable(stream_callback):
-                    stream_callback(audio)
-        # Combine all audio chunks
-        audio = np.concatenate(audio_chunks)
-        combined_phonemes = " ".join(phonemes)
-        end_time = time.time()
-        end_memory = process.memory_info().rss / 1024 / 1024
-        processing_time = end_time - start_time
-        peak_memory = end_memory - start_memory
-        stats = {
-            "processing_time": round(processing_time, 3),
-            "audio_duration": round(total_duration, 3),
-            "realtime_ratio": round(processing_time / total_duration, 2),
-            "peak_memory": round(peak_memory, 2),
-        }
-        return audio, combined_phonemes, stats
-    def generate_speech_streaming(
-        self, text_queue: queue.Queue, status_callback=None, interrupt_event=None
-    ) -> None:
-        """Optimized streaming TTS with separate processing and playback threads."""
-        self.log.debug("Starting speech streaming")
-        buffer = ""
-        audio_buffer = queue.Queue(maxsize=100)  # Buffer for processed audio chunks
-        # Initialize audio stream
-        stream = sd.OutputStream(
-            samplerate=24000,
-            channels=1,
-            dtype=np.float32,
-            blocksize=2400,  # 100ms buffer
-            latency="low",
-        )
-        stream.start()
-        self.log.debug("Audio stream initialized")
-        # Playback thread function
-        def audio_playback_thread():
-            try:
-                while True:
-                    try:
-                        audio_chunk = audio_buffer.get(timeout=0.1)
-                        if audio_chunk is None:  # Exit signal
-                            if status_callback:
-                                status_callback(False)
-                            break
-                        if interrupt_event and interrupt_event.is_set():
-                            break
-                        if status_callback:
-                            status_callback(True)
-                        stream.write(np.array(audio_chunk, dtype=np.float32))
-                    except queue.Empty:
-                        continue
-            except Exception as e:
-                self.log.error(f"Error in playback thread: {e}")
-                if status_callback:
-                    status_callback(False)
-            finally:
-                stream.stop()
-                stream.close()
-                if status_callback:
-                    status_callback(False)
-        # Start playback thread
-        playback_thread = threading.Thread(target=audio_playback_thread)
-        playback_thread.daemon = True
-        playback_thread.start()
-        try:
-            while True:
-                try:
-                    chunk = text_queue.get(timeout=0.1)
-                    if chunk == "__END__" or (
-                        interrupt_event and interrupt_event.is_set()
-                    ):
-                        if buffer.strip():
-                            # Process final buffer
-                            processed_text = self.preprocess_text(buffer.strip())
-                            if processed_text:  # Only process if there's actual text
-                                self.generate_speech(
-                                    processed_text, stream_callback=audio_buffer.put
-                                )
-                        audio_buffer.put(None)  # Signal playback thread to exit
-                        break
-                    buffer += chunk
-                    # Find complete sentences for immediate processing
-                    sentences = buffer.split(".")
-                    if len(sentences) > 1:
-                        # Process complete sentences immediately
-                        text_to_process = ".".join(sentences[:-1]) + "."
-                        if (
-                            text_to_process.strip()
-                        ):  # Only process if there's actual text
-                            processed_text = self.preprocess_text(text_to_process)
-                            if processed_text:  # Double check after preprocessing
-                                self.generate_speech(
-                                    processed_text, stream_callback=audio_buffer.put
-                                )
-                        buffer = sentences[-1]
-                except queue.Empty:
-                    continue
-        except Exception as e:
-            self.log.error(f"Error in streaming: {e}")
-            audio_buffer.put(None)  # Ensure playback thread exits
-        finally:
-            audio_buffer.put(None)  # Ensure playback thread exits
-            playback_thread.join(timeout=2.0)
-    def set_voice(self, voice_name: str) -> None:
-        """Change the current voice."""
-        self.log.info(f"Changing voice to: {voice_name}")
-        if voice_name not in self.available_voices:
-            self.log.error(f"Unknown voice '{voice_name}'")
-            raise ValueError(
-                f"Unknown voice '{voice_name}'. Available voices: {list(self.available_voices.keys())}"
-            )
-        self.voice_name = voice_name
-        self.log.info(
-            f"Changed voice to: {voice_name} - {self.available_voices[voice_name]['name']} (Quality: {self.available_voices[voice_name]['quality']})"
-        )
-    def list_available_voices(self) -> dict[str, dict]:
-        """Get all available voice names and their descriptions."""
-        return self.available_voices
-    # Test methods remain largely unchanged, just updated to use new generate_speech method
-    def test_preprocessing(self, test_text: str) -> str:
-        """Test the text preprocessing functionality."""
-        try:
-            processed_text = self.preprocess_text(test_text)
-            print("\nOriginal text:")
-            print(test_text)
-            print("\nProcessed text:")
-            print(processed_text)
-            return processed_text
-        except Exception as e:
-            self.log.error(f"Error during preprocessing test: {e}")
-            return None
-    def test_generate_audio_file(
-        self, test_text: str, output_file: str = "output.wav"
-    ) -> None:
-        """Test basic audio generation and file saving."""
-        try:
-            print("\nGenerating audio...")
-            audio, _, stats = self.generate_speech(test_text)
-            # Save audio to file
-            sf.write(output_file, np.array(audio), 24000)
-            print(f"Saved audio to: {output_file}")
-            print("\nPerformance stats:")
-            print(f"- Processing time: {stats['processing_time']:.3f}s")
-            print(f"- Audio duration: {stats['audio_duration']:.3f}s")
-            print(f"- Realtime ratio: {stats['realtime_ratio']:.2f}x (lower is better)")
-            print(f"- Peak memory usage: {stats['peak_memory']:.2f} MB")
-        except Exception as e:
-            self.log.error(f"Error during audio generation test: {e}")
-    def test_streaming_playback(self, test_text: str) -> None:
-        """Test streaming audio generation with progress display."""
-        try:
-            # Setup audio stream
-            stream = sd.OutputStream(samplerate=24000, channels=1, dtype=np.float32)
-            stream.start()
-            # Create audio queue and initialize tracking variables
-            audio_queue = queue.Queue(maxsize=100)
-            words = test_text.split()
-            total_words = len(words)
-            total_chunks = 0
-            current_processing_chunk = 0
-            current_playback_chunk = 0
-            spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
-            spinner_idx = 0
-            # Count total chunks
-            def count_chunks(_):
-                nonlocal total_chunks
-                total_chunks += 1
-            print("\nAnalyzing text length...")
-            self.generate_speech(test_text, stream_callback=count_chunks)
-            # Define and start streaming thread
-            def stream_audio():
-                nonlocal current_playback_chunk, spinner_idx
-                while True:
-                    try:
-                        chunk = audio_queue.get()
-                        if chunk is None:
-                            break
-                        chunk_array = np.array(chunk, dtype=np.float32)
-                        stream.write(chunk_array)
-                        current_playback_chunk += 1
-                        # Update progress display
-                        word_position = int(
-                            (current_playback_chunk / total_chunks) * total_words
-                        )
-                        current_text = " ".join(
-                            words[
-                                max(0, word_position - 5) : min(
-                                    total_words, word_position + 5
-                                )
-                            ]
-                        )
-                        current_text = current_text[:60].ljust(60)
-                        process_progress = int(
-                            (current_processing_chunk / total_chunks) * 50
-                        )
-                        playback_progress = int(
-                            (current_playback_chunk / total_chunks) * 50
-                        )
-                        spinner_idx = (spinner_idx + 1) % len(spinner_chars)
-                        print("\033[K", end="")
-                        print(
-                            f"\r{spinner_chars[spinner_idx]} Processing: [{'=' * process_progress}{' ' * (50-process_progress)}] {(current_processing_chunk/total_chunks)*100:.1f}%"
-                        )
-                        print(
-                            f"{spinner_chars[spinner_idx]} Playback:  [{'=' * playback_progress}{' ' * (50-playback_progress)}] {(current_playback_chunk/total_chunks)*100:.1f}%"
-                        )
-                        print(
-                            f"{spinner_chars[spinner_idx]} Current: {current_text}",
-                            end="\033[2A\r",
-                        )
-                        audio_queue.task_done()
-                    except queue.Empty:
-                        continue
-            print("\nGenerating and streaming audio...")
-            print("\n\n")
-            stream_thread = threading.Thread(target=stream_audio)
-            stream_thread.start()
-            def process_chunk(chunk):
-                nonlocal current_processing_chunk
-                current_processing_chunk += 1
-                audio_queue.put(chunk)
-            processed_text = self.preprocess_text(test_text)
-            _, _, stats = self.generate_speech(
-                processed_text, stream_callback=process_chunk
-            )
-            audio_queue.put(None)
-            stream_thread.join()
-            print("\n\n\n")
-            stream.stop()
-            stream.close()
-            print("\nStreaming test completed")
-            print(f"Realtime ratio: {stats['realtime_ratio']:.2f}x (lower is better)")
-        except Exception as e:
-            self.log.error(f"Error during streaming test: {e}")
-def main():
-    """Run all TTS tests."""
-    test_text = """
-Let's play a game of trivia. I'll ask you a series of questions on a particular topic, and you try to answer them to the best of your ability. We can keep track of your score and see how well you do.
-Here's your first question:
-**Question 1:** Which American author wrote the classic novel "To Kill a Mockingbird"?
-A) F. Scott Fitzgerald
-B) Harper Lee
-C) Jane Austen
-D) J. K. Rowling
-E) Edgar Allan Poe
-Let me know your answer!
-"""
-    tts = KokoroTTS()
-    print("Running preprocessing test...")
-    processed_text = tts.test_preprocessing(test_text)
-    print("\nRunning streaming test...")
-    tts.test_streaming_playback(processed_text)
-    print("\nRunning audio generation test...")
-    tts.test_generate_audio_file(processed_text)
-if __name__ == "__main__":
-    main()
+# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+import queue
+import threading
+import time
+import numpy as np
+import psutil
+try:
+    import sounddevice as sd
+except ImportError:
+    sd = None
+try:
+    import soundfile as sf
+except ImportError:
+    sf = None
+try:
+    from kokoro import KPipeline
+except ImportError:
+    KPipeline = None
+from gaia.logger import get_logger
+class KokoroTTS:
+    log = get_logger(__name__)
+    def __init__(self):
+        # Check for required dependencies
+        missing = []
+        if sd is None:
+            missing.append("sounddevice")
+        if sf is None:
+            missing.append("soundfile")
+        if KPipeline is None:
+            missing.append("kokoro>=0.3.1")
+        if missing:
+            error_msg = (
+                f"\n❌ Error: Missing required talk dependencies: {', '.join(missing)}\n\n"
+                f"Please install the talk dependencies:\n"
+                f'  uv pip install -e ".[talk]"\n\n'
+                f"Or install packages directly:\n"
+                f"  uv pip install {' '.join(missing)}\n"
+            )
+            raise ImportError(error_msg)
+        self.log = self.__class__.log
+        # Initialize Kokoro pipeline with American English
+        self.pipeline = KPipeline(lang_code="a")  # 'a' for American English
+        # Available voice configurations with metadata
+        self.available_voices = {
+            # American English Voices 🇸
+            "af_alloy": {
+                "name": "American Female - Alloy",
+                "quality": "C",
+                "duration": "MM",
+            },
+            "af_aoede": {
+                "name": "American Female - Aoede",
+                "quality": "C+",
+                "duration": "H",
+            },
+            "af_bella": {
+                "name": "American Female - Bella",
+                "quality": "A-",
+                "duration": "HH",
+            },
+            "af_jessica": {
+                "name": "American Female - Jessica",
+                "quality": "D",
+                "duration": "MM",
+            },
+            "af_kore": {
+                "name": "American Female - Kore",
+                "quality": "C+",
+                "duration": "H",
+            },
+            "af_nicole": {
+                "name": "American Female - Nicole",
+                "quality": "B-",
+                "duration": "HH",
+            },
+            "af_nova": {
+                "name": "American Female - Nova",
+                "quality": "C",
+                "duration": "MM",
+            },
+            "af_river": {
+                "name": "American Female - River",
+                "quality": "D",
+                "duration": "MM",
+            },
+            "af_sarah": {
+                "name": "American Female - Sarah",
+                "quality": "C+",
+                "duration": "H",
+            },
+            "af_sky": {
+                "name": "American Female - Sky",
+                "quality": "C-",
+                "duration": "M",
+            },
+            "am_adam": {
+                "name": "American Male - Adam",
+                "quality": "F+",
+                "duration": "H",
+            },
+            "am_echo": {
+                "name": "American Male - Echo",
+                "quality": "D",
+                "duration": "MM",
+            },
+            "am_eric": {
+                "name": "American Male - Eric",
+                "quality": "D",
+                "duration": "MM",
+            },
+            "am_fenrir": {
+                "name": "American Male - Fenrir",
+                "quality": "C+",
+                "duration": "H",
+            },
+            "am_liam": {
+                "name": "American Male - Liam",
+                "quality": "D",
+                "duration": "MM",
+            },
+            "am_michael": {
+                "name": "American Male - Michael",
+                "quality": "C+",
+                "duration": "H",
+            },
+            "am_onyx": {
+                "name": "American Male - Onyx",
+                "quality": "D",
+                "duration": "MM",
+            },
+            "am_puck": {
+                "name": "American Male - Puck",
+                "quality": "C+",
+                "duration": "H",
+            },
+            # British English Voices 🇧
+            "bf_alice": {
+                "name": "British Female - Alice",
+                "quality": "D",
+                "duration": "MM",
+            },
+            "bf_emma": {
+                "name": "British Female - Emma",
+                "quality": "B-",
+                "duration": "HH",
+            },
+            "bf_isabella": {
+                "name": "British Female - Isabella",
+                "quality": "C",
+                "duration": "MM",
+            },
+            "bf_lily": {
+                "name": "British Female - Lily",
+                "quality": "D",
+                "duration": "MM",
+            },
+            "bm_daniel": {
+                "name": "British Male - Daniel",
+                "quality": "D",
+                "duration": "MM",
+            },
+            "bm_fable": {
+                "name": "British Male - Fable",
+                "quality": "C",
+                "duration": "MM",
+            },
+            "bm_george": {
+                "name": "British Male - George",
+                "quality": "C",
+                "duration": "MM",
+            },
+            "bm_lewis": {
+                "name": "British Male - Lewis",
+                "quality": "D+",
+                "duration": "H",
+            },
+        }
+        # Default to highest quality voice (Bella)
+        self.voice_name = "af_bella"
+        self.chunk_size = 150  # Optimal token chunk size for best quality
+        self.log.debug(
+            f"Loaded voice: {self.voice_name} - {self.available_voices[self.voice_name]['name']} (Quality: {self.available_voices[self.voice_name]['quality']})"
+        )
+    def preprocess_text(self, text: str) -> str:
+        """
+        Preprocess text to add appropriate pauses and improve speech flow.
+        Removes asterisks and adds pause markers.
+        """
+        # First remove all asterisks from the text
+        text = text.replace("*", "")
+        # Add pauses after bullet points and numbered lists
+        lines = text.split("\n")
+        processed_lines = []
+        for line in lines:
+            line = line.strip()
+            if not line:  # Skip empty lines
+                continue
+            # Check for various list formats and add pauses
+            if (
+                line.startswith(("•", "-", "*"))  # Bullet points
+                or (
+                    len(line) > 2 and line[0].isdigit() and line[1] == "."
+                )  # Numbered lists
+                or (len(line) > 2 and line[0].isalpha() and line[1] in [")", "."])
+            ):  # Lettered lists
+                # For list items, ensure we add pause regardless of existing punctuation
+                if line[-1] in ".!?:":
+                    line = line[:-1]  # Remove existing punctuation
+                line = line.replace(")", "...")  # Add pause after list items
+                processed_lines.append(f"{line}...")
+            else:
+                # Add a period at the end of non-empty lines if they don't already have ending punctuation
+                if not line[-1] in ".!?:":
+                    processed_lines.append(line + ".")
+                else:
+                    processed_lines.append(line)
+        return " ".join(processed_lines)  # Join with spaces instead of newlines
+    def generate_speech(
+        self, text: str, stream_callback=None
+    ) -> tuple[list[float], str, dict]:
+        """Generate speech from text using Kokoro TTS with quality optimizations."""
+        self.log.debug(f"Generating speech for text of length {len(text)}")
+        process = psutil.Process()
+        start_memory = process.memory_info().rss / 1024 / 1024
+        start_time = time.time()
+        # Generate audio using the pipeline with chunking for optimal quality
+        audio_chunks = []
+        phonemes = []
+        total_duration = 0
+        # Split text into chunks of optimal size (100-200 tokens)
+        sentences = text.split(".")
+        current_chunk = []
+        current_length = 0
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if not sentence:
+                continue
+            sentence_length = len(sentence.split())
+            if current_length + sentence_length > self.chunk_size:
+                # Process current chunk
+                chunk_text = ". ".join(current_chunk) + "."
+                generator = self.pipeline(chunk_text, voice=self.voice_name, speed=1)
+                for _, phoneme_seq, audio in generator:
+                    audio_chunks.append(audio)
+                    phonemes.append(phoneme_seq)
+                    chunk_duration = len(audio) / 24000
+                    total_duration += chunk_duration
+                    if stream_callback and callable(stream_callback):
+                        stream_callback(audio)
+                current_chunk = [sentence]
+                current_length = sentence_length
+            else:
+                current_chunk.append(sentence)
+                current_length += sentence_length
+        # Process remaining chunk if any
+        if current_chunk:
+            chunk_text = ". ".join(current_chunk) + "."
+            generator = self.pipeline(chunk_text, voice=self.voice_name, speed=1)
+            for _, phoneme_seq, audio in generator:
+                audio_chunks.append(audio)
+                phonemes.append(phoneme_seq)
+                chunk_duration = len(audio) / 24000
+                total_duration += chunk_duration
+                if stream_callback and callable(stream_callback):
+                    stream_callback(audio)
+        # Combine all audio chunks
+        audio = np.concatenate(audio_chunks)
+        combined_phonemes = " ".join(phonemes)
+        end_time = time.time()
+        end_memory = process.memory_info().rss / 1024 / 1024
+        processing_time = end_time - start_time
+        peak_memory = end_memory - start_memory
+        stats = {
+            "processing_time": round(processing_time, 3),
+            "audio_duration": round(total_duration, 3),
+            "realtime_ratio": round(processing_time / total_duration, 2),
+            "peak_memory": round(peak_memory, 2),
+        }
+        return audio, combined_phonemes, stats
+    def generate_speech_streaming(
+        self, text_queue: queue.Queue, status_callback=None, interrupt_event=None
+    ) -> None:
+        """Optimized streaming TTS with separate processing and playback threads."""
+        self.log.debug("Starting speech streaming")
+        buffer = ""
+        audio_buffer = queue.Queue(maxsize=100)  # Buffer for processed audio chunks
+        # Initialize audio stream
+        stream = sd.OutputStream(
+            samplerate=24000,
+            channels=1,
+            dtype=np.float32,
+            blocksize=2400,  # 100ms buffer
+            latency="low",
+        )
+        stream.start()
+        self.log.debug("Audio stream initialized")
+        # Playback thread function
+        def audio_playback_thread():
+            try:
+                while True:
+                    try:
+                        audio_chunk = audio_buffer.get(timeout=0.1)
+                        if audio_chunk is None:  # Exit signal
+                            if status_callback:
+                                status_callback(False)
+                            break
+                        if interrupt_event and interrupt_event.is_set():
+                            break
+                        if status_callback:
+                            status_callback(True)
+                        stream.write(np.array(audio_chunk, dtype=np.float32))
+                    except queue.Empty:
+                        continue
+            except Exception as e:
+                self.log.error(f"Error in playback thread: {e}")
+                if status_callback:
+                    status_callback(False)
+            finally:
+                stream.stop()
+                stream.close()
+                if status_callback:
+                    status_callback(False)
+        # Start playback thread
+        playback_thread = threading.Thread(target=audio_playback_thread)
+        playback_thread.daemon = True
+        playback_thread.start()
+        try:
+            while True:
+                try:
+                    chunk = text_queue.get(timeout=0.1)
+                    if chunk == "__END__" or (
+                        interrupt_event and interrupt_event.is_set()
+                    ):
+                        if buffer.strip():
+                            # Process final buffer
+                            processed_text = self.preprocess_text(buffer.strip())
+                            if processed_text:  # Only process if there's actual text
+                                self.generate_speech(
+                                    processed_text, stream_callback=audio_buffer.put
+                                )
+                        audio_buffer.put(None)  # Signal playback thread to exit
+                        break
+                    buffer += chunk
+                    # Find complete sentences for immediate processing
+                    sentences = buffer.split(".")
+                    if len(sentences) > 1:
+                        # Process complete sentences immediately
+                        text_to_process = ".".join(sentences[:-1]) + "."
+                        if (
+                            text_to_process.strip()
+                        ):  # Only process if there's actual text
+                            processed_text = self.preprocess_text(text_to_process)
+                            if processed_text:  # Double check after preprocessing
+                                self.generate_speech(
+                                    processed_text, stream_callback=audio_buffer.put
+                                )
+                        buffer = sentences[-1]
+                except queue.Empty:
+                    continue
+        except Exception as e:
+            self.log.error(f"Error in streaming: {e}")
+            audio_buffer.put(None)  # Ensure playback thread exits
+        finally:
+            audio_buffer.put(None)  # Ensure playback thread exits
+            playback_thread.join(timeout=2.0)
+    def set_voice(self, voice_name: str) -> None:
+        """Change the current voice."""
+        self.log.info(f"Changing voice to: {voice_name}")
+        if voice_name not in self.available_voices:
+            self.log.error(f"Unknown voice '{voice_name}'")
+            raise ValueError(
+                f"Unknown voice '{voice_name}'. Available voices: {list(self.available_voices.keys())}"
+            )
+        self.voice_name = voice_name
+        self.log.info(
+            f"Changed voice to: {voice_name} - {self.available_voices[voice_name]['name']} (Quality: {self.available_voices[voice_name]['quality']})"
+        )
+    def list_available_voices(self) -> dict[str, dict]:
+        """Get all available voice names and their descriptions."""
+        return self.available_voices
+    # Test methods remain largely unchanged, just updated to use new generate_speech method
+    def test_preprocessing(self, test_text: str) -> str:
+        """Test the text preprocessing functionality."""
+        try:
+            processed_text = self.preprocess_text(test_text)
+            print("\nOriginal text:")
+            print(test_text)
+            print("\nProcessed text:")
+            print(processed_text)
+            return processed_text
+        except Exception as e:
+            self.log.error(f"Error during preprocessing test: {e}")
+            return None
+    def test_generate_audio_file(
+        self, test_text: str, output_file: str = "output.wav"
+    ) -> None:
+        """Test basic audio generation and file saving."""
+        try:
+            print("\nGenerating audio...")
+            audio, _, stats = self.generate_speech(test_text)
+            # Save audio to file
+            sf.write(output_file, np.array(audio), 24000)
+            print(f"Saved audio to: {output_file}")
+            print("\nPerformance stats:")
+            print(f"- Processing time: {stats['processing_time']:.3f}s")
+            print(f"- Audio duration: {stats['audio_duration']:.3f}s")
+            print(f"- Realtime ratio: {stats['realtime_ratio']:.2f}x (lower is better)")
+            print(f"- Peak memory usage: {stats['peak_memory']:.2f} MB")
+        except Exception as e:
+            self.log.error(f"Error during audio generation test: {e}")
+    def test_streaming_playback(self, test_text: str) -> None:
+        """Test streaming audio generation with progress display."""
+        try:
+            # Setup audio stream
+            stream = sd.OutputStream(samplerate=24000, channels=1, dtype=np.float32)
+            stream.start()
+            # Create audio queue and initialize tracking variables
+            audio_queue = queue.Queue(maxsize=100)
+            words = test_text.split()
+            total_words = len(words)
+            total_chunks = 0
+            current_processing_chunk = 0
+            current_playback_chunk = 0
+            spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
+            spinner_idx = 0
+            # Count total chunks
+            def count_chunks(_):
+                nonlocal total_chunks
+                total_chunks += 1
+            print("\nAnalyzing text length...")
+            self.generate_speech(test_text, stream_callback=count_chunks)
+            # Define and start streaming thread
+            def stream_audio():
+                nonlocal current_playback_chunk, spinner_idx
+                while True:
+                    try:
+                        chunk = audio_queue.get()
+                        if chunk is None:
+                            break
+                        chunk_array = np.array(chunk, dtype=np.float32)
+                        stream.write(chunk_array)
+                        current_playback_chunk += 1
+                        # Update progress display
+                        word_position = int(
+                            (current_playback_chunk / total_chunks) * total_words
+                        )
+                        current_text = " ".join(
+                            words[
+                                max(0, word_position - 5) : min(
+                                    total_words, word_position + 5
+                                )
+                            ]
+                        )
+                        current_text = current_text[:60].ljust(60)
+                        process_progress = int(
+                            (current_processing_chunk / total_chunks) * 50
+                        )
+                        playback_progress = int(
+                            (current_playback_chunk / total_chunks) * 50
+                        )
+                        spinner_idx = (spinner_idx + 1) % len(spinner_chars)
+                        print("\033[K", end="")
+                        print(
+                            f"\r{spinner_chars[spinner_idx]} Processing: [{'=' * process_progress}{' ' * (50-process_progress)}] {(current_processing_chunk/total_chunks)*100:.1f}%"
+                        )
+                        print(
+                            f"{spinner_chars[spinner_idx]} Playback:  [{'=' * playback_progress}{' ' * (50-playback_progress)}] {(current_playback_chunk/total_chunks)*100:.1f}%"
+                        )
+                        print(
+                            f"{spinner_chars[spinner_idx]} Current: {current_text}",
+                            end="\033[2A\r",
+                        )
+                        audio_queue.task_done()
+                    except queue.Empty:
+                        continue
+            print("\nGenerating and streaming audio...")
+            print("\n\n")
+            stream_thread = threading.Thread(target=stream_audio)
+            stream_thread.start()
+            def process_chunk(chunk):
+                nonlocal current_processing_chunk
+                current_processing_chunk += 1
+                audio_queue.put(chunk)
+            processed_text = self.preprocess_text(test_text)
+            _, _, stats = self.generate_speech(
+                processed_text, stream_callback=process_chunk
+            )
+            audio_queue.put(None)
+            stream_thread.join()
+            print("\n\n\n")
+            stream.stop()
+            stream.close()
+            print("\nStreaming test completed")
+            print(f"Realtime ratio: {stats['realtime_ratio']:.2f}x (lower is better)")
+        except Exception as e:
+            self.log.error(f"Error during streaming test: {e}")
+def main():
+    """Run all TTS tests."""
+    test_text = """
+Let's play a game of trivia. I'll ask you a series of questions on a particular topic, and you try to answer them to the best of your ability. We can keep track of your score and see how well you do.
+Here's your first question:
+**Question 1:** Which American author wrote the classic novel "To Kill a Mockingbird"?
+A) F. Scott Fitzgerald
+B) Harper Lee
+C) Jane Austen
+D) J. K. Rowling
+E) Edgar Allan Poe
+Let me know your answer!
+"""
+    tts = KokoroTTS()
+    print("Running preprocessing test...")
+    processed_text = tts.test_preprocessing(test_text)
+    print("\nRunning streaming test...")
+    tts.test_streaming_playback(processed_text)
+    print("\nRunning audio generation test...")
+    tts.test_generate_audio_file(processed_text)
+if __name__ == "__main__":
+    main()

amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl

amd-gaia 0.14.3py3-none-any.whl → 0.15.1py3-none-any.whl