PyPI - amd-gaia - Versions diffs - 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl - Mend

amd-gaia 0.14.3py3-none-any.whl → 0.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
amd_gaia-0.15.1.dist-info/RECORD +178 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
gaia/__init__.py +29 -29
gaia/agents/__init__.py +19 -19
gaia/agents/base/__init__.py +9 -9
gaia/agents/base/agent.py +2177 -2177
gaia/agents/base/api_agent.py +120 -120
gaia/agents/base/console.py +1841 -1841
gaia/agents/base/errors.py +237 -237
gaia/agents/base/mcp_agent.py +86 -86
gaia/agents/base/tools.py +83 -83
gaia/agents/blender/agent.py +556 -556
gaia/agents/blender/agent_simple.py +133 -135
gaia/agents/blender/app.py +211 -211
gaia/agents/blender/app_simple.py +41 -41
gaia/agents/blender/core/__init__.py +16 -16
gaia/agents/blender/core/materials.py +506 -506
gaia/agents/blender/core/objects.py +316 -316
gaia/agents/blender/core/rendering.py +225 -225
gaia/agents/blender/core/scene.py +220 -220
gaia/agents/blender/core/view.py +146 -146
gaia/agents/chat/__init__.py +9 -9
gaia/agents/chat/agent.py +835 -835
gaia/agents/chat/app.py +1058 -1058
gaia/agents/chat/session.py +508 -508
gaia/agents/chat/tools/__init__.py +15 -15
gaia/agents/chat/tools/file_tools.py +96 -96
gaia/agents/chat/tools/rag_tools.py +1729 -1729
gaia/agents/chat/tools/shell_tools.py +436 -436
gaia/agents/code/__init__.py +7 -7
gaia/agents/code/agent.py +549 -549
gaia/agents/code/cli.py +377 -0
gaia/agents/code/models.py +135 -135
gaia/agents/code/orchestration/__init__.py +24 -24
gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
gaia/agents/code/orchestration/checklist_generator.py +713 -713
gaia/agents/code/orchestration/factories/__init__.py +9 -9
gaia/agents/code/orchestration/factories/base.py +63 -63
gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
gaia/agents/code/orchestration/factories/python_factory.py +106 -106
gaia/agents/code/orchestration/orchestrator.py +841 -841
gaia/agents/code/orchestration/project_analyzer.py +391 -391
gaia/agents/code/orchestration/steps/__init__.py +67 -67
gaia/agents/code/orchestration/steps/base.py +188 -188
gaia/agents/code/orchestration/steps/error_handler.py +314 -314
gaia/agents/code/orchestration/steps/nextjs.py +828 -828
gaia/agents/code/orchestration/steps/python.py +307 -307
gaia/agents/code/orchestration/template_catalog.py +469 -469
gaia/agents/code/orchestration/workflows/__init__.py +14 -14
gaia/agents/code/orchestration/workflows/base.py +80 -80
gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
gaia/agents/code/orchestration/workflows/python.py +94 -94
gaia/agents/code/prompts/__init__.py +11 -11
gaia/agents/code/prompts/base_prompt.py +77 -77
gaia/agents/code/prompts/code_patterns.py +2036 -2036
gaia/agents/code/prompts/nextjs_prompt.py +40 -40
gaia/agents/code/prompts/python_prompt.py +109 -109
gaia/agents/code/schema_inference.py +365 -365
gaia/agents/code/system_prompt.py +41 -41
gaia/agents/code/tools/__init__.py +42 -42
gaia/agents/code/tools/cli_tools.py +1138 -1138
gaia/agents/code/tools/code_formatting.py +319 -319
gaia/agents/code/tools/code_tools.py +769 -769
gaia/agents/code/tools/error_fixing.py +1347 -1347
gaia/agents/code/tools/external_tools.py +180 -180
gaia/agents/code/tools/file_io.py +845 -845
gaia/agents/code/tools/prisma_tools.py +190 -190
gaia/agents/code/tools/project_management.py +1016 -1016
gaia/agents/code/tools/testing.py +321 -321
gaia/agents/code/tools/typescript_tools.py +122 -122
gaia/agents/code/tools/validation_parsing.py +461 -461
gaia/agents/code/tools/validation_tools.py +806 -806
gaia/agents/code/tools/web_dev_tools.py +1758 -1758
gaia/agents/code/validators/__init__.py +16 -16
gaia/agents/code/validators/antipattern_checker.py +241 -241
gaia/agents/code/validators/ast_analyzer.py +197 -197
gaia/agents/code/validators/requirements_validator.py +145 -145
gaia/agents/code/validators/syntax_validator.py +171 -171
gaia/agents/docker/__init__.py +7 -7
gaia/agents/docker/agent.py +642 -642
gaia/agents/emr/__init__.py +8 -8
gaia/agents/emr/agent.py +1506 -1506
gaia/agents/emr/cli.py +1322 -1322
gaia/agents/emr/constants.py +475 -475
gaia/agents/emr/dashboard/__init__.py +4 -4
gaia/agents/emr/dashboard/server.py +1974 -1974
gaia/agents/jira/__init__.py +11 -11
gaia/agents/jira/agent.py +894 -894
gaia/agents/jira/jql_templates.py +299 -299
gaia/agents/routing/__init__.py +7 -7
gaia/agents/routing/agent.py +567 -570
gaia/agents/routing/system_prompt.py +75 -75
gaia/agents/summarize/__init__.py +11 -0
gaia/agents/summarize/agent.py +885 -0
gaia/agents/summarize/prompts.py +129 -0
gaia/api/__init__.py +23 -23
gaia/api/agent_registry.py +238 -238
gaia/api/app.py +305 -305
gaia/api/openai_server.py +575 -575
gaia/api/schemas.py +186 -186
gaia/api/sse_handler.py +373 -373
gaia/apps/__init__.py +4 -4
gaia/apps/llm/__init__.py +6 -6
gaia/apps/llm/app.py +173 -169
gaia/apps/summarize/app.py +116 -633
gaia/apps/summarize/html_viewer.py +133 -133
gaia/apps/summarize/pdf_formatter.py +284 -284
gaia/audio/__init__.py +2 -2
gaia/audio/audio_client.py +439 -439
gaia/audio/audio_recorder.py +269 -269
gaia/audio/kokoro_tts.py +599 -599
gaia/audio/whisper_asr.py +432 -432
gaia/chat/__init__.py +16 -16
gaia/chat/app.py +430 -430
gaia/chat/prompts.py +522 -522
gaia/chat/sdk.py +1228 -1225
gaia/cli.py +5481 -5621
gaia/database/__init__.py +10 -10
gaia/database/agent.py +176 -176
gaia/database/mixin.py +290 -290
gaia/database/testing.py +64 -64
gaia/eval/batch_experiment.py +2332 -2332
gaia/eval/claude.py +542 -542
gaia/eval/config.py +37 -37
gaia/eval/email_generator.py +512 -512
gaia/eval/eval.py +3179 -3179
gaia/eval/groundtruth.py +1130 -1130
gaia/eval/transcript_generator.py +582 -582
gaia/eval/webapp/README.md +167 -167
gaia/eval/webapp/package-lock.json +875 -875
gaia/eval/webapp/package.json +20 -20
gaia/eval/webapp/public/app.js +3402 -3402
gaia/eval/webapp/public/index.html +87 -87
gaia/eval/webapp/public/styles.css +3661 -3661
gaia/eval/webapp/server.js +415 -415
gaia/eval/webapp/test-setup.js +72 -72
gaia/llm/__init__.py +9 -2
gaia/llm/base_client.py +60 -0
gaia/llm/exceptions.py +12 -0
gaia/llm/factory.py +70 -0
gaia/llm/lemonade_client.py +3236 -3221
gaia/llm/lemonade_manager.py +294 -294
gaia/llm/providers/__init__.py +9 -0
gaia/llm/providers/claude.py +108 -0
gaia/llm/providers/lemonade.py +120 -0
gaia/llm/providers/openai_provider.py +79 -0
gaia/llm/vlm_client.py +382 -382
gaia/logger.py +189 -189
gaia/mcp/agent_mcp_server.py +245 -245
gaia/mcp/blender_mcp_client.py +138 -138
gaia/mcp/blender_mcp_server.py +648 -648
gaia/mcp/context7_cache.py +332 -332
gaia/mcp/external_services.py +518 -518
gaia/mcp/mcp_bridge.py +811 -550
gaia/mcp/servers/__init__.py +6 -6
gaia/mcp/servers/docker_mcp.py +83 -83
gaia/perf_analysis.py +361 -0
gaia/rag/__init__.py +10 -10
gaia/rag/app.py +293 -293
gaia/rag/demo.py +304 -304
gaia/rag/pdf_utils.py +235 -235
gaia/rag/sdk.py +2194 -2194
gaia/security.py +163 -163
gaia/talk/app.py +289 -289
gaia/talk/sdk.py +538 -538
gaia/testing/__init__.py +87 -87
gaia/testing/assertions.py +330 -330
gaia/testing/fixtures.py +333 -333
gaia/testing/mocks.py +493 -493
gaia/util.py +46 -46
gaia/utils/__init__.py +33 -33
gaia/utils/file_watcher.py +675 -675
gaia/utils/parsing.py +223 -223
gaia/version.py +100 -100
amd_gaia-0.14.3.dist-info/RECORD +0 -168
gaia/agents/code/app.py +0 -266
gaia/llm/llm_client.py +0 -729
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0

gaia/audio/audio_client.py CHANGED Viewed

@@ -1,439 +1,439 @@
-# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
-# SPDX-License-Identifier: MIT
-import asyncio
-import queue
-import threading
-import time
-from gaia.llm.llm_client import LLMClient
-from gaia.logger import get_logger
-class AudioClient:
-    """Handles all audio-related functionality including TTS, ASR, and voice chat."""
-    def __init__(
-        self,
-        whisper_model_size="base",
-        audio_device_index=None,  # Use default input device
-        silence_threshold=0.5,
-        enable_tts=True,
-        logging_level="INFO",
-        use_claude=False,
-        use_chatgpt=False,
-        system_prompt=None,
-    ):
-        self.log = get_logger(__name__)
-        self.log.setLevel(getattr(__import__("logging"), logging_level))
-        # Audio configuration
-        self.whisper_model_size = whisper_model_size
-        self.audio_device_index = audio_device_index
-        self.silence_threshold = silence_threshold
-        self.enable_tts = enable_tts
-        # Audio state
-        self.is_speaking = False
-        self.tts_thread = None
-        self.whisper_asr = None
-        self.transcription_queue = queue.Queue()
-        self.tts = None
-        # Initialize LLM client (base_url handled automatically)
-        self.llm_client = LLMClient(
-            use_claude=use_claude,
-            use_openai=use_chatgpt,  # LLMClient uses use_openai, not use_chatgpt
-            system_prompt=system_prompt,
-        )
-        self.log.info("Audio client initialized.")
-    async def start_voice_chat(self, message_processor_callback):
-        """Start a voice-based chat session."""
-        try:
-            self.log.debug("Initializing voice chat...")
-            print(
-                "Starting voice chat.\n"
-                "Say 'stop' to quit application "
-                "or 'restart' to clear the chat history.\n"
-                "Press Enter key to stop during audio playback."
-            )
-            # Initialize TTS before starting voice chat
-            self.initialize_tts()
-            from gaia.audio.whisper_asr import WhisperAsr
-            # Create WhisperAsr with custom thresholds
-            # Your audio shows energy levels of 0.02-0.03 when speaking
-            self.whisper_asr = WhisperAsr(
-                model_size=self.whisper_model_size,
-                device_index=self.audio_device_index,
-                transcription_queue=self.transcription_queue,
-                silence_threshold=0.01,  # Set higher to ensure detection (your levels are 0.01-0.2+)
-                min_audio_length=16000 * 1.0,  # 1 second minimum at 16kHz
-            )
-            # Log the thresholds being used (reduce verbosity)
-            self.log.debug(
-                f"Audio settings: SILENCE_THRESHOLD={self.whisper_asr.SILENCE_THRESHOLD}, "
-                f"MIN_LENGTH={self.whisper_asr.MIN_AUDIO_LENGTH/self.whisper_asr.RATE:.1f}s"
-            )
-            device_name = self.whisper_asr.get_device_name()
-            self.log.debug(f"Using audio device: {device_name}")
-            # Start recording
-            self.log.debug("Starting audio recording...")
-            self.whisper_asr.start_recording()
-            # Start the processing thread after recording is initialized
-            self.log.debug("Starting audio processing thread...")
-            process_thread = threading.Thread(
-                target=self._process_audio_wrapper, args=(message_processor_callback,)
-            )
-            process_thread.daemon = True
-            process_thread.start()
-            # Keep the main thread alive while processing
-            self.log.debug("Listening for voice input...")
-            try:
-                while True:
-                    if not process_thread.is_alive():
-                        self.log.debug("Process thread stopped unexpectedly")
-                        break
-                    if not self.whisper_asr or not self.whisper_asr.is_recording:
-                        self.log.warning("Recording stopped unexpectedly")
-                        break
-                    await asyncio.sleep(0.1)
-            except KeyboardInterrupt:
-                self.log.info("Received keyboard interrupt")
-                print("\nStopping voice chat...")
-            except Exception as e:
-                self.log.error(f"Error in main processing loop: {str(e)}")
-                raise
-            finally:
-                if self.whisper_asr:
-                    self.log.debug("Stopping recording...")
-                    self.whisper_asr.stop_recording()
-                    self.log.debug("Waiting for process thread to finish...")
-                    process_thread.join(timeout=2.0)
-        except ImportError:
-            self.log.error(
-                'WhisperAsr not found. Please install voice support with: uv pip install ".[talk]"'
-            )
-            raise
-        except Exception as e:
-            self.log.error(f"Failed to initialize voice chat: {str(e)}")
-            raise
-        finally:
-            if self.whisper_asr:
-                self.whisper_asr.stop_recording()
-                self.log.info("Voice recording stopped")
-    async def process_voice_input(self, text, get_stats_callback=None):
-        """Process transcribed voice input and get AI response"""
-        # Initialize TTS streaming
-        text_queue = None
-        tts_finished = threading.Event()  # Add event to track TTS completion
-        interrupt_event = threading.Event()  # Add event for keyboard interrupts
-        try:
-            # Check if we're currently generating and halt if needed
-            if self.llm_client.is_generating():
-                self.log.debug("Generation in progress, halting...")
-                if self.llm_client.halt_generation():
-                    print("\nGeneration interrupted.")
-                    await asyncio.sleep(0.5)
-            # Pause audio recording before sending query
-            if self.whisper_asr:
-                self.whisper_asr.pause_recording()
-                self.log.debug("Recording paused before generation")
-            self.log.debug(f"Sending message to LLM: {text[:50]}...")
-            print("\nGaia: ", end="", flush=True)
-            # Keyboard listener thread for both generation and playback
-            def keyboard_listener():
-                input()  # Wait for any input
-                # Use LLMClient to halt generation
-                if self.llm_client.halt_generation():
-                    print("\nGeneration interrupted.")
-                else:
-                    print("\nInterrupt requested.")
-                interrupt_event.set()
-                if text_queue:
-                    text_queue.put("__HALT__")  # Signal TTS to stop immediately
-            # Start keyboard listener thread
-            keyboard_thread = threading.Thread(target=keyboard_listener)
-            keyboard_thread.daemon = True
-            keyboard_thread.start()
-            if self.enable_tts:
-                text_queue = queue.Queue(maxsize=100)
-                # Define status callback to update speaking state
-                def tts_status_callback(is_speaking):
-                    self.is_speaking = is_speaking
-                    if not is_speaking:  # When TTS finishes speaking
-                        tts_finished.set()
-                        if self.whisper_asr:
-                            self.whisper_asr.resume_recording()
-                    else:  # When TTS starts speaking
-                        if self.whisper_asr:
-                            self.whisper_asr.pause_recording()
-                    self.log.debug(f"TTS speaking state: {is_speaking}")
-                self.tts_thread = threading.Thread(
-                    target=self.tts.generate_speech_streaming,
-                    args=(text_queue,),
-                    kwargs={
-                        "status_callback": tts_status_callback,
-                        "interrupt_event": interrupt_event,
-                    },
-                    daemon=True,
-                )
-                self.tts_thread.start()
-            # Use LLMClient streaming instead of WebSocket
-            accumulated_response = ""
-            initial_buffer = ""  # Buffer for the start of response
-            initial_buffer_sent = False
-            try:
-                # Start LLM generation with streaming
-                response_stream = self.llm_client.generate(text, stream=True)
-                # Process streaming response
-                for chunk in response_stream:
-                    if interrupt_event.is_set():
-                        self.log.debug("Keyboard interrupt detected, stopping...")
-                        if text_queue:
-                            text_queue.put("__END__")
-                        break
-                    if self.transcription_queue.qsize() > 0:
-                        self.log.debug(
-                            "New input detected during generation, stopping..."
-                        )
-                        if text_queue:
-                            text_queue.put("__END__")
-                        # Use LLMClient to halt generation
-                        if self.llm_client.halt_generation():
-                            self.log.debug("Generation interrupted for new input.")
-                        return
-                    if chunk:
-                        print(chunk, end="", flush=True)
-                        if text_queue:
-                            if not initial_buffer_sent:
-                                initial_buffer += chunk
-                                # Send if we've reached 20 chars or if we get a clear end marker
-                                if len(initial_buffer) >= 20 or chunk.endswith(
-                                    ("\n", ". ", "! ", "? ")
-                                ):
-                                    text_queue.put(initial_buffer)
-                                    initial_buffer_sent = True
-                            else:
-                                text_queue.put(chunk)
-                        accumulated_response += chunk
-                # Send any remaining buffered content
-                if text_queue:
-                    if not initial_buffer_sent and initial_buffer:
-                        # Small delay for very short responses
-                        if len(initial_buffer) <= 20:
-                            await asyncio.sleep(0.1)
-                        text_queue.put(initial_buffer)
-                    text_queue.put("__END__")
-            except Exception as e:
-                if text_queue:
-                    text_queue.put("__END__")
-                raise e
-            finally:
-                if self.tts_thread and self.tts_thread.is_alive():
-                    self.tts_thread.join(timeout=1.0)  # Add timeout to thread join
-                keyboard_thread.join(timeout=1.0)  # Add timeout to keyboard thread join
-            print("\n")
-            # Get performance stats from LLMClient
-            if get_stats_callback:
-                # First try the provided callback for backward compatibility
-                stats = get_stats_callback()
-            else:
-                # Use LLMClient stats
-                stats = self.llm_client.get_performance_stats()
-            if stats:
-                from pprint import pprint
-                formatted_stats = {
-                    k: round(v, 1) if isinstance(v, float) else v
-                    for k, v in stats.items()
-                }
-                pprint(formatted_stats)
-        except Exception as e:
-            if text_queue:
-                text_queue.put("__END__")
-            raise e
-        finally:
-            if self.tts_thread and self.tts_thread.is_alive():
-                # Wait for TTS to finish before resuming recording
-                tts_finished.wait(timeout=2.0)  # Add reasonable timeout
-                self.tts_thread.join(timeout=1.0)
-            # Only resume recording after TTS is completely finished
-            if self.whisper_asr:
-                self.whisper_asr.resume_recording()
-    def initialize_tts(self):
-        """Initialize TTS if enabled."""
-        if self.enable_tts:
-            try:
-                from gaia.audio.kokoro_tts import KokoroTTS
-                self.tts = KokoroTTS()
-                self.log.debug("TTS initialized successfully")
-            except Exception as e:
-                raise RuntimeError(
-                    f'Failed to initialize TTS:\n{e}\nInstall talk dependencies with: uv pip install ".[talk]"\nYou can also use --no-tts option to disable TTS'
-                )
-    async def speak_text(self, text: str) -> None:
-        """Speak text using initialized TTS, if available."""
-        if not self.enable_tts:
-            return
-        if not getattr(self, "tts", None):
-            self.log.debug("TTS is not initialized; skipping speak_text")
-            return
-        # Reuse the streaming path used in process_voice_input
-        text_queue = queue.Queue(maxsize=100)
-        interrupt_event = threading.Event()
-        tts_thread = threading.Thread(
-            target=self.tts.generate_speech_streaming,
-            args=(text_queue,),
-            kwargs={"interrupt_event": interrupt_event},
-            daemon=True,
-        )
-        tts_thread.start()
-        # Send the whole text and end
-        text_queue.put(text)
-        text_queue.put("__END__")
-        tts_thread.join(timeout=5.0)
-    def _process_audio_wrapper(self, message_processor_callback):
-        """Wrapper method to process audio and handle transcriptions"""
-        try:
-            accumulated_text = []
-            current_display = ""
-            last_transcription_time = time.time()
-            spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
-            dots_animation = ["   ", ".  ", ".. ", "..."]
-            spinner_idx = 0
-            dots_idx = 0
-            animation_counter = 0
-            self.is_speaking = False  # Initialize speaking state
-            while self.whisper_asr and self.whisper_asr.is_recording:
-                try:
-                    text = self.transcription_queue.get(timeout=0.1)
-                    current_time = time.time()
-                    time_since_last = current_time - last_transcription_time
-                    cleaned_text = text.lower().strip().rstrip(".!?")
-                    # Handle special commands
-                    if cleaned_text in ["stop"]:
-                        print("\nStopping voice chat...")
-                        self.whisper_asr.stop_recording()
-                        break
-                    # Update animations
-                    spinner_idx = (spinner_idx + 1) % len(spinner_chars)
-                    animation_counter += 1
-                    if animation_counter % 4 == 0:  # Update dots every fourth cycle
-                        dots_idx = (dots_idx + 1) % len(dots_animation)
-                    spinner = spinner_chars[spinner_idx]
-                    dots = dots_animation[dots_idx]
-                    # Normal text processing - only if it's not a system message
-                    if text != current_display:
-                        # Clear the current line and display updated text with spinner
-                        print(f"\r\033[K{spinner} {text}", end="", flush=True)
-                        current_display = text
-                        # Only add new text if it's significantly different
-                        if not any(text in existing for existing in accumulated_text):
-                            accumulated_text = [text]  # Replace instead of append
-                            last_transcription_time = current_time
-                    # Process accumulated text after silence threshold
-                    if time_since_last > self.silence_threshold:
-                        if accumulated_text:
-                            complete_text = accumulated_text[
-                                -1
-                            ]  # Use only the last transcription
-                            print()  # Add a newline before agent response
-                            asyncio.run(message_processor_callback(complete_text))
-                            accumulated_text = []
-                            current_display = ""
-                except queue.Empty:
-                    # Update animations
-                    spinner_idx = (spinner_idx + 1) % len(spinner_chars)
-                    animation_counter += 1
-                    if animation_counter % 4 == 0:
-                        dots_idx = (dots_idx + 1) % len(dots_animation)
-                    spinner = spinner_chars[spinner_idx]
-                    dots = dots_animation[dots_idx]
-                    if current_display:
-                        print(
-                            f"\r\033[K{spinner} {current_display}", end="", flush=True
-                        )
-                    else:
-                        # Access the class-level speaking state
-                        status = (
-                            "Speaking"
-                            if getattr(self, "is_speaking", False)
-                            else "Listening"
-                        )
-                        print(f"\r\033[K{spinner} {status}{dots}", end="", flush=True)
-                    if (
-                        accumulated_text
-                        and (time.time() - last_transcription_time)
-                        > self.silence_threshold
-                    ):
-                        complete_text = accumulated_text[-1]
-                        print()  # Add a newline before agent response
-                        asyncio.run(message_processor_callback(complete_text))
-                        accumulated_text = []
-                        current_display = ""
-        except Exception as e:
-            self.log.error(f"Error in process_audio_wrapper: {str(e)}")
-        finally:
-            if self.whisper_asr:
-                self.whisper_asr.stop_recording()
-            if self.tts_thread and self.tts_thread.is_alive():
-                self.tts_thread.join(timeout=1.0)  # Add timeout to thread join
-    async def halt_generation(self):
-        """Send a request to halt the current generation."""
-        if self.llm_client.halt_generation():
-            self.log.debug("Successfully halted generation via LLMClient")
-            print("\nGeneration interrupted.")
-        else:
-            self.log.debug("Halt requested - generation will stop on next iteration")
-            print("\nInterrupt requested.")
+# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+import asyncio
+import queue
+import threading
+import time
+from gaia.llm import create_client
+from gaia.logger import get_logger
+class AudioClient:
+    """Handles all audio-related functionality including TTS, ASR, and voice chat."""
+    def __init__(
+        self,
+        whisper_model_size="base",
+        audio_device_index=None,  # Use default input device
+        silence_threshold=0.5,
+        enable_tts=True,
+        logging_level="INFO",
+        use_claude=False,
+        use_chatgpt=False,
+        system_prompt=None,
+    ):
+        self.log = get_logger(__name__)
+        self.log.setLevel(getattr(__import__("logging"), logging_level))
+        # Audio configuration
+        self.whisper_model_size = whisper_model_size
+        self.audio_device_index = audio_device_index
+        self.silence_threshold = silence_threshold
+        self.enable_tts = enable_tts
+        # Audio state
+        self.is_speaking = False
+        self.tts_thread = None
+        self.whisper_asr = None
+        self.transcription_queue = queue.Queue()
+        self.tts = None
+        # Initialize LLM client - factory auto-detects provider from flags
+        self.llm_client = create_client(
+            use_claude=use_claude,
+            use_openai=use_chatgpt,
+            system_prompt=system_prompt,
+        )
+        self.log.info("Audio client initialized.")
+    async def start_voice_chat(self, message_processor_callback):
+        """Start a voice-based chat session."""
+        try:
+            self.log.debug("Initializing voice chat...")
+            print(
+                "Starting voice chat.\n"
+                "Say 'stop' to quit application "
+                "or 'restart' to clear the chat history.\n"
+                "Press Enter key to stop during audio playback."
+            )
+            # Initialize TTS before starting voice chat
+            self.initialize_tts()
+            from gaia.audio.whisper_asr import WhisperAsr
+            # Create WhisperAsr with custom thresholds
+            # Your audio shows energy levels of 0.02-0.03 when speaking
+            self.whisper_asr = WhisperAsr(
+                model_size=self.whisper_model_size,
+                device_index=self.audio_device_index,
+                transcription_queue=self.transcription_queue,
+                silence_threshold=0.01,  # Set higher to ensure detection (your levels are 0.01-0.2+)
+                min_audio_length=16000 * 1.0,  # 1 second minimum at 16kHz
+            )
+            # Log the thresholds being used (reduce verbosity)
+            self.log.debug(
+                f"Audio settings: SILENCE_THRESHOLD={self.whisper_asr.SILENCE_THRESHOLD}, "
+                f"MIN_LENGTH={self.whisper_asr.MIN_AUDIO_LENGTH/self.whisper_asr.RATE:.1f}s"
+            )
+            device_name = self.whisper_asr.get_device_name()
+            self.log.debug(f"Using audio device: {device_name}")
+            # Start recording
+            self.log.debug("Starting audio recording...")
+            self.whisper_asr.start_recording()
+            # Start the processing thread after recording is initialized
+            self.log.debug("Starting audio processing thread...")
+            process_thread = threading.Thread(
+                target=self._process_audio_wrapper, args=(message_processor_callback,)
+            )
+            process_thread.daemon = True
+            process_thread.start()
+            # Keep the main thread alive while processing
+            self.log.debug("Listening for voice input...")
+            try:
+                while True:
+                    if not process_thread.is_alive():
+                        self.log.debug("Process thread stopped unexpectedly")
+                        break
+                    if not self.whisper_asr or not self.whisper_asr.is_recording:
+                        self.log.warning("Recording stopped unexpectedly")
+                        break
+                    await asyncio.sleep(0.1)
+            except KeyboardInterrupt:
+                self.log.info("Received keyboard interrupt")
+                print("\nStopping voice chat...")
+            except Exception as e:
+                self.log.error(f"Error in main processing loop: {str(e)}")
+                raise
+            finally:
+                if self.whisper_asr:
+                    self.log.debug("Stopping recording...")
+                    self.whisper_asr.stop_recording()
+                    self.log.debug("Waiting for process thread to finish...")
+                    process_thread.join(timeout=2.0)
+        except ImportError:
+            self.log.error(
+                'WhisperAsr not found. Please install voice support with: uv pip install ".[talk]"'
+            )
+            raise
+        except Exception as e:
+            self.log.error(f"Failed to initialize voice chat: {str(e)}")
+            raise
+        finally:
+            if self.whisper_asr:
+                self.whisper_asr.stop_recording()
+                self.log.info("Voice recording stopped")
+    async def process_voice_input(self, text, get_stats_callback=None):
+        """Process transcribed voice input and get AI response"""
+        # Initialize TTS streaming
+        text_queue = None
+        tts_finished = threading.Event()  # Add event to track TTS completion
+        interrupt_event = threading.Event()  # Add event for keyboard interrupts
+        try:
+            # Check if we're currently generating and halt if needed
+            if self.llm_client.is_generating():
+                self.log.debug("Generation in progress, halting...")
+                if self.llm_client.halt_generation():
+                    print("\nGeneration interrupted.")
+                    await asyncio.sleep(0.5)
+            # Pause audio recording before sending query
+            if self.whisper_asr:
+                self.whisper_asr.pause_recording()
+                self.log.debug("Recording paused before generation")
+            self.log.debug(f"Sending message to LLM: {text[:50]}...")
+            print("\nGaia: ", end="", flush=True)
+            # Keyboard listener thread for both generation and playback
+            def keyboard_listener():
+                input()  # Wait for any input
+                # Use LLMClient to halt generation
+                if self.llm_client.halt_generation():
+                    print("\nGeneration interrupted.")
+                else:
+                    print("\nInterrupt requested.")
+                interrupt_event.set()
+                if text_queue:
+                    text_queue.put("__HALT__")  # Signal TTS to stop immediately
+            # Start keyboard listener thread
+            keyboard_thread = threading.Thread(target=keyboard_listener)
+            keyboard_thread.daemon = True
+            keyboard_thread.start()
+            if self.enable_tts:
+                text_queue = queue.Queue(maxsize=100)
+                # Define status callback to update speaking state
+                def tts_status_callback(is_speaking):
+                    self.is_speaking = is_speaking
+                    if not is_speaking:  # When TTS finishes speaking
+                        tts_finished.set()
+                        if self.whisper_asr:
+                            self.whisper_asr.resume_recording()
+                    else:  # When TTS starts speaking
+                        if self.whisper_asr:
+                            self.whisper_asr.pause_recording()
+                    self.log.debug(f"TTS speaking state: {is_speaking}")
+                self.tts_thread = threading.Thread(
+                    target=self.tts.generate_speech_streaming,
+                    args=(text_queue,),
+                    kwargs={
+                        "status_callback": tts_status_callback,
+                        "interrupt_event": interrupt_event,
+                    },
+                    daemon=True,
+                )
+                self.tts_thread.start()
+            # Use LLMClient streaming instead of WebSocket
+            accumulated_response = ""
+            initial_buffer = ""  # Buffer for the start of response
+            initial_buffer_sent = False
+            try:
+                # Start LLM generation with streaming
+                response_stream = self.llm_client.generate(text, stream=True)
+                # Process streaming response
+                for chunk in response_stream:
+                    if interrupt_event.is_set():
+                        self.log.debug("Keyboard interrupt detected, stopping...")
+                        if text_queue:
+                            text_queue.put("__END__")
+                        break
+                    if self.transcription_queue.qsize() > 0:
+                        self.log.debug(
+                            "New input detected during generation, stopping..."
+                        )
+                        if text_queue:
+                            text_queue.put("__END__")
+                        # Use LLMClient to halt generation
+                        if self.llm_client.halt_generation():
+                            self.log.debug("Generation interrupted for new input.")
+                        return
+                    if chunk:
+                        print(chunk, end="", flush=True)
+                        if text_queue:
+                            if not initial_buffer_sent:
+                                initial_buffer += chunk
+                                # Send if we've reached 20 chars or if we get a clear end marker
+                                if len(initial_buffer) >= 20 or chunk.endswith(
+                                    ("\n", ". ", "! ", "? ")
+                                ):
+                                    text_queue.put(initial_buffer)
+                                    initial_buffer_sent = True
+                            else:
+                                text_queue.put(chunk)
+                        accumulated_response += chunk
+                # Send any remaining buffered content
+                if text_queue:
+                    if not initial_buffer_sent and initial_buffer:
+                        # Small delay for very short responses
+                        if len(initial_buffer) <= 20:
+                            await asyncio.sleep(0.1)
+                        text_queue.put(initial_buffer)
+                    text_queue.put("__END__")
+            except Exception as e:
+                if text_queue:
+                    text_queue.put("__END__")
+                raise e
+            finally:
+                if self.tts_thread and self.tts_thread.is_alive():
+                    self.tts_thread.join(timeout=1.0)  # Add timeout to thread join
+                keyboard_thread.join(timeout=1.0)  # Add timeout to keyboard thread join
+            print("\n")
+            # Get performance stats from LLMClient
+            if get_stats_callback:
+                # First try the provided callback for backward compatibility
+                stats = get_stats_callback()
+            else:
+                # Use LLMClient stats
+                stats = self.llm_client.get_performance_stats()
+            if stats:
+                from pprint import pprint
+                formatted_stats = {
+                    k: round(v, 1) if isinstance(v, float) else v
+                    for k, v in stats.items()
+                }
+                pprint(formatted_stats)
+        except Exception as e:
+            if text_queue:
+                text_queue.put("__END__")
+            raise e
+        finally:
+            if self.tts_thread and self.tts_thread.is_alive():
+                # Wait for TTS to finish before resuming recording
+                tts_finished.wait(timeout=2.0)  # Add reasonable timeout
+                self.tts_thread.join(timeout=1.0)
+            # Only resume recording after TTS is completely finished
+            if self.whisper_asr:
+                self.whisper_asr.resume_recording()
+    def initialize_tts(self):
+        """Initialize TTS if enabled."""
+        if self.enable_tts:
+            try:
+                from gaia.audio.kokoro_tts import KokoroTTS
+                self.tts = KokoroTTS()
+                self.log.debug("TTS initialized successfully")
+            except Exception as e:
+                raise RuntimeError(
+                    f'Failed to initialize TTS:\n{e}\nInstall talk dependencies with: uv pip install ".[talk]"\nYou can also use --no-tts option to disable TTS'
+                )
+    async def speak_text(self, text: str) -> None:
+        """Speak text using initialized TTS, if available."""
+        if not self.enable_tts:
+            return
+        if not getattr(self, "tts", None):
+            self.log.debug("TTS is not initialized; skipping speak_text")
+            return
+        # Reuse the streaming path used in process_voice_input
+        text_queue = queue.Queue(maxsize=100)
+        interrupt_event = threading.Event()
+        tts_thread = threading.Thread(
+            target=self.tts.generate_speech_streaming,
+            args=(text_queue,),
+            kwargs={"interrupt_event": interrupt_event},
+            daemon=True,
+        )
+        tts_thread.start()
+        # Send the whole text and end
+        text_queue.put(text)
+        text_queue.put("__END__")
+        tts_thread.join(timeout=5.0)
+    def _process_audio_wrapper(self, message_processor_callback):
+        """Wrapper method to process audio and handle transcriptions"""
+        try:
+            accumulated_text = []
+            current_display = ""
+            last_transcription_time = time.time()
+            spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
+            dots_animation = ["   ", ".  ", ".. ", "..."]
+            spinner_idx = 0
+            dots_idx = 0
+            animation_counter = 0
+            self.is_speaking = False  # Initialize speaking state
+            while self.whisper_asr and self.whisper_asr.is_recording:
+                try:
+                    text = self.transcription_queue.get(timeout=0.1)
+                    current_time = time.time()
+                    time_since_last = current_time - last_transcription_time
+                    cleaned_text = text.lower().strip().rstrip(".!?")
+                    # Handle special commands
+                    if cleaned_text in ["stop"]:
+                        print("\nStopping voice chat...")
+                        self.whisper_asr.stop_recording()
+                        break
+                    # Update animations
+                    spinner_idx = (spinner_idx + 1) % len(spinner_chars)
+                    animation_counter += 1
+                    if animation_counter % 4 == 0:  # Update dots every fourth cycle
+                        dots_idx = (dots_idx + 1) % len(dots_animation)
+                    spinner = spinner_chars[spinner_idx]
+                    dots = dots_animation[dots_idx]
+                    # Normal text processing - only if it's not a system message
+                    if text != current_display:
+                        # Clear the current line and display updated text with spinner
+                        print(f"\r\033[K{spinner} {text}", end="", flush=True)
+                        current_display = text
+                        # Only add new text if it's significantly different
+                        if not any(text in existing for existing in accumulated_text):
+                            accumulated_text = [text]  # Replace instead of append
+                            last_transcription_time = current_time
+                    # Process accumulated text after silence threshold
+                    if time_since_last > self.silence_threshold:
+                        if accumulated_text:
+                            complete_text = accumulated_text[
+                                -1
+                            ]  # Use only the last transcription
+                            print()  # Add a newline before agent response
+                            asyncio.run(message_processor_callback(complete_text))
+                            accumulated_text = []
+                            current_display = ""
+                except queue.Empty:
+                    # Update animations
+                    spinner_idx = (spinner_idx + 1) % len(spinner_chars)
+                    animation_counter += 1
+                    if animation_counter % 4 == 0:
+                        dots_idx = (dots_idx + 1) % len(dots_animation)
+                    spinner = spinner_chars[spinner_idx]
+                    dots = dots_animation[dots_idx]
+                    if current_display:
+                        print(
+                            f"\r\033[K{spinner} {current_display}", end="", flush=True
+                        )
+                    else:
+                        # Access the class-level speaking state
+                        status = (
+                            "Speaking"
+                            if getattr(self, "is_speaking", False)
+                            else "Listening"
+                        )
+                        print(f"\r\033[K{spinner} {status}{dots}", end="", flush=True)
+                    if (
+                        accumulated_text
+                        and (time.time() - last_transcription_time)
+                        > self.silence_threshold
+                    ):
+                        complete_text = accumulated_text[-1]
+                        print()  # Add a newline before agent response
+                        asyncio.run(message_processor_callback(complete_text))
+                        accumulated_text = []
+                        current_display = ""
+        except Exception as e:
+            self.log.error(f"Error in process_audio_wrapper: {str(e)}")
+        finally:
+            if self.whisper_asr:
+                self.whisper_asr.stop_recording()
+            if self.tts_thread and self.tts_thread.is_alive():
+                self.tts_thread.join(timeout=1.0)  # Add timeout to thread join
+    async def halt_generation(self):
+        """Send a request to halt the current generation."""
+        if self.llm_client.halt_generation():
+            self.log.debug("Successfully halted generation via LLMClient")
+            print("\nGeneration interrupted.")
+        else:
+            self.log.debug("Halt requested - generation will stop on next iteration")
+            print("\nInterrupt requested.")

amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl

amd-gaia 0.14.3py3-none-any.whl → 0.15.1py3-none-any.whl