PyPI - amd-gaia - Versions diffs - 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl - Mend

amd-gaia 0.14.3py3-none-any.whl → 0.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
amd_gaia-0.15.1.dist-info/RECORD +178 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
gaia/__init__.py +29 -29
gaia/agents/__init__.py +19 -19
gaia/agents/base/__init__.py +9 -9
gaia/agents/base/agent.py +2177 -2177
gaia/agents/base/api_agent.py +120 -120
gaia/agents/base/console.py +1841 -1841
gaia/agents/base/errors.py +237 -237
gaia/agents/base/mcp_agent.py +86 -86
gaia/agents/base/tools.py +83 -83
gaia/agents/blender/agent.py +556 -556
gaia/agents/blender/agent_simple.py +133 -135
gaia/agents/blender/app.py +211 -211
gaia/agents/blender/app_simple.py +41 -41
gaia/agents/blender/core/__init__.py +16 -16
gaia/agents/blender/core/materials.py +506 -506
gaia/agents/blender/core/objects.py +316 -316
gaia/agents/blender/core/rendering.py +225 -225
gaia/agents/blender/core/scene.py +220 -220
gaia/agents/blender/core/view.py +146 -146
gaia/agents/chat/__init__.py +9 -9
gaia/agents/chat/agent.py +835 -835
gaia/agents/chat/app.py +1058 -1058
gaia/agents/chat/session.py +508 -508
gaia/agents/chat/tools/__init__.py +15 -15
gaia/agents/chat/tools/file_tools.py +96 -96
gaia/agents/chat/tools/rag_tools.py +1729 -1729
gaia/agents/chat/tools/shell_tools.py +436 -436
gaia/agents/code/__init__.py +7 -7
gaia/agents/code/agent.py +549 -549
gaia/agents/code/cli.py +377 -0
gaia/agents/code/models.py +135 -135
gaia/agents/code/orchestration/__init__.py +24 -24
gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
gaia/agents/code/orchestration/checklist_generator.py +713 -713
gaia/agents/code/orchestration/factories/__init__.py +9 -9
gaia/agents/code/orchestration/factories/base.py +63 -63
gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
gaia/agents/code/orchestration/factories/python_factory.py +106 -106
gaia/agents/code/orchestration/orchestrator.py +841 -841
gaia/agents/code/orchestration/project_analyzer.py +391 -391
gaia/agents/code/orchestration/steps/__init__.py +67 -67
gaia/agents/code/orchestration/steps/base.py +188 -188
gaia/agents/code/orchestration/steps/error_handler.py +314 -314
gaia/agents/code/orchestration/steps/nextjs.py +828 -828
gaia/agents/code/orchestration/steps/python.py +307 -307
gaia/agents/code/orchestration/template_catalog.py +469 -469
gaia/agents/code/orchestration/workflows/__init__.py +14 -14
gaia/agents/code/orchestration/workflows/base.py +80 -80
gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
gaia/agents/code/orchestration/workflows/python.py +94 -94
gaia/agents/code/prompts/__init__.py +11 -11
gaia/agents/code/prompts/base_prompt.py +77 -77
gaia/agents/code/prompts/code_patterns.py +2036 -2036
gaia/agents/code/prompts/nextjs_prompt.py +40 -40
gaia/agents/code/prompts/python_prompt.py +109 -109
gaia/agents/code/schema_inference.py +365 -365
gaia/agents/code/system_prompt.py +41 -41
gaia/agents/code/tools/__init__.py +42 -42
gaia/agents/code/tools/cli_tools.py +1138 -1138
gaia/agents/code/tools/code_formatting.py +319 -319
gaia/agents/code/tools/code_tools.py +769 -769
gaia/agents/code/tools/error_fixing.py +1347 -1347
gaia/agents/code/tools/external_tools.py +180 -180
gaia/agents/code/tools/file_io.py +845 -845
gaia/agents/code/tools/prisma_tools.py +190 -190
gaia/agents/code/tools/project_management.py +1016 -1016
gaia/agents/code/tools/testing.py +321 -321
gaia/agents/code/tools/typescript_tools.py +122 -122
gaia/agents/code/tools/validation_parsing.py +461 -461
gaia/agents/code/tools/validation_tools.py +806 -806
gaia/agents/code/tools/web_dev_tools.py +1758 -1758
gaia/agents/code/validators/__init__.py +16 -16
gaia/agents/code/validators/antipattern_checker.py +241 -241
gaia/agents/code/validators/ast_analyzer.py +197 -197
gaia/agents/code/validators/requirements_validator.py +145 -145
gaia/agents/code/validators/syntax_validator.py +171 -171
gaia/agents/docker/__init__.py +7 -7
gaia/agents/docker/agent.py +642 -642
gaia/agents/emr/__init__.py +8 -8
gaia/agents/emr/agent.py +1506 -1506
gaia/agents/emr/cli.py +1322 -1322
gaia/agents/emr/constants.py +475 -475
gaia/agents/emr/dashboard/__init__.py +4 -4
gaia/agents/emr/dashboard/server.py +1974 -1974
gaia/agents/jira/__init__.py +11 -11
gaia/agents/jira/agent.py +894 -894
gaia/agents/jira/jql_templates.py +299 -299
gaia/agents/routing/__init__.py +7 -7
gaia/agents/routing/agent.py +567 -570
gaia/agents/routing/system_prompt.py +75 -75
gaia/agents/summarize/__init__.py +11 -0
gaia/agents/summarize/agent.py +885 -0
gaia/agents/summarize/prompts.py +129 -0
gaia/api/__init__.py +23 -23
gaia/api/agent_registry.py +238 -238
gaia/api/app.py +305 -305
gaia/api/openai_server.py +575 -575
gaia/api/schemas.py +186 -186
gaia/api/sse_handler.py +373 -373
gaia/apps/__init__.py +4 -4
gaia/apps/llm/__init__.py +6 -6
gaia/apps/llm/app.py +173 -169
gaia/apps/summarize/app.py +116 -633
gaia/apps/summarize/html_viewer.py +133 -133
gaia/apps/summarize/pdf_formatter.py +284 -284
gaia/audio/__init__.py +2 -2
gaia/audio/audio_client.py +439 -439
gaia/audio/audio_recorder.py +269 -269
gaia/audio/kokoro_tts.py +599 -599
gaia/audio/whisper_asr.py +432 -432
gaia/chat/__init__.py +16 -16
gaia/chat/app.py +430 -430
gaia/chat/prompts.py +522 -522
gaia/chat/sdk.py +1228 -1225
gaia/cli.py +5481 -5621
gaia/database/__init__.py +10 -10
gaia/database/agent.py +176 -176
gaia/database/mixin.py +290 -290
gaia/database/testing.py +64 -64
gaia/eval/batch_experiment.py +2332 -2332
gaia/eval/claude.py +542 -542
gaia/eval/config.py +37 -37
gaia/eval/email_generator.py +512 -512
gaia/eval/eval.py +3179 -3179
gaia/eval/groundtruth.py +1130 -1130
gaia/eval/transcript_generator.py +582 -582
gaia/eval/webapp/README.md +167 -167
gaia/eval/webapp/package-lock.json +875 -875
gaia/eval/webapp/package.json +20 -20
gaia/eval/webapp/public/app.js +3402 -3402
gaia/eval/webapp/public/index.html +87 -87
gaia/eval/webapp/public/styles.css +3661 -3661
gaia/eval/webapp/server.js +415 -415
gaia/eval/webapp/test-setup.js +72 -72
gaia/llm/__init__.py +9 -2
gaia/llm/base_client.py +60 -0
gaia/llm/exceptions.py +12 -0
gaia/llm/factory.py +70 -0
gaia/llm/lemonade_client.py +3236 -3221
gaia/llm/lemonade_manager.py +294 -294
gaia/llm/providers/__init__.py +9 -0
gaia/llm/providers/claude.py +108 -0
gaia/llm/providers/lemonade.py +120 -0
gaia/llm/providers/openai_provider.py +79 -0
gaia/llm/vlm_client.py +382 -382
gaia/logger.py +189 -189
gaia/mcp/agent_mcp_server.py +245 -245
gaia/mcp/blender_mcp_client.py +138 -138
gaia/mcp/blender_mcp_server.py +648 -648
gaia/mcp/context7_cache.py +332 -332
gaia/mcp/external_services.py +518 -518
gaia/mcp/mcp_bridge.py +811 -550
gaia/mcp/servers/__init__.py +6 -6
gaia/mcp/servers/docker_mcp.py +83 -83
gaia/perf_analysis.py +361 -0
gaia/rag/__init__.py +10 -10
gaia/rag/app.py +293 -293
gaia/rag/demo.py +304 -304
gaia/rag/pdf_utils.py +235 -235
gaia/rag/sdk.py +2194 -2194
gaia/security.py +163 -163
gaia/talk/app.py +289 -289
gaia/talk/sdk.py +538 -538
gaia/testing/__init__.py +87 -87
gaia/testing/assertions.py +330 -330
gaia/testing/fixtures.py +333 -333
gaia/testing/mocks.py +493 -493
gaia/util.py +46 -46
gaia/utils/__init__.py +33 -33
gaia/utils/file_watcher.py +675 -675
gaia/utils/parsing.py +223 -223
gaia/version.py +100 -100
amd_gaia-0.14.3.dist-info/RECORD +0 -168
gaia/agents/code/app.py +0 -266
gaia/llm/llm_client.py +0 -729
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0

gaia/apps/summarize/app.py CHANGED Viewed

@@ -1,633 +1,116 @@
-#!/usr/bin/env python3
-# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
-# SPDX-License-Identifier: MIT
-"""
-Gaia Summarizer Application - Processes meeting transcripts and emails to generate summaries
-"""
-import json
-import re
-import time
-from dataclasses import dataclass
-from datetime import datetime
-from pathlib import Path
-from typing import Any, Dict, List, Literal, Optional
-from gaia.chat.sdk import ChatConfig, ChatSDK
-from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME
-from gaia.logger import get_logger
-def validate_email_address(email: str) -> bool:
-    """Validate email address format"""
-    email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
-    return re.match(email_pattern, email.strip()) is not None
-def validate_email_list(email_list: str) -> List[str]:
-    """Validate and parse comma-separated email list"""
-    if not email_list:
-        return []
-    emails = [e.strip() for e in email_list.split(",") if e.strip()]
-    invalid_emails = [e for e in emails if not validate_email_address(e)]
-    if invalid_emails:
-        raise ValueError(f"Invalid email address(es): {', '.join(invalid_emails)}")
-    return emails
-# Summary style definitions
-SUMMARY_STYLES = {
-    "brief": "Generate a concise 2-3 sentence summary highlighting the most important points.",
-    "detailed": "Generate a comprehensive summary with all key details, context, and nuances.",
-    "bullets": "Generate key points in a clear bullet-point format, focusing on actionable items.",
-    "executive": "Generate a high-level executive summary focusing on decisions, outcomes, and strategic implications.",
-    "participants": "Extract and list all meeting participants with their roles if mentioned.",
-    "action_items": "Extract all action items with owners and deadlines where specified.",
-}
-# System prompts for different content types
-SYSTEM_PROMPTS = {
-    "transcript": "You are a professional meeting summarizer. Analyze meeting transcripts to extract key information, decisions, and action items. Be precise and comprehensive.",
-    "email": "You are a professional email summarizer. Analyze emails to extract key information, requests, and required actions. Focus on the sender's intent and recipient's needed response.",
-}
-@dataclass
-class SummaryConfig:
-    """Configuration for summarization"""
-    model: str = DEFAULT_MODEL_NAME
-    max_tokens: int = 1024
-    input_type: Literal["transcript", "email", "auto"] = "auto"
-    styles: List[str] = None
-    combined_prompt: bool = False
-    use_claude: bool = False
-    use_chatgpt: bool = False
-    def __post_init__(self):
-        if self.styles is None:
-            self.styles = ["executive", "participants", "action_items"]
-        # Validate styles
-        valid_styles = set(SUMMARY_STYLES.keys())
-        invalid_styles = [s for s in self.styles if s not in valid_styles]
-        if invalid_styles:
-            raise ValueError(
-                f"Invalid style(s): {', '.join(invalid_styles)}. Valid styles: {', '.join(valid_styles)}"
-            )
-        # Auto-detect OpenAI models (gpt-*) to use ChatGPT
-        if self.model.lower().startswith("gpt"):
-            self.use_chatgpt = True
-class SummarizerApp:
-    """Main application class for summarization"""
-    def __init__(self, config: Optional[SummaryConfig] = None):
-        """Initialize the summarizer application"""
-        self.config = config or SummaryConfig()
-        self.log = get_logger(__name__)
-        # Initialize base chat SDK
-        chat_config = ChatConfig(
-            model=self.config.model,
-            max_tokens=self.config.max_tokens,
-            use_claude=self.config.use_claude,
-            use_chatgpt=self.config.use_chatgpt,
-            show_stats=True,
-        )
-        self.chat_sdk = ChatSDK(chat_config)
-        # Direct access to LLM client for performance stats
-        self.llm_client = self.chat_sdk.llm_client
-        # Retry configuration
-        self.max_retries = 3
-        self.retry_delay = 1.0  # seconds
-    def detect_content_type(self, content: str) -> str:
-        """Auto-detect if content is a transcript or email using LLM"""
-        if self.config.input_type != "auto":
-            return self.config.input_type
-        # First try simple heuristics
-        email_patterns = [
-            r"From:\s*\S+",
-            r"To:\s*\S+",
-            r"Subject:\s*\S+",
-            r"Dear\s+\w+",
-            r"Sincerely,?\s*\n",
-            r"Best regards,?\s*\n",
-        ]
-        transcript_patterns = [
-            r"\w+:\s*[^\n]+",  # Speaker: dialogue
-            r"\[\d{1,2}:\d{2}\]",  # Time stamps
-            r"\(\d{1,2}:\d{2}\)",
-        ]
-        # Count pattern matches
-        email_score = sum(
-            1
-            for pattern in email_patterns
-            if re.search(pattern, content[:500], re.IGNORECASE)
-        )
-        transcript_score = sum(
-            1 for pattern in transcript_patterns if re.search(pattern, content[:500])
-        )
-        if email_score > transcript_score and email_score >= 2:
-            detected_type = "email"
-        elif transcript_score > email_score and transcript_score >= 2:
-            detected_type = "transcript"
-        else:
-            # Use LLM as fallback with retry logic
-            detection_prompt = (
-                """Analyze this text and determine if it's a meeting transcript or an email.
-            A meeting transcript typically has:
-            - Multiple speakers with dialogue
-            - Time stamps or speaker labels
-            - Conversational flow
-            An email typically has:
-            - From/To/Subject headers or email-like structure
-            - Formal greeting and closing
-            - Single author perspective
-            Respond with ONLY one word: 'transcript' or 'email'
-            Text to analyze:
-            """
-                + content[:1000]
-            )  # Only use first 1000 chars for detection
-            for attempt in range(self.max_retries):
-                try:
-                    response = self.llm_client.generate(
-                        detection_prompt, model=self.config.model, max_tokens=10
-                    )
-                    detected_type = response.strip().lower()
-                    if detected_type not in ["transcript", "email"]:
-                        # Default to transcript if unclear
-                        detected_type = "transcript"
-                    break
-                except Exception as e:
-                    if attempt < self.max_retries - 1:
-                        self.log.warning(
-                            f"Content type detection attempt {attempt + 1} failed: {e}. Retrying..."
-                        )
-                        time.sleep(self.retry_delay * (attempt + 1))
-                    else:
-                        self.log.error(
-                            f"Failed to detect content type after {self.max_retries} attempts"
-                        )
-                        detected_type = "transcript"  # Default fallback
-        self.log.info(f"Auto-detected content type: {detected_type}")
-        return detected_type
-    def generate_summary_prompt(
-        self, content: str, content_type: str, style: str
-    ) -> str:
-        """Generate the prompt for a specific summary style"""
-        style_instruction = SUMMARY_STYLES.get(style, SUMMARY_STYLES["brief"])
-        if style == "participants" and content_type == "email":
-            # Special handling for email participants
-            prompt = f"""Extract the sender and all recipients from this email.
-            Format your response as JSON:
-            {{
-                "sender": "sender email/name",
-                "recipients": ["recipient1", "recipient2"],
-                "cc": ["cc1", "cc2"] (if any),
-                "bcc": ["bcc1"] (if any)
-            }}
-            Email content:
-            {content}"""
-        elif style == "action_items":
-            prompt = f"""Extract all action items from this {content_type}.
-            {style_instruction}
-            Format each action item with:
-            - The specific action required
-            - Who is responsible (if mentioned)
-            - Any deadline or timeline (if mentioned)
-            If no action items are found, respond with "No specific action items identified."
-            Content:
-            {content}"""
-        else:
-            prompt = f"""Analyze this {content_type} and {style_instruction}
-            Content:
-            {content}"""
-        return prompt
-    def generate_combined_prompt(
-        self, content: str, content_type: str, styles: List[str]
-    ) -> str:
-        """Generate a single prompt for multiple summary styles"""
-        sections = []
-        for style in styles:
-            style_instruction = SUMMARY_STYLES.get(style, SUMMARY_STYLES["brief"])
-            sections.append(f"- {style.upper()}: {style_instruction}")
-        prompt = f"""Analyze this {content_type} and generate the following summaries:
-{chr(10).join(sections)}
-Format your response with clear section headers for each style.
-Content:
-{content}"""
-        return prompt
-    def summarize_with_style(
-        self, content: str, content_type: str, style: str
-    ) -> Dict[str, Any]:
-        """Generate a summary for a specific style with retry logic"""
-        start_time = time.time()
-        # Set appropriate system prompt
-        system_prompt = SYSTEM_PROMPTS.get(content_type, SYSTEM_PROMPTS["transcript"])
-        self.chat_sdk.config.system_prompt = system_prompt
-        # Generate prompt
-        prompt = self.generate_summary_prompt(content, content_type, style)
-        # Check if content might exceed token limits
-        estimated_tokens = len(content.split()) + len(prompt.split())
-        if estimated_tokens > 3000:  # Conservative estimate
-            self.log.warning(
-                f"Content may exceed token limits. Estimated tokens: {estimated_tokens}"
-            )
-        # Get summary with retry logic
-        response = None
-        last_error = None
-        for attempt in range(self.max_retries):
-            try:
-                response = self.chat_sdk.send(prompt)
-                break
-            except Exception as e:
-                last_error = e
-                error_msg = str(e).lower()
-                # Check for specific error types
-                if "token" in error_msg and "limit" in error_msg:
-                    # Token limit error - reduce content or max_tokens
-                    self.log.warning(
-                        f"Token limit exceeded. Attempting with reduced content..."
-                    )
-                    # Truncate content to 75% of original
-                    truncated_content = (
-                        content[: int(len(content) * 0.75)]
-                        + "\n\n[Content truncated due to length...]"
-                    )
-                    prompt = self.generate_summary_prompt(
-                        truncated_content, content_type, style
-                    )
-                elif "connection" in error_msg or "timeout" in error_msg:
-                    self.log.warning(f"Connection error on attempt {attempt + 1}: {e}")
-                    if attempt < self.max_retries - 1:
-                        time.sleep(self.retry_delay * (attempt + 1))
-                        continue
-                else:
-                    self.log.error(f"Unexpected error on attempt {attempt + 1}: {e}")
-                if attempt >= self.max_retries - 1:
-                    raise RuntimeError(
-                        f"Failed to generate {style} summary after {self.max_retries} attempts: {last_error}"
-                    )
-        # Get performance stats
-        try:
-            perf_stats = self.llm_client.get_performance_stats()
-        except Exception as e:
-            self.log.warning(f"Failed to get performance stats: {e}")
-            perf_stats = {}
-        # Calculate processing time
-        processing_time_ms = int((time.time() - start_time) * 1000)
-        # Format result based on style
-        result = {"text": response.text}
-        # Add style-specific fields
-        if style == "action_items":
-            # Try to parse action items from response
-            lines = response.text.strip().split("\n")
-            items = []
-            for line in lines:
-                line = line.strip()
-                # Include all non-empty lines except obvious headers
-                if (
-                    line
-                    and not line.lower().startswith("action items:")
-                    and not line.startswith("**Action")
-                ):
-                    items.append(line)
-            if items:
-                result["items"] = items
-        elif style == "participants":
-            if content_type == "email":
-                # Try to parse JSON response for email participants
-                try:
-                    participants_data = json.loads(response.text)
-                    result.update(participants_data)
-                except:
-                    # Fallback to text if not valid JSON
-                    pass
-            else:
-                # Extract participants from transcript response
-                lines = response.text.strip().split("\n")
-                participants = []
-                for line in lines:
-                    line = line.strip()
-                    # Include all non-empty lines (HTML viewer will format properly)
-                    if line and not line.lower().startswith("participants:"):
-                        participants.append(line)
-                if participants:
-                    result["participants"] = participants
-        # Add performance data
-        result["performance"] = {
-            "total_tokens": perf_stats.get("input_tokens", 0)
-            + perf_stats.get("output_tokens", 0),
-            "prompt_tokens": perf_stats.get("input_tokens", 0),
-            "completion_tokens": perf_stats.get("output_tokens", 0),
-            "time_to_first_token_ms": int(
-                perf_stats.get("time_to_first_token", 0) * 1000
-            ),
-            "tokens_per_second": perf_stats.get("tokens_per_second", 0),
-            "processing_time_ms": processing_time_ms,
-        }
-        return result
-    def summarize_combined(
-        self, content: str, content_type: str, styles: List[str]
-    ) -> Dict[str, Dict[str, Any]]:
-        """Generate summaries for multiple styles in a single LLM call"""
-        start_time = time.time()
-        # Set appropriate system prompt
-        system_prompt = SYSTEM_PROMPTS.get(content_type, SYSTEM_PROMPTS["transcript"])
-        self.chat_sdk.config.system_prompt = system_prompt
-        # Generate combined prompt
-        prompt = self.generate_combined_prompt(content, content_type, styles)
-        # Get combined summary
-        response = self.chat_sdk.send(prompt)
-        # Get performance stats
-        perf_stats = self.llm_client.get_performance_stats()
-        # Calculate processing time
-        processing_time_ms = int((time.time() - start_time) * 1000)
-        # Parse response into sections
-        # This is a simple parser - in production, might want more robust parsing
-        response_text = response.text
-        results = {}
-        for style in styles:
-            # Look for style header in response
-            style_upper = style.upper()
-            start_markers = [
-                f"{style_upper}:",
-                f"**{style_upper}**:",
-                f"# {style_upper}",
-                f"## {style_upper}",
-            ]
-            section_start = -1
-            for marker in start_markers:
-                idx = response_text.find(marker)
-                if idx != -1:
-                    section_start = idx + len(marker)
-                    break
-            if section_start == -1:
-                # Fallback - use entire response for first style
-                if not results:
-                    results[style] = {"text": response_text.strip()}
-                continue
-            # Find end of section (next style header or end of text)
-            section_end = len(response_text)
-            for next_style in styles:
-                if next_style == style:
-                    continue
-                next_upper = next_style.upper()
-                for marker in [
-                    f"{next_upper}:",
-                    f"**{next_upper}**:",
-                    f"# {next_upper}",
-                    f"## {next_upper}",
-                ]:
-                    idx = response_text.find(marker, section_start)
-                    if idx != -1 and idx < section_end:
-                        section_end = idx
-            section_text = response_text[section_start:section_end].strip()
-            results[style] = {"text": section_text}
-        # Add shared performance data to each result
-        base_perf = {
-            "total_tokens": perf_stats.get("input_tokens", 0)
-            + perf_stats.get("output_tokens", 0),
-            "prompt_tokens": perf_stats.get("input_tokens", 0),
-            "completion_tokens": perf_stats.get("output_tokens", 0),
-            "time_to_first_token_ms": int(
-                perf_stats.get("time_to_first_token", 0) * 1000
-            ),
-            "tokens_per_second": perf_stats.get("tokens_per_second", 0),
-            "processing_time_ms": processing_time_ms,
-        }
-        # Distribute performance metrics proportionally (simplified)
-        style_count = len(styles)
-        for style in results:
-            results[style]["performance"] = {
-                **base_perf,
-                "total_tokens": base_perf["total_tokens"] // style_count,
-                "completion_tokens": base_perf["completion_tokens"] // style_count,
-            }
-        return results
-    def summarize(
-        self, content: str, input_file: Optional[str] = None
-    ) -> Dict[str, Any]:
-        """Main summarization method"""
-        start_time = time.time()
-        # Detect content type
-        content_type = self.detect_content_type(content)
-        # Filter applicable styles
-        applicable_styles = self.config.styles.copy()
-        if content_type == "email" and "participants" in applicable_styles:
-            # Keep participants for email but handle differently
-            pass
-        # Generate summaries
-        if self.config.combined_prompt and len(applicable_styles) > 1:
-            # Use combined prompt for efficiency
-            summaries = self.summarize_combined(
-                content, content_type, applicable_styles
-            )
-        else:
-            # Generate each style independently
-            summaries = {}
-            for style in applicable_styles:
-                summaries[style] = self.summarize_with_style(
-                    content, content_type, style
-                )
-        # Calculate aggregate performance
-        total_processing_time = int((time.time() - start_time) * 1000)
-        # Build output structure
-        if len(applicable_styles) == 1:
-            # Single style output
-            style = applicable_styles[0]
-            output = {
-                "metadata": {
-                    "input_file": input_file or "stdin",
-                    "input_type": content_type,
-                    "model": self.config.model,
-                    "timestamp": datetime.now().isoformat(),
-                    "processing_time_ms": total_processing_time,
-                    "summary_style": style,
-                },
-                "summary": summaries[style],
-                "performance": summaries[style].get("performance", {}),
-                "original_content": content,
-            }
-        else:
-            # Multiple styles output
-            output = {
-                "metadata": {
-                    "input_file": input_file or "stdin",
-                    "input_type": content_type,
-                    "model": self.config.model,
-                    "timestamp": datetime.now().isoformat(),
-                    "processing_time_ms": total_processing_time,
-                    "summary_styles": applicable_styles,
-                },
-                "summaries": summaries,
-                "aggregate_performance": {
-                    "total_tokens": sum(
-                        s.get("performance", {}).get("total_tokens", 0)
-                        for s in summaries.values()
-                    ),
-                    "total_processing_time_ms": total_processing_time,
-                    "model_info": {
-                        "model": self.config.model,
-                        "use_local": not (
-                            self.config.use_claude or self.config.use_chatgpt
-                        ),
-                        "use_claude": self.config.use_claude,
-                        "use_chatgpt": self.config.use_chatgpt,
-                    },
-                },
-                "original_content": content,
-            }
-        return output
-    def summarize_file(self, file_path: Path) -> Dict[str, Any]:
-        """Summarize a single file"""
-        self.log.info(f"Summarizing file: {file_path}")
-        # Validate file exists
-        if not file_path.exists():
-            raise FileNotFoundError(f"File not found: {file_path}")
-        # Check file size
-        file_size_mb = file_path.stat().st_size / (1024 * 1024)
-        if file_size_mb > 10:
-            self.log.warning(
-                f"Large file ({file_size_mb:.1f}MB) may exceed token limits"
-            )
-        try:
-            content = file_path.read_text(encoding="utf-8")
-            if not content.strip():
-                raise ValueError(f"File is empty: {file_path}")
-            return self.summarize(content, str(file_path))
-        except UnicodeDecodeError:
-            # Try alternative encodings
-            for encoding in ["latin-1", "cp1252"]:
-                try:
-                    content = file_path.read_text(encoding=encoding)
-                    self.log.info(f"Successfully read file with {encoding} encoding")
-                    return self.summarize(content, str(file_path))
-                except UnicodeDecodeError:
-                    continue
-            raise ValueError(
-                f"Unable to decode file {file_path}. File may be binary or use unsupported encoding."
-            )
-        except Exception as e:
-            self.log.error(f"Error processing file {file_path}: {e}")
-            raise
-    def summarize_directory(self, dir_path: Path) -> List[Dict[str, Any]]:
-        """Summarize all files in a directory"""
-        self.log.info(f"Summarizing directory: {dir_path}")
-        # Validate directory exists
-        if not dir_path.exists():
-            raise FileNotFoundError(f"Directory not found: {dir_path}")
-        if not dir_path.is_dir():
-            raise ValueError(f"Path is not a directory: {dir_path}")
-        results = []
-        errors = []
-        # Find all text files
-        text_extensions = [".txt", ".md", ".log", ".email", ".transcript"]
-        files = []
-        for ext in text_extensions:
-            files.extend(dir_path.glob(f"*{ext}"))
-        if not files:
-            self.log.warning(f"No text files found in {dir_path}")
-            return results
-        self.log.info(f"Found {len(files)} files to process")
-        for i, file_path in enumerate(sorted(files), 1):
-            try:
-                self.log.info(f"Processing file {i}/{len(files)}: {file_path.name}")
-                result = self.summarize_file(file_path)
-                results.append(result)
-            except Exception as e:
-                error_msg = f"Failed to summarize {file_path}: {e}"
-                self.log.error(error_msg)
-                errors.append(error_msg)
-                continue
-        if errors:
-            self.log.warning(
-                f"Completed with {len(errors)} errors:\n" + "\n".join(errors)
-            )
-        return results
+#!/usr/bin/env python3
+# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+"""
+Gaia Summarizer Application - Thin wrapper that delegates to SummarizerAgent
+"""
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional
+from gaia.agents.summarize.agent import SummarizerAgent
+from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME
+from gaia.logger import get_logger
+# Utility functions for email validation (used by CLI and other tools)
+def validate_email_address(email: str) -> bool:
+    """Validate email address format"""
+    email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
+    return re.match(email_pattern, email.strip()) is not None
+def validate_email_list(email_list: str) -> list[str]:
+    """Validate and parse comma-separated email list"""
+    if not email_list:
+        return []
+    emails = [e.strip() for e in email_list.split(",") if e.strip()]
+    invalid_emails = [e for e in emails if not validate_email_address(e)]
+    if invalid_emails:
+        raise ValueError(f"Invalid email address(es): {', '.join(invalid_emails)}")
+    return emails
+@dataclass
+class SummaryConfig:
+    """Configuration for summarization"""
+    model: str = DEFAULT_MODEL_NAME
+    max_tokens: int = 1024
+    input_type: Literal["transcript", "email", "auto"] = "auto"
+    styles: List[str] = None
+    combined_prompt: bool = False
+    use_claude: bool = False
+    use_chatgpt: bool = False
+    def __post_init__(self):
+        if self.styles is None:
+            self.styles = ["executive", "participants", "action_items"]
+        # Auto-detect OpenAI models (gpt-*) to use ChatGPT
+        if self.model and self.model.lower().startswith("gpt"):
+            self.use_chatgpt = True
+class SummarizerApp:
+    """Main application class for summarization (delegates to SummarizerAgent)"""
+    def __init__(self, config: Optional[SummaryConfig] = None):
+        self.config = config or SummaryConfig()
+        self.log = get_logger(__name__)
+        self.agent = SummarizerAgent(
+            model=self.config.model,
+            max_tokens=self.config.max_tokens,
+            styles=self.config.styles,
+            combined_prompt=self.config.combined_prompt,
+            use_claude=self.config.use_claude,
+            use_chatgpt=self.config.use_chatgpt,
+        )
+    def summarize_file(
+        self,
+        file_path: Path,
+        styles: Optional[List[str]] = None,
+        combined_prompt: Optional[bool] = None,
+        input_type: str = "auto",
+    ) -> Dict[str, Any]:
+        # Always convert file_path to Path object if it's a string
+        if not isinstance(file_path, Path):
+            file_path = Path(file_path)
+        return self.agent.summarize_file(
+            file_path,
+            styles=styles,
+            combined_prompt=combined_prompt,
+            input_type=input_type,
+        )
+    def summarize_directory(
+        self,
+        dir_path: Path,
+        styles: Optional[List[str]] = None,
+        combined_prompt: Optional[bool] = None,
+        input_type: str = "auto",
+    ) -> List[Dict[str, Any]]:
+        return self.agent.summarize_directory(
+            dir_path,
+            styles=styles,
+            combined_prompt=combined_prompt,
+            input_type=input_type,
+        )
+    def summarize(
+        self,
+        content: str,
+        styles: Optional[List[str]] = None,
+        combined_prompt: Optional[bool] = None,
+        input_type: str = "auto",
+    ) -> Dict[str, Any]:
+        return self.agent.summarize(
+            content,
+            styles=styles,
+            combined_prompt=combined_prompt,
+            input_type=input_type,
+        )

amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl

amd-gaia 0.14.3py3-none-any.whl → 0.15.1py3-none-any.whl