PyPI - amd-gaia - Versions diffs - 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl - Mend

amd-gaia 0.14.3py3-none-any.whl → 0.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
amd_gaia-0.15.1.dist-info/RECORD +178 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
gaia/__init__.py +29 -29
gaia/agents/__init__.py +19 -19
gaia/agents/base/__init__.py +9 -9
gaia/agents/base/agent.py +2177 -2177
gaia/agents/base/api_agent.py +120 -120
gaia/agents/base/console.py +1841 -1841
gaia/agents/base/errors.py +237 -237
gaia/agents/base/mcp_agent.py +86 -86
gaia/agents/base/tools.py +83 -83
gaia/agents/blender/agent.py +556 -556
gaia/agents/blender/agent_simple.py +133 -135
gaia/agents/blender/app.py +211 -211
gaia/agents/blender/app_simple.py +41 -41
gaia/agents/blender/core/__init__.py +16 -16
gaia/agents/blender/core/materials.py +506 -506
gaia/agents/blender/core/objects.py +316 -316
gaia/agents/blender/core/rendering.py +225 -225
gaia/agents/blender/core/scene.py +220 -220
gaia/agents/blender/core/view.py +146 -146
gaia/agents/chat/__init__.py +9 -9
gaia/agents/chat/agent.py +835 -835
gaia/agents/chat/app.py +1058 -1058
gaia/agents/chat/session.py +508 -508
gaia/agents/chat/tools/__init__.py +15 -15
gaia/agents/chat/tools/file_tools.py +96 -96
gaia/agents/chat/tools/rag_tools.py +1729 -1729
gaia/agents/chat/tools/shell_tools.py +436 -436
gaia/agents/code/__init__.py +7 -7
gaia/agents/code/agent.py +549 -549
gaia/agents/code/cli.py +377 -0
gaia/agents/code/models.py +135 -135
gaia/agents/code/orchestration/__init__.py +24 -24
gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
gaia/agents/code/orchestration/checklist_generator.py +713 -713
gaia/agents/code/orchestration/factories/__init__.py +9 -9
gaia/agents/code/orchestration/factories/base.py +63 -63
gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
gaia/agents/code/orchestration/factories/python_factory.py +106 -106
gaia/agents/code/orchestration/orchestrator.py +841 -841
gaia/agents/code/orchestration/project_analyzer.py +391 -391
gaia/agents/code/orchestration/steps/__init__.py +67 -67
gaia/agents/code/orchestration/steps/base.py +188 -188
gaia/agents/code/orchestration/steps/error_handler.py +314 -314
gaia/agents/code/orchestration/steps/nextjs.py +828 -828
gaia/agents/code/orchestration/steps/python.py +307 -307
gaia/agents/code/orchestration/template_catalog.py +469 -469
gaia/agents/code/orchestration/workflows/__init__.py +14 -14
gaia/agents/code/orchestration/workflows/base.py +80 -80
gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
gaia/agents/code/orchestration/workflows/python.py +94 -94
gaia/agents/code/prompts/__init__.py +11 -11
gaia/agents/code/prompts/base_prompt.py +77 -77
gaia/agents/code/prompts/code_patterns.py +2036 -2036
gaia/agents/code/prompts/nextjs_prompt.py +40 -40
gaia/agents/code/prompts/python_prompt.py +109 -109
gaia/agents/code/schema_inference.py +365 -365
gaia/agents/code/system_prompt.py +41 -41
gaia/agents/code/tools/__init__.py +42 -42
gaia/agents/code/tools/cli_tools.py +1138 -1138
gaia/agents/code/tools/code_formatting.py +319 -319
gaia/agents/code/tools/code_tools.py +769 -769
gaia/agents/code/tools/error_fixing.py +1347 -1347
gaia/agents/code/tools/external_tools.py +180 -180
gaia/agents/code/tools/file_io.py +845 -845
gaia/agents/code/tools/prisma_tools.py +190 -190
gaia/agents/code/tools/project_management.py +1016 -1016
gaia/agents/code/tools/testing.py +321 -321
gaia/agents/code/tools/typescript_tools.py +122 -122
gaia/agents/code/tools/validation_parsing.py +461 -461
gaia/agents/code/tools/validation_tools.py +806 -806
gaia/agents/code/tools/web_dev_tools.py +1758 -1758
gaia/agents/code/validators/__init__.py +16 -16
gaia/agents/code/validators/antipattern_checker.py +241 -241
gaia/agents/code/validators/ast_analyzer.py +197 -197
gaia/agents/code/validators/requirements_validator.py +145 -145
gaia/agents/code/validators/syntax_validator.py +171 -171
gaia/agents/docker/__init__.py +7 -7
gaia/agents/docker/agent.py +642 -642
gaia/agents/emr/__init__.py +8 -8
gaia/agents/emr/agent.py +1506 -1506
gaia/agents/emr/cli.py +1322 -1322
gaia/agents/emr/constants.py +475 -475
gaia/agents/emr/dashboard/__init__.py +4 -4
gaia/agents/emr/dashboard/server.py +1974 -1974
gaia/agents/jira/__init__.py +11 -11
gaia/agents/jira/agent.py +894 -894
gaia/agents/jira/jql_templates.py +299 -299
gaia/agents/routing/__init__.py +7 -7
gaia/agents/routing/agent.py +567 -570
gaia/agents/routing/system_prompt.py +75 -75
gaia/agents/summarize/__init__.py +11 -0
gaia/agents/summarize/agent.py +885 -0
gaia/agents/summarize/prompts.py +129 -0
gaia/api/__init__.py +23 -23
gaia/api/agent_registry.py +238 -238
gaia/api/app.py +305 -305
gaia/api/openai_server.py +575 -575
gaia/api/schemas.py +186 -186
gaia/api/sse_handler.py +373 -373
gaia/apps/__init__.py +4 -4
gaia/apps/llm/__init__.py +6 -6
gaia/apps/llm/app.py +173 -169
gaia/apps/summarize/app.py +116 -633
gaia/apps/summarize/html_viewer.py +133 -133
gaia/apps/summarize/pdf_formatter.py +284 -284
gaia/audio/__init__.py +2 -2
gaia/audio/audio_client.py +439 -439
gaia/audio/audio_recorder.py +269 -269
gaia/audio/kokoro_tts.py +599 -599
gaia/audio/whisper_asr.py +432 -432
gaia/chat/__init__.py +16 -16
gaia/chat/app.py +430 -430
gaia/chat/prompts.py +522 -522
gaia/chat/sdk.py +1228 -1225
gaia/cli.py +5481 -5621
gaia/database/__init__.py +10 -10
gaia/database/agent.py +176 -176
gaia/database/mixin.py +290 -290
gaia/database/testing.py +64 -64
gaia/eval/batch_experiment.py +2332 -2332
gaia/eval/claude.py +542 -542
gaia/eval/config.py +37 -37
gaia/eval/email_generator.py +512 -512
gaia/eval/eval.py +3179 -3179
gaia/eval/groundtruth.py +1130 -1130
gaia/eval/transcript_generator.py +582 -582
gaia/eval/webapp/README.md +167 -167
gaia/eval/webapp/package-lock.json +875 -875
gaia/eval/webapp/package.json +20 -20
gaia/eval/webapp/public/app.js +3402 -3402
gaia/eval/webapp/public/index.html +87 -87
gaia/eval/webapp/public/styles.css +3661 -3661
gaia/eval/webapp/server.js +415 -415
gaia/eval/webapp/test-setup.js +72 -72
gaia/llm/__init__.py +9 -2
gaia/llm/base_client.py +60 -0
gaia/llm/exceptions.py +12 -0
gaia/llm/factory.py +70 -0
gaia/llm/lemonade_client.py +3236 -3221
gaia/llm/lemonade_manager.py +294 -294
gaia/llm/providers/__init__.py +9 -0
gaia/llm/providers/claude.py +108 -0
gaia/llm/providers/lemonade.py +120 -0
gaia/llm/providers/openai_provider.py +79 -0
gaia/llm/vlm_client.py +382 -382
gaia/logger.py +189 -189
gaia/mcp/agent_mcp_server.py +245 -245
gaia/mcp/blender_mcp_client.py +138 -138
gaia/mcp/blender_mcp_server.py +648 -648
gaia/mcp/context7_cache.py +332 -332
gaia/mcp/external_services.py +518 -518
gaia/mcp/mcp_bridge.py +811 -550
gaia/mcp/servers/__init__.py +6 -6
gaia/mcp/servers/docker_mcp.py +83 -83
gaia/perf_analysis.py +361 -0
gaia/rag/__init__.py +10 -10
gaia/rag/app.py +293 -293
gaia/rag/demo.py +304 -304
gaia/rag/pdf_utils.py +235 -235
gaia/rag/sdk.py +2194 -2194
gaia/security.py +163 -163
gaia/talk/app.py +289 -289
gaia/talk/sdk.py +538 -538
gaia/testing/__init__.py +87 -87
gaia/testing/assertions.py +330 -330
gaia/testing/fixtures.py +333 -333
gaia/testing/mocks.py +493 -493
gaia/util.py +46 -46
gaia/utils/__init__.py +33 -33
gaia/utils/file_watcher.py +675 -675
gaia/utils/parsing.py +223 -223
gaia/version.py +100 -100
amd_gaia-0.14.3.dist-info/RECORD +0 -168
gaia/agents/code/app.py +0 -266
gaia/llm/llm_client.py +0 -729
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
{amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0

gaia/eval/transcript_generator.py CHANGED Viewed

@@ -1,582 +1,582 @@
-# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
-# SPDX-License-Identifier: MIT
-import argparse
-import json
-from datetime import datetime
-from pathlib import Path
-from gaia.eval.claude import ClaudeClient
-from gaia.eval.config import DEFAULT_CLAUDE_MODEL
-from gaia.logger import get_logger
-class TranscriptGenerator:
-    """Generates example meeting transcripts for testing transcript summarization."""
-    def __init__(self, claude_model=None, max_tokens=8192):
-        self.log = get_logger(__name__)
-        # Initialize Claude client for dynamic content generation
-        if claude_model is None:
-            claude_model = DEFAULT_CLAUDE_MODEL
-        try:
-            self.claude_client = ClaudeClient(model=claude_model, max_tokens=max_tokens)
-            self.log.info(f"Initialized Claude client with model: {claude_model}")
-        except Exception as e:
-            self.log.error(f"Failed to initialize Claude client: {e}")
-            raise ValueError(
-                f"Could not initialize Claude client. Please ensure ANTHROPIC_API_KEY is set. Error: {e}"
-            )
-        # Meeting templates with different use cases
-        self.meeting_templates = {
-            "standup": {
-                "description": "Daily team standup meeting with status updates and blockers",
-                "participants": [
-                    "Alice Chen (Scrum Master)",
-                    "Bob Martinez (Developer)",
-                    "Carol Kim (Developer)",
-                    "David Wilson (QA Engineer)",
-                ],
-                "duration_minutes": 15,
-                "context": "A software development team's daily standup meeting where team members share their progress, current tasks, and any blockers they're facing.",
-            },
-            "planning": {
-                "description": "Sprint planning meeting for upcoming development cycle",
-                "participants": [
-                    "Sarah Johnson (Product Owner)",
-                    "Mike Thompson (Scrum Master)",
-                    "Lisa Wang (Senior Developer)",
-                    "Tom Rodriguez (Developer)",
-                    "Emma Davis (UX Designer)",
-                ],
-                "duration_minutes": 60,
-                "context": "A sprint planning session where the team reviews the product backlog, estimates story points, and commits to work for the upcoming sprint.",
-            },
-            "client_call": {
-                "description": "Client requirements gathering and project discussion",
-                "participants": [
-                    "Jennifer Adams (Account Manager)",
-                    "Robert Smith (Client - CTO)",
-                    "Maria Garcia (Client - Product Manager)",
-                    "Alex Brown (Technical Lead)",
-                ],
-                "duration_minutes": 45,
-                "context": "A client meeting to discuss project requirements, gather feedback, and align on technical solutions and timeline.",
-            },
-            "design_review": {
-                "description": "Technical design review for new system architecture",
-                "participants": [
-                    "Dr. Kevin Liu (Principal Architect)",
-                    "Priya Patel (Senior Developer)",
-                    "James Miller (DevOps Engineer)",
-                    "Sophie Turner (Security Engineer)",
-                    "Ryan O'Connor (Database Specialist)",
-                ],
-                "duration_minutes": 90,
-                "context": "A technical architecture review meeting where the team discusses system design, evaluates trade-offs, and makes architectural decisions.",
-            },
-            "performance_review": {
-                "description": "Quarterly performance review and goal setting",
-                "participants": [
-                    "Linda Zhang (Engineering Manager)",
-                    "Chris Anderson (Senior Software Engineer)",
-                ],
-                "duration_minutes": 30,
-                "context": "A one-on-one performance review meeting between a manager and employee to discuss accomplishments, areas for growth, and career goals.",
-            },
-            "all_hands": {
-                "description": "Company all-hands meeting with quarterly updates",
-                "participants": [
-                    "Mark Taylor (CEO)",
-                    "Rachel Green (CTO)",
-                    "John Lee (VP Sales)",
-                    "Amy White (VP Marketing)",
-                ],
-                "duration_minutes": 45,
-                "context": "A company-wide meeting where leadership shares business updates, financial results, and strategic direction with all employees.",
-            },
-            "budget_planning": {
-                "description": "Annual budget planning and resource allocation",
-                "participants": [
-                    "Patricia Brown (CFO)",
-                    "Daniel Kim (VP Engineering)",
-                    "Michelle Jones (VP Sales)",
-                    "Steve Wilson (VP Marketing)",
-                ],
-                "duration_minutes": 75,
-                "context": "A budget planning meeting where department heads discuss resource needs, budget allocations, and strategic investments for the upcoming year.",
-            },
-            "product_roadmap": {
-                "description": "Product roadmap discussion and feature prioritization",
-                "participants": [
-                    "Nicole Davis (Product Manager)",
-                    "Frank Chen (Engineering Lead)",
-                    "Jessica Miller (Senior Designer)",
-                    "Carlos Ruiz (Data Analyst)",
-                ],
-                "duration_minutes": 60,
-                "context": "A product planning meeting to review customer feedback, prioritize features, and define the product roadmap for the next quarter.",
-            },
-        }
-    def _estimate_tokens(self, text):
-        """Rough token estimation (approximately 4 characters per token)."""
-        return len(text) // 4
-    def _validate_transcript_format(self, content):
-        """
-        Validate that the transcript doesn't contain forbidden sections.
-        Returns (is_valid, warnings) tuple.
-        """
-        warnings = []
-        content_lower = content.lower()
-        # Check for common summary section headers
-        forbidden_patterns = [
-            ("summary:", "Summary section"),
-            ("action items:", "Action items section"),
-            ("action item:", "Action items section"),
-            ("key decisions:", "Key decisions section"),
-            ("decisions:", "Decisions section"),
-            ("next steps:", "Next steps section"),
-            ("follow-up:", "Follow-up section"),
-            ("follow up:", "Follow-up section"),
-            ("takeaways:", "Takeaways section"),
-            ("conclusions:", "Conclusions section"),
-            ("meeting notes:", "Meeting notes section"),
-            ("key points:", "Key points section"),
-        ]
-        for pattern, description in forbidden_patterns:
-            if pattern in content_lower:
-                # Check if it's not just part of dialogue (should have newline before it)
-                lines = content.split("\n")
-                for line in lines:
-                    line_lower = line.lower().strip()
-                    # If line starts with the pattern (not just mentioned in dialogue)
-                    if line_lower.startswith(pattern) or line_lower.startswith(
-                        "**" + pattern
-                    ):
-                        warnings.append(f"Found forbidden section: {description}")
-                        break
-        is_valid = len(warnings) == 0
-        return is_valid, warnings
-    def _generate_transcript_with_claude(self, meeting_type, target_tokens):
-        """Generate a meeting transcript using Claude based on the meeting type and target token count."""
-        if meeting_type not in self.meeting_templates:
-            raise ValueError(f"Unknown meeting type: {meeting_type}")
-        template = self.meeting_templates[meeting_type]
-        # Create a detailed prompt for Claude
-        prompt = f"""Generate a realistic meeting transcript for the following scenario:
-Meeting Type: {template['description']}
-Context: {template['context']}
-Participants: {', '.join(template['participants'])}
-Duration: {template['duration_minutes']} minutes
-Target Length: Approximately {target_tokens} tokens (about {target_tokens * 4} characters)
-CRITICAL FORMATTING REQUIREMENTS - The transcript MUST contain ONLY these three sections:
-1. **Meeting Header** - Include:
-   - Meeting title/type
-   - Date and time
-   - Location (can be virtual/in-person/hybrid)
-2. **Participant List** - List all participants with their roles
-3. **Transcript** - The actual meeting dialogue with:
-   - Natural dialogue between the participants that reflects their roles
-   - Realistic conversation flow appropriate for this type of meeting
-   - Specific technical details, decisions, and action items MENTIONED IN DIALOGUE (not as separate sections)
-   - Natural interruptions, clarifications, and back-and-forth discussion
-   - Format: "Speaker Name: What they said"
-CRITICAL: You MUST NOT include any of the following:
-- Summary or overview sections (DO NOT end with "Summary:" or "In summary:")
-- Action items section (decisions/tasks should only be mentioned within the dialogue)
-- Key decisions section
-- Next steps section
-- Follow-up items section
-- Takeaways or conclusions section
-- Meeting notes section
-- Any other meta-commentary or analysis AFTER the dialogue ends
-The transcript should end naturally with the last line of dialogue from a participant. Do not add any commentary, summary, or analysis after the dialogue ends.
-The transcript should be approximately {target_tokens} tokens long and feel authentic and professional, with each participant contributing meaningfully based on their role.
-Generate ONLY the three sections listed above (header, participants, transcript dialogue). The file should end when the dialogue ends."""
-        try:
-            # Generate the transcript using Claude with usage tracking
-            self.log.info(
-                f"Generating {meeting_type} transcript with Claude (target: {target_tokens} tokens)"
-            )
-            response = self.claude_client.get_completion_with_usage(prompt)
-            generated_content = (
-                response["content"][0].text
-                if isinstance(response["content"], list)
-                else response["content"]
-            )
-            actual_tokens = self._estimate_tokens(generated_content)
-            self.log.info(
-                f"Generated transcript: {actual_tokens} tokens (target: {target_tokens})"
-            )
-            return generated_content, response["usage"], response["cost"]
-        except Exception as e:
-            self.log.error(f"Error generating transcript with Claude: {e}")
-            raise RuntimeError(f"Failed to generate transcript for {meeting_type}: {e}")
-    def _extend_content_with_claude(
-        self, base_content, target_tokens, meeting_type, current_usage, current_cost
-    ):
-        """Extend existing content to reach target token count using Claude."""
-        current_tokens = self._estimate_tokens(base_content)
-        if current_tokens >= target_tokens:
-            return base_content, current_usage, current_cost
-        needed_tokens = target_tokens - current_tokens
-        template = self.meeting_templates[meeting_type]
-        extension_prompt = f"""Continue the following meeting transcript to make it approximately {needed_tokens} more tokens longer.
-Current transcript:
-{base_content}
-Please add more realistic dialogue that:
-1. Maintains the same tone and context as the existing transcript
-2. Continues naturally from where it left off
-3. Adds approximately {needed_tokens} more tokens of content
-4. Includes meaningful discussion relevant to a {template['description']}
-5. Maintains the same participants and their roles
-CRITICAL REQUIREMENTS:
-- Generate ONLY additional dialogue in the format "Speaker Name: What they said"
-- DO NOT conclude or wrap up the meeting
-- DO NOT add summary sections (no "Summary:", "In summary:", etc.)
-- DO NOT add action items sections (no "Action Items:", etc.)
-- DO NOT add key decisions, next steps, or takeaways sections
-- DO NOT add any meta-commentary or analysis
-- Just continue the natural, ongoing conversation between participants
-- The meeting should feel like it's still in progress, not ending
-Even though you're extending the transcript, do NOT treat this as the end of the meeting. The conversation should continue naturally without any concluding sections.
-Generate only the additional transcript dialogue (without repeating the existing content)."""
-        try:
-            self.log.info(f"Extending transcript by ~{needed_tokens} tokens")
-            response = self.claude_client.get_completion_with_usage(extension_prompt)
-            extension_content = (
-                response["content"][0].text
-                if isinstance(response["content"], list)
-                else response["content"]
-            )
-            extended_content = base_content + "\n\n" + extension_content
-            # Combine usage and cost data
-            total_usage = {
-                "input_tokens": current_usage["input_tokens"]
-                + response["usage"]["input_tokens"],
-                "output_tokens": current_usage["output_tokens"]
-                + response["usage"]["output_tokens"],
-                "total_tokens": current_usage["total_tokens"]
-                + response["usage"]["total_tokens"],
-            }
-            total_cost = {
-                "input_cost": current_cost["input_cost"]
-                + response["cost"]["input_cost"],
-                "output_cost": current_cost["output_cost"]
-                + response["cost"]["output_cost"],
-                "total_cost": current_cost["total_cost"]
-                + response["cost"]["total_cost"],
-            }
-            actual_tokens = self._estimate_tokens(extended_content)
-            self.log.info(f"Extended transcript to {actual_tokens} tokens")
-            return extended_content, total_usage, total_cost
-        except Exception as e:
-            self.log.error(f"Error extending transcript with Claude: {e}")
-            # Return original content if extension fails
-            return base_content, current_usage, current_cost
-    def generate_transcript(self, meeting_type, target_tokens=1000):
-        """Generate a single meeting transcript of specified type and approximate token count using Claude."""
-        if meeting_type not in self.meeting_templates:
-            raise ValueError(f"Unknown meeting type: {meeting_type}")
-        template = self.meeting_templates[meeting_type]
-        try:
-            # Generate transcript with Claude
-            content, usage, cost = self._generate_transcript_with_claude(
-                meeting_type, target_tokens
-            )
-            actual_tokens = self._estimate_tokens(content)
-            # If we're significantly under target, try to extend
-            if actual_tokens < target_tokens * 0.8:  # If less than 80% of target
-                self.log.info(
-                    f"Transcript too short ({actual_tokens} tokens), extending to reach target"
-                )
-                content, usage, cost = self._extend_content_with_claude(
-                    content, target_tokens, meeting_type, usage, cost
-                )
-                actual_tokens = self._estimate_tokens(content)
-            # Add metadata
-            metadata = {
-                "meeting_type": meeting_type,
-                "description": template["description"],
-                "participants": template["participants"],
-                "estimated_duration_minutes": template["duration_minutes"],
-                "estimated_tokens": actual_tokens,
-                "target_tokens": target_tokens,
-                "generated_date": datetime.now().isoformat(),
-                "claude_model": self.claude_client.model,
-                "claude_usage": usage,
-                "claude_cost": cost,
-            }
-            return content, metadata
-        except Exception as e:
-            self.log.error(f"Failed to generate transcript for {meeting_type}: {e}")
-            raise
-    def generate_transcript_set(self, output_dir, target_tokens=1000, count_per_type=1):
-        """Generate a set of meeting transcripts and save them to the output directory."""
-        output_dir = Path(output_dir)
-        # Create meetings subdirectory for organized output
-        meetings_dir = output_dir / "meetings"
-        meetings_dir.mkdir(parents=True, exist_ok=True)
-        output_dir = meetings_dir  # Use meetings subdirectory as base
-        generated_files = []
-        all_metadata = []
-        total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
-        total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
-        for meeting_type in self.meeting_templates.keys():
-            for i in range(count_per_type):
-                self.log.info(
-                    f"Generating {meeting_type} transcript {i+1}/{count_per_type}"
-                )
-                # Generate transcript
-                content, metadata = self.generate_transcript(
-                    meeting_type, target_tokens
-                )
-                # Create filename
-                if count_per_type == 1:
-                    filename = f"{meeting_type}_meeting.txt"
-                else:
-                    filename = f"{meeting_type}_meeting_{i+1}.txt"
-                # Save transcript file
-                file_path = output_dir / filename
-                with open(file_path, "w", encoding="utf-8") as f:
-                    f.write(content)
-                # Update metadata with file info
-                metadata["filename"] = filename
-                metadata["file_path"] = str(file_path)
-                metadata["file_size_bytes"] = len(content.encode("utf-8"))
-                generated_files.append(str(file_path))
-                all_metadata.append(metadata)
-                # Accumulate usage and cost
-                usage = metadata["claude_usage"]
-                cost = metadata["claude_cost"]
-                total_usage["input_tokens"] += usage["input_tokens"]
-                total_usage["output_tokens"] += usage["output_tokens"]
-                total_usage["total_tokens"] += usage["total_tokens"]
-                total_cost["input_cost"] += cost["input_cost"]
-                total_cost["output_cost"] += cost["output_cost"]
-                total_cost["total_cost"] += cost["total_cost"]
-                self.log.info(
-                    f"Generated {filename} ({metadata['estimated_tokens']} tokens, ${cost['total_cost']:.4f})"
-                )
-        # Create summary metadata file
-        summary = {
-            "generation_info": {
-                "generated_date": datetime.now().isoformat(),
-                "total_files": len(generated_files),
-                "target_tokens_per_file": target_tokens,
-                "meeting_types": list(self.meeting_templates.keys()),
-                "files_per_type": count_per_type,
-                "claude_model": self.claude_client.model,
-                "total_claude_usage": total_usage,
-                "total_claude_cost": total_cost,
-            },
-            "transcripts": all_metadata,
-        }
-        summary_path = output_dir / "transcript_metadata.json"
-        with open(summary_path, "w", encoding="utf-8") as f:
-            json.dump(summary, f, indent=2)
-        self.log.info(
-            f"Generated {len(generated_files)} transcript files in {output_dir}"
-        )
-        self.log.info(
-            f"Total cost: ${total_cost['total_cost']:.4f} ({total_usage['total_tokens']:,} tokens)"
-        )
-        self.log.info(f"Summary metadata saved to {summary_path}")
-        return {
-            "output_directory": str(output_dir),
-            "generated_files": generated_files,
-            "metadata_file": str(summary_path),
-            "summary": summary,
-        }
-def main():
-    """Command line interface for transcript generation."""
-    parser = argparse.ArgumentParser(
-        description="Generate example meeting transcripts using Claude AI for testing transcript summarization",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  # Generate one transcript of each type with ~1000 tokens
-  python -m gaia.eval.transcript_generator -o ./output/transcripts
-  # Generate larger transcripts (~3000 tokens each)
-  python -m gaia.eval.transcript_generator -o ./output/transcripts --target-tokens 3000
-  # Generate multiple transcripts per type
-  python -m gaia.eval.transcript_generator -o ./output/transcripts --count-per-type 3
-  # Generate specific meeting types only
-  python -m gaia.eval.transcript_generator -o ./output/transcripts --meeting-types standup planning
-  # Generate small transcripts for quick testing
-  python -m gaia.eval.transcript_generator -o ./test_transcripts --target-tokens 500
-  # Use different Claude model
-  python -m gaia.eval.transcript_generator -o ./output/transcripts --claude-model claude-3-opus-20240229
-        """,
-    )
-    parser.add_argument(
-        "-o",
-        "--output-dir",
-        type=str,
-        required=True,
-        help="Output directory for generated transcript files",
-    )
-    parser.add_argument(
-        "--target-tokens",
-        type=int,
-        default=1000,
-        help="Target token count per transcript (approximate, default: 1000)",
-    )
-    parser.add_argument(
-        "--count-per-type",
-        type=int,
-        default=1,
-        help="Number of transcripts to generate per meeting type (default: 1)",
-    )
-    parser.add_argument(
-        "--meeting-types",
-        nargs="+",
-        choices=[
-            "standup",
-            "planning",
-            "client_call",
-            "design_review",
-            "performance_review",
-            "all_hands",
-            "budget_planning",
-            "product_roadmap",
-        ],
-        help="Specific meeting types to generate (default: all types)",
-    )
-    parser.add_argument(
-        "--claude-model",
-        type=str,
-        default=None,
-        help=f"Claude model to use for transcript generation (default: {DEFAULT_CLAUDE_MODEL})",
-    )
-    args = parser.parse_args()
-    try:
-        generator = TranscriptGenerator(claude_model=args.claude_model)
-    except Exception as e:
-        print(f"❌ Error initializing transcript generator: {e}")
-        print("Make sure ANTHROPIC_API_KEY is set in your environment.")
-        return 1
-    try:
-        # Filter meeting types if specified
-        original_templates = None
-        if args.meeting_types:
-            # Temporarily filter the templates
-            original_templates = generator.meeting_templates.copy()
-            generator.meeting_templates = {
-                k: v
-                for k, v in generator.meeting_templates.items()
-                if k in args.meeting_types
-            }
-        result = generator.generate_transcript_set(
-            output_dir=args.output_dir,
-            target_tokens=args.target_tokens,
-            count_per_type=args.count_per_type,
-        )
-        print("✅ Successfully generated meeting transcripts")
-        print(f"  Output directory: {result['output_directory']}")
-        print(f"  Generated files: {len(result['generated_files'])}")
-        print(f"  Metadata file: {result['metadata_file']}")
-        # Show summary stats
-        summary = result["summary"]
-        generation_info = summary["generation_info"]
-        total_tokens = generation_info["total_claude_usage"]["total_tokens"]
-        total_cost = generation_info["total_claude_cost"]["total_cost"]
-        avg_tokens = (
-            total_tokens / len(summary["transcripts"]) if summary["transcripts"] else 0
-        )
-        print(f"  Total tokens used: {total_tokens:,}")
-        print(f"  Total cost: ${total_cost:.4f}")
-        print(f"  Average tokens per file: {avg_tokens:.0f}")
-        print(f"  Average cost per file: ${total_cost/len(summary['transcripts']):.4f}")
-        print(f"  Meeting types: {', '.join(generation_info['meeting_types'])}")
-        print(f"  Claude model: {generation_info['claude_model']}")
-        # Restore original templates if they were filtered
-        if args.meeting_types and original_templates is not None:
-            generator.meeting_templates = original_templates
-    except Exception as e:
-        print(f"❌ Error generating transcripts: {e}")
-        return 1
-    return 0
-if __name__ == "__main__":
-    exit(main())
+# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+import argparse
+import json
+from datetime import datetime
+from pathlib import Path
+from gaia.eval.claude import ClaudeClient
+from gaia.eval.config import DEFAULT_CLAUDE_MODEL
+from gaia.logger import get_logger
+class TranscriptGenerator:
+    """Generates example meeting transcripts for testing transcript summarization."""
+    def __init__(self, claude_model=None, max_tokens=8192):
+        self.log = get_logger(__name__)
+        # Initialize Claude client for dynamic content generation
+        if claude_model is None:
+            claude_model = DEFAULT_CLAUDE_MODEL
+        try:
+            self.claude_client = ClaudeClient(model=claude_model, max_tokens=max_tokens)
+            self.log.info(f"Initialized Claude client with model: {claude_model}")
+        except Exception as e:
+            self.log.error(f"Failed to initialize Claude client: {e}")
+            raise ValueError(
+                f"Could not initialize Claude client. Please ensure ANTHROPIC_API_KEY is set. Error: {e}"
+            )
+        # Meeting templates with different use cases
+        self.meeting_templates = {
+            "standup": {
+                "description": "Daily team standup meeting with status updates and blockers",
+                "participants": [
+                    "Alice Chen (Scrum Master)",
+                    "Bob Martinez (Developer)",
+                    "Carol Kim (Developer)",
+                    "David Wilson (QA Engineer)",
+                ],
+                "duration_minutes": 15,
+                "context": "A software development team's daily standup meeting where team members share their progress, current tasks, and any blockers they're facing.",
+            },
+            "planning": {
+                "description": "Sprint planning meeting for upcoming development cycle",
+                "participants": [
+                    "Sarah Johnson (Product Owner)",
+                    "Mike Thompson (Scrum Master)",
+                    "Lisa Wang (Senior Developer)",
+                    "Tom Rodriguez (Developer)",
+                    "Emma Davis (UX Designer)",
+                ],
+                "duration_minutes": 60,
+                "context": "A sprint planning session where the team reviews the product backlog, estimates story points, and commits to work for the upcoming sprint.",
+            },
+            "client_call": {
+                "description": "Client requirements gathering and project discussion",
+                "participants": [
+                    "Jennifer Adams (Account Manager)",
+                    "Robert Smith (Client - CTO)",
+                    "Maria Garcia (Client - Product Manager)",
+                    "Alex Brown (Technical Lead)",
+                ],
+                "duration_minutes": 45,
+                "context": "A client meeting to discuss project requirements, gather feedback, and align on technical solutions and timeline.",
+            },
+            "design_review": {
+                "description": "Technical design review for new system architecture",
+                "participants": [
+                    "Dr. Kevin Liu (Principal Architect)",
+                    "Priya Patel (Senior Developer)",
+                    "James Miller (DevOps Engineer)",
+                    "Sophie Turner (Security Engineer)",
+                    "Ryan O'Connor (Database Specialist)",
+                ],
+                "duration_minutes": 90,
+                "context": "A technical architecture review meeting where the team discusses system design, evaluates trade-offs, and makes architectural decisions.",
+            },
+            "performance_review": {
+                "description": "Quarterly performance review and goal setting",
+                "participants": [
+                    "Linda Zhang (Engineering Manager)",
+                    "Chris Anderson (Senior Software Engineer)",
+                ],
+                "duration_minutes": 30,
+                "context": "A one-on-one performance review meeting between a manager and employee to discuss accomplishments, areas for growth, and career goals.",
+            },
+            "all_hands": {
+                "description": "Company all-hands meeting with quarterly updates",
+                "participants": [
+                    "Mark Taylor (CEO)",
+                    "Rachel Green (CTO)",
+                    "John Lee (VP Sales)",
+                    "Amy White (VP Marketing)",
+                ],
+                "duration_minutes": 45,
+                "context": "A company-wide meeting where leadership shares business updates, financial results, and strategic direction with all employees.",
+            },
+            "budget_planning": {
+                "description": "Annual budget planning and resource allocation",
+                "participants": [
+                    "Patricia Brown (CFO)",
+                    "Daniel Kim (VP Engineering)",
+                    "Michelle Jones (VP Sales)",
+                    "Steve Wilson (VP Marketing)",
+                ],
+                "duration_minutes": 75,
+                "context": "A budget planning meeting where department heads discuss resource needs, budget allocations, and strategic investments for the upcoming year.",
+            },
+            "product_roadmap": {
+                "description": "Product roadmap discussion and feature prioritization",
+                "participants": [
+                    "Nicole Davis (Product Manager)",
+                    "Frank Chen (Engineering Lead)",
+                    "Jessica Miller (Senior Designer)",
+                    "Carlos Ruiz (Data Analyst)",
+                ],
+                "duration_minutes": 60,
+                "context": "A product planning meeting to review customer feedback, prioritize features, and define the product roadmap for the next quarter.",
+            },
+        }
+    def _estimate_tokens(self, text):
+        """Rough token estimation (approximately 4 characters per token)."""
+        return len(text) // 4
+    def _validate_transcript_format(self, content):
+        """
+        Validate that the transcript doesn't contain forbidden sections.
+        Returns (is_valid, warnings) tuple.
+        """
+        warnings = []
+        content_lower = content.lower()
+        # Check for common summary section headers
+        forbidden_patterns = [
+            ("summary:", "Summary section"),
+            ("action items:", "Action items section"),
+            ("action item:", "Action items section"),
+            ("key decisions:", "Key decisions section"),
+            ("decisions:", "Decisions section"),
+            ("next steps:", "Next steps section"),
+            ("follow-up:", "Follow-up section"),
+            ("follow up:", "Follow-up section"),
+            ("takeaways:", "Takeaways section"),
+            ("conclusions:", "Conclusions section"),
+            ("meeting notes:", "Meeting notes section"),
+            ("key points:", "Key points section"),
+        ]
+        for pattern, description in forbidden_patterns:
+            if pattern in content_lower:
+                # Check if it's not just part of dialogue (should have newline before it)
+                lines = content.split("\n")
+                for line in lines:
+                    line_lower = line.lower().strip()
+                    # If line starts with the pattern (not just mentioned in dialogue)
+                    if line_lower.startswith(pattern) or line_lower.startswith(
+                        "**" + pattern
+                    ):
+                        warnings.append(f"Found forbidden section: {description}")
+                        break
+        is_valid = len(warnings) == 0
+        return is_valid, warnings
+    def _generate_transcript_with_claude(self, meeting_type, target_tokens):
+        """Generate a meeting transcript using Claude based on the meeting type and target token count."""
+        if meeting_type not in self.meeting_templates:
+            raise ValueError(f"Unknown meeting type: {meeting_type}")
+        template = self.meeting_templates[meeting_type]
+        # Create a detailed prompt for Claude
+        prompt = f"""Generate a realistic meeting transcript for the following scenario:
+Meeting Type: {template['description']}
+Context: {template['context']}
+Participants: {', '.join(template['participants'])}
+Duration: {template['duration_minutes']} minutes
+Target Length: Approximately {target_tokens} tokens (about {target_tokens * 4} characters)
+CRITICAL FORMATTING REQUIREMENTS - The transcript MUST contain ONLY these three sections:
+1. **Meeting Header** - Include:
+   - Meeting title/type
+   - Date and time
+   - Location (can be virtual/in-person/hybrid)
+2. **Participant List** - List all participants with their roles
+3. **Transcript** - The actual meeting dialogue with:
+   - Natural dialogue between the participants that reflects their roles
+   - Realistic conversation flow appropriate for this type of meeting
+   - Specific technical details, decisions, and action items MENTIONED IN DIALOGUE (not as separate sections)
+   - Natural interruptions, clarifications, and back-and-forth discussion
+   - Format: "Speaker Name: What they said"
+CRITICAL: You MUST NOT include any of the following:
+- Summary or overview sections (DO NOT end with "Summary:" or "In summary:")
+- Action items section (decisions/tasks should only be mentioned within the dialogue)
+- Key decisions section
+- Next steps section
+- Follow-up items section
+- Takeaways or conclusions section
+- Meeting notes section
+- Any other meta-commentary or analysis AFTER the dialogue ends
+The transcript should end naturally with the last line of dialogue from a participant. Do not add any commentary, summary, or analysis after the dialogue ends.
+The transcript should be approximately {target_tokens} tokens long and feel authentic and professional, with each participant contributing meaningfully based on their role.
+Generate ONLY the three sections listed above (header, participants, transcript dialogue). The file should end when the dialogue ends."""
+        try:
+            # Generate the transcript using Claude with usage tracking
+            self.log.info(
+                f"Generating {meeting_type} transcript with Claude (target: {target_tokens} tokens)"
+            )
+            response = self.claude_client.get_completion_with_usage(prompt)
+            generated_content = (
+                response["content"][0].text
+                if isinstance(response["content"], list)
+                else response["content"]
+            )
+            actual_tokens = self._estimate_tokens(generated_content)
+            self.log.info(
+                f"Generated transcript: {actual_tokens} tokens (target: {target_tokens})"
+            )
+            return generated_content, response["usage"], response["cost"]
+        except Exception as e:
+            self.log.error(f"Error generating transcript with Claude: {e}")
+            raise RuntimeError(f"Failed to generate transcript for {meeting_type}: {e}")
+    def _extend_content_with_claude(
+        self, base_content, target_tokens, meeting_type, current_usage, current_cost
+    ):
+        """Extend existing content to reach target token count using Claude."""
+        current_tokens = self._estimate_tokens(base_content)
+        if current_tokens >= target_tokens:
+            return base_content, current_usage, current_cost
+        needed_tokens = target_tokens - current_tokens
+        template = self.meeting_templates[meeting_type]
+        extension_prompt = f"""Continue the following meeting transcript to make it approximately {needed_tokens} more tokens longer.
+Current transcript:
+{base_content}
+Please add more realistic dialogue that:
+1. Maintains the same tone and context as the existing transcript
+2. Continues naturally from where it left off
+3. Adds approximately {needed_tokens} more tokens of content
+4. Includes meaningful discussion relevant to a {template['description']}
+5. Maintains the same participants and their roles
+CRITICAL REQUIREMENTS:
+- Generate ONLY additional dialogue in the format "Speaker Name: What they said"
+- DO NOT conclude or wrap up the meeting
+- DO NOT add summary sections (no "Summary:", "In summary:", etc.)
+- DO NOT add action items sections (no "Action Items:", etc.)
+- DO NOT add key decisions, next steps, or takeaways sections
+- DO NOT add any meta-commentary or analysis
+- Just continue the natural, ongoing conversation between participants
+- The meeting should feel like it's still in progress, not ending
+Even though you're extending the transcript, do NOT treat this as the end of the meeting. The conversation should continue naturally without any concluding sections.
+Generate only the additional transcript dialogue (without repeating the existing content)."""
+        try:
+            self.log.info(f"Extending transcript by ~{needed_tokens} tokens")
+            response = self.claude_client.get_completion_with_usage(extension_prompt)
+            extension_content = (
+                response["content"][0].text
+                if isinstance(response["content"], list)
+                else response["content"]
+            )
+            extended_content = base_content + "\n\n" + extension_content
+            # Combine usage and cost data
+            total_usage = {
+                "input_tokens": current_usage["input_tokens"]
+                + response["usage"]["input_tokens"],
+                "output_tokens": current_usage["output_tokens"]
+                + response["usage"]["output_tokens"],
+                "total_tokens": current_usage["total_tokens"]
+                + response["usage"]["total_tokens"],
+            }
+            total_cost = {
+                "input_cost": current_cost["input_cost"]
+                + response["cost"]["input_cost"],
+                "output_cost": current_cost["output_cost"]
+                + response["cost"]["output_cost"],
+                "total_cost": current_cost["total_cost"]
+                + response["cost"]["total_cost"],
+            }
+            actual_tokens = self._estimate_tokens(extended_content)
+            self.log.info(f"Extended transcript to {actual_tokens} tokens")
+            return extended_content, total_usage, total_cost
+        except Exception as e:
+            self.log.error(f"Error extending transcript with Claude: {e}")
+            # Return original content if extension fails
+            return base_content, current_usage, current_cost
+    def generate_transcript(self, meeting_type, target_tokens=1000):
+        """Generate a single meeting transcript of specified type and approximate token count using Claude."""
+        if meeting_type not in self.meeting_templates:
+            raise ValueError(f"Unknown meeting type: {meeting_type}")
+        template = self.meeting_templates[meeting_type]
+        try:
+            # Generate transcript with Claude
+            content, usage, cost = self._generate_transcript_with_claude(
+                meeting_type, target_tokens
+            )
+            actual_tokens = self._estimate_tokens(content)
+            # If we're significantly under target, try to extend
+            if actual_tokens < target_tokens * 0.8:  # If less than 80% of target
+                self.log.info(
+                    f"Transcript too short ({actual_tokens} tokens), extending to reach target"
+                )
+                content, usage, cost = self._extend_content_with_claude(
+                    content, target_tokens, meeting_type, usage, cost
+                )
+                actual_tokens = self._estimate_tokens(content)
+            # Add metadata
+            metadata = {
+                "meeting_type": meeting_type,
+                "description": template["description"],
+                "participants": template["participants"],
+                "estimated_duration_minutes": template["duration_minutes"],
+                "estimated_tokens": actual_tokens,
+                "target_tokens": target_tokens,
+                "generated_date": datetime.now().isoformat(),
+                "claude_model": self.claude_client.model,
+                "claude_usage": usage,
+                "claude_cost": cost,
+            }
+            return content, metadata
+        except Exception as e:
+            self.log.error(f"Failed to generate transcript for {meeting_type}: {e}")
+            raise
+    def generate_transcript_set(self, output_dir, target_tokens=1000, count_per_type=1):
+        """Generate a set of meeting transcripts and save them to the output directory."""
+        output_dir = Path(output_dir)
+        # Create meetings subdirectory for organized output
+        meetings_dir = output_dir / "meetings"
+        meetings_dir.mkdir(parents=True, exist_ok=True)
+        output_dir = meetings_dir  # Use meetings subdirectory as base
+        generated_files = []
+        all_metadata = []
+        total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+        total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
+        for meeting_type in self.meeting_templates.keys():
+            for i in range(count_per_type):
+                self.log.info(
+                    f"Generating {meeting_type} transcript {i+1}/{count_per_type}"
+                )
+                # Generate transcript
+                content, metadata = self.generate_transcript(
+                    meeting_type, target_tokens
+                )
+                # Create filename
+                if count_per_type == 1:
+                    filename = f"{meeting_type}_meeting.txt"
+                else:
+                    filename = f"{meeting_type}_meeting_{i+1}.txt"
+                # Save transcript file
+                file_path = output_dir / filename
+                with open(file_path, "w", encoding="utf-8") as f:
+                    f.write(content)
+                # Update metadata with file info
+                metadata["filename"] = filename
+                metadata["file_path"] = str(file_path)
+                metadata["file_size_bytes"] = len(content.encode("utf-8"))
+                generated_files.append(str(file_path))
+                all_metadata.append(metadata)
+                # Accumulate usage and cost
+                usage = metadata["claude_usage"]
+                cost = metadata["claude_cost"]
+                total_usage["input_tokens"] += usage["input_tokens"]
+                total_usage["output_tokens"] += usage["output_tokens"]
+                total_usage["total_tokens"] += usage["total_tokens"]
+                total_cost["input_cost"] += cost["input_cost"]
+                total_cost["output_cost"] += cost["output_cost"]
+                total_cost["total_cost"] += cost["total_cost"]
+                self.log.info(
+                    f"Generated {filename} ({metadata['estimated_tokens']} tokens, ${cost['total_cost']:.4f})"
+                )
+        # Create summary metadata file
+        summary = {
+            "generation_info": {
+                "generated_date": datetime.now().isoformat(),
+                "total_files": len(generated_files),
+                "target_tokens_per_file": target_tokens,
+                "meeting_types": list(self.meeting_templates.keys()),
+                "files_per_type": count_per_type,
+                "claude_model": self.claude_client.model,
+                "total_claude_usage": total_usage,
+                "total_claude_cost": total_cost,
+            },
+            "transcripts": all_metadata,
+        }
+        summary_path = output_dir / "transcript_metadata.json"
+        with open(summary_path, "w", encoding="utf-8") as f:
+            json.dump(summary, f, indent=2)
+        self.log.info(
+            f"Generated {len(generated_files)} transcript files in {output_dir}"
+        )
+        self.log.info(
+            f"Total cost: ${total_cost['total_cost']:.4f} ({total_usage['total_tokens']:,} tokens)"
+        )
+        self.log.info(f"Summary metadata saved to {summary_path}")
+        return {
+            "output_directory": str(output_dir),
+            "generated_files": generated_files,
+            "metadata_file": str(summary_path),
+            "summary": summary,
+        }
+def main():
+    """Command line interface for transcript generation."""
+    parser = argparse.ArgumentParser(
+        description="Generate example meeting transcripts using Claude AI for testing transcript summarization",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Generate one transcript of each type with ~1000 tokens
+  python -m gaia.eval.transcript_generator -o ./output/transcripts
+  # Generate larger transcripts (~3000 tokens each)
+  python -m gaia.eval.transcript_generator -o ./output/transcripts --target-tokens 3000
+  # Generate multiple transcripts per type
+  python -m gaia.eval.transcript_generator -o ./output/transcripts --count-per-type 3
+  # Generate specific meeting types only
+  python -m gaia.eval.transcript_generator -o ./output/transcripts --meeting-types standup planning
+  # Generate small transcripts for quick testing
+  python -m gaia.eval.transcript_generator -o ./test_transcripts --target-tokens 500
+  # Use different Claude model
+  python -m gaia.eval.transcript_generator -o ./output/transcripts --claude-model claude-3-opus-20240229
+        """,
+    )
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        type=str,
+        required=True,
+        help="Output directory for generated transcript files",
+    )
+    parser.add_argument(
+        "--target-tokens",
+        type=int,
+        default=1000,
+        help="Target token count per transcript (approximate, default: 1000)",
+    )
+    parser.add_argument(
+        "--count-per-type",
+        type=int,
+        default=1,
+        help="Number of transcripts to generate per meeting type (default: 1)",
+    )
+    parser.add_argument(
+        "--meeting-types",
+        nargs="+",
+        choices=[
+            "standup",
+            "planning",
+            "client_call",
+            "design_review",
+            "performance_review",
+            "all_hands",
+            "budget_planning",
+            "product_roadmap",
+        ],
+        help="Specific meeting types to generate (default: all types)",
+    )
+    parser.add_argument(
+        "--claude-model",
+        type=str,
+        default=None,
+        help=f"Claude model to use for transcript generation (default: {DEFAULT_CLAUDE_MODEL})",
+    )
+    args = parser.parse_args()
+    try:
+        generator = TranscriptGenerator(claude_model=args.claude_model)
+    except Exception as e:
+        print(f"❌ Error initializing transcript generator: {e}")
+        print("Make sure ANTHROPIC_API_KEY is set in your environment.")
+        return 1
+    try:
+        # Filter meeting types if specified
+        original_templates = None
+        if args.meeting_types:
+            # Temporarily filter the templates
+            original_templates = generator.meeting_templates.copy()
+            generator.meeting_templates = {
+                k: v
+                for k, v in generator.meeting_templates.items()
+                if k in args.meeting_types
+            }
+        result = generator.generate_transcript_set(
+            output_dir=args.output_dir,
+            target_tokens=args.target_tokens,
+            count_per_type=args.count_per_type,
+        )
+        print("✅ Successfully generated meeting transcripts")
+        print(f"  Output directory: {result['output_directory']}")
+        print(f"  Generated files: {len(result['generated_files'])}")
+        print(f"  Metadata file: {result['metadata_file']}")
+        # Show summary stats
+        summary = result["summary"]
+        generation_info = summary["generation_info"]
+        total_tokens = generation_info["total_claude_usage"]["total_tokens"]
+        total_cost = generation_info["total_claude_cost"]["total_cost"]
+        avg_tokens = (
+            total_tokens / len(summary["transcripts"]) if summary["transcripts"] else 0
+        )
+        print(f"  Total tokens used: {total_tokens:,}")
+        print(f"  Total cost: ${total_cost:.4f}")
+        print(f"  Average tokens per file: {avg_tokens:.0f}")
+        print(f"  Average cost per file: ${total_cost/len(summary['transcripts']):.4f}")
+        print(f"  Meeting types: {', '.join(generation_info['meeting_types'])}")
+        print(f"  Claude model: {generation_info['claude_model']}")
+        # Restore original templates if they were filtered
+        if args.meeting_types and original_templates is not None:
+            generator.meeting_templates = original_templates
+    except Exception as e:
+        print(f"❌ Error generating transcripts: {e}")
+        return 1
+    return 0
+if __name__ == "__main__":
+    exit(main())

amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl

amd-gaia 0.14.3py3-none-any.whl → 0.15.1py3-none-any.whl