amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
  2. amd_gaia-0.15.1.dist-info/RECORD +178 -0
  3. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
  4. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
  5. gaia/__init__.py +29 -29
  6. gaia/agents/__init__.py +19 -19
  7. gaia/agents/base/__init__.py +9 -9
  8. gaia/agents/base/agent.py +2177 -2177
  9. gaia/agents/base/api_agent.py +120 -120
  10. gaia/agents/base/console.py +1841 -1841
  11. gaia/agents/base/errors.py +237 -237
  12. gaia/agents/base/mcp_agent.py +86 -86
  13. gaia/agents/base/tools.py +83 -83
  14. gaia/agents/blender/agent.py +556 -556
  15. gaia/agents/blender/agent_simple.py +133 -135
  16. gaia/agents/blender/app.py +211 -211
  17. gaia/agents/blender/app_simple.py +41 -41
  18. gaia/agents/blender/core/__init__.py +16 -16
  19. gaia/agents/blender/core/materials.py +506 -506
  20. gaia/agents/blender/core/objects.py +316 -316
  21. gaia/agents/blender/core/rendering.py +225 -225
  22. gaia/agents/blender/core/scene.py +220 -220
  23. gaia/agents/blender/core/view.py +146 -146
  24. gaia/agents/chat/__init__.py +9 -9
  25. gaia/agents/chat/agent.py +835 -835
  26. gaia/agents/chat/app.py +1058 -1058
  27. gaia/agents/chat/session.py +508 -508
  28. gaia/agents/chat/tools/__init__.py +15 -15
  29. gaia/agents/chat/tools/file_tools.py +96 -96
  30. gaia/agents/chat/tools/rag_tools.py +1729 -1729
  31. gaia/agents/chat/tools/shell_tools.py +436 -436
  32. gaia/agents/code/__init__.py +7 -7
  33. gaia/agents/code/agent.py +549 -549
  34. gaia/agents/code/cli.py +377 -0
  35. gaia/agents/code/models.py +135 -135
  36. gaia/agents/code/orchestration/__init__.py +24 -24
  37. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  38. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  39. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  40. gaia/agents/code/orchestration/factories/base.py +63 -63
  41. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  42. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  43. gaia/agents/code/orchestration/orchestrator.py +841 -841
  44. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  45. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  46. gaia/agents/code/orchestration/steps/base.py +188 -188
  47. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  48. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  49. gaia/agents/code/orchestration/steps/python.py +307 -307
  50. gaia/agents/code/orchestration/template_catalog.py +469 -469
  51. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  52. gaia/agents/code/orchestration/workflows/base.py +80 -80
  53. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  54. gaia/agents/code/orchestration/workflows/python.py +94 -94
  55. gaia/agents/code/prompts/__init__.py +11 -11
  56. gaia/agents/code/prompts/base_prompt.py +77 -77
  57. gaia/agents/code/prompts/code_patterns.py +2036 -2036
  58. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  59. gaia/agents/code/prompts/python_prompt.py +109 -109
  60. gaia/agents/code/schema_inference.py +365 -365
  61. gaia/agents/code/system_prompt.py +41 -41
  62. gaia/agents/code/tools/__init__.py +42 -42
  63. gaia/agents/code/tools/cli_tools.py +1138 -1138
  64. gaia/agents/code/tools/code_formatting.py +319 -319
  65. gaia/agents/code/tools/code_tools.py +769 -769
  66. gaia/agents/code/tools/error_fixing.py +1347 -1347
  67. gaia/agents/code/tools/external_tools.py +180 -180
  68. gaia/agents/code/tools/file_io.py +845 -845
  69. gaia/agents/code/tools/prisma_tools.py +190 -190
  70. gaia/agents/code/tools/project_management.py +1016 -1016
  71. gaia/agents/code/tools/testing.py +321 -321
  72. gaia/agents/code/tools/typescript_tools.py +122 -122
  73. gaia/agents/code/tools/validation_parsing.py +461 -461
  74. gaia/agents/code/tools/validation_tools.py +806 -806
  75. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  76. gaia/agents/code/validators/__init__.py +16 -16
  77. gaia/agents/code/validators/antipattern_checker.py +241 -241
  78. gaia/agents/code/validators/ast_analyzer.py +197 -197
  79. gaia/agents/code/validators/requirements_validator.py +145 -145
  80. gaia/agents/code/validators/syntax_validator.py +171 -171
  81. gaia/agents/docker/__init__.py +7 -7
  82. gaia/agents/docker/agent.py +642 -642
  83. gaia/agents/emr/__init__.py +8 -8
  84. gaia/agents/emr/agent.py +1506 -1506
  85. gaia/agents/emr/cli.py +1322 -1322
  86. gaia/agents/emr/constants.py +475 -475
  87. gaia/agents/emr/dashboard/__init__.py +4 -4
  88. gaia/agents/emr/dashboard/server.py +1974 -1974
  89. gaia/agents/jira/__init__.py +11 -11
  90. gaia/agents/jira/agent.py +894 -894
  91. gaia/agents/jira/jql_templates.py +299 -299
  92. gaia/agents/routing/__init__.py +7 -7
  93. gaia/agents/routing/agent.py +567 -570
  94. gaia/agents/routing/system_prompt.py +75 -75
  95. gaia/agents/summarize/__init__.py +11 -0
  96. gaia/agents/summarize/agent.py +885 -0
  97. gaia/agents/summarize/prompts.py +129 -0
  98. gaia/api/__init__.py +23 -23
  99. gaia/api/agent_registry.py +238 -238
  100. gaia/api/app.py +305 -305
  101. gaia/api/openai_server.py +575 -575
  102. gaia/api/schemas.py +186 -186
  103. gaia/api/sse_handler.py +373 -373
  104. gaia/apps/__init__.py +4 -4
  105. gaia/apps/llm/__init__.py +6 -6
  106. gaia/apps/llm/app.py +173 -169
  107. gaia/apps/summarize/app.py +116 -633
  108. gaia/apps/summarize/html_viewer.py +133 -133
  109. gaia/apps/summarize/pdf_formatter.py +284 -284
  110. gaia/audio/__init__.py +2 -2
  111. gaia/audio/audio_client.py +439 -439
  112. gaia/audio/audio_recorder.py +269 -269
  113. gaia/audio/kokoro_tts.py +599 -599
  114. gaia/audio/whisper_asr.py +432 -432
  115. gaia/chat/__init__.py +16 -16
  116. gaia/chat/app.py +430 -430
  117. gaia/chat/prompts.py +522 -522
  118. gaia/chat/sdk.py +1228 -1225
  119. gaia/cli.py +5481 -5621
  120. gaia/database/__init__.py +10 -10
  121. gaia/database/agent.py +176 -176
  122. gaia/database/mixin.py +290 -290
  123. gaia/database/testing.py +64 -64
  124. gaia/eval/batch_experiment.py +2332 -2332
  125. gaia/eval/claude.py +542 -542
  126. gaia/eval/config.py +37 -37
  127. gaia/eval/email_generator.py +512 -512
  128. gaia/eval/eval.py +3179 -3179
  129. gaia/eval/groundtruth.py +1130 -1130
  130. gaia/eval/transcript_generator.py +582 -582
  131. gaia/eval/webapp/README.md +167 -167
  132. gaia/eval/webapp/package-lock.json +875 -875
  133. gaia/eval/webapp/package.json +20 -20
  134. gaia/eval/webapp/public/app.js +3402 -3402
  135. gaia/eval/webapp/public/index.html +87 -87
  136. gaia/eval/webapp/public/styles.css +3661 -3661
  137. gaia/eval/webapp/server.js +415 -415
  138. gaia/eval/webapp/test-setup.js +72 -72
  139. gaia/llm/__init__.py +9 -2
  140. gaia/llm/base_client.py +60 -0
  141. gaia/llm/exceptions.py +12 -0
  142. gaia/llm/factory.py +70 -0
  143. gaia/llm/lemonade_client.py +3236 -3221
  144. gaia/llm/lemonade_manager.py +294 -294
  145. gaia/llm/providers/__init__.py +9 -0
  146. gaia/llm/providers/claude.py +108 -0
  147. gaia/llm/providers/lemonade.py +120 -0
  148. gaia/llm/providers/openai_provider.py +79 -0
  149. gaia/llm/vlm_client.py +382 -382
  150. gaia/logger.py +189 -189
  151. gaia/mcp/agent_mcp_server.py +245 -245
  152. gaia/mcp/blender_mcp_client.py +138 -138
  153. gaia/mcp/blender_mcp_server.py +648 -648
  154. gaia/mcp/context7_cache.py +332 -332
  155. gaia/mcp/external_services.py +518 -518
  156. gaia/mcp/mcp_bridge.py +811 -550
  157. gaia/mcp/servers/__init__.py +6 -6
  158. gaia/mcp/servers/docker_mcp.py +83 -83
  159. gaia/perf_analysis.py +361 -0
  160. gaia/rag/__init__.py +10 -10
  161. gaia/rag/app.py +293 -293
  162. gaia/rag/demo.py +304 -304
  163. gaia/rag/pdf_utils.py +235 -235
  164. gaia/rag/sdk.py +2194 -2194
  165. gaia/security.py +163 -163
  166. gaia/talk/app.py +289 -289
  167. gaia/talk/sdk.py +538 -538
  168. gaia/testing/__init__.py +87 -87
  169. gaia/testing/assertions.py +330 -330
  170. gaia/testing/fixtures.py +333 -333
  171. gaia/testing/mocks.py +493 -493
  172. gaia/util.py +46 -46
  173. gaia/utils/__init__.py +33 -33
  174. gaia/utils/file_watcher.py +675 -675
  175. gaia/utils/parsing.py +223 -223
  176. gaia/version.py +100 -100
  177. amd_gaia-0.14.3.dist-info/RECORD +0 -168
  178. gaia/agents/code/app.py +0 -266
  179. gaia/llm/llm_client.py +0 -729
  180. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
  181. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
@@ -1,582 +1,582 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
-
4
- import argparse
5
- import json
6
- from datetime import datetime
7
- from pathlib import Path
8
-
9
- from gaia.eval.claude import ClaudeClient
10
- from gaia.eval.config import DEFAULT_CLAUDE_MODEL
11
- from gaia.logger import get_logger
12
-
13
-
14
- class TranscriptGenerator:
15
- """Generates example meeting transcripts for testing transcript summarization."""
16
-
17
- def __init__(self, claude_model=None, max_tokens=8192):
18
- self.log = get_logger(__name__)
19
-
20
- # Initialize Claude client for dynamic content generation
21
- if claude_model is None:
22
- claude_model = DEFAULT_CLAUDE_MODEL
23
- try:
24
- self.claude_client = ClaudeClient(model=claude_model, max_tokens=max_tokens)
25
- self.log.info(f"Initialized Claude client with model: {claude_model}")
26
- except Exception as e:
27
- self.log.error(f"Failed to initialize Claude client: {e}")
28
- raise ValueError(
29
- f"Could not initialize Claude client. Please ensure ANTHROPIC_API_KEY is set. Error: {e}"
30
- )
31
-
32
- # Meeting templates with different use cases
33
- self.meeting_templates = {
34
- "standup": {
35
- "description": "Daily team standup meeting with status updates and blockers",
36
- "participants": [
37
- "Alice Chen (Scrum Master)",
38
- "Bob Martinez (Developer)",
39
- "Carol Kim (Developer)",
40
- "David Wilson (QA Engineer)",
41
- ],
42
- "duration_minutes": 15,
43
- "context": "A software development team's daily standup meeting where team members share their progress, current tasks, and any blockers they're facing.",
44
- },
45
- "planning": {
46
- "description": "Sprint planning meeting for upcoming development cycle",
47
- "participants": [
48
- "Sarah Johnson (Product Owner)",
49
- "Mike Thompson (Scrum Master)",
50
- "Lisa Wang (Senior Developer)",
51
- "Tom Rodriguez (Developer)",
52
- "Emma Davis (UX Designer)",
53
- ],
54
- "duration_minutes": 60,
55
- "context": "A sprint planning session where the team reviews the product backlog, estimates story points, and commits to work for the upcoming sprint.",
56
- },
57
- "client_call": {
58
- "description": "Client requirements gathering and project discussion",
59
- "participants": [
60
- "Jennifer Adams (Account Manager)",
61
- "Robert Smith (Client - CTO)",
62
- "Maria Garcia (Client - Product Manager)",
63
- "Alex Brown (Technical Lead)",
64
- ],
65
- "duration_minutes": 45,
66
- "context": "A client meeting to discuss project requirements, gather feedback, and align on technical solutions and timeline.",
67
- },
68
- "design_review": {
69
- "description": "Technical design review for new system architecture",
70
- "participants": [
71
- "Dr. Kevin Liu (Principal Architect)",
72
- "Priya Patel (Senior Developer)",
73
- "James Miller (DevOps Engineer)",
74
- "Sophie Turner (Security Engineer)",
75
- "Ryan O'Connor (Database Specialist)",
76
- ],
77
- "duration_minutes": 90,
78
- "context": "A technical architecture review meeting where the team discusses system design, evaluates trade-offs, and makes architectural decisions.",
79
- },
80
- "performance_review": {
81
- "description": "Quarterly performance review and goal setting",
82
- "participants": [
83
- "Linda Zhang (Engineering Manager)",
84
- "Chris Anderson (Senior Software Engineer)",
85
- ],
86
- "duration_minutes": 30,
87
- "context": "A one-on-one performance review meeting between a manager and employee to discuss accomplishments, areas for growth, and career goals.",
88
- },
89
- "all_hands": {
90
- "description": "Company all-hands meeting with quarterly updates",
91
- "participants": [
92
- "Mark Taylor (CEO)",
93
- "Rachel Green (CTO)",
94
- "John Lee (VP Sales)",
95
- "Amy White (VP Marketing)",
96
- ],
97
- "duration_minutes": 45,
98
- "context": "A company-wide meeting where leadership shares business updates, financial results, and strategic direction with all employees.",
99
- },
100
- "budget_planning": {
101
- "description": "Annual budget planning and resource allocation",
102
- "participants": [
103
- "Patricia Brown (CFO)",
104
- "Daniel Kim (VP Engineering)",
105
- "Michelle Jones (VP Sales)",
106
- "Steve Wilson (VP Marketing)",
107
- ],
108
- "duration_minutes": 75,
109
- "context": "A budget planning meeting where department heads discuss resource needs, budget allocations, and strategic investments for the upcoming year.",
110
- },
111
- "product_roadmap": {
112
- "description": "Product roadmap discussion and feature prioritization",
113
- "participants": [
114
- "Nicole Davis (Product Manager)",
115
- "Frank Chen (Engineering Lead)",
116
- "Jessica Miller (Senior Designer)",
117
- "Carlos Ruiz (Data Analyst)",
118
- ],
119
- "duration_minutes": 60,
120
- "context": "A product planning meeting to review customer feedback, prioritize features, and define the product roadmap for the next quarter.",
121
- },
122
- }
123
-
124
- def _estimate_tokens(self, text):
125
- """Rough token estimation (approximately 4 characters per token)."""
126
- return len(text) // 4
127
-
128
- def _validate_transcript_format(self, content):
129
- """
130
- Validate that the transcript doesn't contain forbidden sections.
131
- Returns (is_valid, warnings) tuple.
132
- """
133
- warnings = []
134
- content_lower = content.lower()
135
-
136
- # Check for common summary section headers
137
- forbidden_patterns = [
138
- ("summary:", "Summary section"),
139
- ("action items:", "Action items section"),
140
- ("action item:", "Action items section"),
141
- ("key decisions:", "Key decisions section"),
142
- ("decisions:", "Decisions section"),
143
- ("next steps:", "Next steps section"),
144
- ("follow-up:", "Follow-up section"),
145
- ("follow up:", "Follow-up section"),
146
- ("takeaways:", "Takeaways section"),
147
- ("conclusions:", "Conclusions section"),
148
- ("meeting notes:", "Meeting notes section"),
149
- ("key points:", "Key points section"),
150
- ]
151
-
152
- for pattern, description in forbidden_patterns:
153
- if pattern in content_lower:
154
- # Check if it's not just part of dialogue (should have newline before it)
155
- lines = content.split("\n")
156
- for line in lines:
157
- line_lower = line.lower().strip()
158
- # If line starts with the pattern (not just mentioned in dialogue)
159
- if line_lower.startswith(pattern) or line_lower.startswith(
160
- "**" + pattern
161
- ):
162
- warnings.append(f"Found forbidden section: {description}")
163
- break
164
-
165
- is_valid = len(warnings) == 0
166
- return is_valid, warnings
167
-
168
- def _generate_transcript_with_claude(self, meeting_type, target_tokens):
169
- """Generate a meeting transcript using Claude based on the meeting type and target token count."""
170
- if meeting_type not in self.meeting_templates:
171
- raise ValueError(f"Unknown meeting type: {meeting_type}")
172
-
173
- template = self.meeting_templates[meeting_type]
174
-
175
- # Create a detailed prompt for Claude
176
- prompt = f"""Generate a realistic meeting transcript for the following scenario:
177
-
178
- Meeting Type: {template['description']}
179
- Context: {template['context']}
180
- Participants: {', '.join(template['participants'])}
181
- Duration: {template['duration_minutes']} minutes
182
- Target Length: Approximately {target_tokens} tokens (about {target_tokens * 4} characters)
183
-
184
- CRITICAL FORMATTING REQUIREMENTS - The transcript MUST contain ONLY these three sections:
185
-
186
- 1. **Meeting Header** - Include:
187
- - Meeting title/type
188
- - Date and time
189
- - Location (can be virtual/in-person/hybrid)
190
-
191
- 2. **Participant List** - List all participants with their roles
192
-
193
- 3. **Transcript** - The actual meeting dialogue with:
194
- - Natural dialogue between the participants that reflects their roles
195
- - Realistic conversation flow appropriate for this type of meeting
196
- - Specific technical details, decisions, and action items MENTIONED IN DIALOGUE (not as separate sections)
197
- - Natural interruptions, clarifications, and back-and-forth discussion
198
- - Format: "Speaker Name: What they said"
199
-
200
- CRITICAL: You MUST NOT include any of the following:
201
- - Summary or overview sections (DO NOT end with "Summary:" or "In summary:")
202
- - Action items section (decisions/tasks should only be mentioned within the dialogue)
203
- - Key decisions section
204
- - Next steps section
205
- - Follow-up items section
206
- - Takeaways or conclusions section
207
- - Meeting notes section
208
- - Any other meta-commentary or analysis AFTER the dialogue ends
209
-
210
- The transcript should end naturally with the last line of dialogue from a participant. Do not add any commentary, summary, or analysis after the dialogue ends.
211
-
212
- The transcript should be approximately {target_tokens} tokens long and feel authentic and professional, with each participant contributing meaningfully based on their role.
213
-
214
- Generate ONLY the three sections listed above (header, participants, transcript dialogue). The file should end when the dialogue ends."""
215
-
216
- try:
217
- # Generate the transcript using Claude with usage tracking
218
- self.log.info(
219
- f"Generating {meeting_type} transcript with Claude (target: {target_tokens} tokens)"
220
- )
221
- response = self.claude_client.get_completion_with_usage(prompt)
222
-
223
- generated_content = (
224
- response["content"][0].text
225
- if isinstance(response["content"], list)
226
- else response["content"]
227
- )
228
- actual_tokens = self._estimate_tokens(generated_content)
229
-
230
- self.log.info(
231
- f"Generated transcript: {actual_tokens} tokens (target: {target_tokens})"
232
- )
233
-
234
- return generated_content, response["usage"], response["cost"]
235
-
236
- except Exception as e:
237
- self.log.error(f"Error generating transcript with Claude: {e}")
238
- raise RuntimeError(f"Failed to generate transcript for {meeting_type}: {e}")
239
-
240
- def _extend_content_with_claude(
241
- self, base_content, target_tokens, meeting_type, current_usage, current_cost
242
- ):
243
- """Extend existing content to reach target token count using Claude."""
244
- current_tokens = self._estimate_tokens(base_content)
245
-
246
- if current_tokens >= target_tokens:
247
- return base_content, current_usage, current_cost
248
-
249
- needed_tokens = target_tokens - current_tokens
250
- template = self.meeting_templates[meeting_type]
251
-
252
- extension_prompt = f"""Continue the following meeting transcript to make it approximately {needed_tokens} more tokens longer.
253
-
254
- Current transcript:
255
- {base_content}
256
-
257
- Please add more realistic dialogue that:
258
- 1. Maintains the same tone and context as the existing transcript
259
- 2. Continues naturally from where it left off
260
- 3. Adds approximately {needed_tokens} more tokens of content
261
- 4. Includes meaningful discussion relevant to a {template['description']}
262
- 5. Maintains the same participants and their roles
263
-
264
- CRITICAL REQUIREMENTS:
265
- - Generate ONLY additional dialogue in the format "Speaker Name: What they said"
266
- - DO NOT conclude or wrap up the meeting
267
- - DO NOT add summary sections (no "Summary:", "In summary:", etc.)
268
- - DO NOT add action items sections (no "Action Items:", etc.)
269
- - DO NOT add key decisions, next steps, or takeaways sections
270
- - DO NOT add any meta-commentary or analysis
271
- - Just continue the natural, ongoing conversation between participants
272
- - The meeting should feel like it's still in progress, not ending
273
-
274
- Even though you're extending the transcript, do NOT treat this as the end of the meeting. The conversation should continue naturally without any concluding sections.
275
-
276
- Generate only the additional transcript dialogue (without repeating the existing content)."""
277
-
278
- try:
279
- self.log.info(f"Extending transcript by ~{needed_tokens} tokens")
280
- response = self.claude_client.get_completion_with_usage(extension_prompt)
281
-
282
- extension_content = (
283
- response["content"][0].text
284
- if isinstance(response["content"], list)
285
- else response["content"]
286
- )
287
- extended_content = base_content + "\n\n" + extension_content
288
-
289
- # Combine usage and cost data
290
- total_usage = {
291
- "input_tokens": current_usage["input_tokens"]
292
- + response["usage"]["input_tokens"],
293
- "output_tokens": current_usage["output_tokens"]
294
- + response["usage"]["output_tokens"],
295
- "total_tokens": current_usage["total_tokens"]
296
- + response["usage"]["total_tokens"],
297
- }
298
-
299
- total_cost = {
300
- "input_cost": current_cost["input_cost"]
301
- + response["cost"]["input_cost"],
302
- "output_cost": current_cost["output_cost"]
303
- + response["cost"]["output_cost"],
304
- "total_cost": current_cost["total_cost"]
305
- + response["cost"]["total_cost"],
306
- }
307
-
308
- actual_tokens = self._estimate_tokens(extended_content)
309
- self.log.info(f"Extended transcript to {actual_tokens} tokens")
310
-
311
- return extended_content, total_usage, total_cost
312
-
313
- except Exception as e:
314
- self.log.error(f"Error extending transcript with Claude: {e}")
315
- # Return original content if extension fails
316
- return base_content, current_usage, current_cost
317
-
318
- def generate_transcript(self, meeting_type, target_tokens=1000):
319
- """Generate a single meeting transcript of specified type and approximate token count using Claude."""
320
- if meeting_type not in self.meeting_templates:
321
- raise ValueError(f"Unknown meeting type: {meeting_type}")
322
-
323
- template = self.meeting_templates[meeting_type]
324
-
325
- try:
326
- # Generate transcript with Claude
327
- content, usage, cost = self._generate_transcript_with_claude(
328
- meeting_type, target_tokens
329
- )
330
- actual_tokens = self._estimate_tokens(content)
331
-
332
- # If we're significantly under target, try to extend
333
- if actual_tokens < target_tokens * 0.8: # If less than 80% of target
334
- self.log.info(
335
- f"Transcript too short ({actual_tokens} tokens), extending to reach target"
336
- )
337
- content, usage, cost = self._extend_content_with_claude(
338
- content, target_tokens, meeting_type, usage, cost
339
- )
340
- actual_tokens = self._estimate_tokens(content)
341
-
342
- # Add metadata
343
- metadata = {
344
- "meeting_type": meeting_type,
345
- "description": template["description"],
346
- "participants": template["participants"],
347
- "estimated_duration_minutes": template["duration_minutes"],
348
- "estimated_tokens": actual_tokens,
349
- "target_tokens": target_tokens,
350
- "generated_date": datetime.now().isoformat(),
351
- "claude_model": self.claude_client.model,
352
- "claude_usage": usage,
353
- "claude_cost": cost,
354
- }
355
-
356
- return content, metadata
357
-
358
- except Exception as e:
359
- self.log.error(f"Failed to generate transcript for {meeting_type}: {e}")
360
- raise
361
-
362
- def generate_transcript_set(self, output_dir, target_tokens=1000, count_per_type=1):
363
- """Generate a set of meeting transcripts and save them to the output directory."""
364
- output_dir = Path(output_dir)
365
- # Create meetings subdirectory for organized output
366
- meetings_dir = output_dir / "meetings"
367
- meetings_dir.mkdir(parents=True, exist_ok=True)
368
- output_dir = meetings_dir # Use meetings subdirectory as base
369
-
370
- generated_files = []
371
- all_metadata = []
372
- total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
373
- total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
374
-
375
- for meeting_type in self.meeting_templates.keys():
376
- for i in range(count_per_type):
377
- self.log.info(
378
- f"Generating {meeting_type} transcript {i+1}/{count_per_type}"
379
- )
380
-
381
- # Generate transcript
382
- content, metadata = self.generate_transcript(
383
- meeting_type, target_tokens
384
- )
385
-
386
- # Create filename
387
- if count_per_type == 1:
388
- filename = f"{meeting_type}_meeting.txt"
389
- else:
390
- filename = f"{meeting_type}_meeting_{i+1}.txt"
391
-
392
- # Save transcript file
393
- file_path = output_dir / filename
394
- with open(file_path, "w", encoding="utf-8") as f:
395
- f.write(content)
396
-
397
- # Update metadata with file info
398
- metadata["filename"] = filename
399
- metadata["file_path"] = str(file_path)
400
- metadata["file_size_bytes"] = len(content.encode("utf-8"))
401
-
402
- generated_files.append(str(file_path))
403
- all_metadata.append(metadata)
404
-
405
- # Accumulate usage and cost
406
- usage = metadata["claude_usage"]
407
- cost = metadata["claude_cost"]
408
- total_usage["input_tokens"] += usage["input_tokens"]
409
- total_usage["output_tokens"] += usage["output_tokens"]
410
- total_usage["total_tokens"] += usage["total_tokens"]
411
- total_cost["input_cost"] += cost["input_cost"]
412
- total_cost["output_cost"] += cost["output_cost"]
413
- total_cost["total_cost"] += cost["total_cost"]
414
-
415
- self.log.info(
416
- f"Generated {filename} ({metadata['estimated_tokens']} tokens, ${cost['total_cost']:.4f})"
417
- )
418
-
419
- # Create summary metadata file
420
- summary = {
421
- "generation_info": {
422
- "generated_date": datetime.now().isoformat(),
423
- "total_files": len(generated_files),
424
- "target_tokens_per_file": target_tokens,
425
- "meeting_types": list(self.meeting_templates.keys()),
426
- "files_per_type": count_per_type,
427
- "claude_model": self.claude_client.model,
428
- "total_claude_usage": total_usage,
429
- "total_claude_cost": total_cost,
430
- },
431
- "transcripts": all_metadata,
432
- }
433
-
434
- summary_path = output_dir / "transcript_metadata.json"
435
- with open(summary_path, "w", encoding="utf-8") as f:
436
- json.dump(summary, f, indent=2)
437
-
438
- self.log.info(
439
- f"Generated {len(generated_files)} transcript files in {output_dir}"
440
- )
441
- self.log.info(
442
- f"Total cost: ${total_cost['total_cost']:.4f} ({total_usage['total_tokens']:,} tokens)"
443
- )
444
- self.log.info(f"Summary metadata saved to {summary_path}")
445
-
446
- return {
447
- "output_directory": str(output_dir),
448
- "generated_files": generated_files,
449
- "metadata_file": str(summary_path),
450
- "summary": summary,
451
- }
452
-
453
-
454
- def main():
455
- """Command line interface for transcript generation."""
456
- parser = argparse.ArgumentParser(
457
- description="Generate example meeting transcripts using Claude AI for testing transcript summarization",
458
- formatter_class=argparse.RawDescriptionHelpFormatter,
459
- epilog="""
460
- Examples:
461
- # Generate one transcript of each type with ~1000 tokens
462
- python -m gaia.eval.transcript_generator -o ./output/transcripts
463
-
464
- # Generate larger transcripts (~3000 tokens each)
465
- python -m gaia.eval.transcript_generator -o ./output/transcripts --target-tokens 3000
466
-
467
- # Generate multiple transcripts per type
468
- python -m gaia.eval.transcript_generator -o ./output/transcripts --count-per-type 3
469
-
470
- # Generate specific meeting types only
471
- python -m gaia.eval.transcript_generator -o ./output/transcripts --meeting-types standup planning
472
-
473
- # Generate small transcripts for quick testing
474
- python -m gaia.eval.transcript_generator -o ./test_transcripts --target-tokens 500
475
-
476
- # Use different Claude model
477
- python -m gaia.eval.transcript_generator -o ./output/transcripts --claude-model claude-3-opus-20240229
478
- """,
479
- )
480
-
481
- parser.add_argument(
482
- "-o",
483
- "--output-dir",
484
- type=str,
485
- required=True,
486
- help="Output directory for generated transcript files",
487
- )
488
- parser.add_argument(
489
- "--target-tokens",
490
- type=int,
491
- default=1000,
492
- help="Target token count per transcript (approximate, default: 1000)",
493
- )
494
- parser.add_argument(
495
- "--count-per-type",
496
- type=int,
497
- default=1,
498
- help="Number of transcripts to generate per meeting type (default: 1)",
499
- )
500
- parser.add_argument(
501
- "--meeting-types",
502
- nargs="+",
503
- choices=[
504
- "standup",
505
- "planning",
506
- "client_call",
507
- "design_review",
508
- "performance_review",
509
- "all_hands",
510
- "budget_planning",
511
- "product_roadmap",
512
- ],
513
- help="Specific meeting types to generate (default: all types)",
514
- )
515
- parser.add_argument(
516
- "--claude-model",
517
- type=str,
518
- default=None,
519
- help=f"Claude model to use for transcript generation (default: {DEFAULT_CLAUDE_MODEL})",
520
- )
521
-
522
- args = parser.parse_args()
523
-
524
- try:
525
- generator = TranscriptGenerator(claude_model=args.claude_model)
526
- except Exception as e:
527
- print(f"❌ Error initializing transcript generator: {e}")
528
- print("Make sure ANTHROPIC_API_KEY is set in your environment.")
529
- return 1
530
-
531
- try:
532
- # Filter meeting types if specified
533
- original_templates = None
534
- if args.meeting_types:
535
- # Temporarily filter the templates
536
- original_templates = generator.meeting_templates.copy()
537
- generator.meeting_templates = {
538
- k: v
539
- for k, v in generator.meeting_templates.items()
540
- if k in args.meeting_types
541
- }
542
-
543
- result = generator.generate_transcript_set(
544
- output_dir=args.output_dir,
545
- target_tokens=args.target_tokens,
546
- count_per_type=args.count_per_type,
547
- )
548
-
549
- print("✅ Successfully generated meeting transcripts")
550
- print(f" Output directory: {result['output_directory']}")
551
- print(f" Generated files: {len(result['generated_files'])}")
552
- print(f" Metadata file: {result['metadata_file']}")
553
-
554
- # Show summary stats
555
- summary = result["summary"]
556
- generation_info = summary["generation_info"]
557
- total_tokens = generation_info["total_claude_usage"]["total_tokens"]
558
- total_cost = generation_info["total_claude_cost"]["total_cost"]
559
- avg_tokens = (
560
- total_tokens / len(summary["transcripts"]) if summary["transcripts"] else 0
561
- )
562
-
563
- print(f" Total tokens used: {total_tokens:,}")
564
- print(f" Total cost: ${total_cost:.4f}")
565
- print(f" Average tokens per file: {avg_tokens:.0f}")
566
- print(f" Average cost per file: ${total_cost/len(summary['transcripts']):.4f}")
567
- print(f" Meeting types: {', '.join(generation_info['meeting_types'])}")
568
- print(f" Claude model: {generation_info['claude_model']}")
569
-
570
- # Restore original templates if they were filtered
571
- if args.meeting_types and original_templates is not None:
572
- generator.meeting_templates = original_templates
573
-
574
- except Exception as e:
575
- print(f"❌ Error generating transcripts: {e}")
576
- return 1
577
-
578
- return 0
579
-
580
-
581
- if __name__ == "__main__":
582
- exit(main())
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ import argparse
5
+ import json
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+
9
+ from gaia.eval.claude import ClaudeClient
10
+ from gaia.eval.config import DEFAULT_CLAUDE_MODEL
11
+ from gaia.logger import get_logger
12
+
13
+
14
+ class TranscriptGenerator:
15
+ """Generates example meeting transcripts for testing transcript summarization."""
16
+
17
+ def __init__(self, claude_model=None, max_tokens=8192):
18
+ self.log = get_logger(__name__)
19
+
20
+ # Initialize Claude client for dynamic content generation
21
+ if claude_model is None:
22
+ claude_model = DEFAULT_CLAUDE_MODEL
23
+ try:
24
+ self.claude_client = ClaudeClient(model=claude_model, max_tokens=max_tokens)
25
+ self.log.info(f"Initialized Claude client with model: {claude_model}")
26
+ except Exception as e:
27
+ self.log.error(f"Failed to initialize Claude client: {e}")
28
+ raise ValueError(
29
+ f"Could not initialize Claude client. Please ensure ANTHROPIC_API_KEY is set. Error: {e}"
30
+ )
31
+
32
+ # Meeting templates with different use cases
33
+ self.meeting_templates = {
34
+ "standup": {
35
+ "description": "Daily team standup meeting with status updates and blockers",
36
+ "participants": [
37
+ "Alice Chen (Scrum Master)",
38
+ "Bob Martinez (Developer)",
39
+ "Carol Kim (Developer)",
40
+ "David Wilson (QA Engineer)",
41
+ ],
42
+ "duration_minutes": 15,
43
+ "context": "A software development team's daily standup meeting where team members share their progress, current tasks, and any blockers they're facing.",
44
+ },
45
+ "planning": {
46
+ "description": "Sprint planning meeting for upcoming development cycle",
47
+ "participants": [
48
+ "Sarah Johnson (Product Owner)",
49
+ "Mike Thompson (Scrum Master)",
50
+ "Lisa Wang (Senior Developer)",
51
+ "Tom Rodriguez (Developer)",
52
+ "Emma Davis (UX Designer)",
53
+ ],
54
+ "duration_minutes": 60,
55
+ "context": "A sprint planning session where the team reviews the product backlog, estimates story points, and commits to work for the upcoming sprint.",
56
+ },
57
+ "client_call": {
58
+ "description": "Client requirements gathering and project discussion",
59
+ "participants": [
60
+ "Jennifer Adams (Account Manager)",
61
+ "Robert Smith (Client - CTO)",
62
+ "Maria Garcia (Client - Product Manager)",
63
+ "Alex Brown (Technical Lead)",
64
+ ],
65
+ "duration_minutes": 45,
66
+ "context": "A client meeting to discuss project requirements, gather feedback, and align on technical solutions and timeline.",
67
+ },
68
+ "design_review": {
69
+ "description": "Technical design review for new system architecture",
70
+ "participants": [
71
+ "Dr. Kevin Liu (Principal Architect)",
72
+ "Priya Patel (Senior Developer)",
73
+ "James Miller (DevOps Engineer)",
74
+ "Sophie Turner (Security Engineer)",
75
+ "Ryan O'Connor (Database Specialist)",
76
+ ],
77
+ "duration_minutes": 90,
78
+ "context": "A technical architecture review meeting where the team discusses system design, evaluates trade-offs, and makes architectural decisions.",
79
+ },
80
+ "performance_review": {
81
+ "description": "Quarterly performance review and goal setting",
82
+ "participants": [
83
+ "Linda Zhang (Engineering Manager)",
84
+ "Chris Anderson (Senior Software Engineer)",
85
+ ],
86
+ "duration_minutes": 30,
87
+ "context": "A one-on-one performance review meeting between a manager and employee to discuss accomplishments, areas for growth, and career goals.",
88
+ },
89
+ "all_hands": {
90
+ "description": "Company all-hands meeting with quarterly updates",
91
+ "participants": [
92
+ "Mark Taylor (CEO)",
93
+ "Rachel Green (CTO)",
94
+ "John Lee (VP Sales)",
95
+ "Amy White (VP Marketing)",
96
+ ],
97
+ "duration_minutes": 45,
98
+ "context": "A company-wide meeting where leadership shares business updates, financial results, and strategic direction with all employees.",
99
+ },
100
+ "budget_planning": {
101
+ "description": "Annual budget planning and resource allocation",
102
+ "participants": [
103
+ "Patricia Brown (CFO)",
104
+ "Daniel Kim (VP Engineering)",
105
+ "Michelle Jones (VP Sales)",
106
+ "Steve Wilson (VP Marketing)",
107
+ ],
108
+ "duration_minutes": 75,
109
+ "context": "A budget planning meeting where department heads discuss resource needs, budget allocations, and strategic investments for the upcoming year.",
110
+ },
111
+ "product_roadmap": {
112
+ "description": "Product roadmap discussion and feature prioritization",
113
+ "participants": [
114
+ "Nicole Davis (Product Manager)",
115
+ "Frank Chen (Engineering Lead)",
116
+ "Jessica Miller (Senior Designer)",
117
+ "Carlos Ruiz (Data Analyst)",
118
+ ],
119
+ "duration_minutes": 60,
120
+ "context": "A product planning meeting to review customer feedback, prioritize features, and define the product roadmap for the next quarter.",
121
+ },
122
+ }
123
+
124
+ def _estimate_tokens(self, text):
125
+ """Rough token estimation (approximately 4 characters per token)."""
126
+ return len(text) // 4
127
+
128
+ def _validate_transcript_format(self, content):
129
+ """
130
+ Validate that the transcript doesn't contain forbidden sections.
131
+ Returns (is_valid, warnings) tuple.
132
+ """
133
+ warnings = []
134
+ content_lower = content.lower()
135
+
136
+ # Check for common summary section headers
137
+ forbidden_patterns = [
138
+ ("summary:", "Summary section"),
139
+ ("action items:", "Action items section"),
140
+ ("action item:", "Action items section"),
141
+ ("key decisions:", "Key decisions section"),
142
+ ("decisions:", "Decisions section"),
143
+ ("next steps:", "Next steps section"),
144
+ ("follow-up:", "Follow-up section"),
145
+ ("follow up:", "Follow-up section"),
146
+ ("takeaways:", "Takeaways section"),
147
+ ("conclusions:", "Conclusions section"),
148
+ ("meeting notes:", "Meeting notes section"),
149
+ ("key points:", "Key points section"),
150
+ ]
151
+
152
+ for pattern, description in forbidden_patterns:
153
+ if pattern in content_lower:
154
+ # Check if it's not just part of dialogue (should have newline before it)
155
+ lines = content.split("\n")
156
+ for line in lines:
157
+ line_lower = line.lower().strip()
158
+ # If line starts with the pattern (not just mentioned in dialogue)
159
+ if line_lower.startswith(pattern) or line_lower.startswith(
160
+ "**" + pattern
161
+ ):
162
+ warnings.append(f"Found forbidden section: {description}")
163
+ break
164
+
165
+ is_valid = len(warnings) == 0
166
+ return is_valid, warnings
167
+
168
+ def _generate_transcript_with_claude(self, meeting_type, target_tokens):
169
+ """Generate a meeting transcript using Claude based on the meeting type and target token count."""
170
+ if meeting_type not in self.meeting_templates:
171
+ raise ValueError(f"Unknown meeting type: {meeting_type}")
172
+
173
+ template = self.meeting_templates[meeting_type]
174
+
175
+ # Create a detailed prompt for Claude
176
+ prompt = f"""Generate a realistic meeting transcript for the following scenario:
177
+
178
+ Meeting Type: {template['description']}
179
+ Context: {template['context']}
180
+ Participants: {', '.join(template['participants'])}
181
+ Duration: {template['duration_minutes']} minutes
182
+ Target Length: Approximately {target_tokens} tokens (about {target_tokens * 4} characters)
183
+
184
+ CRITICAL FORMATTING REQUIREMENTS - The transcript MUST contain ONLY these three sections:
185
+
186
+ 1. **Meeting Header** - Include:
187
+ - Meeting title/type
188
+ - Date and time
189
+ - Location (can be virtual/in-person/hybrid)
190
+
191
+ 2. **Participant List** - List all participants with their roles
192
+
193
+ 3. **Transcript** - The actual meeting dialogue with:
194
+ - Natural dialogue between the participants that reflects their roles
195
+ - Realistic conversation flow appropriate for this type of meeting
196
+ - Specific technical details, decisions, and action items MENTIONED IN DIALOGUE (not as separate sections)
197
+ - Natural interruptions, clarifications, and back-and-forth discussion
198
+ - Format: "Speaker Name: What they said"
199
+
200
+ CRITICAL: You MUST NOT include any of the following:
201
+ - Summary or overview sections (DO NOT end with "Summary:" or "In summary:")
202
+ - Action items section (decisions/tasks should only be mentioned within the dialogue)
203
+ - Key decisions section
204
+ - Next steps section
205
+ - Follow-up items section
206
+ - Takeaways or conclusions section
207
+ - Meeting notes section
208
+ - Any other meta-commentary or analysis AFTER the dialogue ends
209
+
210
+ The transcript should end naturally with the last line of dialogue from a participant. Do not add any commentary, summary, or analysis after the dialogue ends.
211
+
212
+ The transcript should be approximately {target_tokens} tokens long and feel authentic and professional, with each participant contributing meaningfully based on their role.
213
+
214
+ Generate ONLY the three sections listed above (header, participants, transcript dialogue). The file should end when the dialogue ends."""
215
+
216
+ try:
217
+ # Generate the transcript using Claude with usage tracking
218
+ self.log.info(
219
+ f"Generating {meeting_type} transcript with Claude (target: {target_tokens} tokens)"
220
+ )
221
+ response = self.claude_client.get_completion_with_usage(prompt)
222
+
223
+ generated_content = (
224
+ response["content"][0].text
225
+ if isinstance(response["content"], list)
226
+ else response["content"]
227
+ )
228
+ actual_tokens = self._estimate_tokens(generated_content)
229
+
230
+ self.log.info(
231
+ f"Generated transcript: {actual_tokens} tokens (target: {target_tokens})"
232
+ )
233
+
234
+ return generated_content, response["usage"], response["cost"]
235
+
236
+ except Exception as e:
237
+ self.log.error(f"Error generating transcript with Claude: {e}")
238
+ raise RuntimeError(f"Failed to generate transcript for {meeting_type}: {e}")
239
+
240
+ def _extend_content_with_claude(
241
+ self, base_content, target_tokens, meeting_type, current_usage, current_cost
242
+ ):
243
+ """Extend existing content to reach target token count using Claude."""
244
+ current_tokens = self._estimate_tokens(base_content)
245
+
246
+ if current_tokens >= target_tokens:
247
+ return base_content, current_usage, current_cost
248
+
249
+ needed_tokens = target_tokens - current_tokens
250
+ template = self.meeting_templates[meeting_type]
251
+
252
+ extension_prompt = f"""Continue the following meeting transcript to make it approximately {needed_tokens} more tokens longer.
253
+
254
+ Current transcript:
255
+ {base_content}
256
+
257
+ Please add more realistic dialogue that:
258
+ 1. Maintains the same tone and context as the existing transcript
259
+ 2. Continues naturally from where it left off
260
+ 3. Adds approximately {needed_tokens} more tokens of content
261
+ 4. Includes meaningful discussion relevant to a {template['description']}
262
+ 5. Maintains the same participants and their roles
263
+
264
+ CRITICAL REQUIREMENTS:
265
+ - Generate ONLY additional dialogue in the format "Speaker Name: What they said"
266
+ - DO NOT conclude or wrap up the meeting
267
+ - DO NOT add summary sections (no "Summary:", "In summary:", etc.)
268
+ - DO NOT add action items sections (no "Action Items:", etc.)
269
+ - DO NOT add key decisions, next steps, or takeaways sections
270
+ - DO NOT add any meta-commentary or analysis
271
+ - Just continue the natural, ongoing conversation between participants
272
+ - The meeting should feel like it's still in progress, not ending
273
+
274
+ Even though you're extending the transcript, do NOT treat this as the end of the meeting. The conversation should continue naturally without any concluding sections.
275
+
276
+ Generate only the additional transcript dialogue (without repeating the existing content)."""
277
+
278
+ try:
279
+ self.log.info(f"Extending transcript by ~{needed_tokens} tokens")
280
+ response = self.claude_client.get_completion_with_usage(extension_prompt)
281
+
282
+ extension_content = (
283
+ response["content"][0].text
284
+ if isinstance(response["content"], list)
285
+ else response["content"]
286
+ )
287
+ extended_content = base_content + "\n\n" + extension_content
288
+
289
+ # Combine usage and cost data
290
+ total_usage = {
291
+ "input_tokens": current_usage["input_tokens"]
292
+ + response["usage"]["input_tokens"],
293
+ "output_tokens": current_usage["output_tokens"]
294
+ + response["usage"]["output_tokens"],
295
+ "total_tokens": current_usage["total_tokens"]
296
+ + response["usage"]["total_tokens"],
297
+ }
298
+
299
+ total_cost = {
300
+ "input_cost": current_cost["input_cost"]
301
+ + response["cost"]["input_cost"],
302
+ "output_cost": current_cost["output_cost"]
303
+ + response["cost"]["output_cost"],
304
+ "total_cost": current_cost["total_cost"]
305
+ + response["cost"]["total_cost"],
306
+ }
307
+
308
+ actual_tokens = self._estimate_tokens(extended_content)
309
+ self.log.info(f"Extended transcript to {actual_tokens} tokens")
310
+
311
+ return extended_content, total_usage, total_cost
312
+
313
+ except Exception as e:
314
+ self.log.error(f"Error extending transcript with Claude: {e}")
315
+ # Return original content if extension fails
316
+ return base_content, current_usage, current_cost
317
+
318
+ def generate_transcript(self, meeting_type, target_tokens=1000):
319
+ """Generate a single meeting transcript of specified type and approximate token count using Claude."""
320
+ if meeting_type not in self.meeting_templates:
321
+ raise ValueError(f"Unknown meeting type: {meeting_type}")
322
+
323
+ template = self.meeting_templates[meeting_type]
324
+
325
+ try:
326
+ # Generate transcript with Claude
327
+ content, usage, cost = self._generate_transcript_with_claude(
328
+ meeting_type, target_tokens
329
+ )
330
+ actual_tokens = self._estimate_tokens(content)
331
+
332
+ # If we're significantly under target, try to extend
333
+ if actual_tokens < target_tokens * 0.8: # If less than 80% of target
334
+ self.log.info(
335
+ f"Transcript too short ({actual_tokens} tokens), extending to reach target"
336
+ )
337
+ content, usage, cost = self._extend_content_with_claude(
338
+ content, target_tokens, meeting_type, usage, cost
339
+ )
340
+ actual_tokens = self._estimate_tokens(content)
341
+
342
+ # Add metadata
343
+ metadata = {
344
+ "meeting_type": meeting_type,
345
+ "description": template["description"],
346
+ "participants": template["participants"],
347
+ "estimated_duration_minutes": template["duration_minutes"],
348
+ "estimated_tokens": actual_tokens,
349
+ "target_tokens": target_tokens,
350
+ "generated_date": datetime.now().isoformat(),
351
+ "claude_model": self.claude_client.model,
352
+ "claude_usage": usage,
353
+ "claude_cost": cost,
354
+ }
355
+
356
+ return content, metadata
357
+
358
+ except Exception as e:
359
+ self.log.error(f"Failed to generate transcript for {meeting_type}: {e}")
360
+ raise
361
+
362
+ def generate_transcript_set(self, output_dir, target_tokens=1000, count_per_type=1):
363
+ """Generate a set of meeting transcripts and save them to the output directory."""
364
+ output_dir = Path(output_dir)
365
+ # Create meetings subdirectory for organized output
366
+ meetings_dir = output_dir / "meetings"
367
+ meetings_dir.mkdir(parents=True, exist_ok=True)
368
+ output_dir = meetings_dir # Use meetings subdirectory as base
369
+
370
+ generated_files = []
371
+ all_metadata = []
372
+ total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
373
+ total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
374
+
375
+ for meeting_type in self.meeting_templates.keys():
376
+ for i in range(count_per_type):
377
+ self.log.info(
378
+ f"Generating {meeting_type} transcript {i+1}/{count_per_type}"
379
+ )
380
+
381
+ # Generate transcript
382
+ content, metadata = self.generate_transcript(
383
+ meeting_type, target_tokens
384
+ )
385
+
386
+ # Create filename
387
+ if count_per_type == 1:
388
+ filename = f"{meeting_type}_meeting.txt"
389
+ else:
390
+ filename = f"{meeting_type}_meeting_{i+1}.txt"
391
+
392
+ # Save transcript file
393
+ file_path = output_dir / filename
394
+ with open(file_path, "w", encoding="utf-8") as f:
395
+ f.write(content)
396
+
397
+ # Update metadata with file info
398
+ metadata["filename"] = filename
399
+ metadata["file_path"] = str(file_path)
400
+ metadata["file_size_bytes"] = len(content.encode("utf-8"))
401
+
402
+ generated_files.append(str(file_path))
403
+ all_metadata.append(metadata)
404
+
405
+ # Accumulate usage and cost
406
+ usage = metadata["claude_usage"]
407
+ cost = metadata["claude_cost"]
408
+ total_usage["input_tokens"] += usage["input_tokens"]
409
+ total_usage["output_tokens"] += usage["output_tokens"]
410
+ total_usage["total_tokens"] += usage["total_tokens"]
411
+ total_cost["input_cost"] += cost["input_cost"]
412
+ total_cost["output_cost"] += cost["output_cost"]
413
+ total_cost["total_cost"] += cost["total_cost"]
414
+
415
+ self.log.info(
416
+ f"Generated {filename} ({metadata['estimated_tokens']} tokens, ${cost['total_cost']:.4f})"
417
+ )
418
+
419
+ # Create summary metadata file
420
+ summary = {
421
+ "generation_info": {
422
+ "generated_date": datetime.now().isoformat(),
423
+ "total_files": len(generated_files),
424
+ "target_tokens_per_file": target_tokens,
425
+ "meeting_types": list(self.meeting_templates.keys()),
426
+ "files_per_type": count_per_type,
427
+ "claude_model": self.claude_client.model,
428
+ "total_claude_usage": total_usage,
429
+ "total_claude_cost": total_cost,
430
+ },
431
+ "transcripts": all_metadata,
432
+ }
433
+
434
+ summary_path = output_dir / "transcript_metadata.json"
435
+ with open(summary_path, "w", encoding="utf-8") as f:
436
+ json.dump(summary, f, indent=2)
437
+
438
+ self.log.info(
439
+ f"Generated {len(generated_files)} transcript files in {output_dir}"
440
+ )
441
+ self.log.info(
442
+ f"Total cost: ${total_cost['total_cost']:.4f} ({total_usage['total_tokens']:,} tokens)"
443
+ )
444
+ self.log.info(f"Summary metadata saved to {summary_path}")
445
+
446
+ return {
447
+ "output_directory": str(output_dir),
448
+ "generated_files": generated_files,
449
+ "metadata_file": str(summary_path),
450
+ "summary": summary,
451
+ }
452
+
453
+
454
+ def main():
455
+ """Command line interface for transcript generation."""
456
+ parser = argparse.ArgumentParser(
457
+ description="Generate example meeting transcripts using Claude AI for testing transcript summarization",
458
+ formatter_class=argparse.RawDescriptionHelpFormatter,
459
+ epilog="""
460
+ Examples:
461
+ # Generate one transcript of each type with ~1000 tokens
462
+ python -m gaia.eval.transcript_generator -o ./output/transcripts
463
+
464
+ # Generate larger transcripts (~3000 tokens each)
465
+ python -m gaia.eval.transcript_generator -o ./output/transcripts --target-tokens 3000
466
+
467
+ # Generate multiple transcripts per type
468
+ python -m gaia.eval.transcript_generator -o ./output/transcripts --count-per-type 3
469
+
470
+ # Generate specific meeting types only
471
+ python -m gaia.eval.transcript_generator -o ./output/transcripts --meeting-types standup planning
472
+
473
+ # Generate small transcripts for quick testing
474
+ python -m gaia.eval.transcript_generator -o ./test_transcripts --target-tokens 500
475
+
476
+ # Use different Claude model
477
+ python -m gaia.eval.transcript_generator -o ./output/transcripts --claude-model claude-3-opus-20240229
478
+ """,
479
+ )
480
+
481
+ parser.add_argument(
482
+ "-o",
483
+ "--output-dir",
484
+ type=str,
485
+ required=True,
486
+ help="Output directory for generated transcript files",
487
+ )
488
+ parser.add_argument(
489
+ "--target-tokens",
490
+ type=int,
491
+ default=1000,
492
+ help="Target token count per transcript (approximate, default: 1000)",
493
+ )
494
+ parser.add_argument(
495
+ "--count-per-type",
496
+ type=int,
497
+ default=1,
498
+ help="Number of transcripts to generate per meeting type (default: 1)",
499
+ )
500
+ parser.add_argument(
501
+ "--meeting-types",
502
+ nargs="+",
503
+ choices=[
504
+ "standup",
505
+ "planning",
506
+ "client_call",
507
+ "design_review",
508
+ "performance_review",
509
+ "all_hands",
510
+ "budget_planning",
511
+ "product_roadmap",
512
+ ],
513
+ help="Specific meeting types to generate (default: all types)",
514
+ )
515
+ parser.add_argument(
516
+ "--claude-model",
517
+ type=str,
518
+ default=None,
519
+ help=f"Claude model to use for transcript generation (default: {DEFAULT_CLAUDE_MODEL})",
520
+ )
521
+
522
+ args = parser.parse_args()
523
+
524
+ try:
525
+ generator = TranscriptGenerator(claude_model=args.claude_model)
526
+ except Exception as e:
527
+ print(f"❌ Error initializing transcript generator: {e}")
528
+ print("Make sure ANTHROPIC_API_KEY is set in your environment.")
529
+ return 1
530
+
531
+ try:
532
+ # Filter meeting types if specified
533
+ original_templates = None
534
+ if args.meeting_types:
535
+ # Temporarily filter the templates
536
+ original_templates = generator.meeting_templates.copy()
537
+ generator.meeting_templates = {
538
+ k: v
539
+ for k, v in generator.meeting_templates.items()
540
+ if k in args.meeting_types
541
+ }
542
+
543
+ result = generator.generate_transcript_set(
544
+ output_dir=args.output_dir,
545
+ target_tokens=args.target_tokens,
546
+ count_per_type=args.count_per_type,
547
+ )
548
+
549
+ print("✅ Successfully generated meeting transcripts")
550
+ print(f" Output directory: {result['output_directory']}")
551
+ print(f" Generated files: {len(result['generated_files'])}")
552
+ print(f" Metadata file: {result['metadata_file']}")
553
+
554
+ # Show summary stats
555
+ summary = result["summary"]
556
+ generation_info = summary["generation_info"]
557
+ total_tokens = generation_info["total_claude_usage"]["total_tokens"]
558
+ total_cost = generation_info["total_claude_cost"]["total_cost"]
559
+ avg_tokens = (
560
+ total_tokens / len(summary["transcripts"]) if summary["transcripts"] else 0
561
+ )
562
+
563
+ print(f" Total tokens used: {total_tokens:,}")
564
+ print(f" Total cost: ${total_cost:.4f}")
565
+ print(f" Average tokens per file: {avg_tokens:.0f}")
566
+ print(f" Average cost per file: ${total_cost/len(summary['transcripts']):.4f}")
567
+ print(f" Meeting types: {', '.join(generation_info['meeting_types'])}")
568
+ print(f" Claude model: {generation_info['claude_model']}")
569
+
570
+ # Restore original templates if they were filtered
571
+ if args.meeting_types and original_templates is not None:
572
+ generator.meeting_templates = original_templates
573
+
574
+ except Exception as e:
575
+ print(f"❌ Error generating transcripts: {e}")
576
+ return 1
577
+
578
+ return 0
579
+
580
+
581
+ if __name__ == "__main__":
582
+ exit(main())