amd-gaia 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
  2. amd_gaia-0.15.1.dist-info/RECORD +178 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
  5. gaia/__init__.py +29 -29
  6. gaia/agents/__init__.py +19 -19
  7. gaia/agents/base/__init__.py +9 -9
  8. gaia/agents/base/agent.py +2177 -2177
  9. gaia/agents/base/api_agent.py +120 -120
  10. gaia/agents/base/console.py +1841 -1841
  11. gaia/agents/base/errors.py +237 -237
  12. gaia/agents/base/mcp_agent.py +86 -86
  13. gaia/agents/base/tools.py +83 -83
  14. gaia/agents/blender/agent.py +556 -556
  15. gaia/agents/blender/agent_simple.py +133 -135
  16. gaia/agents/blender/app.py +211 -211
  17. gaia/agents/blender/app_simple.py +41 -41
  18. gaia/agents/blender/core/__init__.py +16 -16
  19. gaia/agents/blender/core/materials.py +506 -506
  20. gaia/agents/blender/core/objects.py +316 -316
  21. gaia/agents/blender/core/rendering.py +225 -225
  22. gaia/agents/blender/core/scene.py +220 -220
  23. gaia/agents/blender/core/view.py +146 -146
  24. gaia/agents/chat/__init__.py +9 -9
  25. gaia/agents/chat/agent.py +835 -835
  26. gaia/agents/chat/app.py +1058 -1058
  27. gaia/agents/chat/session.py +508 -508
  28. gaia/agents/chat/tools/__init__.py +15 -15
  29. gaia/agents/chat/tools/file_tools.py +96 -96
  30. gaia/agents/chat/tools/rag_tools.py +1729 -1729
  31. gaia/agents/chat/tools/shell_tools.py +436 -436
  32. gaia/agents/code/__init__.py +7 -7
  33. gaia/agents/code/agent.py +549 -549
  34. gaia/agents/code/cli.py +377 -0
  35. gaia/agents/code/models.py +135 -135
  36. gaia/agents/code/orchestration/__init__.py +24 -24
  37. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  38. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  39. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  40. gaia/agents/code/orchestration/factories/base.py +63 -63
  41. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  42. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  43. gaia/agents/code/orchestration/orchestrator.py +841 -841
  44. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  45. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  46. gaia/agents/code/orchestration/steps/base.py +188 -188
  47. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  48. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  49. gaia/agents/code/orchestration/steps/python.py +307 -307
  50. gaia/agents/code/orchestration/template_catalog.py +469 -469
  51. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  52. gaia/agents/code/orchestration/workflows/base.py +80 -80
  53. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  54. gaia/agents/code/orchestration/workflows/python.py +94 -94
  55. gaia/agents/code/prompts/__init__.py +11 -11
  56. gaia/agents/code/prompts/base_prompt.py +77 -77
  57. gaia/agents/code/prompts/code_patterns.py +2036 -2036
  58. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  59. gaia/agents/code/prompts/python_prompt.py +109 -109
  60. gaia/agents/code/schema_inference.py +365 -365
  61. gaia/agents/code/system_prompt.py +41 -41
  62. gaia/agents/code/tools/__init__.py +42 -42
  63. gaia/agents/code/tools/cli_tools.py +1138 -1138
  64. gaia/agents/code/tools/code_formatting.py +319 -319
  65. gaia/agents/code/tools/code_tools.py +769 -769
  66. gaia/agents/code/tools/error_fixing.py +1347 -1347
  67. gaia/agents/code/tools/external_tools.py +180 -180
  68. gaia/agents/code/tools/file_io.py +845 -845
  69. gaia/agents/code/tools/prisma_tools.py +190 -190
  70. gaia/agents/code/tools/project_management.py +1016 -1016
  71. gaia/agents/code/tools/testing.py +321 -321
  72. gaia/agents/code/tools/typescript_tools.py +122 -122
  73. gaia/agents/code/tools/validation_parsing.py +461 -461
  74. gaia/agents/code/tools/validation_tools.py +806 -806
  75. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  76. gaia/agents/code/validators/__init__.py +16 -16
  77. gaia/agents/code/validators/antipattern_checker.py +241 -241
  78. gaia/agents/code/validators/ast_analyzer.py +197 -197
  79. gaia/agents/code/validators/requirements_validator.py +145 -145
  80. gaia/agents/code/validators/syntax_validator.py +171 -171
  81. gaia/agents/docker/__init__.py +7 -7
  82. gaia/agents/docker/agent.py +642 -642
  83. gaia/agents/emr/__init__.py +8 -8
  84. gaia/agents/emr/agent.py +1506 -1506
  85. gaia/agents/emr/cli.py +1322 -1322
  86. gaia/agents/emr/constants.py +475 -475
  87. gaia/agents/emr/dashboard/__init__.py +4 -4
  88. gaia/agents/emr/dashboard/server.py +1974 -1974
  89. gaia/agents/jira/__init__.py +11 -11
  90. gaia/agents/jira/agent.py +894 -894
  91. gaia/agents/jira/jql_templates.py +299 -299
  92. gaia/agents/routing/__init__.py +7 -7
  93. gaia/agents/routing/agent.py +567 -570
  94. gaia/agents/routing/system_prompt.py +75 -75
  95. gaia/agents/summarize/__init__.py +11 -0
  96. gaia/agents/summarize/agent.py +885 -0
  97. gaia/agents/summarize/prompts.py +129 -0
  98. gaia/api/__init__.py +23 -23
  99. gaia/api/agent_registry.py +238 -238
  100. gaia/api/app.py +305 -305
  101. gaia/api/openai_server.py +575 -575
  102. gaia/api/schemas.py +186 -186
  103. gaia/api/sse_handler.py +373 -373
  104. gaia/apps/__init__.py +4 -4
  105. gaia/apps/llm/__init__.py +6 -6
  106. gaia/apps/llm/app.py +173 -169
  107. gaia/apps/summarize/app.py +116 -633
  108. gaia/apps/summarize/html_viewer.py +133 -133
  109. gaia/apps/summarize/pdf_formatter.py +284 -284
  110. gaia/audio/__init__.py +2 -2
  111. gaia/audio/audio_client.py +439 -439
  112. gaia/audio/audio_recorder.py +269 -269
  113. gaia/audio/kokoro_tts.py +599 -599
  114. gaia/audio/whisper_asr.py +432 -432
  115. gaia/chat/__init__.py +16 -16
  116. gaia/chat/app.py +430 -430
  117. gaia/chat/prompts.py +522 -522
  118. gaia/chat/sdk.py +1228 -1225
  119. gaia/cli.py +5481 -5632
  120. gaia/database/__init__.py +10 -10
  121. gaia/database/agent.py +176 -176
  122. gaia/database/mixin.py +290 -290
  123. gaia/database/testing.py +64 -64
  124. gaia/eval/batch_experiment.py +2332 -2332
  125. gaia/eval/claude.py +542 -542
  126. gaia/eval/config.py +37 -37
  127. gaia/eval/email_generator.py +512 -512
  128. gaia/eval/eval.py +3179 -3179
  129. gaia/eval/groundtruth.py +1130 -1130
  130. gaia/eval/transcript_generator.py +582 -582
  131. gaia/eval/webapp/README.md +167 -167
  132. gaia/eval/webapp/package-lock.json +875 -875
  133. gaia/eval/webapp/package.json +20 -20
  134. gaia/eval/webapp/public/app.js +3402 -3402
  135. gaia/eval/webapp/public/index.html +87 -87
  136. gaia/eval/webapp/public/styles.css +3661 -3661
  137. gaia/eval/webapp/server.js +415 -415
  138. gaia/eval/webapp/test-setup.js +72 -72
  139. gaia/llm/__init__.py +9 -2
  140. gaia/llm/base_client.py +60 -0
  141. gaia/llm/exceptions.py +12 -0
  142. gaia/llm/factory.py +70 -0
  143. gaia/llm/lemonade_client.py +3236 -3221
  144. gaia/llm/lemonade_manager.py +294 -294
  145. gaia/llm/providers/__init__.py +9 -0
  146. gaia/llm/providers/claude.py +108 -0
  147. gaia/llm/providers/lemonade.py +120 -0
  148. gaia/llm/providers/openai_provider.py +79 -0
  149. gaia/llm/vlm_client.py +382 -382
  150. gaia/logger.py +189 -189
  151. gaia/mcp/agent_mcp_server.py +245 -245
  152. gaia/mcp/blender_mcp_client.py +138 -138
  153. gaia/mcp/blender_mcp_server.py +648 -648
  154. gaia/mcp/context7_cache.py +332 -332
  155. gaia/mcp/external_services.py +518 -518
  156. gaia/mcp/mcp_bridge.py +811 -550
  157. gaia/mcp/servers/__init__.py +6 -6
  158. gaia/mcp/servers/docker_mcp.py +83 -83
  159. gaia/perf_analysis.py +361 -0
  160. gaia/rag/__init__.py +10 -10
  161. gaia/rag/app.py +293 -293
  162. gaia/rag/demo.py +304 -304
  163. gaia/rag/pdf_utils.py +235 -235
  164. gaia/rag/sdk.py +2194 -2194
  165. gaia/security.py +163 -163
  166. gaia/talk/app.py +289 -289
  167. gaia/talk/sdk.py +538 -538
  168. gaia/testing/__init__.py +87 -87
  169. gaia/testing/assertions.py +330 -330
  170. gaia/testing/fixtures.py +333 -333
  171. gaia/testing/mocks.py +493 -493
  172. gaia/util.py +46 -46
  173. gaia/utils/__init__.py +33 -33
  174. gaia/utils/file_watcher.py +675 -675
  175. gaia/utils/parsing.py +223 -223
  176. gaia/version.py +100 -100
  177. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  178. gaia/agents/code/app.py +0 -266
  179. gaia/llm/llm_client.py +0 -723
  180. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
  181. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
@@ -1,633 +1,116 @@
1
- #!/usr/bin/env python3
2
- # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
3
- # SPDX-License-Identifier: MIT
4
-
5
- """
6
- Gaia Summarizer Application - Processes meeting transcripts and emails to generate summaries
7
- """
8
-
9
- import json
10
- import re
11
- import time
12
- from dataclasses import dataclass
13
- from datetime import datetime
14
- from pathlib import Path
15
- from typing import Any, Dict, List, Literal, Optional
16
-
17
- from gaia.chat.sdk import ChatConfig, ChatSDK
18
- from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME
19
- from gaia.logger import get_logger
20
-
21
-
22
- def validate_email_address(email: str) -> bool:
23
- """Validate email address format"""
24
- email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
25
- return re.match(email_pattern, email.strip()) is not None
26
-
27
-
28
- def validate_email_list(email_list: str) -> List[str]:
29
- """Validate and parse comma-separated email list"""
30
- if not email_list:
31
- return []
32
-
33
- emails = [e.strip() for e in email_list.split(",") if e.strip()]
34
- invalid_emails = [e for e in emails if not validate_email_address(e)]
35
-
36
- if invalid_emails:
37
- raise ValueError(f"Invalid email address(es): {', '.join(invalid_emails)}")
38
-
39
- return emails
40
-
41
-
42
- # Summary style definitions
43
- SUMMARY_STYLES = {
44
- "brief": "Generate a concise 2-3 sentence summary highlighting the most important points.",
45
- "detailed": "Generate a comprehensive summary with all key details, context, and nuances.",
46
- "bullets": "Generate key points in a clear bullet-point format, focusing on actionable items.",
47
- "executive": "Generate a high-level executive summary focusing on decisions, outcomes, and strategic implications.",
48
- "participants": "Extract and list all meeting participants with their roles if mentioned.",
49
- "action_items": "Extract all action items with owners and deadlines where specified.",
50
- }
51
-
52
- # System prompts for different content types
53
- SYSTEM_PROMPTS = {
54
- "transcript": "You are a professional meeting summarizer. Analyze meeting transcripts to extract key information, decisions, and action items. Be precise and comprehensive.",
55
- "email": "You are a professional email summarizer. Analyze emails to extract key information, requests, and required actions. Focus on the sender's intent and recipient's needed response.",
56
- }
57
-
58
-
59
- @dataclass
60
- class SummaryConfig:
61
- """Configuration for summarization"""
62
-
63
- model: str = DEFAULT_MODEL_NAME
64
- max_tokens: int = 1024
65
- input_type: Literal["transcript", "email", "auto"] = "auto"
66
- styles: List[str] = None
67
- combined_prompt: bool = False
68
- use_claude: bool = False
69
- use_chatgpt: bool = False
70
-
71
- def __post_init__(self):
72
- if self.styles is None:
73
- self.styles = ["executive", "participants", "action_items"]
74
- # Validate styles
75
- valid_styles = set(SUMMARY_STYLES.keys())
76
- invalid_styles = [s for s in self.styles if s not in valid_styles]
77
- if invalid_styles:
78
- raise ValueError(
79
- f"Invalid style(s): {', '.join(invalid_styles)}. Valid styles: {', '.join(valid_styles)}"
80
- )
81
-
82
- # Auto-detect OpenAI models (gpt-*) to use ChatGPT
83
- if self.model.lower().startswith("gpt"):
84
- self.use_chatgpt = True
85
-
86
-
87
- class SummarizerApp:
88
- """Main application class for summarization"""
89
-
90
- def __init__(self, config: Optional[SummaryConfig] = None):
91
- """Initialize the summarizer application"""
92
- self.config = config or SummaryConfig()
93
- self.log = get_logger(__name__)
94
-
95
- # Initialize base chat SDK
96
- chat_config = ChatConfig(
97
- model=self.config.model,
98
- max_tokens=self.config.max_tokens,
99
- use_claude=self.config.use_claude,
100
- use_chatgpt=self.config.use_chatgpt,
101
- show_stats=True,
102
- )
103
- self.chat_sdk = ChatSDK(chat_config)
104
-
105
- # Direct access to LLM client for performance stats
106
- self.llm_client = self.chat_sdk.llm_client
107
-
108
- # Retry configuration
109
- self.max_retries = 3
110
- self.retry_delay = 1.0 # seconds
111
-
112
- def detect_content_type(self, content: str) -> str:
113
- """Auto-detect if content is a transcript or email using LLM"""
114
- if self.config.input_type != "auto":
115
- return self.config.input_type
116
-
117
- # First try simple heuristics
118
- email_patterns = [
119
- r"From:\s*\S+",
120
- r"To:\s*\S+",
121
- r"Subject:\s*\S+",
122
- r"Dear\s+\w+",
123
- r"Sincerely,?\s*\n",
124
- r"Best regards,?\s*\n",
125
- ]
126
-
127
- transcript_patterns = [
128
- r"\w+:\s*[^\n]+", # Speaker: dialogue
129
- r"\[\d{1,2}:\d{2}\]", # Time stamps
130
- r"\(\d{1,2}:\d{2}\)",
131
- ]
132
-
133
- # Count pattern matches
134
- email_score = sum(
135
- 1
136
- for pattern in email_patterns
137
- if re.search(pattern, content[:500], re.IGNORECASE)
138
- )
139
- transcript_score = sum(
140
- 1 for pattern in transcript_patterns if re.search(pattern, content[:500])
141
- )
142
-
143
- if email_score > transcript_score and email_score >= 2:
144
- detected_type = "email"
145
- elif transcript_score > email_score and transcript_score >= 2:
146
- detected_type = "transcript"
147
- else:
148
- # Use LLM as fallback with retry logic
149
- detection_prompt = (
150
- """Analyze this text and determine if it's a meeting transcript or an email.
151
-
152
- A meeting transcript typically has:
153
- - Multiple speakers with dialogue
154
- - Time stamps or speaker labels
155
- - Conversational flow
156
-
157
- An email typically has:
158
- - From/To/Subject headers or email-like structure
159
- - Formal greeting and closing
160
- - Single author perspective
161
-
162
- Respond with ONLY one word: 'transcript' or 'email'
163
-
164
- Text to analyze:
165
- """
166
- + content[:1000]
167
- ) # Only use first 1000 chars for detection
168
-
169
- for attempt in range(self.max_retries):
170
- try:
171
- response = self.llm_client.generate(
172
- detection_prompt, model=self.config.model, max_tokens=10
173
- )
174
-
175
- detected_type = response.strip().lower()
176
- if detected_type not in ["transcript", "email"]:
177
- # Default to transcript if unclear
178
- detected_type = "transcript"
179
- break
180
- except Exception as e:
181
- if attempt < self.max_retries - 1:
182
- self.log.warning(
183
- f"Content type detection attempt {attempt + 1} failed: {e}. Retrying..."
184
- )
185
- time.sleep(self.retry_delay * (attempt + 1))
186
- else:
187
- self.log.error(
188
- f"Failed to detect content type after {self.max_retries} attempts"
189
- )
190
- detected_type = "transcript" # Default fallback
191
-
192
- self.log.info(f"Auto-detected content type: {detected_type}")
193
- return detected_type
194
-
195
- def generate_summary_prompt(
196
- self, content: str, content_type: str, style: str
197
- ) -> str:
198
- """Generate the prompt for a specific summary style"""
199
- style_instruction = SUMMARY_STYLES.get(style, SUMMARY_STYLES["brief"])
200
-
201
- if style == "participants" and content_type == "email":
202
- # Special handling for email participants
203
- prompt = f"""Extract the sender and all recipients from this email.
204
-
205
- Format your response as JSON:
206
- {{
207
- "sender": "sender email/name",
208
- "recipients": ["recipient1", "recipient2"],
209
- "cc": ["cc1", "cc2"] (if any),
210
- "bcc": ["bcc1"] (if any)
211
- }}
212
-
213
- Email content:
214
- {content}"""
215
- elif style == "action_items":
216
- prompt = f"""Extract all action items from this {content_type}.
217
-
218
- {style_instruction}
219
-
220
- Format each action item with:
221
- - The specific action required
222
- - Who is responsible (if mentioned)
223
- - Any deadline or timeline (if mentioned)
224
-
225
- If no action items are found, respond with "No specific action items identified."
226
-
227
- Content:
228
- {content}"""
229
- else:
230
- prompt = f"""Analyze this {content_type} and {style_instruction}
231
-
232
- Content:
233
- {content}"""
234
-
235
- return prompt
236
-
237
- def generate_combined_prompt(
238
- self, content: str, content_type: str, styles: List[str]
239
- ) -> str:
240
- """Generate a single prompt for multiple summary styles"""
241
- sections = []
242
- for style in styles:
243
- style_instruction = SUMMARY_STYLES.get(style, SUMMARY_STYLES["brief"])
244
- sections.append(f"- {style.upper()}: {style_instruction}")
245
-
246
- prompt = f"""Analyze this {content_type} and generate the following summaries:
247
-
248
- {chr(10).join(sections)}
249
-
250
- Format your response with clear section headers for each style.
251
-
252
- Content:
253
- {content}"""
254
-
255
- return prompt
256
-
257
- def summarize_with_style(
258
- self, content: str, content_type: str, style: str
259
- ) -> Dict[str, Any]:
260
- """Generate a summary for a specific style with retry logic"""
261
- start_time = time.time()
262
-
263
- # Set appropriate system prompt
264
- system_prompt = SYSTEM_PROMPTS.get(content_type, SYSTEM_PROMPTS["transcript"])
265
- self.chat_sdk.config.system_prompt = system_prompt
266
-
267
- # Generate prompt
268
- prompt = self.generate_summary_prompt(content, content_type, style)
269
-
270
- # Check if content might exceed token limits
271
- estimated_tokens = len(content.split()) + len(prompt.split())
272
- if estimated_tokens > 3000: # Conservative estimate
273
- self.log.warning(
274
- f"Content may exceed token limits. Estimated tokens: {estimated_tokens}"
275
- )
276
-
277
- # Get summary with retry logic
278
- response = None
279
- last_error = None
280
-
281
- for attempt in range(self.max_retries):
282
- try:
283
- response = self.chat_sdk.send(prompt)
284
- break
285
- except Exception as e:
286
- last_error = e
287
- error_msg = str(e).lower()
288
-
289
- # Check for specific error types
290
- if "token" in error_msg and "limit" in error_msg:
291
- # Token limit error - reduce content or max_tokens
292
- self.log.warning(
293
- f"Token limit exceeded. Attempting with reduced content..."
294
- )
295
- # Truncate content to 75% of original
296
- truncated_content = (
297
- content[: int(len(content) * 0.75)]
298
- + "\n\n[Content truncated due to length...]"
299
- )
300
- prompt = self.generate_summary_prompt(
301
- truncated_content, content_type, style
302
- )
303
- elif "connection" in error_msg or "timeout" in error_msg:
304
- self.log.warning(f"Connection error on attempt {attempt + 1}: {e}")
305
- if attempt < self.max_retries - 1:
306
- time.sleep(self.retry_delay * (attempt + 1))
307
- continue
308
- else:
309
- self.log.error(f"Unexpected error on attempt {attempt + 1}: {e}")
310
-
311
- if attempt >= self.max_retries - 1:
312
- raise RuntimeError(
313
- f"Failed to generate {style} summary after {self.max_retries} attempts: {last_error}"
314
- )
315
-
316
- # Get performance stats
317
- try:
318
- perf_stats = self.llm_client.get_performance_stats()
319
- except Exception as e:
320
- self.log.warning(f"Failed to get performance stats: {e}")
321
- perf_stats = {}
322
-
323
- # Calculate processing time
324
- processing_time_ms = int((time.time() - start_time) * 1000)
325
-
326
- # Format result based on style
327
- result = {"text": response.text}
328
-
329
- # Add style-specific fields
330
- if style == "action_items":
331
- # Try to parse action items from response
332
- lines = response.text.strip().split("\n")
333
- items = []
334
- for line in lines:
335
- line = line.strip()
336
- # Include all non-empty lines except obvious headers
337
- if (
338
- line
339
- and not line.lower().startswith("action items:")
340
- and not line.startswith("**Action")
341
- ):
342
- items.append(line)
343
- if items:
344
- result["items"] = items
345
-
346
- elif style == "participants":
347
- if content_type == "email":
348
- # Try to parse JSON response for email participants
349
- try:
350
- participants_data = json.loads(response.text)
351
- result.update(participants_data)
352
- except:
353
- # Fallback to text if not valid JSON
354
- pass
355
- else:
356
- # Extract participants from transcript response
357
- lines = response.text.strip().split("\n")
358
- participants = []
359
- for line in lines:
360
- line = line.strip()
361
- # Include all non-empty lines (HTML viewer will format properly)
362
- if line and not line.lower().startswith("participants:"):
363
- participants.append(line)
364
- if participants:
365
- result["participants"] = participants
366
-
367
- # Add performance data
368
- result["performance"] = {
369
- "total_tokens": perf_stats.get("input_tokens", 0)
370
- + perf_stats.get("output_tokens", 0),
371
- "prompt_tokens": perf_stats.get("input_tokens", 0),
372
- "completion_tokens": perf_stats.get("output_tokens", 0),
373
- "time_to_first_token_ms": int(
374
- perf_stats.get("time_to_first_token", 0) * 1000
375
- ),
376
- "tokens_per_second": perf_stats.get("tokens_per_second", 0),
377
- "processing_time_ms": processing_time_ms,
378
- }
379
-
380
- return result
381
-
382
- def summarize_combined(
383
- self, content: str, content_type: str, styles: List[str]
384
- ) -> Dict[str, Dict[str, Any]]:
385
- """Generate summaries for multiple styles in a single LLM call"""
386
- start_time = time.time()
387
-
388
- # Set appropriate system prompt
389
- system_prompt = SYSTEM_PROMPTS.get(content_type, SYSTEM_PROMPTS["transcript"])
390
- self.chat_sdk.config.system_prompt = system_prompt
391
-
392
- # Generate combined prompt
393
- prompt = self.generate_combined_prompt(content, content_type, styles)
394
-
395
- # Get combined summary
396
- response = self.chat_sdk.send(prompt)
397
-
398
- # Get performance stats
399
- perf_stats = self.llm_client.get_performance_stats()
400
-
401
- # Calculate processing time
402
- processing_time_ms = int((time.time() - start_time) * 1000)
403
-
404
- # Parse response into sections
405
- # This is a simple parser - in production, might want more robust parsing
406
- response_text = response.text
407
- results = {}
408
-
409
- for style in styles:
410
- # Look for style header in response
411
- style_upper = style.upper()
412
- start_markers = [
413
- f"{style_upper}:",
414
- f"**{style_upper}**:",
415
- f"# {style_upper}",
416
- f"## {style_upper}",
417
- ]
418
-
419
- section_start = -1
420
- for marker in start_markers:
421
- idx = response_text.find(marker)
422
- if idx != -1:
423
- section_start = idx + len(marker)
424
- break
425
-
426
- if section_start == -1:
427
- # Fallback - use entire response for first style
428
- if not results:
429
- results[style] = {"text": response_text.strip()}
430
- continue
431
-
432
- # Find end of section (next style header or end of text)
433
- section_end = len(response_text)
434
- for next_style in styles:
435
- if next_style == style:
436
- continue
437
- next_upper = next_style.upper()
438
- for marker in [
439
- f"{next_upper}:",
440
- f"**{next_upper}**:",
441
- f"# {next_upper}",
442
- f"## {next_upper}",
443
- ]:
444
- idx = response_text.find(marker, section_start)
445
- if idx != -1 and idx < section_end:
446
- section_end = idx
447
-
448
- section_text = response_text[section_start:section_end].strip()
449
- results[style] = {"text": section_text}
450
-
451
- # Add shared performance data to each result
452
- base_perf = {
453
- "total_tokens": perf_stats.get("input_tokens", 0)
454
- + perf_stats.get("output_tokens", 0),
455
- "prompt_tokens": perf_stats.get("input_tokens", 0),
456
- "completion_tokens": perf_stats.get("output_tokens", 0),
457
- "time_to_first_token_ms": int(
458
- perf_stats.get("time_to_first_token", 0) * 1000
459
- ),
460
- "tokens_per_second": perf_stats.get("tokens_per_second", 0),
461
- "processing_time_ms": processing_time_ms,
462
- }
463
-
464
- # Distribute performance metrics proportionally (simplified)
465
- style_count = len(styles)
466
- for style in results:
467
- results[style]["performance"] = {
468
- **base_perf,
469
- "total_tokens": base_perf["total_tokens"] // style_count,
470
- "completion_tokens": base_perf["completion_tokens"] // style_count,
471
- }
472
-
473
- return results
474
-
475
- def summarize(
476
- self, content: str, input_file: Optional[str] = None
477
- ) -> Dict[str, Any]:
478
- """Main summarization method"""
479
- start_time = time.time()
480
-
481
- # Detect content type
482
- content_type = self.detect_content_type(content)
483
-
484
- # Filter applicable styles
485
- applicable_styles = self.config.styles.copy()
486
- if content_type == "email" and "participants" in applicable_styles:
487
- # Keep participants for email but handle differently
488
- pass
489
-
490
- # Generate summaries
491
- if self.config.combined_prompt and len(applicable_styles) > 1:
492
- # Use combined prompt for efficiency
493
- summaries = self.summarize_combined(
494
- content, content_type, applicable_styles
495
- )
496
- else:
497
- # Generate each style independently
498
- summaries = {}
499
- for style in applicable_styles:
500
- summaries[style] = self.summarize_with_style(
501
- content, content_type, style
502
- )
503
-
504
- # Calculate aggregate performance
505
- total_processing_time = int((time.time() - start_time) * 1000)
506
-
507
- # Build output structure
508
- if len(applicable_styles) == 1:
509
- # Single style output
510
- style = applicable_styles[0]
511
- output = {
512
- "metadata": {
513
- "input_file": input_file or "stdin",
514
- "input_type": content_type,
515
- "model": self.config.model,
516
- "timestamp": datetime.now().isoformat(),
517
- "processing_time_ms": total_processing_time,
518
- "summary_style": style,
519
- },
520
- "summary": summaries[style],
521
- "performance": summaries[style].get("performance", {}),
522
- "original_content": content,
523
- }
524
- else:
525
- # Multiple styles output
526
- output = {
527
- "metadata": {
528
- "input_file": input_file or "stdin",
529
- "input_type": content_type,
530
- "model": self.config.model,
531
- "timestamp": datetime.now().isoformat(),
532
- "processing_time_ms": total_processing_time,
533
- "summary_styles": applicable_styles,
534
- },
535
- "summaries": summaries,
536
- "aggregate_performance": {
537
- "total_tokens": sum(
538
- s.get("performance", {}).get("total_tokens", 0)
539
- for s in summaries.values()
540
- ),
541
- "total_processing_time_ms": total_processing_time,
542
- "model_info": {
543
- "model": self.config.model,
544
- "use_local": not (
545
- self.config.use_claude or self.config.use_chatgpt
546
- ),
547
- "use_claude": self.config.use_claude,
548
- "use_chatgpt": self.config.use_chatgpt,
549
- },
550
- },
551
- "original_content": content,
552
- }
553
-
554
- return output
555
-
556
- def summarize_file(self, file_path: Path) -> Dict[str, Any]:
557
- """Summarize a single file"""
558
- self.log.info(f"Summarizing file: {file_path}")
559
-
560
- # Validate file exists
561
- if not file_path.exists():
562
- raise FileNotFoundError(f"File not found: {file_path}")
563
-
564
- # Check file size
565
- file_size_mb = file_path.stat().st_size / (1024 * 1024)
566
- if file_size_mb > 10:
567
- self.log.warning(
568
- f"Large file ({file_size_mb:.1f}MB) may exceed token limits"
569
- )
570
-
571
- try:
572
- content = file_path.read_text(encoding="utf-8")
573
- if not content.strip():
574
- raise ValueError(f"File is empty: {file_path}")
575
- return self.summarize(content, str(file_path))
576
- except UnicodeDecodeError:
577
- # Try alternative encodings
578
- for encoding in ["latin-1", "cp1252"]:
579
- try:
580
- content = file_path.read_text(encoding=encoding)
581
- self.log.info(f"Successfully read file with {encoding} encoding")
582
- return self.summarize(content, str(file_path))
583
- except UnicodeDecodeError:
584
- continue
585
- raise ValueError(
586
- f"Unable to decode file {file_path}. File may be binary or use unsupported encoding."
587
- )
588
- except Exception as e:
589
- self.log.error(f"Error processing file {file_path}: {e}")
590
- raise
591
-
592
- def summarize_directory(self, dir_path: Path) -> List[Dict[str, Any]]:
593
- """Summarize all files in a directory"""
594
- self.log.info(f"Summarizing directory: {dir_path}")
595
-
596
- # Validate directory exists
597
- if not dir_path.exists():
598
- raise FileNotFoundError(f"Directory not found: {dir_path}")
599
- if not dir_path.is_dir():
600
- raise ValueError(f"Path is not a directory: {dir_path}")
601
-
602
- results = []
603
- errors = []
604
-
605
- # Find all text files
606
- text_extensions = [".txt", ".md", ".log", ".email", ".transcript"]
607
- files = []
608
- for ext in text_extensions:
609
- files.extend(dir_path.glob(f"*{ext}"))
610
-
611
- if not files:
612
- self.log.warning(f"No text files found in {dir_path}")
613
- return results
614
-
615
- self.log.info(f"Found {len(files)} files to process")
616
-
617
- for i, file_path in enumerate(sorted(files), 1):
618
- try:
619
- self.log.info(f"Processing file {i}/{len(files)}: {file_path.name}")
620
- result = self.summarize_file(file_path)
621
- results.append(result)
622
- except Exception as e:
623
- error_msg = f"Failed to summarize {file_path}: {e}"
624
- self.log.error(error_msg)
625
- errors.append(error_msg)
626
- continue
627
-
628
- if errors:
629
- self.log.warning(
630
- f"Completed with {len(errors)} errors:\n" + "\n".join(errors)
631
- )
632
-
633
- return results
1
+ #!/usr/bin/env python3
2
+ # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """
6
+ Gaia Summarizer Application - Thin wrapper that delegates to SummarizerAgent
7
+ """
8
+
9
+ import re
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Literal, Optional
13
+
14
+ from gaia.agents.summarize.agent import SummarizerAgent
15
+ from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME
16
+ from gaia.logger import get_logger
17
+
18
+
19
+ # Utility functions for email validation (used by CLI and other tools)
20
+ def validate_email_address(email: str) -> bool:
21
+ """Validate email address format"""
22
+ email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
23
+ return re.match(email_pattern, email.strip()) is not None
24
+
25
+
26
+ def validate_email_list(email_list: str) -> list[str]:
27
+ """Validate and parse comma-separated email list"""
28
+ if not email_list:
29
+ return []
30
+ emails = [e.strip() for e in email_list.split(",") if e.strip()]
31
+ invalid_emails = [e for e in emails if not validate_email_address(e)]
32
+ if invalid_emails:
33
+ raise ValueError(f"Invalid email address(es): {', '.join(invalid_emails)}")
34
+ return emails
35
+
36
+
37
+ @dataclass
38
+ class SummaryConfig:
39
+ """Configuration for summarization"""
40
+
41
+ model: str = DEFAULT_MODEL_NAME
42
+ max_tokens: int = 1024
43
+ input_type: Literal["transcript", "email", "auto"] = "auto"
44
+ styles: List[str] = None
45
+ combined_prompt: bool = False
46
+ use_claude: bool = False
47
+ use_chatgpt: bool = False
48
+
49
+ def __post_init__(self):
50
+ if self.styles is None:
51
+ self.styles = ["executive", "participants", "action_items"]
52
+
53
+ # Auto-detect OpenAI models (gpt-*) to use ChatGPT
54
+ if self.model and self.model.lower().startswith("gpt"):
55
+ self.use_chatgpt = True
56
+
57
+
58
+ class SummarizerApp:
59
+ """Main application class for summarization (delegates to SummarizerAgent)"""
60
+
61
+ def __init__(self, config: Optional[SummaryConfig] = None):
62
+ self.config = config or SummaryConfig()
63
+ self.log = get_logger(__name__)
64
+ self.agent = SummarizerAgent(
65
+ model=self.config.model,
66
+ max_tokens=self.config.max_tokens,
67
+ styles=self.config.styles,
68
+ combined_prompt=self.config.combined_prompt,
69
+ use_claude=self.config.use_claude,
70
+ use_chatgpt=self.config.use_chatgpt,
71
+ )
72
+
73
+ def summarize_file(
74
+ self,
75
+ file_path: Path,
76
+ styles: Optional[List[str]] = None,
77
+ combined_prompt: Optional[bool] = None,
78
+ input_type: str = "auto",
79
+ ) -> Dict[str, Any]:
80
+ # Always convert file_path to Path object if it's a string
81
+ if not isinstance(file_path, Path):
82
+ file_path = Path(file_path)
83
+ return self.agent.summarize_file(
84
+ file_path,
85
+ styles=styles,
86
+ combined_prompt=combined_prompt,
87
+ input_type=input_type,
88
+ )
89
+
90
+ def summarize_directory(
91
+ self,
92
+ dir_path: Path,
93
+ styles: Optional[List[str]] = None,
94
+ combined_prompt: Optional[bool] = None,
95
+ input_type: str = "auto",
96
+ ) -> List[Dict[str, Any]]:
97
+ return self.agent.summarize_directory(
98
+ dir_path,
99
+ styles=styles,
100
+ combined_prompt=combined_prompt,
101
+ input_type=input_type,
102
+ )
103
+
104
+ def summarize(
105
+ self,
106
+ content: str,
107
+ styles: Optional[List[str]] = None,
108
+ combined_prompt: Optional[bool] = None,
109
+ input_type: str = "auto",
110
+ ) -> Dict[str, Any]:
111
+ return self.agent.summarize(
112
+ content,
113
+ styles=styles,
114
+ combined_prompt=combined_prompt,
115
+ input_type=input_type,
116
+ )