amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
- amd_gaia-0.15.1.dist-info/RECORD +178 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
- gaia/__init__.py +29 -29
- gaia/agents/__init__.py +19 -19
- gaia/agents/base/__init__.py +9 -9
- gaia/agents/base/agent.py +2177 -2177
- gaia/agents/base/api_agent.py +120 -120
- gaia/agents/base/console.py +1841 -1841
- gaia/agents/base/errors.py +237 -237
- gaia/agents/base/mcp_agent.py +86 -86
- gaia/agents/base/tools.py +83 -83
- gaia/agents/blender/agent.py +556 -556
- gaia/agents/blender/agent_simple.py +133 -135
- gaia/agents/blender/app.py +211 -211
- gaia/agents/blender/app_simple.py +41 -41
- gaia/agents/blender/core/__init__.py +16 -16
- gaia/agents/blender/core/materials.py +506 -506
- gaia/agents/blender/core/objects.py +316 -316
- gaia/agents/blender/core/rendering.py +225 -225
- gaia/agents/blender/core/scene.py +220 -220
- gaia/agents/blender/core/view.py +146 -146
- gaia/agents/chat/__init__.py +9 -9
- gaia/agents/chat/agent.py +835 -835
- gaia/agents/chat/app.py +1058 -1058
- gaia/agents/chat/session.py +508 -508
- gaia/agents/chat/tools/__init__.py +15 -15
- gaia/agents/chat/tools/file_tools.py +96 -96
- gaia/agents/chat/tools/rag_tools.py +1729 -1729
- gaia/agents/chat/tools/shell_tools.py +436 -436
- gaia/agents/code/__init__.py +7 -7
- gaia/agents/code/agent.py +549 -549
- gaia/agents/code/cli.py +377 -0
- gaia/agents/code/models.py +135 -135
- gaia/agents/code/orchestration/__init__.py +24 -24
- gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
- gaia/agents/code/orchestration/checklist_generator.py +713 -713
- gaia/agents/code/orchestration/factories/__init__.py +9 -9
- gaia/agents/code/orchestration/factories/base.py +63 -63
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
- gaia/agents/code/orchestration/factories/python_factory.py +106 -106
- gaia/agents/code/orchestration/orchestrator.py +841 -841
- gaia/agents/code/orchestration/project_analyzer.py +391 -391
- gaia/agents/code/orchestration/steps/__init__.py +67 -67
- gaia/agents/code/orchestration/steps/base.py +188 -188
- gaia/agents/code/orchestration/steps/error_handler.py +314 -314
- gaia/agents/code/orchestration/steps/nextjs.py +828 -828
- gaia/agents/code/orchestration/steps/python.py +307 -307
- gaia/agents/code/orchestration/template_catalog.py +469 -469
- gaia/agents/code/orchestration/workflows/__init__.py +14 -14
- gaia/agents/code/orchestration/workflows/base.py +80 -80
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
- gaia/agents/code/orchestration/workflows/python.py +94 -94
- gaia/agents/code/prompts/__init__.py +11 -11
- gaia/agents/code/prompts/base_prompt.py +77 -77
- gaia/agents/code/prompts/code_patterns.py +2036 -2036
- gaia/agents/code/prompts/nextjs_prompt.py +40 -40
- gaia/agents/code/prompts/python_prompt.py +109 -109
- gaia/agents/code/schema_inference.py +365 -365
- gaia/agents/code/system_prompt.py +41 -41
- gaia/agents/code/tools/__init__.py +42 -42
- gaia/agents/code/tools/cli_tools.py +1138 -1138
- gaia/agents/code/tools/code_formatting.py +319 -319
- gaia/agents/code/tools/code_tools.py +769 -769
- gaia/agents/code/tools/error_fixing.py +1347 -1347
- gaia/agents/code/tools/external_tools.py +180 -180
- gaia/agents/code/tools/file_io.py +845 -845
- gaia/agents/code/tools/prisma_tools.py +190 -190
- gaia/agents/code/tools/project_management.py +1016 -1016
- gaia/agents/code/tools/testing.py +321 -321
- gaia/agents/code/tools/typescript_tools.py +122 -122
- gaia/agents/code/tools/validation_parsing.py +461 -461
- gaia/agents/code/tools/validation_tools.py +806 -806
- gaia/agents/code/tools/web_dev_tools.py +1758 -1758
- gaia/agents/code/validators/__init__.py +16 -16
- gaia/agents/code/validators/antipattern_checker.py +241 -241
- gaia/agents/code/validators/ast_analyzer.py +197 -197
- gaia/agents/code/validators/requirements_validator.py +145 -145
- gaia/agents/code/validators/syntax_validator.py +171 -171
- gaia/agents/docker/__init__.py +7 -7
- gaia/agents/docker/agent.py +642 -642
- gaia/agents/emr/__init__.py +8 -8
- gaia/agents/emr/agent.py +1506 -1506
- gaia/agents/emr/cli.py +1322 -1322
- gaia/agents/emr/constants.py +475 -475
- gaia/agents/emr/dashboard/__init__.py +4 -4
- gaia/agents/emr/dashboard/server.py +1974 -1974
- gaia/agents/jira/__init__.py +11 -11
- gaia/agents/jira/agent.py +894 -894
- gaia/agents/jira/jql_templates.py +299 -299
- gaia/agents/routing/__init__.py +7 -7
- gaia/agents/routing/agent.py +567 -570
- gaia/agents/routing/system_prompt.py +75 -75
- gaia/agents/summarize/__init__.py +11 -0
- gaia/agents/summarize/agent.py +885 -0
- gaia/agents/summarize/prompts.py +129 -0
- gaia/api/__init__.py +23 -23
- gaia/api/agent_registry.py +238 -238
- gaia/api/app.py +305 -305
- gaia/api/openai_server.py +575 -575
- gaia/api/schemas.py +186 -186
- gaia/api/sse_handler.py +373 -373
- gaia/apps/__init__.py +4 -4
- gaia/apps/llm/__init__.py +6 -6
- gaia/apps/llm/app.py +173 -169
- gaia/apps/summarize/app.py +116 -633
- gaia/apps/summarize/html_viewer.py +133 -133
- gaia/apps/summarize/pdf_formatter.py +284 -284
- gaia/audio/__init__.py +2 -2
- gaia/audio/audio_client.py +439 -439
- gaia/audio/audio_recorder.py +269 -269
- gaia/audio/kokoro_tts.py +599 -599
- gaia/audio/whisper_asr.py +432 -432
- gaia/chat/__init__.py +16 -16
- gaia/chat/app.py +430 -430
- gaia/chat/prompts.py +522 -522
- gaia/chat/sdk.py +1228 -1225
- gaia/cli.py +5481 -5621
- gaia/database/__init__.py +10 -10
- gaia/database/agent.py +176 -176
- gaia/database/mixin.py +290 -290
- gaia/database/testing.py +64 -64
- gaia/eval/batch_experiment.py +2332 -2332
- gaia/eval/claude.py +542 -542
- gaia/eval/config.py +37 -37
- gaia/eval/email_generator.py +512 -512
- gaia/eval/eval.py +3179 -3179
- gaia/eval/groundtruth.py +1130 -1130
- gaia/eval/transcript_generator.py +582 -582
- gaia/eval/webapp/README.md +167 -167
- gaia/eval/webapp/package-lock.json +875 -875
- gaia/eval/webapp/package.json +20 -20
- gaia/eval/webapp/public/app.js +3402 -3402
- gaia/eval/webapp/public/index.html +87 -87
- gaia/eval/webapp/public/styles.css +3661 -3661
- gaia/eval/webapp/server.js +415 -415
- gaia/eval/webapp/test-setup.js +72 -72
- gaia/llm/__init__.py +9 -2
- gaia/llm/base_client.py +60 -0
- gaia/llm/exceptions.py +12 -0
- gaia/llm/factory.py +70 -0
- gaia/llm/lemonade_client.py +3236 -3221
- gaia/llm/lemonade_manager.py +294 -294
- gaia/llm/providers/__init__.py +9 -0
- gaia/llm/providers/claude.py +108 -0
- gaia/llm/providers/lemonade.py +120 -0
- gaia/llm/providers/openai_provider.py +79 -0
- gaia/llm/vlm_client.py +382 -382
- gaia/logger.py +189 -189
- gaia/mcp/agent_mcp_server.py +245 -245
- gaia/mcp/blender_mcp_client.py +138 -138
- gaia/mcp/blender_mcp_server.py +648 -648
- gaia/mcp/context7_cache.py +332 -332
- gaia/mcp/external_services.py +518 -518
- gaia/mcp/mcp_bridge.py +811 -550
- gaia/mcp/servers/__init__.py +6 -6
- gaia/mcp/servers/docker_mcp.py +83 -83
- gaia/perf_analysis.py +361 -0
- gaia/rag/__init__.py +10 -10
- gaia/rag/app.py +293 -293
- gaia/rag/demo.py +304 -304
- gaia/rag/pdf_utils.py +235 -235
- gaia/rag/sdk.py +2194 -2194
- gaia/security.py +163 -163
- gaia/talk/app.py +289 -289
- gaia/talk/sdk.py +538 -538
- gaia/testing/__init__.py +87 -87
- gaia/testing/assertions.py +330 -330
- gaia/testing/fixtures.py +333 -333
- gaia/testing/mocks.py +493 -493
- gaia/util.py +46 -46
- gaia/utils/__init__.py +33 -33
- gaia/utils/file_watcher.py +675 -675
- gaia/utils/parsing.py +223 -223
- gaia/version.py +100 -100
- amd_gaia-0.14.3.dist-info/RECORD +0 -168
- gaia/agents/code/app.py +0 -266
- gaia/llm/llm_client.py +0 -729
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
gaia/apps/summarize/app.py
CHANGED
|
@@ -1,633 +1,116 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
-
# SPDX-License-Identifier: MIT
|
|
4
|
-
|
|
5
|
-
"""
|
|
6
|
-
Gaia Summarizer Application -
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import
|
|
10
|
-
import
|
|
11
|
-
import
|
|
12
|
-
from
|
|
13
|
-
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
# First try simple heuristics
|
|
118
|
-
email_patterns = [
|
|
119
|
-
r"From:\s*\S+",
|
|
120
|
-
r"To:\s*\S+",
|
|
121
|
-
r"Subject:\s*\S+",
|
|
122
|
-
r"Dear\s+\w+",
|
|
123
|
-
r"Sincerely,?\s*\n",
|
|
124
|
-
r"Best regards,?\s*\n",
|
|
125
|
-
]
|
|
126
|
-
|
|
127
|
-
transcript_patterns = [
|
|
128
|
-
r"\w+:\s*[^\n]+", # Speaker: dialogue
|
|
129
|
-
r"\[\d{1,2}:\d{2}\]", # Time stamps
|
|
130
|
-
r"\(\d{1,2}:\d{2}\)",
|
|
131
|
-
]
|
|
132
|
-
|
|
133
|
-
# Count pattern matches
|
|
134
|
-
email_score = sum(
|
|
135
|
-
1
|
|
136
|
-
for pattern in email_patterns
|
|
137
|
-
if re.search(pattern, content[:500], re.IGNORECASE)
|
|
138
|
-
)
|
|
139
|
-
transcript_score = sum(
|
|
140
|
-
1 for pattern in transcript_patterns if re.search(pattern, content[:500])
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
if email_score > transcript_score and email_score >= 2:
|
|
144
|
-
detected_type = "email"
|
|
145
|
-
elif transcript_score > email_score and transcript_score >= 2:
|
|
146
|
-
detected_type = "transcript"
|
|
147
|
-
else:
|
|
148
|
-
# Use LLM as fallback with retry logic
|
|
149
|
-
detection_prompt = (
|
|
150
|
-
"""Analyze this text and determine if it's a meeting transcript or an email.
|
|
151
|
-
|
|
152
|
-
A meeting transcript typically has:
|
|
153
|
-
- Multiple speakers with dialogue
|
|
154
|
-
- Time stamps or speaker labels
|
|
155
|
-
- Conversational flow
|
|
156
|
-
|
|
157
|
-
An email typically has:
|
|
158
|
-
- From/To/Subject headers or email-like structure
|
|
159
|
-
- Formal greeting and closing
|
|
160
|
-
- Single author perspective
|
|
161
|
-
|
|
162
|
-
Respond with ONLY one word: 'transcript' or 'email'
|
|
163
|
-
|
|
164
|
-
Text to analyze:
|
|
165
|
-
"""
|
|
166
|
-
+ content[:1000]
|
|
167
|
-
) # Only use first 1000 chars for detection
|
|
168
|
-
|
|
169
|
-
for attempt in range(self.max_retries):
|
|
170
|
-
try:
|
|
171
|
-
response = self.llm_client.generate(
|
|
172
|
-
detection_prompt, model=self.config.model, max_tokens=10
|
|
173
|
-
)
|
|
174
|
-
|
|
175
|
-
detected_type = response.strip().lower()
|
|
176
|
-
if detected_type not in ["transcript", "email"]:
|
|
177
|
-
# Default to transcript if unclear
|
|
178
|
-
detected_type = "transcript"
|
|
179
|
-
break
|
|
180
|
-
except Exception as e:
|
|
181
|
-
if attempt < self.max_retries - 1:
|
|
182
|
-
self.log.warning(
|
|
183
|
-
f"Content type detection attempt {attempt + 1} failed: {e}. Retrying..."
|
|
184
|
-
)
|
|
185
|
-
time.sleep(self.retry_delay * (attempt + 1))
|
|
186
|
-
else:
|
|
187
|
-
self.log.error(
|
|
188
|
-
f"Failed to detect content type after {self.max_retries} attempts"
|
|
189
|
-
)
|
|
190
|
-
detected_type = "transcript" # Default fallback
|
|
191
|
-
|
|
192
|
-
self.log.info(f"Auto-detected content type: {detected_type}")
|
|
193
|
-
return detected_type
|
|
194
|
-
|
|
195
|
-
def generate_summary_prompt(
|
|
196
|
-
self, content: str, content_type: str, style: str
|
|
197
|
-
) -> str:
|
|
198
|
-
"""Generate the prompt for a specific summary style"""
|
|
199
|
-
style_instruction = SUMMARY_STYLES.get(style, SUMMARY_STYLES["brief"])
|
|
200
|
-
|
|
201
|
-
if style == "participants" and content_type == "email":
|
|
202
|
-
# Special handling for email participants
|
|
203
|
-
prompt = f"""Extract the sender and all recipients from this email.
|
|
204
|
-
|
|
205
|
-
Format your response as JSON:
|
|
206
|
-
{{
|
|
207
|
-
"sender": "sender email/name",
|
|
208
|
-
"recipients": ["recipient1", "recipient2"],
|
|
209
|
-
"cc": ["cc1", "cc2"] (if any),
|
|
210
|
-
"bcc": ["bcc1"] (if any)
|
|
211
|
-
}}
|
|
212
|
-
|
|
213
|
-
Email content:
|
|
214
|
-
{content}"""
|
|
215
|
-
elif style == "action_items":
|
|
216
|
-
prompt = f"""Extract all action items from this {content_type}.
|
|
217
|
-
|
|
218
|
-
{style_instruction}
|
|
219
|
-
|
|
220
|
-
Format each action item with:
|
|
221
|
-
- The specific action required
|
|
222
|
-
- Who is responsible (if mentioned)
|
|
223
|
-
- Any deadline or timeline (if mentioned)
|
|
224
|
-
|
|
225
|
-
If no action items are found, respond with "No specific action items identified."
|
|
226
|
-
|
|
227
|
-
Content:
|
|
228
|
-
{content}"""
|
|
229
|
-
else:
|
|
230
|
-
prompt = f"""Analyze this {content_type} and {style_instruction}
|
|
231
|
-
|
|
232
|
-
Content:
|
|
233
|
-
{content}"""
|
|
234
|
-
|
|
235
|
-
return prompt
|
|
236
|
-
|
|
237
|
-
def generate_combined_prompt(
|
|
238
|
-
self, content: str, content_type: str, styles: List[str]
|
|
239
|
-
) -> str:
|
|
240
|
-
"""Generate a single prompt for multiple summary styles"""
|
|
241
|
-
sections = []
|
|
242
|
-
for style in styles:
|
|
243
|
-
style_instruction = SUMMARY_STYLES.get(style, SUMMARY_STYLES["brief"])
|
|
244
|
-
sections.append(f"- {style.upper()}: {style_instruction}")
|
|
245
|
-
|
|
246
|
-
prompt = f"""Analyze this {content_type} and generate the following summaries:
|
|
247
|
-
|
|
248
|
-
{chr(10).join(sections)}
|
|
249
|
-
|
|
250
|
-
Format your response with clear section headers for each style.
|
|
251
|
-
|
|
252
|
-
Content:
|
|
253
|
-
{content}"""
|
|
254
|
-
|
|
255
|
-
return prompt
|
|
256
|
-
|
|
257
|
-
def summarize_with_style(
|
|
258
|
-
self, content: str, content_type: str, style: str
|
|
259
|
-
) -> Dict[str, Any]:
|
|
260
|
-
"""Generate a summary for a specific style with retry logic"""
|
|
261
|
-
start_time = time.time()
|
|
262
|
-
|
|
263
|
-
# Set appropriate system prompt
|
|
264
|
-
system_prompt = SYSTEM_PROMPTS.get(content_type, SYSTEM_PROMPTS["transcript"])
|
|
265
|
-
self.chat_sdk.config.system_prompt = system_prompt
|
|
266
|
-
|
|
267
|
-
# Generate prompt
|
|
268
|
-
prompt = self.generate_summary_prompt(content, content_type, style)
|
|
269
|
-
|
|
270
|
-
# Check if content might exceed token limits
|
|
271
|
-
estimated_tokens = len(content.split()) + len(prompt.split())
|
|
272
|
-
if estimated_tokens > 3000: # Conservative estimate
|
|
273
|
-
self.log.warning(
|
|
274
|
-
f"Content may exceed token limits. Estimated tokens: {estimated_tokens}"
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
# Get summary with retry logic
|
|
278
|
-
response = None
|
|
279
|
-
last_error = None
|
|
280
|
-
|
|
281
|
-
for attempt in range(self.max_retries):
|
|
282
|
-
try:
|
|
283
|
-
response = self.chat_sdk.send(prompt)
|
|
284
|
-
break
|
|
285
|
-
except Exception as e:
|
|
286
|
-
last_error = e
|
|
287
|
-
error_msg = str(e).lower()
|
|
288
|
-
|
|
289
|
-
# Check for specific error types
|
|
290
|
-
if "token" in error_msg and "limit" in error_msg:
|
|
291
|
-
# Token limit error - reduce content or max_tokens
|
|
292
|
-
self.log.warning(
|
|
293
|
-
f"Token limit exceeded. Attempting with reduced content..."
|
|
294
|
-
)
|
|
295
|
-
# Truncate content to 75% of original
|
|
296
|
-
truncated_content = (
|
|
297
|
-
content[: int(len(content) * 0.75)]
|
|
298
|
-
+ "\n\n[Content truncated due to length...]"
|
|
299
|
-
)
|
|
300
|
-
prompt = self.generate_summary_prompt(
|
|
301
|
-
truncated_content, content_type, style
|
|
302
|
-
)
|
|
303
|
-
elif "connection" in error_msg or "timeout" in error_msg:
|
|
304
|
-
self.log.warning(f"Connection error on attempt {attempt + 1}: {e}")
|
|
305
|
-
if attempt < self.max_retries - 1:
|
|
306
|
-
time.sleep(self.retry_delay * (attempt + 1))
|
|
307
|
-
continue
|
|
308
|
-
else:
|
|
309
|
-
self.log.error(f"Unexpected error on attempt {attempt + 1}: {e}")
|
|
310
|
-
|
|
311
|
-
if attempt >= self.max_retries - 1:
|
|
312
|
-
raise RuntimeError(
|
|
313
|
-
f"Failed to generate {style} summary after {self.max_retries} attempts: {last_error}"
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
# Get performance stats
|
|
317
|
-
try:
|
|
318
|
-
perf_stats = self.llm_client.get_performance_stats()
|
|
319
|
-
except Exception as e:
|
|
320
|
-
self.log.warning(f"Failed to get performance stats: {e}")
|
|
321
|
-
perf_stats = {}
|
|
322
|
-
|
|
323
|
-
# Calculate processing time
|
|
324
|
-
processing_time_ms = int((time.time() - start_time) * 1000)
|
|
325
|
-
|
|
326
|
-
# Format result based on style
|
|
327
|
-
result = {"text": response.text}
|
|
328
|
-
|
|
329
|
-
# Add style-specific fields
|
|
330
|
-
if style == "action_items":
|
|
331
|
-
# Try to parse action items from response
|
|
332
|
-
lines = response.text.strip().split("\n")
|
|
333
|
-
items = []
|
|
334
|
-
for line in lines:
|
|
335
|
-
line = line.strip()
|
|
336
|
-
# Include all non-empty lines except obvious headers
|
|
337
|
-
if (
|
|
338
|
-
line
|
|
339
|
-
and not line.lower().startswith("action items:")
|
|
340
|
-
and not line.startswith("**Action")
|
|
341
|
-
):
|
|
342
|
-
items.append(line)
|
|
343
|
-
if items:
|
|
344
|
-
result["items"] = items
|
|
345
|
-
|
|
346
|
-
elif style == "participants":
|
|
347
|
-
if content_type == "email":
|
|
348
|
-
# Try to parse JSON response for email participants
|
|
349
|
-
try:
|
|
350
|
-
participants_data = json.loads(response.text)
|
|
351
|
-
result.update(participants_data)
|
|
352
|
-
except:
|
|
353
|
-
# Fallback to text if not valid JSON
|
|
354
|
-
pass
|
|
355
|
-
else:
|
|
356
|
-
# Extract participants from transcript response
|
|
357
|
-
lines = response.text.strip().split("\n")
|
|
358
|
-
participants = []
|
|
359
|
-
for line in lines:
|
|
360
|
-
line = line.strip()
|
|
361
|
-
# Include all non-empty lines (HTML viewer will format properly)
|
|
362
|
-
if line and not line.lower().startswith("participants:"):
|
|
363
|
-
participants.append(line)
|
|
364
|
-
if participants:
|
|
365
|
-
result["participants"] = participants
|
|
366
|
-
|
|
367
|
-
# Add performance data
|
|
368
|
-
result["performance"] = {
|
|
369
|
-
"total_tokens": perf_stats.get("input_tokens", 0)
|
|
370
|
-
+ perf_stats.get("output_tokens", 0),
|
|
371
|
-
"prompt_tokens": perf_stats.get("input_tokens", 0),
|
|
372
|
-
"completion_tokens": perf_stats.get("output_tokens", 0),
|
|
373
|
-
"time_to_first_token_ms": int(
|
|
374
|
-
perf_stats.get("time_to_first_token", 0) * 1000
|
|
375
|
-
),
|
|
376
|
-
"tokens_per_second": perf_stats.get("tokens_per_second", 0),
|
|
377
|
-
"processing_time_ms": processing_time_ms,
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
return result
|
|
381
|
-
|
|
382
|
-
def summarize_combined(
|
|
383
|
-
self, content: str, content_type: str, styles: List[str]
|
|
384
|
-
) -> Dict[str, Dict[str, Any]]:
|
|
385
|
-
"""Generate summaries for multiple styles in a single LLM call"""
|
|
386
|
-
start_time = time.time()
|
|
387
|
-
|
|
388
|
-
# Set appropriate system prompt
|
|
389
|
-
system_prompt = SYSTEM_PROMPTS.get(content_type, SYSTEM_PROMPTS["transcript"])
|
|
390
|
-
self.chat_sdk.config.system_prompt = system_prompt
|
|
391
|
-
|
|
392
|
-
# Generate combined prompt
|
|
393
|
-
prompt = self.generate_combined_prompt(content, content_type, styles)
|
|
394
|
-
|
|
395
|
-
# Get combined summary
|
|
396
|
-
response = self.chat_sdk.send(prompt)
|
|
397
|
-
|
|
398
|
-
# Get performance stats
|
|
399
|
-
perf_stats = self.llm_client.get_performance_stats()
|
|
400
|
-
|
|
401
|
-
# Calculate processing time
|
|
402
|
-
processing_time_ms = int((time.time() - start_time) * 1000)
|
|
403
|
-
|
|
404
|
-
# Parse response into sections
|
|
405
|
-
# This is a simple parser - in production, might want more robust parsing
|
|
406
|
-
response_text = response.text
|
|
407
|
-
results = {}
|
|
408
|
-
|
|
409
|
-
for style in styles:
|
|
410
|
-
# Look for style header in response
|
|
411
|
-
style_upper = style.upper()
|
|
412
|
-
start_markers = [
|
|
413
|
-
f"{style_upper}:",
|
|
414
|
-
f"**{style_upper}**:",
|
|
415
|
-
f"# {style_upper}",
|
|
416
|
-
f"## {style_upper}",
|
|
417
|
-
]
|
|
418
|
-
|
|
419
|
-
section_start = -1
|
|
420
|
-
for marker in start_markers:
|
|
421
|
-
idx = response_text.find(marker)
|
|
422
|
-
if idx != -1:
|
|
423
|
-
section_start = idx + len(marker)
|
|
424
|
-
break
|
|
425
|
-
|
|
426
|
-
if section_start == -1:
|
|
427
|
-
# Fallback - use entire response for first style
|
|
428
|
-
if not results:
|
|
429
|
-
results[style] = {"text": response_text.strip()}
|
|
430
|
-
continue
|
|
431
|
-
|
|
432
|
-
# Find end of section (next style header or end of text)
|
|
433
|
-
section_end = len(response_text)
|
|
434
|
-
for next_style in styles:
|
|
435
|
-
if next_style == style:
|
|
436
|
-
continue
|
|
437
|
-
next_upper = next_style.upper()
|
|
438
|
-
for marker in [
|
|
439
|
-
f"{next_upper}:",
|
|
440
|
-
f"**{next_upper}**:",
|
|
441
|
-
f"# {next_upper}",
|
|
442
|
-
f"## {next_upper}",
|
|
443
|
-
]:
|
|
444
|
-
idx = response_text.find(marker, section_start)
|
|
445
|
-
if idx != -1 and idx < section_end:
|
|
446
|
-
section_end = idx
|
|
447
|
-
|
|
448
|
-
section_text = response_text[section_start:section_end].strip()
|
|
449
|
-
results[style] = {"text": section_text}
|
|
450
|
-
|
|
451
|
-
# Add shared performance data to each result
|
|
452
|
-
base_perf = {
|
|
453
|
-
"total_tokens": perf_stats.get("input_tokens", 0)
|
|
454
|
-
+ perf_stats.get("output_tokens", 0),
|
|
455
|
-
"prompt_tokens": perf_stats.get("input_tokens", 0),
|
|
456
|
-
"completion_tokens": perf_stats.get("output_tokens", 0),
|
|
457
|
-
"time_to_first_token_ms": int(
|
|
458
|
-
perf_stats.get("time_to_first_token", 0) * 1000
|
|
459
|
-
),
|
|
460
|
-
"tokens_per_second": perf_stats.get("tokens_per_second", 0),
|
|
461
|
-
"processing_time_ms": processing_time_ms,
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
# Distribute performance metrics proportionally (simplified)
|
|
465
|
-
style_count = len(styles)
|
|
466
|
-
for style in results:
|
|
467
|
-
results[style]["performance"] = {
|
|
468
|
-
**base_perf,
|
|
469
|
-
"total_tokens": base_perf["total_tokens"] // style_count,
|
|
470
|
-
"completion_tokens": base_perf["completion_tokens"] // style_count,
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
return results
|
|
474
|
-
|
|
475
|
-
def summarize(
|
|
476
|
-
self, content: str, input_file: Optional[str] = None
|
|
477
|
-
) -> Dict[str, Any]:
|
|
478
|
-
"""Main summarization method"""
|
|
479
|
-
start_time = time.time()
|
|
480
|
-
|
|
481
|
-
# Detect content type
|
|
482
|
-
content_type = self.detect_content_type(content)
|
|
483
|
-
|
|
484
|
-
# Filter applicable styles
|
|
485
|
-
applicable_styles = self.config.styles.copy()
|
|
486
|
-
if content_type == "email" and "participants" in applicable_styles:
|
|
487
|
-
# Keep participants for email but handle differently
|
|
488
|
-
pass
|
|
489
|
-
|
|
490
|
-
# Generate summaries
|
|
491
|
-
if self.config.combined_prompt and len(applicable_styles) > 1:
|
|
492
|
-
# Use combined prompt for efficiency
|
|
493
|
-
summaries = self.summarize_combined(
|
|
494
|
-
content, content_type, applicable_styles
|
|
495
|
-
)
|
|
496
|
-
else:
|
|
497
|
-
# Generate each style independently
|
|
498
|
-
summaries = {}
|
|
499
|
-
for style in applicable_styles:
|
|
500
|
-
summaries[style] = self.summarize_with_style(
|
|
501
|
-
content, content_type, style
|
|
502
|
-
)
|
|
503
|
-
|
|
504
|
-
# Calculate aggregate performance
|
|
505
|
-
total_processing_time = int((time.time() - start_time) * 1000)
|
|
506
|
-
|
|
507
|
-
# Build output structure
|
|
508
|
-
if len(applicable_styles) == 1:
|
|
509
|
-
# Single style output
|
|
510
|
-
style = applicable_styles[0]
|
|
511
|
-
output = {
|
|
512
|
-
"metadata": {
|
|
513
|
-
"input_file": input_file or "stdin",
|
|
514
|
-
"input_type": content_type,
|
|
515
|
-
"model": self.config.model,
|
|
516
|
-
"timestamp": datetime.now().isoformat(),
|
|
517
|
-
"processing_time_ms": total_processing_time,
|
|
518
|
-
"summary_style": style,
|
|
519
|
-
},
|
|
520
|
-
"summary": summaries[style],
|
|
521
|
-
"performance": summaries[style].get("performance", {}),
|
|
522
|
-
"original_content": content,
|
|
523
|
-
}
|
|
524
|
-
else:
|
|
525
|
-
# Multiple styles output
|
|
526
|
-
output = {
|
|
527
|
-
"metadata": {
|
|
528
|
-
"input_file": input_file or "stdin",
|
|
529
|
-
"input_type": content_type,
|
|
530
|
-
"model": self.config.model,
|
|
531
|
-
"timestamp": datetime.now().isoformat(),
|
|
532
|
-
"processing_time_ms": total_processing_time,
|
|
533
|
-
"summary_styles": applicable_styles,
|
|
534
|
-
},
|
|
535
|
-
"summaries": summaries,
|
|
536
|
-
"aggregate_performance": {
|
|
537
|
-
"total_tokens": sum(
|
|
538
|
-
s.get("performance", {}).get("total_tokens", 0)
|
|
539
|
-
for s in summaries.values()
|
|
540
|
-
),
|
|
541
|
-
"total_processing_time_ms": total_processing_time,
|
|
542
|
-
"model_info": {
|
|
543
|
-
"model": self.config.model,
|
|
544
|
-
"use_local": not (
|
|
545
|
-
self.config.use_claude or self.config.use_chatgpt
|
|
546
|
-
),
|
|
547
|
-
"use_claude": self.config.use_claude,
|
|
548
|
-
"use_chatgpt": self.config.use_chatgpt,
|
|
549
|
-
},
|
|
550
|
-
},
|
|
551
|
-
"original_content": content,
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
return output
|
|
555
|
-
|
|
556
|
-
def summarize_file(self, file_path: Path) -> Dict[str, Any]:
|
|
557
|
-
"""Summarize a single file"""
|
|
558
|
-
self.log.info(f"Summarizing file: {file_path}")
|
|
559
|
-
|
|
560
|
-
# Validate file exists
|
|
561
|
-
if not file_path.exists():
|
|
562
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
|
563
|
-
|
|
564
|
-
# Check file size
|
|
565
|
-
file_size_mb = file_path.stat().st_size / (1024 * 1024)
|
|
566
|
-
if file_size_mb > 10:
|
|
567
|
-
self.log.warning(
|
|
568
|
-
f"Large file ({file_size_mb:.1f}MB) may exceed token limits"
|
|
569
|
-
)
|
|
570
|
-
|
|
571
|
-
try:
|
|
572
|
-
content = file_path.read_text(encoding="utf-8")
|
|
573
|
-
if not content.strip():
|
|
574
|
-
raise ValueError(f"File is empty: {file_path}")
|
|
575
|
-
return self.summarize(content, str(file_path))
|
|
576
|
-
except UnicodeDecodeError:
|
|
577
|
-
# Try alternative encodings
|
|
578
|
-
for encoding in ["latin-1", "cp1252"]:
|
|
579
|
-
try:
|
|
580
|
-
content = file_path.read_text(encoding=encoding)
|
|
581
|
-
self.log.info(f"Successfully read file with {encoding} encoding")
|
|
582
|
-
return self.summarize(content, str(file_path))
|
|
583
|
-
except UnicodeDecodeError:
|
|
584
|
-
continue
|
|
585
|
-
raise ValueError(
|
|
586
|
-
f"Unable to decode file {file_path}. File may be binary or use unsupported encoding."
|
|
587
|
-
)
|
|
588
|
-
except Exception as e:
|
|
589
|
-
self.log.error(f"Error processing file {file_path}: {e}")
|
|
590
|
-
raise
|
|
591
|
-
|
|
592
|
-
def summarize_directory(self, dir_path: Path) -> List[Dict[str, Any]]:
|
|
593
|
-
"""Summarize all files in a directory"""
|
|
594
|
-
self.log.info(f"Summarizing directory: {dir_path}")
|
|
595
|
-
|
|
596
|
-
# Validate directory exists
|
|
597
|
-
if not dir_path.exists():
|
|
598
|
-
raise FileNotFoundError(f"Directory not found: {dir_path}")
|
|
599
|
-
if not dir_path.is_dir():
|
|
600
|
-
raise ValueError(f"Path is not a directory: {dir_path}")
|
|
601
|
-
|
|
602
|
-
results = []
|
|
603
|
-
errors = []
|
|
604
|
-
|
|
605
|
-
# Find all text files
|
|
606
|
-
text_extensions = [".txt", ".md", ".log", ".email", ".transcript"]
|
|
607
|
-
files = []
|
|
608
|
-
for ext in text_extensions:
|
|
609
|
-
files.extend(dir_path.glob(f"*{ext}"))
|
|
610
|
-
|
|
611
|
-
if not files:
|
|
612
|
-
self.log.warning(f"No text files found in {dir_path}")
|
|
613
|
-
return results
|
|
614
|
-
|
|
615
|
-
self.log.info(f"Found {len(files)} files to process")
|
|
616
|
-
|
|
617
|
-
for i, file_path in enumerate(sorted(files), 1):
|
|
618
|
-
try:
|
|
619
|
-
self.log.info(f"Processing file {i}/{len(files)}: {file_path.name}")
|
|
620
|
-
result = self.summarize_file(file_path)
|
|
621
|
-
results.append(result)
|
|
622
|
-
except Exception as e:
|
|
623
|
-
error_msg = f"Failed to summarize {file_path}: {e}"
|
|
624
|
-
self.log.error(error_msg)
|
|
625
|
-
errors.append(error_msg)
|
|
626
|
-
continue
|
|
627
|
-
|
|
628
|
-
if errors:
|
|
629
|
-
self.log.warning(
|
|
630
|
-
f"Completed with {len(errors)} errors:\n" + "\n".join(errors)
|
|
631
|
-
)
|
|
632
|
-
|
|
633
|
-
return results
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Gaia Summarizer Application - Thin wrapper that delegates to SummarizerAgent
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Dict, List, Literal, Optional
|
|
13
|
+
|
|
14
|
+
from gaia.agents.summarize.agent import SummarizerAgent
|
|
15
|
+
from gaia.llm.lemonade_client import DEFAULT_MODEL_NAME
|
|
16
|
+
from gaia.logger import get_logger
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Utility functions for email validation (used by CLI and other tools)
|
|
20
|
+
def validate_email_address(email: str) -> bool:
|
|
21
|
+
"""Validate email address format"""
|
|
22
|
+
email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
|
|
23
|
+
return re.match(email_pattern, email.strip()) is not None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def validate_email_list(email_list: str) -> list[str]:
|
|
27
|
+
"""Validate and parse comma-separated email list"""
|
|
28
|
+
if not email_list:
|
|
29
|
+
return []
|
|
30
|
+
emails = [e.strip() for e in email_list.split(",") if e.strip()]
|
|
31
|
+
invalid_emails = [e for e in emails if not validate_email_address(e)]
|
|
32
|
+
if invalid_emails:
|
|
33
|
+
raise ValueError(f"Invalid email address(es): {', '.join(invalid_emails)}")
|
|
34
|
+
return emails
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class SummaryConfig:
|
|
39
|
+
"""Configuration for summarization"""
|
|
40
|
+
|
|
41
|
+
model: str = DEFAULT_MODEL_NAME
|
|
42
|
+
max_tokens: int = 1024
|
|
43
|
+
input_type: Literal["transcript", "email", "auto"] = "auto"
|
|
44
|
+
styles: List[str] = None
|
|
45
|
+
combined_prompt: bool = False
|
|
46
|
+
use_claude: bool = False
|
|
47
|
+
use_chatgpt: bool = False
|
|
48
|
+
|
|
49
|
+
def __post_init__(self):
|
|
50
|
+
if self.styles is None:
|
|
51
|
+
self.styles = ["executive", "participants", "action_items"]
|
|
52
|
+
|
|
53
|
+
# Auto-detect OpenAI models (gpt-*) to use ChatGPT
|
|
54
|
+
if self.model and self.model.lower().startswith("gpt"):
|
|
55
|
+
self.use_chatgpt = True
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class SummarizerApp:
|
|
59
|
+
"""Main application class for summarization (delegates to SummarizerAgent)"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, config: Optional[SummaryConfig] = None):
|
|
62
|
+
self.config = config or SummaryConfig()
|
|
63
|
+
self.log = get_logger(__name__)
|
|
64
|
+
self.agent = SummarizerAgent(
|
|
65
|
+
model=self.config.model,
|
|
66
|
+
max_tokens=self.config.max_tokens,
|
|
67
|
+
styles=self.config.styles,
|
|
68
|
+
combined_prompt=self.config.combined_prompt,
|
|
69
|
+
use_claude=self.config.use_claude,
|
|
70
|
+
use_chatgpt=self.config.use_chatgpt,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def summarize_file(
|
|
74
|
+
self,
|
|
75
|
+
file_path: Path,
|
|
76
|
+
styles: Optional[List[str]] = None,
|
|
77
|
+
combined_prompt: Optional[bool] = None,
|
|
78
|
+
input_type: str = "auto",
|
|
79
|
+
) -> Dict[str, Any]:
|
|
80
|
+
# Always convert file_path to Path object if it's a string
|
|
81
|
+
if not isinstance(file_path, Path):
|
|
82
|
+
file_path = Path(file_path)
|
|
83
|
+
return self.agent.summarize_file(
|
|
84
|
+
file_path,
|
|
85
|
+
styles=styles,
|
|
86
|
+
combined_prompt=combined_prompt,
|
|
87
|
+
input_type=input_type,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def summarize_directory(
|
|
91
|
+
self,
|
|
92
|
+
dir_path: Path,
|
|
93
|
+
styles: Optional[List[str]] = None,
|
|
94
|
+
combined_prompt: Optional[bool] = None,
|
|
95
|
+
input_type: str = "auto",
|
|
96
|
+
) -> List[Dict[str, Any]]:
|
|
97
|
+
return self.agent.summarize_directory(
|
|
98
|
+
dir_path,
|
|
99
|
+
styles=styles,
|
|
100
|
+
combined_prompt=combined_prompt,
|
|
101
|
+
input_type=input_type,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def summarize(
|
|
105
|
+
self,
|
|
106
|
+
content: str,
|
|
107
|
+
styles: Optional[List[str]] = None,
|
|
108
|
+
combined_prompt: Optional[bool] = None,
|
|
109
|
+
input_type: str = "auto",
|
|
110
|
+
) -> Dict[str, Any]:
|
|
111
|
+
return self.agent.summarize(
|
|
112
|
+
content,
|
|
113
|
+
styles=styles,
|
|
114
|
+
combined_prompt=combined_prompt,
|
|
115
|
+
input_type=input_type,
|
|
116
|
+
)
|