amd-gaia 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +222 -223
  2. amd_gaia-0.15.2.dist-info/RECORD +182 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +1 -0
  5. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +20 -20
  6. gaia/__init__.py +29 -29
  7. gaia/agents/__init__.py +19 -19
  8. gaia/agents/base/__init__.py +9 -9
  9. gaia/agents/base/agent.py +2132 -2177
  10. gaia/agents/base/api_agent.py +119 -120
  11. gaia/agents/base/console.py +1967 -1841
  12. gaia/agents/base/errors.py +237 -237
  13. gaia/agents/base/mcp_agent.py +86 -86
  14. gaia/agents/base/tools.py +88 -83
  15. gaia/agents/blender/__init__.py +7 -0
  16. gaia/agents/blender/agent.py +553 -556
  17. gaia/agents/blender/agent_simple.py +133 -135
  18. gaia/agents/blender/app.py +211 -211
  19. gaia/agents/blender/app_simple.py +41 -41
  20. gaia/agents/blender/core/__init__.py +16 -16
  21. gaia/agents/blender/core/materials.py +506 -506
  22. gaia/agents/blender/core/objects.py +316 -316
  23. gaia/agents/blender/core/rendering.py +225 -225
  24. gaia/agents/blender/core/scene.py +220 -220
  25. gaia/agents/blender/core/view.py +146 -146
  26. gaia/agents/chat/__init__.py +9 -9
  27. gaia/agents/chat/agent.py +809 -835
  28. gaia/agents/chat/app.py +1065 -1058
  29. gaia/agents/chat/session.py +508 -508
  30. gaia/agents/chat/tools/__init__.py +15 -15
  31. gaia/agents/chat/tools/file_tools.py +96 -96
  32. gaia/agents/chat/tools/rag_tools.py +1744 -1729
  33. gaia/agents/chat/tools/shell_tools.py +437 -436
  34. gaia/agents/code/__init__.py +7 -7
  35. gaia/agents/code/agent.py +549 -549
  36. gaia/agents/code/cli.py +377 -0
  37. gaia/agents/code/models.py +135 -135
  38. gaia/agents/code/orchestration/__init__.py +24 -24
  39. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  40. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  41. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  42. gaia/agents/code/orchestration/factories/base.py +63 -63
  43. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  44. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  45. gaia/agents/code/orchestration/orchestrator.py +841 -841
  46. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  47. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  48. gaia/agents/code/orchestration/steps/base.py +188 -188
  49. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  50. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  51. gaia/agents/code/orchestration/steps/python.py +307 -307
  52. gaia/agents/code/orchestration/template_catalog.py +469 -469
  53. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  54. gaia/agents/code/orchestration/workflows/base.py +80 -80
  55. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  56. gaia/agents/code/orchestration/workflows/python.py +94 -94
  57. gaia/agents/code/prompts/__init__.py +11 -11
  58. gaia/agents/code/prompts/base_prompt.py +77 -77
  59. gaia/agents/code/prompts/code_patterns.py +2034 -2036
  60. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  61. gaia/agents/code/prompts/python_prompt.py +109 -109
  62. gaia/agents/code/schema_inference.py +365 -365
  63. gaia/agents/code/system_prompt.py +41 -41
  64. gaia/agents/code/tools/__init__.py +42 -42
  65. gaia/agents/code/tools/cli_tools.py +1138 -1138
  66. gaia/agents/code/tools/code_formatting.py +319 -319
  67. gaia/agents/code/tools/code_tools.py +769 -769
  68. gaia/agents/code/tools/error_fixing.py +1347 -1347
  69. gaia/agents/code/tools/external_tools.py +180 -180
  70. gaia/agents/code/tools/file_io.py +845 -845
  71. gaia/agents/code/tools/prisma_tools.py +190 -190
  72. gaia/agents/code/tools/project_management.py +1016 -1016
  73. gaia/agents/code/tools/testing.py +321 -321
  74. gaia/agents/code/tools/typescript_tools.py +122 -122
  75. gaia/agents/code/tools/validation_parsing.py +461 -461
  76. gaia/agents/code/tools/validation_tools.py +806 -806
  77. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  78. gaia/agents/code/validators/__init__.py +16 -16
  79. gaia/agents/code/validators/antipattern_checker.py +241 -241
  80. gaia/agents/code/validators/ast_analyzer.py +197 -197
  81. gaia/agents/code/validators/requirements_validator.py +145 -145
  82. gaia/agents/code/validators/syntax_validator.py +171 -171
  83. gaia/agents/docker/__init__.py +7 -7
  84. gaia/agents/docker/agent.py +643 -642
  85. gaia/agents/emr/__init__.py +8 -8
  86. gaia/agents/emr/agent.py +1504 -1506
  87. gaia/agents/emr/cli.py +1322 -1322
  88. gaia/agents/emr/constants.py +475 -475
  89. gaia/agents/emr/dashboard/__init__.py +4 -4
  90. gaia/agents/emr/dashboard/server.py +1972 -1974
  91. gaia/agents/jira/__init__.py +11 -11
  92. gaia/agents/jira/agent.py +894 -894
  93. gaia/agents/jira/jql_templates.py +299 -299
  94. gaia/agents/routing/__init__.py +7 -7
  95. gaia/agents/routing/agent.py +567 -570
  96. gaia/agents/routing/system_prompt.py +75 -75
  97. gaia/agents/summarize/__init__.py +11 -0
  98. gaia/agents/summarize/agent.py +885 -0
  99. gaia/agents/summarize/prompts.py +129 -0
  100. gaia/api/__init__.py +23 -23
  101. gaia/api/agent_registry.py +238 -238
  102. gaia/api/app.py +305 -305
  103. gaia/api/openai_server.py +575 -575
  104. gaia/api/schemas.py +186 -186
  105. gaia/api/sse_handler.py +373 -373
  106. gaia/apps/__init__.py +4 -4
  107. gaia/apps/llm/__init__.py +6 -6
  108. gaia/apps/llm/app.py +184 -169
  109. gaia/apps/summarize/app.py +116 -633
  110. gaia/apps/summarize/html_viewer.py +133 -133
  111. gaia/apps/summarize/pdf_formatter.py +284 -284
  112. gaia/audio/__init__.py +2 -2
  113. gaia/audio/audio_client.py +439 -439
  114. gaia/audio/audio_recorder.py +269 -269
  115. gaia/audio/kokoro_tts.py +599 -599
  116. gaia/audio/whisper_asr.py +432 -432
  117. gaia/chat/__init__.py +16 -16
  118. gaia/chat/app.py +428 -430
  119. gaia/chat/prompts.py +522 -522
  120. gaia/chat/sdk.py +1228 -1225
  121. gaia/cli.py +5659 -5632
  122. gaia/database/__init__.py +10 -10
  123. gaia/database/agent.py +176 -176
  124. gaia/database/mixin.py +290 -290
  125. gaia/database/testing.py +64 -64
  126. gaia/eval/batch_experiment.py +2332 -2332
  127. gaia/eval/claude.py +542 -542
  128. gaia/eval/config.py +37 -37
  129. gaia/eval/email_generator.py +512 -512
  130. gaia/eval/eval.py +3179 -3179
  131. gaia/eval/groundtruth.py +1130 -1130
  132. gaia/eval/transcript_generator.py +582 -582
  133. gaia/eval/webapp/README.md +167 -167
  134. gaia/eval/webapp/package-lock.json +875 -875
  135. gaia/eval/webapp/package.json +20 -20
  136. gaia/eval/webapp/public/app.js +3402 -3402
  137. gaia/eval/webapp/public/index.html +87 -87
  138. gaia/eval/webapp/public/styles.css +3661 -3661
  139. gaia/eval/webapp/server.js +415 -415
  140. gaia/eval/webapp/test-setup.js +72 -72
  141. gaia/installer/__init__.py +23 -0
  142. gaia/installer/init_command.py +1275 -0
  143. gaia/installer/lemonade_installer.py +619 -0
  144. gaia/llm/__init__.py +10 -2
  145. gaia/llm/base_client.py +60 -0
  146. gaia/llm/exceptions.py +12 -0
  147. gaia/llm/factory.py +70 -0
  148. gaia/llm/lemonade_client.py +3421 -3221
  149. gaia/llm/lemonade_manager.py +294 -294
  150. gaia/llm/providers/__init__.py +9 -0
  151. gaia/llm/providers/claude.py +108 -0
  152. gaia/llm/providers/lemonade.py +118 -0
  153. gaia/llm/providers/openai_provider.py +79 -0
  154. gaia/llm/vlm_client.py +382 -382
  155. gaia/logger.py +189 -189
  156. gaia/mcp/agent_mcp_server.py +245 -245
  157. gaia/mcp/blender_mcp_client.py +138 -138
  158. gaia/mcp/blender_mcp_server.py +648 -648
  159. gaia/mcp/context7_cache.py +332 -332
  160. gaia/mcp/external_services.py +518 -518
  161. gaia/mcp/mcp_bridge.py +811 -550
  162. gaia/mcp/servers/__init__.py +6 -6
  163. gaia/mcp/servers/docker_mcp.py +83 -83
  164. gaia/perf_analysis.py +361 -0
  165. gaia/rag/__init__.py +10 -10
  166. gaia/rag/app.py +293 -293
  167. gaia/rag/demo.py +304 -304
  168. gaia/rag/pdf_utils.py +235 -235
  169. gaia/rag/sdk.py +2194 -2194
  170. gaia/security.py +183 -163
  171. gaia/talk/app.py +287 -289
  172. gaia/talk/sdk.py +538 -538
  173. gaia/testing/__init__.py +87 -87
  174. gaia/testing/assertions.py +330 -330
  175. gaia/testing/fixtures.py +333 -333
  176. gaia/testing/mocks.py +493 -493
  177. gaia/util.py +46 -46
  178. gaia/utils/__init__.py +33 -33
  179. gaia/utils/file_watcher.py +675 -675
  180. gaia/utils/parsing.py +223 -223
  181. gaia/version.py +100 -100
  182. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  183. gaia/agents/code/app.py +0 -266
  184. gaia/llm/llm_client.py +0 -723
  185. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,885 @@
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ SummarizerAgent: GAIA agent for advanced text/document summarization.
6
+ """
7
+
8
+ import json
9
+ import re
10
+ import time
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from typing import Any, Dict, Generator, List, Optional
14
+
15
+ from gaia.agents.base import Agent
16
+ from gaia.chat.sdk import ChatConfig, ChatSDK
17
+ from gaia.logger import get_logger
18
+ from gaia.rag.sdk import RAGSDK
19
+
20
+ from .prompts import (
21
+ DETECTION_PROMPT_TEMPLATE,
22
+ DOCUMENT_SUMMARY_TEMPLATE,
23
+ ITERATIVE_SUMMARY_TEMPLATE,
24
+ SUMMARY_STYLES,
25
+ SYSTEM_PROMPTS,
26
+ )
27
+
28
+
29
+ class Chunker:
30
+ def __init__(self):
31
+ self.logger = get_logger(__name__)
32
+ # Simple sentence splitter to avoid NLTK dependency
33
+ self._sentence_split_regex = re.compile(r"(?<=[.!?])\s+(?=[A-Z])")
34
+
35
+ def count_tokens(self, text: str) -> int:
36
+ """Simple estimation, Lemonade Server does not expose tokenize endpoint."""
37
+ chars = len(text)
38
+ words = len(text.split())
39
+ est_by_chars = chars // 4
40
+ est_by_words = int(words * 1.3)
41
+ num_tokens = max(est_by_chars, est_by_words)
42
+
43
+ self.logger.info(f"Approximated token count: {num_tokens} tokens")
44
+ return num_tokens
45
+
46
+ def chunk_text(self, text: str, max_tokens: int, overlap_tokens: int) -> List[str]:
47
+ if not text:
48
+ return []
49
+
50
+ paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
51
+ chunks = []
52
+ current_chunk = []
53
+ current_tokens = 0
54
+
55
+ for para in paragraphs:
56
+ para_tokens = self.count_tokens(para)
57
+
58
+ # Split very long paragraphs into sentences using simple heuristics
59
+ units = [para]
60
+ if para_tokens > max_tokens:
61
+ units = [
62
+ s.strip()
63
+ for s in self._sentence_split_regex.split(para)
64
+ if s.strip()
65
+ ]
66
+
67
+ for unit in units:
68
+ unit_tokens = self.count_tokens(unit)
69
+
70
+ if current_tokens + unit_tokens > max_tokens:
71
+ # Output current chunk
72
+ if current_chunk:
73
+ chunk_text = " ".join(current_chunk)
74
+ chunks.append(chunk_text)
75
+ self.logger.info(
76
+ f"Created chunk {len(chunks)}: {len(chunk_text)} chars"
77
+ )
78
+
79
+ # Prepare next chunk with overlap
80
+ if overlap_tokens > 0:
81
+ overlap = []
82
+ overlap_count = 0
83
+ for u in reversed(current_chunk):
84
+ t = self.count_tokens(u)
85
+ if overlap_count + t > overlap_tokens:
86
+ break
87
+ overlap.insert(0, u)
88
+ overlap_count += t
89
+ current_chunk = overlap
90
+ current_tokens = sum(self.count_tokens(x) for x in overlap)
91
+ else:
92
+ current_chunk = []
93
+ current_tokens = 0
94
+
95
+ # Add new unit
96
+ current_chunk.append(unit)
97
+ current_tokens += unit_tokens
98
+
99
+ # push last chunk
100
+ if current_chunk:
101
+ chunk_text = " ".join(current_chunk)
102
+ chunks.append(chunk_text)
103
+ try:
104
+ self.logger.info(
105
+ f"Created chunk {len(chunks)}: {len(chunk_text)} chars"
106
+ )
107
+ except Exception as e:
108
+ self.logger.warning(f"Failed to log chunk creation: {e}")
109
+
110
+ self.logger.info(f"Total chunks created: {len(chunks)}")
111
+ return chunks
112
+
113
+
114
+ class SummarizerAgent(Agent):
115
+
116
+ DEFAULT_MODEL = "Qwen3-4B-Instruct-2507-GGUF"
117
+
118
+ def __init__(
119
+ self,
120
+ model: Optional[str] = None,
121
+ max_tokens: int = 1024,
122
+ max_ctx_size: int = 8192,
123
+ styles: Optional[List[str]] = None,
124
+ combined_prompt: bool = False,
125
+ use_claude: bool = False,
126
+ use_chatgpt: bool = False,
127
+ ):
128
+ self.model = model or self.DEFAULT_MODEL
129
+ self.max_tokens = max_tokens
130
+ self.styles = styles or ["executive", "participants", "action_items"]
131
+ self.combined_prompt = combined_prompt
132
+ self.use_claude = use_claude
133
+ self.use_chatgpt = use_chatgpt
134
+ self.log = get_logger(__name__)
135
+ chat_config = ChatConfig(
136
+ model=self.model,
137
+ max_tokens=self.max_tokens,
138
+ use_claude=self.use_claude,
139
+ use_chatgpt=self.use_chatgpt,
140
+ show_stats=True,
141
+ )
142
+ self.chat_sdk = ChatSDK(chat_config)
143
+ self.rag_sdk = RAGSDK()
144
+ self.chunker = Chunker()
145
+ self.llm_client = self.chat_sdk.llm_client
146
+ self.rag_sdk.llm_client = self.llm_client
147
+ self.max_retries = 3
148
+ self.retry_delay = 1.0
149
+ # Default 8192 balances context size with TTFT for responsive UI.
150
+ # Can be increased for larger documents if TTFT is not critical.
151
+ self.max_ctx_size = max_ctx_size
152
+ self.overlap_tokens_ratio = 0.05
153
+ self.chunk_tokens = int(self.max_ctx_size * 0.7)
154
+ self.overlap_tokens = int(self.chunk_tokens * self.overlap_tokens_ratio)
155
+
156
+ # Load prompts from prompts.py
157
+ self.summary_styles = SUMMARY_STYLES
158
+ self.system_prompts = SYSTEM_PROMPTS
159
+ self.iterative_summary_template = ITERATIVE_SUMMARY_TEMPLATE
160
+ self.document_summary_template = DOCUMENT_SUMMARY_TEMPLATE
161
+ self.detection_prompt_template = DETECTION_PROMPT_TEMPLATE
162
+
163
+ # Initialize parent class after setting required attributes
164
+ super().__init__()
165
+
166
+ # Disk cache for extracted text
167
+ self._text_cache_dir = Path(".gaia") / "text_cache"
168
+ try:
169
+ self._text_cache_dir.mkdir(parents=True, exist_ok=True)
170
+ except Exception as e:
171
+ raise RuntimeError("Failed to create text cache directory") from e
172
+
173
+ def _get_system_prompt(self, content_type: Optional[str] = None) -> str:
174
+ """Return the system prompt for the agent.
175
+
176
+ Args:
177
+ content_type: Optional content type (email, transcript, pdf).
178
+ If None, returns default transcript prompt.
179
+
180
+ Returns:
181
+ System prompt string for the specified content type.
182
+ """
183
+ if content_type is None:
184
+ content_type = "transcript"
185
+ return self.system_prompts.get(
186
+ content_type, self.system_prompts.get("transcript", "")
187
+ )
188
+
189
+ def _register_tools(self) -> None:
190
+ """Register tools for the agent. No tools needed for summarizer."""
191
+
192
+ def _prepare_chat(self, input_type: str) -> None:
193
+ """Clear prior chat context and set system prompt for the given input type."""
194
+ try:
195
+ self.chat_sdk.clear_history()
196
+ except Exception as e:
197
+ self.log.warning(f"Failed to clear chat history: {e}")
198
+ system_prompt = self._get_system_prompt(input_type)
199
+ if not system_prompt:
200
+ raise KeyError(f"Missing system prompt for '{input_type}' in prompts")
201
+ self.chat_sdk.config.system_prompt = system_prompt
202
+
203
+ def _validate_styles(self, styles: Any) -> None:
204
+ """Validate provided style or list of styles against prompt definitions."""
205
+ allowed = set((self.summary_styles or {}).keys())
206
+ provided = styles if isinstance(styles, list) else [styles]
207
+ invalid = [s for s in provided if s not in allowed]
208
+ if invalid:
209
+ allowed_list = ", ".join(sorted(allowed))
210
+ raise ValueError(
211
+ f"Unsupported style(s): {', '.join(invalid)}. Allowed styles: {allowed_list}"
212
+ )
213
+
214
+ def _should_use_iterative(self, text: str) -> bool:
215
+ """Decide if iterative summarization is needed based on estimated tokens."""
216
+ # Reserve 25% of context for prompts, instructions, and output
217
+ # Apply additional 15% safety margin to account for token estimation variance
218
+ effective_limit = int(self.max_ctx_size * 0.75 * 0.87) # 0.87 = 1/1.15
219
+ content_tokens = self.chunker.count_tokens(text)
220
+ should_iterate = content_tokens > effective_limit
221
+
222
+ if should_iterate:
223
+ self.log.info(
224
+ f"Using iterative summarization: {content_tokens} tokens > {effective_limit} effective limit "
225
+ f"(65% of {self.max_ctx_size} max context with safety margin)"
226
+ )
227
+
228
+ return should_iterate
229
+
230
+ def _iterative_summarize(
231
+ self,
232
+ text: str,
233
+ style: str = "brief",
234
+ content_type: str = "pdf",
235
+ ) -> Dict[str, Any]:
236
+ """Iteratively fold large text; reuse streaming generator to avoid duplication."""
237
+ final_text = ""
238
+ final_stats: Dict[str, Any] = {}
239
+ for evt in self._iterative_summary_events(text, content_type, style):
240
+ if evt.get("is_complete"):
241
+ final_text = evt.get("text", "")
242
+ final_stats = evt.get("performance", {})
243
+ return {"text": final_text, "performance": final_stats}
244
+
245
+ def _summarize_content(
246
+ self,
247
+ content: str,
248
+ input_file: Optional[str],
249
+ input_type: str,
250
+ styles: Optional[List[str]],
251
+ combined_prompt: Optional[bool],
252
+ ) -> Dict[str, Any]:
253
+ """Summarize content choosing iterative vs direct path, returning structured output."""
254
+ should_iterate = self._should_use_iterative(content)
255
+
256
+ if should_iterate:
257
+ if input_type == "pdf":
258
+ self.log.info("Large content detected; using iterative summarization")
259
+ brief = self._iterative_summarize(
260
+ content, "brief", content_type=input_type
261
+ )
262
+ return self.summarize(
263
+ brief.get("text", ""),
264
+ input_file,
265
+ input_type=input_type,
266
+ styles=styles,
267
+ combined_prompt=combined_prompt,
268
+ )
269
+ else:
270
+ self.log.warning(
271
+ f"Content is large enough for iterative summarization but input type is '{input_type}'. "
272
+ f"Attempting direct summarization which may exceed token limits. "
273
+ f"Consider splitting the content manually or converting to PDF."
274
+ )
275
+
276
+ return self.summarize(
277
+ content,
278
+ input_file,
279
+ input_type=input_type,
280
+ styles=styles,
281
+ combined_prompt=combined_prompt,
282
+ )
283
+
284
+ def _stream_summary_content(self, content: str, input_type: str, style: str):
285
+ """Stream summary for content, using iterative folding for large inputs."""
286
+ self._prepare_chat(input_type)
287
+ if not self._should_use_iterative(content):
288
+ prompt = self.generate_summary_prompt(content, input_type, style)
289
+ for chunk in self.chat_sdk.send_stream(prompt):
290
+ if chunk.is_complete:
291
+ yield {
292
+ "text": "",
293
+ "is_complete": True,
294
+ "performance": chunk.stats or {},
295
+ }
296
+ else:
297
+ yield {"text": chunk.text, "is_complete": False}
298
+ return
299
+ # Large inputs: delegate to unified iterative streaming generator
300
+ yield from self._iterative_summary_events(content, input_type, style)
301
+
302
+ def _stream_chunk_and_accumulate(
303
+ self,
304
+ prompt: str,
305
+ chunk_index: int,
306
+ total_chunks: int,
307
+ label: str = "LLM Prompt",
308
+ ):
309
+ """Helper to stream a chunk's LLM response and return the accumulated text."""
310
+ self.log.info(
311
+ f"[{label} - chunk {chunk_index+1}/{total_chunks}] {prompt[:500]}..."
312
+ )
313
+ streamed_text = ""
314
+ for part in self.chat_sdk.send_stream(prompt):
315
+ if part.is_complete:
316
+ return streamed_text.strip()
317
+ else:
318
+ streamed_text += part.text
319
+ yield {"text": part.text, "is_complete": False}
320
+ return streamed_text.strip()
321
+
322
+ def _iterative_summary_events(self, content: str, input_type: str, style: str):
323
+ """Unified generator for iterative summarization: streams per-chunk and yields final stats."""
324
+ self._prepare_chat(input_type)
325
+ summary_so_far = ""
326
+ chunk_tokens = int(self.max_ctx_size * 0.7)
327
+ overlap_tokens = int(chunk_tokens * self.overlap_tokens_ratio)
328
+ chunks = self.chunker.chunk_text(content, chunk_tokens, overlap_tokens)
329
+ for i, chunk in enumerate(chunks):
330
+ style_instruction = (self.summary_styles or {}).get(style)
331
+ if not style_instruction:
332
+ raise KeyError(f"Missing style '{style}' in prompts")
333
+ if i == 0:
334
+ base_prompt = self.document_summary_template.format(
335
+ style_instruction=style_instruction, document_text=chunk
336
+ )
337
+ else:
338
+ base_prompt = self.iterative_summary_template.format(
339
+ style_instruction=style_instruction,
340
+ previous_summary=summary_so_far,
341
+ new_chunk=chunk,
342
+ )
343
+ try:
344
+ completed = yield from self._stream_chunk_and_accumulate(
345
+ base_prompt, i, len(chunks)
346
+ )
347
+ if completed:
348
+ summary_so_far = (
349
+ summary_so_far + ("\n" if summary_so_far else "") + completed
350
+ )
351
+ yield {"text": "\n", "is_complete": False}
352
+ except Exception as e:
353
+ self.log.error(f"Failed to process chunk {i+1}/{len(chunks)}: {e}")
354
+ raise
355
+ try:
356
+ perf_stats = self.llm_client.get_performance_stats()
357
+ except Exception as e:
358
+ self.log.warning(f"Failed to retrieve performance stats: {e}")
359
+ perf_stats = {}
360
+ yield {
361
+ "text": summary_so_far,
362
+ "is_complete": True,
363
+ "performance": {
364
+ "total_tokens": perf_stats.get("input_tokens", 0)
365
+ + perf_stats.get("output_tokens", 0),
366
+ "prompt_tokens": perf_stats.get("input_tokens", 0),
367
+ "completion_tokens": perf_stats.get("output_tokens", 0),
368
+ "time_to_first_token_ms": int(
369
+ perf_stats.get("time_to_first_token", 0) * 1000
370
+ ),
371
+ "tokens_per_second": perf_stats.get("tokens_per_second", 0),
372
+ },
373
+ }
374
+
375
+ def detect_content_type(self, content: str, input_type: str = "auto") -> str:
376
+ if input_type != "auto":
377
+ return input_type
378
+
379
+ email_patterns = [
380
+ r"From:\s*[\w\s]+",
381
+ r"To:\s*[\w\s]+",
382
+ r"Subject:\s*[\w\s]+",
383
+ r"Dear\s+[A-Z]+",
384
+ r"Sincerely,\s*[A-Z]+",
385
+ r"Best regards,\s*[A-Z]+",
386
+ r"Re:\s*[\w\s]+",
387
+ r"cc:\s*[\w\s]+",
388
+ ]
389
+
390
+ transcript_patterns = [
391
+ r"\w+\s*:\s*[^\n]+",
392
+ r"\[.*:\d{1,2}:\d{2}\]",
393
+ r"\(\d{1,2}:\d{2}\)",
394
+ r"Meeting\s+Transcript",
395
+ r"Project\s+Update",
396
+ r"Action\s+item",
397
+ r"Summary\s+of\s+discussion",
398
+ r"discuss\s+about",
399
+ r"can you give us an update",
400
+ r"how's\s+the\s+design\s+coming",
401
+ r"any\s+blockers",
402
+ r"next\s+step",
403
+ r"review\s+before\s+development",
404
+ ]
405
+
406
+ email_score = sum(
407
+ 1
408
+ for pattern in email_patterns
409
+ if re.search(pattern, content, re.IGNORECASE)
410
+ )
411
+ transcript_score = sum(
412
+ 1
413
+ for pattern in transcript_patterns
414
+ if re.search(pattern, content, re.IGNORECASE)
415
+ )
416
+
417
+ if email_score >= 2:
418
+ detected_type = "email"
419
+ elif transcript_score >= 3:
420
+ detected_type = "transcript"
421
+ else:
422
+ # Fall back to LLM only if score is ambiguous
423
+ if self.detection_prompt_template:
424
+ detection_prompt = self.detection_prompt_template.format(
425
+ text_excerpt=content
426
+ )
427
+
428
+ # Add strict output constraints
429
+ for attempt in range(self.max_retries):
430
+ try:
431
+ response = self.llm_client.generate(
432
+ detection_prompt, model=self.model
433
+ )
434
+ text = (response or "").strip().lower()
435
+ m = re.findall(r"[a-z]+", text)
436
+ detected_type = m[0] if m else ""
437
+ if detected_type not in ["transcript", "email"]:
438
+ if "transcript" in text:
439
+ detected_type = "transcript"
440
+ elif "email" in text:
441
+ detected_type = "email"
442
+ else:
443
+ detected_type = "transcript"
444
+ break
445
+ except Exception as e:
446
+ if attempt < self.max_retries - 1:
447
+ self.log.warning(
448
+ f"Content type detection attempt {attempt + 1} failed: {e}. Retrying..."
449
+ )
450
+ time.sleep(self.retry_delay * (attempt + 1))
451
+ else:
452
+ self.log.error(
453
+ f"Failed to detect content type after {self.max_retries} attempts"
454
+ )
455
+ detected_type = "transcript"
456
+ else:
457
+ detected_type = "transcript" # fallback if loop exits normally
458
+
459
+ self.log.info(f"Auto-detected content type: {detected_type}")
460
+ return detected_type
461
+
462
+ def generate_summary_prompt(
463
+ self, content: str, content_type: str, style: str
464
+ ) -> str:
465
+ style_instruction = (self.summary_styles or {}).get(style)
466
+ if not style_instruction:
467
+ raise KeyError(f"Missing style '{style}' in prompts")
468
+ if style == "participants" and content_type == "email":
469
+ prompt = f"""Extract the sender and all recipients from this email.\n\nFormat your response as JSON:\n{{\n \"sender\": \"sender email/name\",\n \"recipients\": [\"recipient1\", \"recipient2\"],\n \"cc\": [\"cc1\", \"cc2\"] (if any),\n \"bcc\": [\"bcc1\"] (if any)\n}}\n\nEmail content:\n{content}"""
470
+ elif style == "action_items":
471
+ prompt = f"""Extract all action items from this {content_type}.\n\n{style_instruction}\n\nFormat each action item with:\n- The specific action required\n- Who is responsible (if mentioned)\n- Any deadline or timeline (if mentioned)\n\nIf no action items are found, respond with \"No specific action items identified.\"\n\nContent:\n{content}"""
472
+ else:
473
+ prompt = f"""Analyze this {content_type} and {style_instruction}\n\nContent:\n{content}"""
474
+ return prompt
475
+
476
+ def generate_combined_prompt(
477
+ self, content: str, content_type: str, styles: List[str]
478
+ ) -> str:
479
+ sections = []
480
+ for style in styles:
481
+ style_instruction = (self.summary_styles or {}).get(style)
482
+ if not style_instruction:
483
+ raise KeyError(f"Missing style '{style}' in prompts")
484
+ sections.append(f"- {style.upper()}: {style_instruction}")
485
+ prompt = f"""Analyze this {content_type} and generate the following summaries:\n\n{chr(10).join(sections)}\n\nFormat your response with clear section headers for each style.\n\nContent:\n{content}"""
486
+ return prompt
487
+
488
+ def summarize_with_style(
489
+ self, content: str, content_type: str, style: str
490
+ ) -> Dict[str, Any]:
491
+ start_time = time.time()
492
+ system_prompt = self._get_system_prompt(content_type)
493
+ style_instruction = (self.summary_styles or {}).get(style)
494
+ if not style_instruction:
495
+ raise KeyError(f"Missing style '{style}' in prompts")
496
+ # Merge style guidance into the system prompt for consistent behavior
497
+ self.chat_sdk.config.system_prompt = system_prompt
498
+ prompt = self.generate_summary_prompt(content, content_type, style)
499
+ response = None
500
+ last_error = None
501
+ for attempt in range(self.max_retries):
502
+ try:
503
+ response = self.chat_sdk.send(prompt)
504
+ break
505
+ except Exception as e:
506
+ last_error = e
507
+ error_msg = str(e).lower()
508
+ if "token" in error_msg and "limit" in error_msg:
509
+ self.log.warning(
510
+ "Token limit exceeded. Attempting with reduced content..."
511
+ )
512
+ truncated_content = (
513
+ content[: int(len(content) * 0.75)]
514
+ + "\n\n[Content truncated due to length...]"
515
+ )
516
+ prompt = self.generate_summary_prompt(
517
+ truncated_content, content_type, style
518
+ )
519
+ elif "connection" in error_msg or "timeout" in error_msg:
520
+ self.log.warning(f"Connection error on attempt {attempt + 1}: {e}")
521
+ if attempt < self.max_retries - 1:
522
+ time.sleep(self.retry_delay * (attempt + 1))
523
+ continue
524
+ else:
525
+ self.log.error(f"Unexpected error on attempt {attempt + 1}: {e}")
526
+ if attempt >= self.max_retries - 1:
527
+ raise RuntimeError(
528
+ f"Failed to generate {style} summary after {self.max_retries} attempts: {last_error}"
529
+ )
530
+ try:
531
+ perf_stats = self.llm_client.get_performance_stats()
532
+ except Exception as e:
533
+ self.log.warning(f"Failed to get performance stats: {e}")
534
+ perf_stats = {}
535
+ processing_time_ms = int((time.time() - start_time) * 1000)
536
+ result = {"text": response.text}
537
+ if style == "action_items":
538
+ lines = response.text.strip().split("\n")
539
+ items = []
540
+ for line in lines:
541
+ line = line.strip()
542
+ if (
543
+ line
544
+ and not line.lower().startswith("action items:")
545
+ and not line.startswith("**Action")
546
+ ):
547
+ items.append(line)
548
+ if items:
549
+ result["items"] = items
550
+ elif style == "participants":
551
+ if content_type == "email":
552
+ try:
553
+ participants_data = json.loads(response.text)
554
+ result.update(participants_data)
555
+ except (json.JSONDecodeError, ValueError, KeyError):
556
+ pass
557
+ else:
558
+ lines = response.text.strip().split("\n")
559
+ participants = []
560
+ for line in lines:
561
+ line = line.strip()
562
+ if line and not line.lower().startswith("participants:"):
563
+ participants.append(line)
564
+ if participants:
565
+ result["participants"] = participants
566
+ result["performance"] = {
567
+ "total_tokens": perf_stats.get("input_tokens", 0)
568
+ + perf_stats.get("output_tokens", 0),
569
+ "prompt_tokens": perf_stats.get("input_tokens", 0),
570
+ "completion_tokens": perf_stats.get("output_tokens", 0),
571
+ "time_to_first_token_ms": int(
572
+ perf_stats.get("time_to_first_token", 0) * 1000
573
+ ),
574
+ "tokens_per_second": perf_stats.get("tokens_per_second", 0),
575
+ "processing_time_ms": processing_time_ms,
576
+ }
577
+ return result
578
+
579
+ def summarize_combined(
580
+ self, content: str, content_type: str, styles: List[str]
581
+ ) -> Dict[str, Dict[str, Any]]:
582
+ start_time = time.time()
583
+ system_prompt = self._get_system_prompt(content_type)
584
+ self.chat_sdk.config.system_prompt = system_prompt
585
+ prompt = self.generate_combined_prompt(content, content_type, styles)
586
+ response = self.chat_sdk.send(prompt)
587
+ perf_stats = self.llm_client.get_performance_stats()
588
+ processing_time_ms = int((time.time() - start_time) * 1000)
589
+ response_text = response.text
590
+ results = {}
591
+ for style in styles:
592
+ style_upper = style.upper()
593
+ start_markers = [
594
+ f"{style_upper}:",
595
+ f"**{style_upper}**:",
596
+ f"# {style_upper}",
597
+ f"## {style_upper}",
598
+ ]
599
+ section_start = -1
600
+ for marker in start_markers:
601
+ idx = response_text.find(marker)
602
+ if idx != -1:
603
+ section_start = idx + len(marker)
604
+ break
605
+ if section_start == -1:
606
+ if not results:
607
+ results[style] = {"text": response_text.strip()}
608
+ continue
609
+ section_end = len(response_text)
610
+ for next_style in styles:
611
+ if next_style == style:
612
+ continue
613
+ next_upper = next_style.upper()
614
+ for marker in [
615
+ f"{next_upper}:",
616
+ f"**{next_upper}**:",
617
+ f"# {next_upper}",
618
+ f"## {next_upper}",
619
+ ]:
620
+ idx = response_text.find(marker, section_start)
621
+ if idx != -1 and idx < section_end:
622
+ section_end = idx
623
+ section_text = response_text[section_start:section_end].strip()
624
+ results[style] = {"text": section_text}
625
+ base_perf = {
626
+ "total_tokens": perf_stats.get("input_tokens", 0)
627
+ + perf_stats.get("output_tokens", 0),
628
+ "prompt_tokens": perf_stats.get("input_tokens", 0),
629
+ "completion_tokens": perf_stats.get("output_tokens", 0),
630
+ "time_to_first_token_ms": int(
631
+ perf_stats.get("time_to_first_token", 0) * 1000
632
+ ),
633
+ "tokens_per_second": perf_stats.get("tokens_per_second", 0),
634
+ "processing_time_ms": processing_time_ms,
635
+ }
636
+ style_count = len(styles)
637
+ for style in results:
638
+ results[style]["performance"] = {
639
+ **base_perf,
640
+ "total_tokens": base_perf["total_tokens"] // style_count,
641
+ "completion_tokens": base_perf["completion_tokens"] // style_count,
642
+ }
643
+ return results
644
+
645
+ def summarize(
646
+ self,
647
+ content: str,
648
+ input_file: Optional[str] = None,
649
+ input_type: str = "auto",
650
+ styles: Optional[List[str]] = None,
651
+ combined_prompt: Optional[bool] = None,
652
+ ) -> Dict[str, Any]:
653
+ # Ensure no prior conversation context leaks into this summary
654
+ try:
655
+ self.chat_sdk.clear_history()
656
+ except Exception as e:
657
+ self.log.warning(f"Failed to clear chat history: {e}")
658
+ start_time = time.time()
659
+ content_type = self.detect_content_type(content, input_type)
660
+ applicable_styles = styles or self.styles.copy()
661
+ # Early validation: fail fast with clear guidance if a style is unsupported
662
+ self._validate_styles(applicable_styles)
663
+ if content_type == "email" and "participants" in applicable_styles:
664
+ pass
665
+ if (
666
+ combined_prompt if combined_prompt is not None else self.combined_prompt
667
+ ) and len(applicable_styles) > 1:
668
+ summaries = self.summarize_combined(
669
+ content, content_type, applicable_styles
670
+ )
671
+ else:
672
+ summaries = {}
673
+ for style in applicable_styles:
674
+ summaries[style] = self.summarize_with_style(
675
+ content, content_type, style
676
+ )
677
+ total_processing_time = int((time.time() - start_time) * 1000)
678
+ if len(applicable_styles) == 1:
679
+ style = applicable_styles[0]
680
+ output = {
681
+ "metadata": {
682
+ "input_file": input_file or "stdin",
683
+ "input_type": content_type,
684
+ "model": self.model,
685
+ "timestamp": datetime.now().isoformat(),
686
+ "processing_time_ms": total_processing_time,
687
+ "summary_style": style,
688
+ },
689
+ "summary": summaries[style],
690
+ "performance": summaries[style].get("performance", {}),
691
+ "original_content": content,
692
+ }
693
+ else:
694
+ output = {
695
+ "metadata": {
696
+ "input_file": input_file or "stdin",
697
+ "input_type": content_type,
698
+ "model": self.model,
699
+ "timestamp": datetime.now().isoformat(),
700
+ "processing_time_ms": total_processing_time,
701
+ "summary_styles": applicable_styles,
702
+ },
703
+ "summaries": summaries,
704
+ "aggregate_performance": {
705
+ "total_tokens": sum(
706
+ s.get("performance", {}).get("total_tokens", 0)
707
+ for s in summaries.values()
708
+ ),
709
+ "total_processing_time_ms": total_processing_time,
710
+ "model_info": {
711
+ "model": self.model,
712
+ "use_local": not (self.use_claude or self.use_chatgpt),
713
+ "use_claude": self.use_claude,
714
+ "use_chatgpt": self.use_chatgpt,
715
+ },
716
+ },
717
+ "original_content": content,
718
+ }
719
+ return output
720
+
721
+ def summarize_stream(
722
+ self, content: str, input_type: str = "auto", style: str = "brief"
723
+ ) -> Generator[Dict[str, Any], None, None]:
724
+ """Stream a single-style summary, using iterative folding for large inputs."""
725
+ self._validate_styles(style)
726
+ yield from self._stream_summary_content(content, input_type, style)
727
+
728
+ def _ensure_path(self, file_path) -> Path:
729
+ """Convert file_path to Path object if it's not already."""
730
+ return file_path if isinstance(file_path, Path) else Path(file_path)
731
+
732
+ def get_summary_content_from_file(self, file_path: Path) -> str:
733
+ """Extract content to be summarized from a file."""
734
+ file_path = self._ensure_path(file_path)
735
+ abs_path = str(file_path.absolute())
736
+ ext = file_path.suffix.lower()
737
+ if ext == ".pdf":
738
+ # Check disk cache first
739
+ test_cache_path = self._resolve_text_cache_paths(abs_path)
740
+ if test_cache_path and test_cache_path.exists():
741
+ self.log.info(f"[Cache] Using cached PDF text for {file_path.name}")
742
+ return test_cache_path.read_text(encoding="utf-8").strip()
743
+
744
+ # Extract fresh text
745
+ pdf_text, _, _ = (
746
+ self.rag_sdk._extract_text_from_pdf( # pylint: disable=protected-access
747
+ file_path
748
+ )
749
+ )
750
+ text = pdf_text.strip()
751
+ # Write cache atomically
752
+ cache_path = test_cache_path or self._resolve_text_cache_paths(abs_path)
753
+ if cache_path and text:
754
+ tmp_path = cache_path.with_suffix(".tmp")
755
+ tmp_path.write_text(text, encoding="utf-8")
756
+ try:
757
+ tmp_path.replace(cache_path)
758
+ except Exception:
759
+ cache_path.write_text(text, encoding="utf-8")
760
+ self.log.info(f"[Cache] Stored PDF text for {file_path.name}")
761
+ return text
762
+ else:
763
+ # Read as UTF-8, fall back to common encodings
764
+ try:
765
+ text = file_path.read_text(encoding="utf-8").strip()
766
+ except UnicodeDecodeError:
767
+ for encoding in ["latin-1", "cp1252"]:
768
+ try:
769
+ text = file_path.read_text(encoding=encoding).strip()
770
+ break
771
+ except UnicodeDecodeError:
772
+ continue
773
+ else:
774
+ text = ""
775
+
776
+ # Cache non-PDF text using same content-hash scheme
777
+ if text:
778
+ cache_path = self._resolve_text_cache_paths(abs_path)
779
+ if cache_path:
780
+ tmp_path = cache_path.with_suffix(".tmp")
781
+ tmp_path.write_text(text, encoding="utf-8")
782
+ try:
783
+ tmp_path.replace(cache_path)
784
+ except Exception:
785
+ cache_path.write_text(text, encoding="utf-8")
786
+ self.log.info(f"[Cache] Stored text for {file_path.name}")
787
+ return text
788
+
789
+ def _resolve_text_cache_paths(self, file_path: str) -> Optional[Path]:
790
+ """Return test_cache path for given file content hash, or None.
791
+
792
+ test_cache: '<digest>.txt' in cache dir. Legacy removed.
793
+ """
794
+ try:
795
+ p = Path(file_path)
796
+ if not p.exists():
797
+ return None
798
+ import hashlib
799
+
800
+ h = hashlib.sha256()
801
+ with p.open("rb") as f:
802
+ while True:
803
+ b = f.read(1024 * 1024)
804
+ if not b:
805
+ break
806
+ h.update(b)
807
+ digest = h.hexdigest()
808
+ test_cache = self._text_cache_dir.joinpath(f"{digest}.txt")
809
+ return test_cache
810
+ except Exception:
811
+ return None
812
+
813
+ def summarize_file(
814
+ self,
815
+ file_path: Path,
816
+ styles: Optional[List[str]] = None,
817
+ combined_prompt: Optional[bool] = None,
818
+ input_type: str = "auto",
819
+ ) -> Dict[str, Any]:
820
+ file_path = self._ensure_path(file_path)
821
+ self.log.info(f"Summarizing file: {file_path}")
822
+ if not file_path.exists():
823
+ raise FileNotFoundError(f"File not found: {file_path}")
824
+ file_size_mb = file_path.stat().st_size / (1024 * 1024)
825
+ if file_size_mb > 10:
826
+ self.log.warning(
827
+ f"Large file ({file_size_mb:.1f}MB) may exceed token limits"
828
+ )
829
+ try:
830
+ content = self.get_summary_content_from_file(file_path)
831
+ if not content.strip():
832
+ raise ValueError(f"No extractable text found in {file_path}")
833
+ return self._summarize_content(
834
+ content,
835
+ str(file_path),
836
+ input_type="pdf" if file_path.suffix.lower() == ".pdf" else input_type,
837
+ styles=styles,
838
+ combined_prompt=combined_prompt,
839
+ )
840
+ except Exception as e:
841
+ self.log.error(f"Error processing file {file_path}: {e}")
842
+ raise
843
+
844
+ def summarize_directory(
845
+ self,
846
+ dir_path: Path,
847
+ styles: Optional[List[str]] = None,
848
+ combined_prompt: Optional[bool] = None,
849
+ input_type: str = "auto",
850
+ ) -> List[Dict[str, Any]]:
851
+ self.log.info(f"Summarizing directory: {dir_path}")
852
+ if not dir_path.exists():
853
+ raise FileNotFoundError(f"Directory not found: {dir_path}")
854
+ if not dir_path.is_dir():
855
+ raise ValueError(f"Path is not a directory: {dir_path}")
856
+ results = []
857
+ errors = []
858
+ text_extensions = [".txt", ".md", ".log", ".pdf", ".email", ".transcript"]
859
+ files = []
860
+ for ext in text_extensions:
861
+ files.extend(dir_path.glob(f"*{ext}"))
862
+ if not files:
863
+ self.log.warning(f"No text files found in {dir_path}")
864
+ return results
865
+ self.log.info(f"Found {len(files)} files to process")
866
+ for i, file_path in enumerate(sorted(files), 1):
867
+ try:
868
+ self.log.info(f"Processing file {i}/{len(files)}: {file_path.name}")
869
+ result = self.summarize_file(
870
+ file_path,
871
+ styles=styles,
872
+ combined_prompt=combined_prompt,
873
+ input_type=input_type,
874
+ )
875
+ results.append(result)
876
+ except Exception as e:
877
+ error_msg = f"Failed to summarize {file_path}: {e}"
878
+ self.log.error(error_msg)
879
+ errors.append(error_msg)
880
+ continue
881
+ if errors:
882
+ self.log.warning(
883
+ f"Completed with {len(errors)} errors:\n" + "\n".join(errors)
884
+ )
885
+ return results