abstractcore 2.6.9__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/summarizer.py +69 -27
- abstractcore/architectures/detection.py +190 -25
- abstractcore/assets/architecture_formats.json +129 -6
- abstractcore/assets/model_capabilities.json +803 -141
- abstractcore/config/main.py +2 -2
- abstractcore/config/manager.py +3 -1
- abstractcore/events/__init__.py +7 -1
- abstractcore/mcp/__init__.py +30 -0
- abstractcore/mcp/client.py +213 -0
- abstractcore/mcp/factory.py +64 -0
- abstractcore/mcp/naming.py +28 -0
- abstractcore/mcp/stdio_client.py +336 -0
- abstractcore/mcp/tool_source.py +164 -0
- abstractcore/processing/__init__.py +2 -2
- abstractcore/processing/basic_deepsearch.py +1 -1
- abstractcore/processing/basic_summarizer.py +379 -93
- abstractcore/providers/anthropic_provider.py +91 -10
- abstractcore/providers/base.py +540 -16
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/providers/lmstudio_provider.py +170 -25
- abstractcore/providers/mlx_provider.py +13 -10
- abstractcore/providers/ollama_provider.py +42 -26
- abstractcore/providers/openai_compatible_provider.py +87 -22
- abstractcore/providers/openai_provider.py +12 -9
- abstractcore/providers/streaming.py +201 -39
- abstractcore/providers/vllm_provider.py +78 -21
- abstractcore/server/app.py +116 -30
- abstractcore/structured/retry.py +20 -7
- abstractcore/tools/__init__.py +46 -24
- abstractcore/tools/abstractignore.py +166 -0
- abstractcore/tools/arg_canonicalizer.py +61 -0
- abstractcore/tools/common_tools.py +2443 -742
- abstractcore/tools/core.py +109 -13
- abstractcore/tools/handler.py +17 -3
- abstractcore/tools/parser.py +894 -159
- abstractcore/tools/registry.py +122 -18
- abstractcore/tools/syntax_rewriter.py +68 -6
- abstractcore/tools/tag_rewriter.py +186 -1
- abstractcore/utils/jsonish.py +111 -0
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/METADATA +56 -2
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/RECORD +46 -37
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/WHEEL +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/top_level.txt +0 -0
|
@@ -6,8 +6,10 @@ text processing capabilities with minimal complexity.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from enum import Enum
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
import json
|
|
10
|
+
import re
|
|
11
|
+
from typing import List, Optional, Tuple
|
|
12
|
+
from pydantic import BaseModel, Field, ValidationError
|
|
11
13
|
|
|
12
14
|
from ..core.interface import AbstractCoreInterface
|
|
13
15
|
from ..core.factory import create_llm
|
|
@@ -35,6 +37,42 @@ class SummaryLength(Enum):
|
|
|
35
37
|
COMPREHENSIVE = "comprehensive" # Full analysis with context
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
class CompressionMode(Enum):
|
|
41
|
+
"""Compression aggressiveness for chat history summarization.
|
|
42
|
+
|
|
43
|
+
Controls how aggressively the summarizer compresses conversation history:
|
|
44
|
+
- LIGHT: Keep most information, only remove redundancy
|
|
45
|
+
- STANDARD: Balanced compression, main points and context
|
|
46
|
+
- HEAVY: Aggressive compression, only critical information
|
|
47
|
+
"""
|
|
48
|
+
LIGHT = "light"
|
|
49
|
+
STANDARD = "standard"
|
|
50
|
+
HEAVY = "heavy"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Compression mode-specific instructions for summarization prompts
|
|
54
|
+
COMPRESSION_INSTRUCTIONS = {
|
|
55
|
+
CompressionMode.LIGHT: (
|
|
56
|
+
"Preserve most details from this conversation while removing only redundancy. "
|
|
57
|
+
"Keep: all key decisions and outcomes, important context and background, "
|
|
58
|
+
"specific details/names/numbers/technical terms, all tool calls and results, "
|
|
59
|
+
"error messages and resolutions. Remove only: repetitive greetings, duplicate information."
|
|
60
|
+
),
|
|
61
|
+
CompressionMode.STANDARD: (
|
|
62
|
+
"Summarize with balanced compression, keeping main points and essential context. "
|
|
63
|
+
"Keep: key decisions and rationale, important outcomes, critical context for ongoing work, "
|
|
64
|
+
"unresolved items and pending tasks. Remove: intermediate reasoning steps, "
|
|
65
|
+
"exploratory tangents, detailed tool outputs (keep only key findings)."
|
|
66
|
+
),
|
|
67
|
+
CompressionMode.HEAVY: (
|
|
68
|
+
"Extract only the most critical information. Keep ONLY: final decisions made, "
|
|
69
|
+
"critical outcomes (success/failure), essential context to continue work, "
|
|
70
|
+
"blocking issues and hard dependencies. Remove: all exploratory discussion, "
|
|
71
|
+
"all intermediate steps, all detailed outputs, all background explanations."
|
|
72
|
+
),
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
38
76
|
class LLMSummaryOutput(BaseModel):
|
|
39
77
|
"""LLM-generated summary output (without word counts)"""
|
|
40
78
|
summary: str = Field(description="The main summary text")
|
|
@@ -81,9 +119,10 @@ class BasicSummarizer:
|
|
|
81
119
|
self,
|
|
82
120
|
llm: Optional[AbstractCoreInterface] = None,
|
|
83
121
|
max_chunk_size: int = 8000,
|
|
84
|
-
max_tokens: int =
|
|
85
|
-
max_output_tokens: int =
|
|
86
|
-
timeout: Optional[float] = None
|
|
122
|
+
max_tokens: int = -1,
|
|
123
|
+
max_output_tokens: int = -1,
|
|
124
|
+
timeout: Optional[float] = None,
|
|
125
|
+
retry_strategy: Optional[FeedbackRetry] = None,
|
|
87
126
|
):
|
|
88
127
|
"""
|
|
89
128
|
Initialize the summarizer
|
|
@@ -91,14 +130,26 @@ class BasicSummarizer:
|
|
|
91
130
|
Args:
|
|
92
131
|
llm: AbstractCore instance (any provider). If None, attempts to create ollama gemma3:1b-it-qat
|
|
93
132
|
max_chunk_size: Maximum characters per chunk for long documents (default 8000)
|
|
94
|
-
max_tokens: Maximum total tokens for LLM context (default
|
|
95
|
-
|
|
133
|
+
max_tokens: Maximum total tokens for LLM context (default -1 = AUTO).
|
|
134
|
+
- Use -1 (AUTO): Automatically uses model's context window capability
|
|
135
|
+
- Use specific value: Hard limit for deployment constraint (GPU/RAM limits)
|
|
136
|
+
Example: max_tokens=16000 limits to 16K even if model supports 128K
|
|
137
|
+
max_output_tokens: Maximum tokens for LLM output generation (default -1 = AUTO).
|
|
138
|
+
- Use -1 (AUTO): Automatically uses model's output capability
|
|
139
|
+
- Use specific value: Hard limit for output tokens
|
|
96
140
|
timeout: HTTP request timeout in seconds. None for unlimited timeout (default None)
|
|
141
|
+
retry_strategy: Custom retry strategy for structured output. If None, uses default (3 attempts)
|
|
97
142
|
"""
|
|
98
143
|
if llm is None:
|
|
99
144
|
try:
|
|
100
145
|
# Default to gemma3:1b-it-qat with configurable token limits
|
|
101
|
-
|
|
146
|
+
# Only pass token limits if not using AUTO mode (-1)
|
|
147
|
+
llm_kwargs = {'timeout': timeout} if timeout is not None else {}
|
|
148
|
+
if max_tokens != -1:
|
|
149
|
+
llm_kwargs['max_tokens'] = max_tokens
|
|
150
|
+
if max_output_tokens != -1:
|
|
151
|
+
llm_kwargs['max_output_tokens'] = max_output_tokens
|
|
152
|
+
self.llm = create_llm("ollama", model="gemma3:1b-it-qat", **llm_kwargs)
|
|
102
153
|
except Exception as e:
|
|
103
154
|
error_msg = (
|
|
104
155
|
f"❌ Failed to initialize default Ollama model 'gemma3:1b-it-qat': {e}\n\n"
|
|
@@ -126,9 +177,13 @@ class BasicSummarizer:
|
|
|
126
177
|
else:
|
|
127
178
|
self.llm = llm
|
|
128
179
|
self.max_chunk_size = max_chunk_size
|
|
180
|
+
# Store token budgets. -1 means AUTO (use model's capability).
|
|
181
|
+
# In AbstractCore, `max_tokens` is the total (input + output) context budget.
|
|
182
|
+
self.max_tokens = max_tokens
|
|
183
|
+
self.max_output_tokens = max_output_tokens
|
|
129
184
|
|
|
130
|
-
# Default retry strategy with 3 attempts
|
|
131
|
-
self.retry_strategy = FeedbackRetry(max_attempts=3)
|
|
185
|
+
# Default retry strategy with 3 attempts (callers may override for latency-sensitive UX).
|
|
186
|
+
self.retry_strategy = retry_strategy or FeedbackRetry(max_attempts=3)
|
|
132
187
|
|
|
133
188
|
def summarize(
|
|
134
189
|
self,
|
|
@@ -192,36 +247,29 @@ class BasicSummarizer:
|
|
|
192
247
|
# Build the prompt based on parameters
|
|
193
248
|
prompt = self._build_prompt(text, focus, style, length)
|
|
194
249
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
# Debug information for troubleshooting
|
|
208
|
-
error_msg = f"Failed to generate structured summary output. Response type: {type(response)}"
|
|
209
|
-
if hasattr(response, 'content'):
|
|
210
|
-
error_msg += f", Content: {response.content[:200]}..."
|
|
211
|
-
if hasattr(response, 'structured_output'):
|
|
212
|
-
error_msg += f", Structured output: {response.structured_output}"
|
|
213
|
-
raise ValueError(error_msg)
|
|
250
|
+
llm_result: Optional[LLMSummaryOutput] = None
|
|
251
|
+
try:
|
|
252
|
+
# Use AbstractCore's structured output with retry strategy (no word counts in LLM response)
|
|
253
|
+
response = self.llm.generate(prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
|
|
254
|
+
llm_result = self._extract_summary_structured_output(response, context="summary")
|
|
255
|
+
except (json.JSONDecodeError, ValidationError) as e:
|
|
256
|
+
logger.warning(
|
|
257
|
+
"Structured summary output failed; falling back to marker format",
|
|
258
|
+
error_type=type(e).__name__,
|
|
259
|
+
error=str(e),
|
|
260
|
+
)
|
|
261
|
+
llm_result = self._summarize_fallback(text=text, focus=focus, style=style, length=length)
|
|
214
262
|
|
|
215
263
|
# Compute word counts ourselves (reliable, client-side calculation)
|
|
216
264
|
actual_original_words = len(text.split())
|
|
217
|
-
actual_summary_words = len(llm_result.summary.split())
|
|
265
|
+
actual_summary_words = len((llm_result.summary if llm_result else "").split())
|
|
218
266
|
|
|
219
267
|
# Create complete result with computed word counts
|
|
220
268
|
return SummaryOutput(
|
|
221
|
-
summary=llm_result.summary,
|
|
222
|
-
key_points=llm_result.key_points,
|
|
223
|
-
confidence=llm_result.confidence,
|
|
224
|
-
focus_alignment=llm_result.focus_alignment,
|
|
269
|
+
summary=(llm_result.summary if llm_result else ""),
|
|
270
|
+
key_points=(llm_result.key_points if llm_result else []),
|
|
271
|
+
confidence=(llm_result.confidence if llm_result else 0.5),
|
|
272
|
+
focus_alignment=(llm_result.focus_alignment if llm_result else 0.5),
|
|
225
273
|
word_count_original=actual_original_words,
|
|
226
274
|
word_count_summary=actual_summary_words
|
|
227
275
|
)
|
|
@@ -261,22 +309,31 @@ class BasicSummarizer:
|
|
|
261
309
|
summary: str
|
|
262
310
|
key_points: List[str] = Field(max_length=5)
|
|
263
311
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
312
|
+
try:
|
|
313
|
+
response = self.llm.generate(chunk_prompt, response_model=ChunkSummary, retry_strategy=self.retry_strategy)
|
|
314
|
+
if isinstance(response, ChunkSummary):
|
|
315
|
+
# When structured output succeeds, response is the ChunkSummary object directly
|
|
316
|
+
chunk_summaries.append(response)
|
|
317
|
+
elif hasattr(response, 'structured_output') and response.structured_output:
|
|
318
|
+
# Fallback: check for structured_output attribute
|
|
319
|
+
chunk_summaries.append(response.structured_output)
|
|
320
|
+
else:
|
|
321
|
+
raise ValueError(f"Unexpected chunk response type: {type(response)}")
|
|
322
|
+
except (json.JSONDecodeError, ValidationError, ValueError) as e:
|
|
323
|
+
# If chunk processing fails, create a minimal placeholder (do not fail the whole summary).
|
|
324
|
+
logger.warning(
|
|
325
|
+
"Chunk processing failed, creating fallback",
|
|
326
|
+
chunk_number=i + 1,
|
|
327
|
+
total_chunks=len(chunks),
|
|
328
|
+
error_type=type(e).__name__,
|
|
329
|
+
error=str(e),
|
|
330
|
+
)
|
|
331
|
+
chunk_summaries.append(
|
|
332
|
+
ChunkSummary(
|
|
333
|
+
summary=f"Section {i+1} content summary unavailable",
|
|
334
|
+
key_points=["Content processing failed"],
|
|
335
|
+
)
|
|
336
|
+
)
|
|
280
337
|
|
|
281
338
|
# Step 2: Combine chunk summaries (Reduce phase)
|
|
282
339
|
combined_text = "\n\n".join([
|
|
@@ -287,35 +344,28 @@ class BasicSummarizer:
|
|
|
287
344
|
# Generate final summary from combined summaries
|
|
288
345
|
final_prompt = self._build_final_combination_prompt(combined_text, focus, style, length, len(text))
|
|
289
346
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
# Debug information for troubleshooting
|
|
302
|
-
error_msg = f"Failed to generate final structured summary output. Response type: {type(response)}"
|
|
303
|
-
if hasattr(response, 'content'):
|
|
304
|
-
error_msg += f", Content: {response.content[:200]}..."
|
|
305
|
-
if hasattr(response, 'structured_output'):
|
|
306
|
-
error_msg += f", Structured output: {response.structured_output}"
|
|
307
|
-
raise ValueError(error_msg)
|
|
347
|
+
llm_result: Optional[LLMSummaryOutput] = None
|
|
348
|
+
try:
|
|
349
|
+
response = self.llm.generate(final_prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
|
|
350
|
+
llm_result = self._extract_summary_structured_output(response, context="final_summary")
|
|
351
|
+
except (json.JSONDecodeError, ValidationError) as e:
|
|
352
|
+
logger.warning(
|
|
353
|
+
"Structured final summary output failed; falling back to marker format",
|
|
354
|
+
error_type=type(e).__name__,
|
|
355
|
+
error=str(e),
|
|
356
|
+
)
|
|
357
|
+
llm_result = self._summarize_fallback(text=combined_text, focus=focus, style=style, length=length)
|
|
308
358
|
|
|
309
359
|
# Compute word counts ourselves (reliable, client-side calculation)
|
|
310
360
|
actual_original_words = len(text.split())
|
|
311
|
-
actual_summary_words = len(llm_result.summary.split())
|
|
361
|
+
actual_summary_words = len((llm_result.summary if llm_result else "").split())
|
|
312
362
|
|
|
313
363
|
# Create complete result with computed word counts
|
|
314
364
|
return SummaryOutput(
|
|
315
|
-
summary=llm_result.summary,
|
|
316
|
-
key_points=llm_result.key_points,
|
|
317
|
-
confidence=llm_result.confidence,
|
|
318
|
-
focus_alignment=llm_result.focus_alignment,
|
|
365
|
+
summary=(llm_result.summary if llm_result else ""),
|
|
366
|
+
key_points=(llm_result.key_points if llm_result else []),
|
|
367
|
+
confidence=(llm_result.confidence if llm_result else 0.5),
|
|
368
|
+
focus_alignment=(llm_result.focus_alignment if llm_result else 0.5),
|
|
319
369
|
word_count_original=actual_original_words,
|
|
320
370
|
word_count_summary=actual_summary_words
|
|
321
371
|
)
|
|
@@ -324,6 +374,13 @@ class BasicSummarizer:
|
|
|
324
374
|
"""
|
|
325
375
|
Determine if text should be chunked based on token count.
|
|
326
376
|
|
|
377
|
+
Token budget logic:
|
|
378
|
+
- max_tokens = -1 (AUTO): Uses model's full context window capability
|
|
379
|
+
- max_tokens = N: Hard limit (deployment constraint for GPU/RAM)
|
|
380
|
+
|
|
381
|
+
This ensures we don't exceed GPU memory constraints even when the model
|
|
382
|
+
theoretically supports larger contexts.
|
|
383
|
+
|
|
327
384
|
Uses centralized TokenUtils for accurate token estimation.
|
|
328
385
|
Falls back to character count if model information unavailable.
|
|
329
386
|
"""
|
|
@@ -334,18 +391,214 @@ class BasicSummarizer:
|
|
|
334
391
|
if self.llm and hasattr(self.llm, 'model'):
|
|
335
392
|
model_name = self.llm.model
|
|
336
393
|
|
|
337
|
-
# Estimate tokens using centralized utility
|
|
338
|
-
|
|
394
|
+
# Estimate tokens using centralized utility. If estimation fails for any reason,
|
|
395
|
+
# fall back to character chunking (conservative).
|
|
396
|
+
try:
|
|
397
|
+
estimated_tokens = TokenUtils.estimate_tokens(text, model_name)
|
|
398
|
+
except Exception:
|
|
399
|
+
return len(text) > self.max_chunk_size
|
|
339
400
|
|
|
340
|
-
#
|
|
341
|
-
#
|
|
342
|
-
|
|
401
|
+
# Determine the effective token budget
|
|
402
|
+
# Get provider's capabilities
|
|
403
|
+
provider_max_input = getattr(self.llm, "max_input_tokens", None) if self.llm else None
|
|
404
|
+
if provider_max_input is None:
|
|
405
|
+
provider_total = getattr(self.llm, "max_tokens", None) if self.llm else None
|
|
406
|
+
provider_output = getattr(self.llm, "max_output_tokens", None) if self.llm else None
|
|
407
|
+
if provider_total is not None and provider_output is not None:
|
|
408
|
+
try:
|
|
409
|
+
provider_max_input = int(provider_total) - int(provider_output)
|
|
410
|
+
except Exception:
|
|
411
|
+
provider_max_input = None
|
|
343
412
|
|
|
344
|
-
|
|
345
|
-
|
|
413
|
+
# Determine effective max_input_tokens based on configuration
|
|
414
|
+
if self.max_tokens == -1:
|
|
415
|
+
# AUTO mode: Use model's capability
|
|
416
|
+
if provider_max_input is not None:
|
|
417
|
+
max_input_tokens = provider_max_input
|
|
418
|
+
else:
|
|
419
|
+
# Fallback to safe default if model info unavailable
|
|
420
|
+
max_input_tokens = 24000 # Conservative default
|
|
421
|
+
else:
|
|
422
|
+
# User-specified limit (deployment constraint)
|
|
423
|
+
user_max_output = self.max_output_tokens if self.max_output_tokens != -1 else 8000
|
|
424
|
+
user_max_input = self.max_tokens - user_max_output
|
|
346
425
|
|
|
347
|
-
|
|
348
|
-
|
|
426
|
+
if provider_max_input is not None:
|
|
427
|
+
# Respect BOTH user limit AND model capability (take minimum)
|
|
428
|
+
max_input_tokens = min(provider_max_input, user_max_input)
|
|
429
|
+
else:
|
|
430
|
+
# No model info, use user limit
|
|
431
|
+
max_input_tokens = user_max_input
|
|
432
|
+
|
|
433
|
+
# Reserve prompt/formatting overhead (structured output schemas + instructions).
|
|
434
|
+
# Keep the historical safety floor (8000) for small-context models.
|
|
435
|
+
try:
|
|
436
|
+
token_limit = max(8000, int(max_input_tokens) - 1200)
|
|
437
|
+
except Exception:
|
|
438
|
+
token_limit = 8000
|
|
439
|
+
|
|
440
|
+
logger.debug(
|
|
441
|
+
"Chunking decision",
|
|
442
|
+
estimated_tokens=estimated_tokens,
|
|
443
|
+
token_limit=token_limit,
|
|
444
|
+
max_tokens_config=self.max_tokens,
|
|
445
|
+
is_auto_mode=(self.max_tokens == -1),
|
|
446
|
+
will_chunk=(estimated_tokens > token_limit)
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
return estimated_tokens > token_limit
|
|
450
|
+
|
|
451
|
+
def _extract_summary_structured_output(self, response: object, *, context: str) -> LLMSummaryOutput:
|
|
452
|
+
"""Extract structured summary output from AbstractCore responses."""
|
|
453
|
+
if isinstance(response, LLMSummaryOutput):
|
|
454
|
+
return response
|
|
455
|
+
if hasattr(response, "structured_output") and getattr(response, "structured_output"):
|
|
456
|
+
return response.structured_output
|
|
457
|
+
|
|
458
|
+
error_msg = f"Failed to generate structured {context} output. Response type: {type(response)}"
|
|
459
|
+
if hasattr(response, "content") and getattr(response, "content"):
|
|
460
|
+
try:
|
|
461
|
+
error_msg += f", Content: {str(response.content)[:200]}..."
|
|
462
|
+
except Exception:
|
|
463
|
+
pass
|
|
464
|
+
if hasattr(response, "structured_output"):
|
|
465
|
+
try:
|
|
466
|
+
error_msg += f", Structured output: {getattr(response, 'structured_output')}"
|
|
467
|
+
except Exception:
|
|
468
|
+
pass
|
|
469
|
+
raise ValueError(error_msg)
|
|
470
|
+
|
|
471
|
+
def _summarize_fallback(
|
|
472
|
+
self,
|
|
473
|
+
*,
|
|
474
|
+
text: str,
|
|
475
|
+
focus: Optional[str],
|
|
476
|
+
style: SummaryStyle,
|
|
477
|
+
length: SummaryLength,
|
|
478
|
+
) -> LLMSummaryOutput:
|
|
479
|
+
"""Best-effort summary when structured output cannot be produced reliably."""
|
|
480
|
+
prompt = self._build_fallback_prompt(text=text, focus=focus, style=style, length=length)
|
|
481
|
+
response = self.llm.generate(prompt)
|
|
482
|
+
content = getattr(response, "content", None)
|
|
483
|
+
if content is None:
|
|
484
|
+
content = str(response)
|
|
485
|
+
summary, key_points, confidence, focus_alignment = self._parse_fallback_response(str(content))
|
|
486
|
+
return LLMSummaryOutput(
|
|
487
|
+
summary=summary,
|
|
488
|
+
key_points=key_points[:8],
|
|
489
|
+
confidence=confidence,
|
|
490
|
+
focus_alignment=focus_alignment,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
def _build_fallback_prompt(
|
|
494
|
+
self,
|
|
495
|
+
*,
|
|
496
|
+
text: str,
|
|
497
|
+
focus: Optional[str],
|
|
498
|
+
style: SummaryStyle,
|
|
499
|
+
length: SummaryLength,
|
|
500
|
+
) -> str:
|
|
501
|
+
"""Build a non-JSON prompt that is easy to parse deterministically."""
|
|
502
|
+
style_instructions = {
|
|
503
|
+
SummaryStyle.STRUCTURED: "Present the summary in a clear, organized format with distinct sections or bullet points.",
|
|
504
|
+
SummaryStyle.NARRATIVE: "Write the summary as a flowing narrative that tells the story of the content.",
|
|
505
|
+
SummaryStyle.OBJECTIVE: "Maintain a neutral, factual tone without opinions or interpretations.",
|
|
506
|
+
SummaryStyle.ANALYTICAL: "Provide critical analysis with insights, implications, and deeper understanding.",
|
|
507
|
+
SummaryStyle.EXECUTIVE: "Focus on actionable insights, business implications, and key decisions.",
|
|
508
|
+
SummaryStyle.CONVERSATIONAL: "Preserve conversational context, key decisions, ongoing topics, and user intent. Focus on information needed for conversation continuity.",
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
length_instructions = {
|
|
512
|
+
SummaryLength.BRIEF: "Keep the summary very concise - 2-3 sentences covering only the most essential points.",
|
|
513
|
+
SummaryLength.STANDARD: "Provide a balanced summary of 1-2 paragraphs covering the main ideas.",
|
|
514
|
+
SummaryLength.DETAILED: "Create a comprehensive summary with multiple paragraphs covering all important aspects.",
|
|
515
|
+
SummaryLength.COMPREHENSIVE: "Provide an extensive analysis covering all significant points, context, and implications.",
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
focus_instruction = ""
|
|
519
|
+
if focus:
|
|
520
|
+
focus_instruction = f"\nPay special attention to: {focus}\n"
|
|
521
|
+
|
|
522
|
+
return f"""Analyze the following text and produce a summary.
|
|
523
|
+
|
|
524
|
+
{style_instructions[style]}
|
|
525
|
+
{length_instructions[length]}{focus_instruction}
|
|
526
|
+
|
|
527
|
+
Text to summarize:
|
|
528
|
+
{text}
|
|
529
|
+
|
|
530
|
+
Return your answer in this EXACT plain-text format (no JSON, no code blocks):
|
|
531
|
+
|
|
532
|
+
SUMMARY:
|
|
533
|
+
<the main summary text>
|
|
534
|
+
|
|
535
|
+
KEY POINTS:
|
|
536
|
+
- <point 1>
|
|
537
|
+
- <point 2>
|
|
538
|
+
- <point 3>
|
|
539
|
+
|
|
540
|
+
CONFIDENCE: <0-1>
|
|
541
|
+
FOCUS_ALIGNMENT: <0-1>
|
|
542
|
+
"""
|
|
543
|
+
|
|
544
|
+
@staticmethod
|
|
545
|
+
def _parse_fallback_response(content: str) -> Tuple[str, List[str], float, float]:
|
|
546
|
+
"""Parse marker-format fallback summaries into structured fields."""
|
|
547
|
+
text = (content or "").strip()
|
|
548
|
+
if not text:
|
|
549
|
+
return "", [], 0.5, 0.5
|
|
550
|
+
|
|
551
|
+
def _parse_score(label_re: str, default: float) -> float:
|
|
552
|
+
m = re.search(rf"(?im)^{label_re}\s*:\s*(.+?)\s*$", text)
|
|
553
|
+
if not m:
|
|
554
|
+
return default
|
|
555
|
+
raw = m.group(1).strip()
|
|
556
|
+
try:
|
|
557
|
+
if raw.endswith("%"):
|
|
558
|
+
val = float(raw[:-1].strip()) / 100.0
|
|
559
|
+
else:
|
|
560
|
+
val = float(raw)
|
|
561
|
+
except Exception:
|
|
562
|
+
return default
|
|
563
|
+
return max(0.0, min(1.0, val))
|
|
564
|
+
|
|
565
|
+
summary = ""
|
|
566
|
+
m_summary = re.search(r"(?is)summary\s*:\s*(.*?)\n\s*key\s*points\s*:", text)
|
|
567
|
+
if m_summary:
|
|
568
|
+
summary = m_summary.group(1).strip()
|
|
569
|
+
else:
|
|
570
|
+
# Best-effort: take the first paragraph.
|
|
571
|
+
summary = text.split("\n\n", 1)[0].strip()
|
|
572
|
+
|
|
573
|
+
key_points: List[str] = []
|
|
574
|
+
m_kp = re.search(
|
|
575
|
+
r"(?is)key\s*points\s*:\s*(.*?)(?:\n\s*confidence\s*:|\n\s*focus[_ ]alignment\s*:|\Z)",
|
|
576
|
+
text,
|
|
577
|
+
)
|
|
578
|
+
if m_kp:
|
|
579
|
+
block = m_kp.group(1)
|
|
580
|
+
for line in block.splitlines():
|
|
581
|
+
line = line.strip()
|
|
582
|
+
if not line:
|
|
583
|
+
continue
|
|
584
|
+
if line.startswith(("-", "•", "*")):
|
|
585
|
+
line = line.lstrip("-•*").strip()
|
|
586
|
+
if line:
|
|
587
|
+
key_points.append(line)
|
|
588
|
+
if not key_points:
|
|
589
|
+
# Fallback: try to extract bullet-like lines anywhere.
|
|
590
|
+
for line in text.splitlines():
|
|
591
|
+
line = line.strip()
|
|
592
|
+
if line.startswith(("-", "•", "*")):
|
|
593
|
+
cleaned = line.lstrip("-•*").strip()
|
|
594
|
+
if cleaned:
|
|
595
|
+
key_points.append(cleaned)
|
|
596
|
+
key_points = key_points[:8]
|
|
597
|
+
|
|
598
|
+
confidence = _parse_score("confidence", 0.6)
|
|
599
|
+
focus_alignment = _parse_score(r"focus[_ ]alignment", 0.6)
|
|
600
|
+
|
|
601
|
+
return summary, key_points, confidence, focus_alignment
|
|
349
602
|
|
|
350
603
|
def _split_text_into_chunks(self, text: str, overlap: int = 200) -> List[str]:
|
|
351
604
|
"""Split text into overlapping chunks"""
|
|
@@ -493,7 +746,8 @@ Create a unified summary that represents the entire document effectively."""
|
|
|
493
746
|
self,
|
|
494
747
|
messages: List[dict],
|
|
495
748
|
preserve_recent: int = 6,
|
|
496
|
-
focus: Optional[str] = None
|
|
749
|
+
focus: Optional[str] = None,
|
|
750
|
+
compression_mode: CompressionMode = CompressionMode.STANDARD
|
|
497
751
|
) -> SummaryOutput:
|
|
498
752
|
"""
|
|
499
753
|
Specialized method for chat history summarization following SOTA 2025 practices
|
|
@@ -502,6 +756,7 @@ Create a unified summary that represents the entire document effectively."""
|
|
|
502
756
|
messages: List of message dicts with 'role' and 'content' keys
|
|
503
757
|
preserve_recent: Number of recent messages to keep intact (default 6)
|
|
504
758
|
focus: Optional focus for summarization (e.g., "key decisions", "technical solutions")
|
|
759
|
+
compression_mode: How aggressively to compress (LIGHT, STANDARD, HEAVY)
|
|
505
760
|
|
|
506
761
|
Returns:
|
|
507
762
|
SummaryOutput: Structured summary optimized for chat history context
|
|
@@ -511,36 +766,67 @@ Create a unified summary that represents the entire document effectively."""
|
|
|
511
766
|
- Focuses on decisions, solutions, and ongoing topics
|
|
512
767
|
- Maintains user intent and assistant responses
|
|
513
768
|
- Optimized for chat continuation rather than standalone summary
|
|
769
|
+
|
|
770
|
+
Compression Modes:
|
|
771
|
+
- LIGHT: Keep most information, only remove redundancy
|
|
772
|
+
- STANDARD: Balanced compression, main points and context
|
|
773
|
+
- HEAVY: Aggressive compression, only critical information
|
|
514
774
|
"""
|
|
775
|
+
# Build focus with compression instructions
|
|
776
|
+
compression_instruction = COMPRESSION_INSTRUCTIONS.get(
|
|
777
|
+
compression_mode,
|
|
778
|
+
COMPRESSION_INSTRUCTIONS[CompressionMode.STANDARD]
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
# Combine user focus with compression instruction
|
|
782
|
+
if focus:
|
|
783
|
+
effective_focus = f"{compression_instruction} Focus especially on: {focus}"
|
|
784
|
+
else:
|
|
785
|
+
effective_focus = compression_instruction
|
|
786
|
+
|
|
787
|
+
# Map compression mode to summary length for appropriate output size
|
|
788
|
+
length_map = {
|
|
789
|
+
CompressionMode.LIGHT: SummaryLength.DETAILED,
|
|
790
|
+
CompressionMode.STANDARD: SummaryLength.STANDARD,
|
|
791
|
+
CompressionMode.HEAVY: SummaryLength.BRIEF,
|
|
792
|
+
}
|
|
793
|
+
target_length = length_map.get(compression_mode, SummaryLength.STANDARD)
|
|
794
|
+
|
|
795
|
+
logger.debug("Chat history summarization with compression mode",
|
|
796
|
+
message_count=len(messages),
|
|
797
|
+
preserve_recent=preserve_recent,
|
|
798
|
+
compression_mode=compression_mode.value,
|
|
799
|
+
target_length=target_length.value)
|
|
800
|
+
|
|
515
801
|
if len(messages) <= preserve_recent:
|
|
516
802
|
# If short enough, just summarize normally
|
|
517
|
-
logger.debug("Chat history is short, using standard summarization",
|
|
518
|
-
message_count=len(messages),
|
|
803
|
+
logger.debug("Chat history is short, using standard summarization",
|
|
804
|
+
message_count=len(messages),
|
|
519
805
|
preserve_recent=preserve_recent)
|
|
520
806
|
chat_text = self._format_chat_messages_to_text(messages)
|
|
521
807
|
return self.summarize(
|
|
522
808
|
chat_text,
|
|
523
|
-
focus=
|
|
809
|
+
focus=effective_focus,
|
|
524
810
|
style=SummaryStyle.CONVERSATIONAL,
|
|
525
|
-
length=
|
|
811
|
+
length=target_length
|
|
526
812
|
)
|
|
527
813
|
|
|
528
814
|
# Split into older messages (to summarize) and recent messages (to preserve)
|
|
529
815
|
older_messages = messages[:-preserve_recent]
|
|
530
816
|
recent_messages = messages[-preserve_recent:]
|
|
531
|
-
|
|
532
|
-
logger.debug("Splitting chat history for summarization",
|
|
817
|
+
|
|
818
|
+
logger.debug("Splitting chat history for summarization",
|
|
533
819
|
total_messages=len(messages),
|
|
534
820
|
older_messages=len(older_messages),
|
|
535
821
|
recent_messages=len(recent_messages))
|
|
536
822
|
|
|
537
|
-
# Summarize older messages with conversational focus
|
|
823
|
+
# Summarize older messages with conversational focus and compression mode
|
|
538
824
|
older_text = self._format_chat_messages_to_text(older_messages)
|
|
539
825
|
older_summary = self.summarize(
|
|
540
826
|
older_text,
|
|
541
|
-
focus=
|
|
827
|
+
focus=effective_focus,
|
|
542
828
|
style=SummaryStyle.CONVERSATIONAL,
|
|
543
|
-
length=
|
|
829
|
+
length=target_length
|
|
544
830
|
)
|
|
545
831
|
|
|
546
832
|
# The summary should ONLY contain the older messages summary
|
|
@@ -581,4 +867,4 @@ Create a unified summary that represents the entire document effectively."""
|
|
|
581
867
|
else:
|
|
582
868
|
formatted_lines.append(f"[{role.upper()}]: {content}")
|
|
583
869
|
|
|
584
|
-
return "\n\n".join(formatted_lines)
|
|
870
|
+
return "\n\n".join(formatted_lines)
|