abstractcore 2.6.8__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/summarizer.py +69 -27
- abstractcore/architectures/detection.py +190 -25
- abstractcore/assets/architecture_formats.json +129 -6
- abstractcore/assets/model_capabilities.json +789 -136
- abstractcore/config/main.py +2 -2
- abstractcore/config/manager.py +3 -1
- abstractcore/events/__init__.py +7 -1
- abstractcore/mcp/__init__.py +30 -0
- abstractcore/mcp/client.py +213 -0
- abstractcore/mcp/factory.py +64 -0
- abstractcore/mcp/naming.py +28 -0
- abstractcore/mcp/stdio_client.py +336 -0
- abstractcore/mcp/tool_source.py +164 -0
- abstractcore/processing/basic_deepsearch.py +1 -1
- abstractcore/processing/basic_summarizer.py +300 -83
- abstractcore/providers/anthropic_provider.py +91 -10
- abstractcore/providers/base.py +537 -16
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/providers/lmstudio_provider.py +170 -25
- abstractcore/providers/mlx_provider.py +13 -10
- abstractcore/providers/ollama_provider.py +42 -26
- abstractcore/providers/openai_compatible_provider.py +87 -22
- abstractcore/providers/openai_provider.py +12 -9
- abstractcore/providers/streaming.py +201 -39
- abstractcore/providers/vllm_provider.py +78 -21
- abstractcore/server/app.py +65 -28
- abstractcore/structured/retry.py +20 -7
- abstractcore/tools/__init__.py +5 -4
- abstractcore/tools/abstractignore.py +166 -0
- abstractcore/tools/arg_canonicalizer.py +61 -0
- abstractcore/tools/common_tools.py +2311 -772
- abstractcore/tools/core.py +109 -13
- abstractcore/tools/handler.py +17 -3
- abstractcore/tools/parser.py +798 -155
- abstractcore/tools/registry.py +107 -2
- abstractcore/tools/syntax_rewriter.py +68 -6
- abstractcore/tools/tag_rewriter.py +186 -1
- abstractcore/utils/jsonish.py +111 -0
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/METADATA +11 -2
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/RECORD +45 -36
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/WHEEL +0 -0
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/top_level.txt +0 -0
|
@@ -6,8 +6,10 @@ text processing capabilities with minimal complexity.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from enum import Enum
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
import json
|
|
10
|
+
import re
|
|
11
|
+
from typing import List, Optional, Tuple
|
|
12
|
+
from pydantic import BaseModel, Field, ValidationError
|
|
11
13
|
|
|
12
14
|
from ..core.interface import AbstractCoreInterface
|
|
13
15
|
from ..core.factory import create_llm
|
|
@@ -117,9 +119,10 @@ class BasicSummarizer:
|
|
|
117
119
|
self,
|
|
118
120
|
llm: Optional[AbstractCoreInterface] = None,
|
|
119
121
|
max_chunk_size: int = 8000,
|
|
120
|
-
max_tokens: int =
|
|
121
|
-
max_output_tokens: int =
|
|
122
|
-
timeout: Optional[float] = None
|
|
122
|
+
max_tokens: int = -1,
|
|
123
|
+
max_output_tokens: int = -1,
|
|
124
|
+
timeout: Optional[float] = None,
|
|
125
|
+
retry_strategy: Optional[FeedbackRetry] = None,
|
|
123
126
|
):
|
|
124
127
|
"""
|
|
125
128
|
Initialize the summarizer
|
|
@@ -127,14 +130,26 @@ class BasicSummarizer:
|
|
|
127
130
|
Args:
|
|
128
131
|
llm: AbstractCore instance (any provider). If None, attempts to create ollama gemma3:1b-it-qat
|
|
129
132
|
max_chunk_size: Maximum characters per chunk for long documents (default 8000)
|
|
130
|
-
max_tokens: Maximum total tokens for LLM context (default
|
|
131
|
-
|
|
133
|
+
max_tokens: Maximum total tokens for LLM context (default -1 = AUTO).
|
|
134
|
+
- Use -1 (AUTO): Automatically uses model's context window capability
|
|
135
|
+
- Use specific value: Hard limit for deployment constraint (GPU/RAM limits)
|
|
136
|
+
Example: max_tokens=16000 limits to 16K even if model supports 128K
|
|
137
|
+
max_output_tokens: Maximum tokens for LLM output generation (default -1 = AUTO).
|
|
138
|
+
- Use -1 (AUTO): Automatically uses model's output capability
|
|
139
|
+
- Use specific value: Hard limit for output tokens
|
|
132
140
|
timeout: HTTP request timeout in seconds. None for unlimited timeout (default None)
|
|
141
|
+
retry_strategy: Custom retry strategy for structured output. If None, uses default (3 attempts)
|
|
133
142
|
"""
|
|
134
143
|
if llm is None:
|
|
135
144
|
try:
|
|
136
145
|
# Default to gemma3:1b-it-qat with configurable token limits
|
|
137
|
-
|
|
146
|
+
# Only pass token limits if not using AUTO mode (-1)
|
|
147
|
+
llm_kwargs = {'timeout': timeout} if timeout is not None else {}
|
|
148
|
+
if max_tokens != -1:
|
|
149
|
+
llm_kwargs['max_tokens'] = max_tokens
|
|
150
|
+
if max_output_tokens != -1:
|
|
151
|
+
llm_kwargs['max_output_tokens'] = max_output_tokens
|
|
152
|
+
self.llm = create_llm("ollama", model="gemma3:1b-it-qat", **llm_kwargs)
|
|
138
153
|
except Exception as e:
|
|
139
154
|
error_msg = (
|
|
140
155
|
f"❌ Failed to initialize default Ollama model 'gemma3:1b-it-qat': {e}\n\n"
|
|
@@ -162,9 +177,13 @@ class BasicSummarizer:
|
|
|
162
177
|
else:
|
|
163
178
|
self.llm = llm
|
|
164
179
|
self.max_chunk_size = max_chunk_size
|
|
180
|
+
# Store token budgets. -1 means AUTO (use model's capability).
|
|
181
|
+
# In AbstractCore, `max_tokens` is the total (input + output) context budget.
|
|
182
|
+
self.max_tokens = max_tokens
|
|
183
|
+
self.max_output_tokens = max_output_tokens
|
|
165
184
|
|
|
166
|
-
# Default retry strategy with 3 attempts
|
|
167
|
-
self.retry_strategy = FeedbackRetry(max_attempts=3)
|
|
185
|
+
# Default retry strategy with 3 attempts (callers may override for latency-sensitive UX).
|
|
186
|
+
self.retry_strategy = retry_strategy or FeedbackRetry(max_attempts=3)
|
|
168
187
|
|
|
169
188
|
def summarize(
|
|
170
189
|
self,
|
|
@@ -228,36 +247,29 @@ class BasicSummarizer:
|
|
|
228
247
|
# Build the prompt based on parameters
|
|
229
248
|
prompt = self._build_prompt(text, focus, style, length)
|
|
230
249
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
# Debug information for troubleshooting
|
|
244
|
-
error_msg = f"Failed to generate structured summary output. Response type: {type(response)}"
|
|
245
|
-
if hasattr(response, 'content'):
|
|
246
|
-
error_msg += f", Content: {response.content[:200]}..."
|
|
247
|
-
if hasattr(response, 'structured_output'):
|
|
248
|
-
error_msg += f", Structured output: {response.structured_output}"
|
|
249
|
-
raise ValueError(error_msg)
|
|
250
|
+
llm_result: Optional[LLMSummaryOutput] = None
|
|
251
|
+
try:
|
|
252
|
+
# Use AbstractCore's structured output with retry strategy (no word counts in LLM response)
|
|
253
|
+
response = self.llm.generate(prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
|
|
254
|
+
llm_result = self._extract_summary_structured_output(response, context="summary")
|
|
255
|
+
except (json.JSONDecodeError, ValidationError) as e:
|
|
256
|
+
logger.warning(
|
|
257
|
+
"Structured summary output failed; falling back to marker format",
|
|
258
|
+
error_type=type(e).__name__,
|
|
259
|
+
error=str(e),
|
|
260
|
+
)
|
|
261
|
+
llm_result = self._summarize_fallback(text=text, focus=focus, style=style, length=length)
|
|
250
262
|
|
|
251
263
|
# Compute word counts ourselves (reliable, client-side calculation)
|
|
252
264
|
actual_original_words = len(text.split())
|
|
253
|
-
actual_summary_words = len(llm_result.summary.split())
|
|
265
|
+
actual_summary_words = len((llm_result.summary if llm_result else "").split())
|
|
254
266
|
|
|
255
267
|
# Create complete result with computed word counts
|
|
256
268
|
return SummaryOutput(
|
|
257
|
-
summary=llm_result.summary,
|
|
258
|
-
key_points=llm_result.key_points,
|
|
259
|
-
confidence=llm_result.confidence,
|
|
260
|
-
focus_alignment=llm_result.focus_alignment,
|
|
269
|
+
summary=(llm_result.summary if llm_result else ""),
|
|
270
|
+
key_points=(llm_result.key_points if llm_result else []),
|
|
271
|
+
confidence=(llm_result.confidence if llm_result else 0.5),
|
|
272
|
+
focus_alignment=(llm_result.focus_alignment if llm_result else 0.5),
|
|
261
273
|
word_count_original=actual_original_words,
|
|
262
274
|
word_count_summary=actual_summary_words
|
|
263
275
|
)
|
|
@@ -297,22 +309,31 @@ class BasicSummarizer:
|
|
|
297
309
|
summary: str
|
|
298
310
|
key_points: List[str] = Field(max_length=5)
|
|
299
311
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
312
|
+
try:
|
|
313
|
+
response = self.llm.generate(chunk_prompt, response_model=ChunkSummary, retry_strategy=self.retry_strategy)
|
|
314
|
+
if isinstance(response, ChunkSummary):
|
|
315
|
+
# When structured output succeeds, response is the ChunkSummary object directly
|
|
316
|
+
chunk_summaries.append(response)
|
|
317
|
+
elif hasattr(response, 'structured_output') and response.structured_output:
|
|
318
|
+
# Fallback: check for structured_output attribute
|
|
319
|
+
chunk_summaries.append(response.structured_output)
|
|
320
|
+
else:
|
|
321
|
+
raise ValueError(f"Unexpected chunk response type: {type(response)}")
|
|
322
|
+
except (json.JSONDecodeError, ValidationError, ValueError) as e:
|
|
323
|
+
# If chunk processing fails, create a minimal placeholder (do not fail the whole summary).
|
|
324
|
+
logger.warning(
|
|
325
|
+
"Chunk processing failed, creating fallback",
|
|
326
|
+
chunk_number=i + 1,
|
|
327
|
+
total_chunks=len(chunks),
|
|
328
|
+
error_type=type(e).__name__,
|
|
329
|
+
error=str(e),
|
|
330
|
+
)
|
|
331
|
+
chunk_summaries.append(
|
|
332
|
+
ChunkSummary(
|
|
333
|
+
summary=f"Section {i+1} content summary unavailable",
|
|
334
|
+
key_points=["Content processing failed"],
|
|
335
|
+
)
|
|
336
|
+
)
|
|
316
337
|
|
|
317
338
|
# Step 2: Combine chunk summaries (Reduce phase)
|
|
318
339
|
combined_text = "\n\n".join([
|
|
@@ -323,35 +344,28 @@ class BasicSummarizer:
|
|
|
323
344
|
# Generate final summary from combined summaries
|
|
324
345
|
final_prompt = self._build_final_combination_prompt(combined_text, focus, style, length, len(text))
|
|
325
346
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
# Debug information for troubleshooting
|
|
338
|
-
error_msg = f"Failed to generate final structured summary output. Response type: {type(response)}"
|
|
339
|
-
if hasattr(response, 'content'):
|
|
340
|
-
error_msg += f", Content: {response.content[:200]}..."
|
|
341
|
-
if hasattr(response, 'structured_output'):
|
|
342
|
-
error_msg += f", Structured output: {response.structured_output}"
|
|
343
|
-
raise ValueError(error_msg)
|
|
347
|
+
llm_result: Optional[LLMSummaryOutput] = None
|
|
348
|
+
try:
|
|
349
|
+
response = self.llm.generate(final_prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
|
|
350
|
+
llm_result = self._extract_summary_structured_output(response, context="final_summary")
|
|
351
|
+
except (json.JSONDecodeError, ValidationError) as e:
|
|
352
|
+
logger.warning(
|
|
353
|
+
"Structured final summary output failed; falling back to marker format",
|
|
354
|
+
error_type=type(e).__name__,
|
|
355
|
+
error=str(e),
|
|
356
|
+
)
|
|
357
|
+
llm_result = self._summarize_fallback(text=combined_text, focus=focus, style=style, length=length)
|
|
344
358
|
|
|
345
359
|
# Compute word counts ourselves (reliable, client-side calculation)
|
|
346
360
|
actual_original_words = len(text.split())
|
|
347
|
-
actual_summary_words = len(llm_result.summary.split())
|
|
361
|
+
actual_summary_words = len((llm_result.summary if llm_result else "").split())
|
|
348
362
|
|
|
349
363
|
# Create complete result with computed word counts
|
|
350
364
|
return SummaryOutput(
|
|
351
|
-
summary=llm_result.summary,
|
|
352
|
-
key_points=llm_result.key_points,
|
|
353
|
-
confidence=llm_result.confidence,
|
|
354
|
-
focus_alignment=llm_result.focus_alignment,
|
|
365
|
+
summary=(llm_result.summary if llm_result else ""),
|
|
366
|
+
key_points=(llm_result.key_points if llm_result else []),
|
|
367
|
+
confidence=(llm_result.confidence if llm_result else 0.5),
|
|
368
|
+
focus_alignment=(llm_result.focus_alignment if llm_result else 0.5),
|
|
355
369
|
word_count_original=actual_original_words,
|
|
356
370
|
word_count_summary=actual_summary_words
|
|
357
371
|
)
|
|
@@ -360,6 +374,13 @@ class BasicSummarizer:
|
|
|
360
374
|
"""
|
|
361
375
|
Determine if text should be chunked based on token count.
|
|
362
376
|
|
|
377
|
+
Token budget logic:
|
|
378
|
+
- max_tokens = -1 (AUTO): Uses model's full context window capability
|
|
379
|
+
- max_tokens = N: Hard limit (deployment constraint for GPU/RAM)
|
|
380
|
+
|
|
381
|
+
This ensures we don't exceed GPU memory constraints even when the model
|
|
382
|
+
theoretically supports larger contexts.
|
|
383
|
+
|
|
363
384
|
Uses centralized TokenUtils for accurate token estimation.
|
|
364
385
|
Falls back to character count if model information unavailable.
|
|
365
386
|
"""
|
|
@@ -370,18 +391,214 @@ class BasicSummarizer:
|
|
|
370
391
|
if self.llm and hasattr(self.llm, 'model'):
|
|
371
392
|
model_name = self.llm.model
|
|
372
393
|
|
|
373
|
-
# Estimate tokens using centralized utility
|
|
374
|
-
|
|
394
|
+
# Estimate tokens using centralized utility. If estimation fails for any reason,
|
|
395
|
+
# fall back to character chunking (conservative).
|
|
396
|
+
try:
|
|
397
|
+
estimated_tokens = TokenUtils.estimate_tokens(text, model_name)
|
|
398
|
+
except Exception:
|
|
399
|
+
return len(text) > self.max_chunk_size
|
|
375
400
|
|
|
376
|
-
#
|
|
377
|
-
#
|
|
378
|
-
|
|
401
|
+
# Determine the effective token budget
|
|
402
|
+
# Get provider's capabilities
|
|
403
|
+
provider_max_input = getattr(self.llm, "max_input_tokens", None) if self.llm else None
|
|
404
|
+
if provider_max_input is None:
|
|
405
|
+
provider_total = getattr(self.llm, "max_tokens", None) if self.llm else None
|
|
406
|
+
provider_output = getattr(self.llm, "max_output_tokens", None) if self.llm else None
|
|
407
|
+
if provider_total is not None and provider_output is not None:
|
|
408
|
+
try:
|
|
409
|
+
provider_max_input = int(provider_total) - int(provider_output)
|
|
410
|
+
except Exception:
|
|
411
|
+
provider_max_input = None
|
|
379
412
|
|
|
380
|
-
|
|
381
|
-
|
|
413
|
+
# Determine effective max_input_tokens based on configuration
|
|
414
|
+
if self.max_tokens == -1:
|
|
415
|
+
# AUTO mode: Use model's capability
|
|
416
|
+
if provider_max_input is not None:
|
|
417
|
+
max_input_tokens = provider_max_input
|
|
418
|
+
else:
|
|
419
|
+
# Fallback to safe default if model info unavailable
|
|
420
|
+
max_input_tokens = 24000 # Conservative default
|
|
421
|
+
else:
|
|
422
|
+
# User-specified limit (deployment constraint)
|
|
423
|
+
user_max_output = self.max_output_tokens if self.max_output_tokens != -1 else 8000
|
|
424
|
+
user_max_input = self.max_tokens - user_max_output
|
|
382
425
|
|
|
383
|
-
|
|
384
|
-
|
|
426
|
+
if provider_max_input is not None:
|
|
427
|
+
# Respect BOTH user limit AND model capability (take minimum)
|
|
428
|
+
max_input_tokens = min(provider_max_input, user_max_input)
|
|
429
|
+
else:
|
|
430
|
+
# No model info, use user limit
|
|
431
|
+
max_input_tokens = user_max_input
|
|
432
|
+
|
|
433
|
+
# Reserve prompt/formatting overhead (structured output schemas + instructions).
|
|
434
|
+
# Keep the historical safety floor (8000) for small-context models.
|
|
435
|
+
try:
|
|
436
|
+
token_limit = max(8000, int(max_input_tokens) - 1200)
|
|
437
|
+
except Exception:
|
|
438
|
+
token_limit = 8000
|
|
439
|
+
|
|
440
|
+
logger.debug(
|
|
441
|
+
"Chunking decision",
|
|
442
|
+
estimated_tokens=estimated_tokens,
|
|
443
|
+
token_limit=token_limit,
|
|
444
|
+
max_tokens_config=self.max_tokens,
|
|
445
|
+
is_auto_mode=(self.max_tokens == -1),
|
|
446
|
+
will_chunk=(estimated_tokens > token_limit)
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
return estimated_tokens > token_limit
|
|
450
|
+
|
|
451
|
+
def _extract_summary_structured_output(self, response: object, *, context: str) -> LLMSummaryOutput:
|
|
452
|
+
"""Extract structured summary output from AbstractCore responses."""
|
|
453
|
+
if isinstance(response, LLMSummaryOutput):
|
|
454
|
+
return response
|
|
455
|
+
if hasattr(response, "structured_output") and getattr(response, "structured_output"):
|
|
456
|
+
return response.structured_output
|
|
457
|
+
|
|
458
|
+
error_msg = f"Failed to generate structured {context} output. Response type: {type(response)}"
|
|
459
|
+
if hasattr(response, "content") and getattr(response, "content"):
|
|
460
|
+
try:
|
|
461
|
+
error_msg += f", Content: {str(response.content)[:200]}..."
|
|
462
|
+
except Exception:
|
|
463
|
+
pass
|
|
464
|
+
if hasattr(response, "structured_output"):
|
|
465
|
+
try:
|
|
466
|
+
error_msg += f", Structured output: {getattr(response, 'structured_output')}"
|
|
467
|
+
except Exception:
|
|
468
|
+
pass
|
|
469
|
+
raise ValueError(error_msg)
|
|
470
|
+
|
|
471
|
+
def _summarize_fallback(
|
|
472
|
+
self,
|
|
473
|
+
*,
|
|
474
|
+
text: str,
|
|
475
|
+
focus: Optional[str],
|
|
476
|
+
style: SummaryStyle,
|
|
477
|
+
length: SummaryLength,
|
|
478
|
+
) -> LLMSummaryOutput:
|
|
479
|
+
"""Best-effort summary when structured output cannot be produced reliably."""
|
|
480
|
+
prompt = self._build_fallback_prompt(text=text, focus=focus, style=style, length=length)
|
|
481
|
+
response = self.llm.generate(prompt)
|
|
482
|
+
content = getattr(response, "content", None)
|
|
483
|
+
if content is None:
|
|
484
|
+
content = str(response)
|
|
485
|
+
summary, key_points, confidence, focus_alignment = self._parse_fallback_response(str(content))
|
|
486
|
+
return LLMSummaryOutput(
|
|
487
|
+
summary=summary,
|
|
488
|
+
key_points=key_points[:8],
|
|
489
|
+
confidence=confidence,
|
|
490
|
+
focus_alignment=focus_alignment,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
def _build_fallback_prompt(
|
|
494
|
+
self,
|
|
495
|
+
*,
|
|
496
|
+
text: str,
|
|
497
|
+
focus: Optional[str],
|
|
498
|
+
style: SummaryStyle,
|
|
499
|
+
length: SummaryLength,
|
|
500
|
+
) -> str:
|
|
501
|
+
"""Build a non-JSON prompt that is easy to parse deterministically."""
|
|
502
|
+
style_instructions = {
|
|
503
|
+
SummaryStyle.STRUCTURED: "Present the summary in a clear, organized format with distinct sections or bullet points.",
|
|
504
|
+
SummaryStyle.NARRATIVE: "Write the summary as a flowing narrative that tells the story of the content.",
|
|
505
|
+
SummaryStyle.OBJECTIVE: "Maintain a neutral, factual tone without opinions or interpretations.",
|
|
506
|
+
SummaryStyle.ANALYTICAL: "Provide critical analysis with insights, implications, and deeper understanding.",
|
|
507
|
+
SummaryStyle.EXECUTIVE: "Focus on actionable insights, business implications, and key decisions.",
|
|
508
|
+
SummaryStyle.CONVERSATIONAL: "Preserve conversational context, key decisions, ongoing topics, and user intent. Focus on information needed for conversation continuity.",
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
length_instructions = {
|
|
512
|
+
SummaryLength.BRIEF: "Keep the summary very concise - 2-3 sentences covering only the most essential points.",
|
|
513
|
+
SummaryLength.STANDARD: "Provide a balanced summary of 1-2 paragraphs covering the main ideas.",
|
|
514
|
+
SummaryLength.DETAILED: "Create a comprehensive summary with multiple paragraphs covering all important aspects.",
|
|
515
|
+
SummaryLength.COMPREHENSIVE: "Provide an extensive analysis covering all significant points, context, and implications.",
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
focus_instruction = ""
|
|
519
|
+
if focus:
|
|
520
|
+
focus_instruction = f"\nPay special attention to: {focus}\n"
|
|
521
|
+
|
|
522
|
+
return f"""Analyze the following text and produce a summary.
|
|
523
|
+
|
|
524
|
+
{style_instructions[style]}
|
|
525
|
+
{length_instructions[length]}{focus_instruction}
|
|
526
|
+
|
|
527
|
+
Text to summarize:
|
|
528
|
+
{text}
|
|
529
|
+
|
|
530
|
+
Return your answer in this EXACT plain-text format (no JSON, no code blocks):
|
|
531
|
+
|
|
532
|
+
SUMMARY:
|
|
533
|
+
<the main summary text>
|
|
534
|
+
|
|
535
|
+
KEY POINTS:
|
|
536
|
+
- <point 1>
|
|
537
|
+
- <point 2>
|
|
538
|
+
- <point 3>
|
|
539
|
+
|
|
540
|
+
CONFIDENCE: <0-1>
|
|
541
|
+
FOCUS_ALIGNMENT: <0-1>
|
|
542
|
+
"""
|
|
543
|
+
|
|
544
|
+
@staticmethod
|
|
545
|
+
def _parse_fallback_response(content: str) -> Tuple[str, List[str], float, float]:
|
|
546
|
+
"""Parse marker-format fallback summaries into structured fields."""
|
|
547
|
+
text = (content or "").strip()
|
|
548
|
+
if not text:
|
|
549
|
+
return "", [], 0.5, 0.5
|
|
550
|
+
|
|
551
|
+
def _parse_score(label_re: str, default: float) -> float:
|
|
552
|
+
m = re.search(rf"(?im)^{label_re}\s*:\s*(.+?)\s*$", text)
|
|
553
|
+
if not m:
|
|
554
|
+
return default
|
|
555
|
+
raw = m.group(1).strip()
|
|
556
|
+
try:
|
|
557
|
+
if raw.endswith("%"):
|
|
558
|
+
val = float(raw[:-1].strip()) / 100.0
|
|
559
|
+
else:
|
|
560
|
+
val = float(raw)
|
|
561
|
+
except Exception:
|
|
562
|
+
return default
|
|
563
|
+
return max(0.0, min(1.0, val))
|
|
564
|
+
|
|
565
|
+
summary = ""
|
|
566
|
+
m_summary = re.search(r"(?is)summary\s*:\s*(.*?)\n\s*key\s*points\s*:", text)
|
|
567
|
+
if m_summary:
|
|
568
|
+
summary = m_summary.group(1).strip()
|
|
569
|
+
else:
|
|
570
|
+
# Best-effort: take the first paragraph.
|
|
571
|
+
summary = text.split("\n\n", 1)[0].strip()
|
|
572
|
+
|
|
573
|
+
key_points: List[str] = []
|
|
574
|
+
m_kp = re.search(
|
|
575
|
+
r"(?is)key\s*points\s*:\s*(.*?)(?:\n\s*confidence\s*:|\n\s*focus[_ ]alignment\s*:|\Z)",
|
|
576
|
+
text,
|
|
577
|
+
)
|
|
578
|
+
if m_kp:
|
|
579
|
+
block = m_kp.group(1)
|
|
580
|
+
for line in block.splitlines():
|
|
581
|
+
line = line.strip()
|
|
582
|
+
if not line:
|
|
583
|
+
continue
|
|
584
|
+
if line.startswith(("-", "•", "*")):
|
|
585
|
+
line = line.lstrip("-•*").strip()
|
|
586
|
+
if line:
|
|
587
|
+
key_points.append(line)
|
|
588
|
+
if not key_points:
|
|
589
|
+
# Fallback: try to extract bullet-like lines anywhere.
|
|
590
|
+
for line in text.splitlines():
|
|
591
|
+
line = line.strip()
|
|
592
|
+
if line.startswith(("-", "•", "*")):
|
|
593
|
+
cleaned = line.lstrip("-•*").strip()
|
|
594
|
+
if cleaned:
|
|
595
|
+
key_points.append(cleaned)
|
|
596
|
+
key_points = key_points[:8]
|
|
597
|
+
|
|
598
|
+
confidence = _parse_score("confidence", 0.6)
|
|
599
|
+
focus_alignment = _parse_score(r"focus[_ ]alignment", 0.6)
|
|
600
|
+
|
|
601
|
+
return summary, key_points, confidence, focus_alignment
|
|
385
602
|
|
|
386
603
|
def _split_text_into_chunks(self, text: str, overlap: int = 200) -> List[str]:
|
|
387
604
|
"""Split text into overlapping chunks"""
|
|
@@ -650,4 +867,4 @@ Create a unified summary that represents the entire document effectively."""
|
|
|
650
867
|
else:
|
|
651
868
|
formatted_lines.append(f"[{role.upper()}]: {content}")
|
|
652
869
|
|
|
653
|
-
return "\n\n".join(formatted_lines)
|
|
870
|
+
return "\n\n".join(formatted_lines)
|
|
@@ -100,6 +100,30 @@ class AnthropicProvider(BaseProvider):
|
|
|
100
100
|
"role": "assistant",
|
|
101
101
|
"content": msg["content"]
|
|
102
102
|
})
|
|
103
|
+
elif role == "tool":
|
|
104
|
+
# Anthropic Messages API represents tool outputs as `tool_result`
|
|
105
|
+
# content blocks inside a USER message (there is no `role="tool"`).
|
|
106
|
+
meta = msg.get("metadata") if isinstance(msg.get("metadata"), dict) else {}
|
|
107
|
+
tool_use_id = meta.get("call_id") or meta.get("tool_use_id") or meta.get("id")
|
|
108
|
+
tool_text = msg.get("content", "")
|
|
109
|
+
tool_text = "" if tool_text is None else str(tool_text)
|
|
110
|
+
|
|
111
|
+
if isinstance(tool_use_id, str) and tool_use_id.strip():
|
|
112
|
+
api_messages.append(
|
|
113
|
+
{
|
|
114
|
+
"role": "user",
|
|
115
|
+
"content": [
|
|
116
|
+
{
|
|
117
|
+
"type": "tool_result",
|
|
118
|
+
"tool_use_id": tool_use_id.strip(),
|
|
119
|
+
"content": tool_text,
|
|
120
|
+
}
|
|
121
|
+
],
|
|
122
|
+
}
|
|
123
|
+
)
|
|
124
|
+
else:
|
|
125
|
+
# Fallback: preserve as plain user text when no tool_use_id is available.
|
|
126
|
+
api_messages.append({"role": "user", "content": tool_text})
|
|
103
127
|
else:
|
|
104
128
|
api_messages.append({
|
|
105
129
|
"role": "user",
|
|
@@ -194,7 +218,9 @@ class AnthropicProvider(BaseProvider):
|
|
|
194
218
|
call_params["tool_choice"] = {"type": kwargs.get("tool_choice", "auto")}
|
|
195
219
|
else:
|
|
196
220
|
# Add tools as system prompt for prompted models
|
|
197
|
-
|
|
221
|
+
system_text = call_params.get("system") if isinstance(call_params.get("system"), str) else ""
|
|
222
|
+
include_tool_list = "## Tools (session)" not in system_text
|
|
223
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
198
224
|
if call_params.get("system"):
|
|
199
225
|
call_params["system"] += f"\n\n{tool_prompt}"
|
|
200
226
|
else:
|
|
@@ -213,6 +239,8 @@ class AnthropicProvider(BaseProvider):
|
|
|
213
239
|
formatted = self._format_response(response)
|
|
214
240
|
# Add generation time to response
|
|
215
241
|
formatted.gen_time = gen_time
|
|
242
|
+
formatted.metadata = dict(formatted.metadata or {})
|
|
243
|
+
formatted.metadata["_provider_request"] = {"call_params": call_params}
|
|
216
244
|
|
|
217
245
|
# Handle tool execution for Anthropic responses
|
|
218
246
|
if tools and (formatted.has_tool_calls() or
|
|
@@ -232,7 +260,7 @@ class AnthropicProvider(BaseProvider):
|
|
|
232
260
|
error_message = format_model_error("Anthropic", self.model, available_models)
|
|
233
261
|
raise ModelNotFoundError(error_message)
|
|
234
262
|
else:
|
|
235
|
-
raise
|
|
263
|
+
raise
|
|
236
264
|
|
|
237
265
|
async def _agenerate_internal(self,
|
|
238
266
|
prompt: str,
|
|
@@ -260,6 +288,30 @@ class AnthropicProvider(BaseProvider):
|
|
|
260
288
|
"role": "assistant",
|
|
261
289
|
"content": msg["content"]
|
|
262
290
|
})
|
|
291
|
+
elif role == "tool":
|
|
292
|
+
# Anthropic Messages API represents tool outputs as `tool_result`
|
|
293
|
+
# content blocks inside a USER message (there is no `role="tool"`).
|
|
294
|
+
meta = msg.get("metadata") if isinstance(msg.get("metadata"), dict) else {}
|
|
295
|
+
tool_use_id = meta.get("call_id") or meta.get("tool_use_id") or meta.get("id")
|
|
296
|
+
tool_text = msg.get("content", "")
|
|
297
|
+
tool_text = "" if tool_text is None else str(tool_text)
|
|
298
|
+
|
|
299
|
+
if isinstance(tool_use_id, str) and tool_use_id.strip():
|
|
300
|
+
api_messages.append(
|
|
301
|
+
{
|
|
302
|
+
"role": "user",
|
|
303
|
+
"content": [
|
|
304
|
+
{
|
|
305
|
+
"type": "tool_result",
|
|
306
|
+
"tool_use_id": tool_use_id.strip(),
|
|
307
|
+
"content": tool_text,
|
|
308
|
+
}
|
|
309
|
+
],
|
|
310
|
+
}
|
|
311
|
+
)
|
|
312
|
+
else:
|
|
313
|
+
# Fallback: preserve as plain user text when no tool_use_id is available.
|
|
314
|
+
api_messages.append({"role": "user", "content": tool_text})
|
|
263
315
|
else:
|
|
264
316
|
api_messages.append({
|
|
265
317
|
"role": "user",
|
|
@@ -348,7 +400,9 @@ class AnthropicProvider(BaseProvider):
|
|
|
348
400
|
elif kwargs.get("tool_choice"):
|
|
349
401
|
call_params["tool_choice"] = {"type": kwargs.get("tool_choice", "auto")}
|
|
350
402
|
else:
|
|
351
|
-
|
|
403
|
+
system_text = call_params.get("system") if isinstance(call_params.get("system"), str) else ""
|
|
404
|
+
include_tool_list = "## Tools (session)" not in system_text
|
|
405
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
352
406
|
if call_params.get("system"):
|
|
353
407
|
call_params["system"] += f"\n\n{tool_prompt}"
|
|
354
408
|
else:
|
|
@@ -365,6 +419,8 @@ class AnthropicProvider(BaseProvider):
|
|
|
365
419
|
|
|
366
420
|
formatted = self._format_response(response)
|
|
367
421
|
formatted.gen_time = gen_time
|
|
422
|
+
formatted.metadata = dict(formatted.metadata or {})
|
|
423
|
+
formatted.metadata["_provider_request"] = {"call_params": call_params}
|
|
368
424
|
|
|
369
425
|
if tools and (formatted.has_tool_calls() or
|
|
370
426
|
(self.tool_handler.supports_prompted and formatted.content)):
|
|
@@ -381,7 +437,7 @@ class AnthropicProvider(BaseProvider):
|
|
|
381
437
|
error_message = format_model_error("Anthropic", self.model, available_models)
|
|
382
438
|
raise ModelNotFoundError(error_message)
|
|
383
439
|
else:
|
|
384
|
-
raise
|
|
440
|
+
raise
|
|
385
441
|
|
|
386
442
|
async def _async_stream_response(self, call_params: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None) -> AsyncIterator[GenerateResponse]:
|
|
387
443
|
"""Native async streaming with Anthropic's context manager pattern."""
|
|
@@ -397,7 +453,7 @@ class AnthropicProvider(BaseProvider):
|
|
|
397
453
|
raw_response=chunk
|
|
398
454
|
)
|
|
399
455
|
except Exception as e:
|
|
400
|
-
raise
|
|
456
|
+
raise
|
|
401
457
|
|
|
402
458
|
def unload(self) -> None:
|
|
403
459
|
"""Close async client if it was created."""
|
|
@@ -414,13 +470,38 @@ class AnthropicProvider(BaseProvider):
|
|
|
414
470
|
"""Format tools for Anthropic API format"""
|
|
415
471
|
formatted_tools = []
|
|
416
472
|
for tool in tools:
|
|
417
|
-
#
|
|
473
|
+
# Anthropic expects `input_schema` to be a JSON Schema object:
|
|
474
|
+
# https://platform.claude.com/docs/en/agents-and-tools/tool-use/implement-tool-use
|
|
475
|
+
#
|
|
476
|
+
# Our internal tool representation typically uses:
|
|
477
|
+
# tool["parameters"] = { "arg": {"type": "...", "default": ...?}, ... }
|
|
478
|
+
# or, less commonly:
|
|
479
|
+
# tool["parameters"] = {"type":"object","properties":{...},"required":[...]}
|
|
418
480
|
params = tool.get("parameters", {})
|
|
419
|
-
|
|
481
|
+
|
|
482
|
+
properties: Dict[str, Any] = {}
|
|
483
|
+
required: List[str] = []
|
|
484
|
+
|
|
485
|
+
if isinstance(params, dict) and "properties" in params:
|
|
486
|
+
# Treat as already-schema-like.
|
|
487
|
+
raw_props = params.get("properties") if isinstance(params.get("properties"), dict) else {}
|
|
488
|
+
properties = dict(raw_props)
|
|
489
|
+
raw_required = params.get("required")
|
|
490
|
+
if isinstance(raw_required, list):
|
|
491
|
+
required = [str(x) for x in raw_required if isinstance(x, (str, int))]
|
|
492
|
+
elif isinstance(params, dict):
|
|
493
|
+
# Treat as compact parameter dict; infer required args by absence of `default`.
|
|
494
|
+
properties = dict(params)
|
|
495
|
+
for k, v in params.items():
|
|
496
|
+
if isinstance(v, dict) and "default" not in v:
|
|
497
|
+
required.append(str(k))
|
|
498
|
+
|
|
499
|
+
input_schema: Dict[str, Any] = {
|
|
420
500
|
"type": "object",
|
|
421
|
-
"properties":
|
|
422
|
-
"required": params.get("required", list(params.keys()) if "properties" not in params else [])
|
|
501
|
+
"properties": properties,
|
|
423
502
|
}
|
|
503
|
+
if required:
|
|
504
|
+
input_schema["required"] = required
|
|
424
505
|
|
|
425
506
|
formatted_tool = {
|
|
426
507
|
"name": tool.get("name"),
|
|
@@ -440,7 +521,7 @@ class AnthropicProvider(BaseProvider):
|
|
|
440
521
|
# Handle different content types
|
|
441
522
|
for content_block in response.content:
|
|
442
523
|
if content_block.type == "text":
|
|
443
|
-
content
|
|
524
|
+
content += content_block.text
|
|
444
525
|
elif content_block.type == "tool_use":
|
|
445
526
|
if tool_calls is None:
|
|
446
527
|
tool_calls = []
|