abstractcore 2.6.8__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. abstractcore/apps/summarizer.py +69 -27
  2. abstractcore/architectures/detection.py +190 -25
  3. abstractcore/assets/architecture_formats.json +129 -6
  4. abstractcore/assets/model_capabilities.json +789 -136
  5. abstractcore/config/main.py +2 -2
  6. abstractcore/config/manager.py +3 -1
  7. abstractcore/events/__init__.py +7 -1
  8. abstractcore/mcp/__init__.py +30 -0
  9. abstractcore/mcp/client.py +213 -0
  10. abstractcore/mcp/factory.py +64 -0
  11. abstractcore/mcp/naming.py +28 -0
  12. abstractcore/mcp/stdio_client.py +336 -0
  13. abstractcore/mcp/tool_source.py +164 -0
  14. abstractcore/processing/basic_deepsearch.py +1 -1
  15. abstractcore/processing/basic_summarizer.py +300 -83
  16. abstractcore/providers/anthropic_provider.py +91 -10
  17. abstractcore/providers/base.py +537 -16
  18. abstractcore/providers/huggingface_provider.py +17 -8
  19. abstractcore/providers/lmstudio_provider.py +170 -25
  20. abstractcore/providers/mlx_provider.py +13 -10
  21. abstractcore/providers/ollama_provider.py +42 -26
  22. abstractcore/providers/openai_compatible_provider.py +87 -22
  23. abstractcore/providers/openai_provider.py +12 -9
  24. abstractcore/providers/streaming.py +201 -39
  25. abstractcore/providers/vllm_provider.py +78 -21
  26. abstractcore/server/app.py +65 -28
  27. abstractcore/structured/retry.py +20 -7
  28. abstractcore/tools/__init__.py +5 -4
  29. abstractcore/tools/abstractignore.py +166 -0
  30. abstractcore/tools/arg_canonicalizer.py +61 -0
  31. abstractcore/tools/common_tools.py +2311 -772
  32. abstractcore/tools/core.py +109 -13
  33. abstractcore/tools/handler.py +17 -3
  34. abstractcore/tools/parser.py +798 -155
  35. abstractcore/tools/registry.py +107 -2
  36. abstractcore/tools/syntax_rewriter.py +68 -6
  37. abstractcore/tools/tag_rewriter.py +186 -1
  38. abstractcore/utils/jsonish.py +111 -0
  39. abstractcore/utils/version.py +1 -1
  40. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/METADATA +11 -2
  41. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/RECORD +45 -36
  42. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/WHEEL +0 -0
  43. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/entry_points.txt +0 -0
  44. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/licenses/LICENSE +0 -0
  45. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/top_level.txt +0 -0
@@ -6,8 +6,10 @@ text processing capabilities with minimal complexity.
6
6
  """
7
7
 
8
8
  from enum import Enum
9
- from typing import List, Optional
10
- from pydantic import BaseModel, Field
9
+ import json
10
+ import re
11
+ from typing import List, Optional, Tuple
12
+ from pydantic import BaseModel, Field, ValidationError
11
13
 
12
14
  from ..core.interface import AbstractCoreInterface
13
15
  from ..core.factory import create_llm
@@ -117,9 +119,10 @@ class BasicSummarizer:
117
119
  self,
118
120
  llm: Optional[AbstractCoreInterface] = None,
119
121
  max_chunk_size: int = 8000,
120
- max_tokens: int = 32000,
121
- max_output_tokens: int = 8000,
122
- timeout: Optional[float] = None
122
+ max_tokens: int = -1,
123
+ max_output_tokens: int = -1,
124
+ timeout: Optional[float] = None,
125
+ retry_strategy: Optional[FeedbackRetry] = None,
123
126
  ):
124
127
  """
125
128
  Initialize the summarizer
@@ -127,14 +130,26 @@ class BasicSummarizer:
127
130
  Args:
128
131
  llm: AbstractCore instance (any provider). If None, attempts to create ollama gemma3:1b-it-qat
129
132
  max_chunk_size: Maximum characters per chunk for long documents (default 8000)
130
- max_tokens: Maximum total tokens for LLM context (default 32000)
131
- max_output_tokens: Maximum tokens for LLM output generation (default 8000)
133
+ max_tokens: Maximum total tokens for LLM context (default -1 = AUTO).
134
+ - Use -1 (AUTO): Automatically uses model's context window capability
135
+ - Use specific value: Hard limit for deployment constraint (GPU/RAM limits)
136
+ Example: max_tokens=16000 limits to 16K even if model supports 128K
137
+ max_output_tokens: Maximum tokens for LLM output generation (default -1 = AUTO).
138
+ - Use -1 (AUTO): Automatically uses model's output capability
139
+ - Use specific value: Hard limit for output tokens
132
140
  timeout: HTTP request timeout in seconds. None for unlimited timeout (default None)
141
+ retry_strategy: Custom retry strategy for structured output. If None, uses default (3 attempts)
133
142
  """
134
143
  if llm is None:
135
144
  try:
136
145
  # Default to gemma3:1b-it-qat with configurable token limits
137
- self.llm = create_llm("ollama", model="gemma3:1b-it-qat", max_tokens=max_tokens, max_output_tokens=max_output_tokens, timeout=timeout)
146
+ # Only pass token limits if not using AUTO mode (-1)
147
+ llm_kwargs = {'timeout': timeout} if timeout is not None else {}
148
+ if max_tokens != -1:
149
+ llm_kwargs['max_tokens'] = max_tokens
150
+ if max_output_tokens != -1:
151
+ llm_kwargs['max_output_tokens'] = max_output_tokens
152
+ self.llm = create_llm("ollama", model="gemma3:1b-it-qat", **llm_kwargs)
138
153
  except Exception as e:
139
154
  error_msg = (
140
155
  f"❌ Failed to initialize default Ollama model 'gemma3:1b-it-qat': {e}\n\n"
@@ -162,9 +177,13 @@ class BasicSummarizer:
162
177
  else:
163
178
  self.llm = llm
164
179
  self.max_chunk_size = max_chunk_size
180
+ # Store token budgets. -1 means AUTO (use model's capability).
181
+ # In AbstractCore, `max_tokens` is the total (input + output) context budget.
182
+ self.max_tokens = max_tokens
183
+ self.max_output_tokens = max_output_tokens
165
184
 
166
- # Default retry strategy with 3 attempts
167
- self.retry_strategy = FeedbackRetry(max_attempts=3)
185
+ # Default retry strategy with 3 attempts (callers may override for latency-sensitive UX).
186
+ self.retry_strategy = retry_strategy or FeedbackRetry(max_attempts=3)
168
187
 
169
188
  def summarize(
170
189
  self,
@@ -228,36 +247,29 @@ class BasicSummarizer:
228
247
  # Build the prompt based on parameters
229
248
  prompt = self._build_prompt(text, focus, style, length)
230
249
 
231
- # Use AbstractCore's structured output with retry strategy (no word counts in LLM response)
232
- response = self.llm.generate(prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
233
-
234
- # Extract the structured output
235
- llm_result = None
236
- if isinstance(response, LLMSummaryOutput):
237
- # When structured output succeeds, response is the LLMSummaryOutput object directly
238
- llm_result = response
239
- elif hasattr(response, 'structured_output') and response.structured_output:
240
- # Fallback: check for structured_output attribute
241
- llm_result = response.structured_output
242
- else:
243
- # Debug information for troubleshooting
244
- error_msg = f"Failed to generate structured summary output. Response type: {type(response)}"
245
- if hasattr(response, 'content'):
246
- error_msg += f", Content: {response.content[:200]}..."
247
- if hasattr(response, 'structured_output'):
248
- error_msg += f", Structured output: {response.structured_output}"
249
- raise ValueError(error_msg)
250
+ llm_result: Optional[LLMSummaryOutput] = None
251
+ try:
252
+ # Use AbstractCore's structured output with retry strategy (no word counts in LLM response)
253
+ response = self.llm.generate(prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
254
+ llm_result = self._extract_summary_structured_output(response, context="summary")
255
+ except (json.JSONDecodeError, ValidationError) as e:
256
+ logger.warning(
257
+ "Structured summary output failed; falling back to marker format",
258
+ error_type=type(e).__name__,
259
+ error=str(e),
260
+ )
261
+ llm_result = self._summarize_fallback(text=text, focus=focus, style=style, length=length)
250
262
 
251
263
  # Compute word counts ourselves (reliable, client-side calculation)
252
264
  actual_original_words = len(text.split())
253
- actual_summary_words = len(llm_result.summary.split())
265
+ actual_summary_words = len((llm_result.summary if llm_result else "").split())
254
266
 
255
267
  # Create complete result with computed word counts
256
268
  return SummaryOutput(
257
- summary=llm_result.summary,
258
- key_points=llm_result.key_points,
259
- confidence=llm_result.confidence,
260
- focus_alignment=llm_result.focus_alignment,
269
+ summary=(llm_result.summary if llm_result else ""),
270
+ key_points=(llm_result.key_points if llm_result else []),
271
+ confidence=(llm_result.confidence if llm_result else 0.5),
272
+ focus_alignment=(llm_result.focus_alignment if llm_result else 0.5),
261
273
  word_count_original=actual_original_words,
262
274
  word_count_summary=actual_summary_words
263
275
  )
@@ -297,22 +309,31 @@ class BasicSummarizer:
297
309
  summary: str
298
310
  key_points: List[str] = Field(max_length=5)
299
311
 
300
- response = self.llm.generate(chunk_prompt, response_model=ChunkSummary, retry_strategy=self.retry_strategy)
301
- if isinstance(response, ChunkSummary):
302
- # When structured output succeeds, response is the ChunkSummary object directly
303
- chunk_summaries.append(response)
304
- elif hasattr(response, 'structured_output') and response.structured_output:
305
- # Fallback: check for structured_output attribute
306
- chunk_summaries.append(response.structured_output)
307
- else:
308
- # If chunk processing fails, create a fallback summary
309
- logger.warning("Chunk processing failed, creating fallback",
310
- chunk_number=i+1,
311
- total_chunks=len(chunks))
312
- chunk_summaries.append(ChunkSummary(
313
- summary=f"Section {i+1} content summary unavailable",
314
- key_points=["Content processing failed"]
315
- ))
312
+ try:
313
+ response = self.llm.generate(chunk_prompt, response_model=ChunkSummary, retry_strategy=self.retry_strategy)
314
+ if isinstance(response, ChunkSummary):
315
+ # When structured output succeeds, response is the ChunkSummary object directly
316
+ chunk_summaries.append(response)
317
+ elif hasattr(response, 'structured_output') and response.structured_output:
318
+ # Fallback: check for structured_output attribute
319
+ chunk_summaries.append(response.structured_output)
320
+ else:
321
+ raise ValueError(f"Unexpected chunk response type: {type(response)}")
322
+ except (json.JSONDecodeError, ValidationError, ValueError) as e:
323
+ # If chunk processing fails, create a minimal placeholder (do not fail the whole summary).
324
+ logger.warning(
325
+ "Chunk processing failed, creating fallback",
326
+ chunk_number=i + 1,
327
+ total_chunks=len(chunks),
328
+ error_type=type(e).__name__,
329
+ error=str(e),
330
+ )
331
+ chunk_summaries.append(
332
+ ChunkSummary(
333
+ summary=f"Section {i+1} content summary unavailable",
334
+ key_points=["Content processing failed"],
335
+ )
336
+ )
316
337
 
317
338
  # Step 2: Combine chunk summaries (Reduce phase)
318
339
  combined_text = "\n\n".join([
@@ -323,35 +344,28 @@ class BasicSummarizer:
323
344
  # Generate final summary from combined summaries
324
345
  final_prompt = self._build_final_combination_prompt(combined_text, focus, style, length, len(text))
325
346
 
326
- response = self.llm.generate(final_prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
327
-
328
- # Extract the structured output
329
- llm_result = None
330
- if isinstance(response, LLMSummaryOutput):
331
- # When structured output succeeds, response is the LLMSummaryOutput object directly
332
- llm_result = response
333
- elif hasattr(response, 'structured_output') and response.structured_output:
334
- # Fallback: check for structured_output attribute
335
- llm_result = response.structured_output
336
- else:
337
- # Debug information for troubleshooting
338
- error_msg = f"Failed to generate final structured summary output. Response type: {type(response)}"
339
- if hasattr(response, 'content'):
340
- error_msg += f", Content: {response.content[:200]}..."
341
- if hasattr(response, 'structured_output'):
342
- error_msg += f", Structured output: {response.structured_output}"
343
- raise ValueError(error_msg)
347
+ llm_result: Optional[LLMSummaryOutput] = None
348
+ try:
349
+ response = self.llm.generate(final_prompt, response_model=LLMSummaryOutput, retry_strategy=self.retry_strategy)
350
+ llm_result = self._extract_summary_structured_output(response, context="final_summary")
351
+ except (json.JSONDecodeError, ValidationError) as e:
352
+ logger.warning(
353
+ "Structured final summary output failed; falling back to marker format",
354
+ error_type=type(e).__name__,
355
+ error=str(e),
356
+ )
357
+ llm_result = self._summarize_fallback(text=combined_text, focus=focus, style=style, length=length)
344
358
 
345
359
  # Compute word counts ourselves (reliable, client-side calculation)
346
360
  actual_original_words = len(text.split())
347
- actual_summary_words = len(llm_result.summary.split())
361
+ actual_summary_words = len((llm_result.summary if llm_result else "").split())
348
362
 
349
363
  # Create complete result with computed word counts
350
364
  return SummaryOutput(
351
- summary=llm_result.summary,
352
- key_points=llm_result.key_points,
353
- confidence=llm_result.confidence,
354
- focus_alignment=llm_result.focus_alignment,
365
+ summary=(llm_result.summary if llm_result else ""),
366
+ key_points=(llm_result.key_points if llm_result else []),
367
+ confidence=(llm_result.confidence if llm_result else 0.5),
368
+ focus_alignment=(llm_result.focus_alignment if llm_result else 0.5),
355
369
  word_count_original=actual_original_words,
356
370
  word_count_summary=actual_summary_words
357
371
  )
@@ -360,6 +374,13 @@ class BasicSummarizer:
360
374
  """
361
375
  Determine if text should be chunked based on token count.
362
376
 
377
+ Token budget logic:
378
+ - max_tokens = -1 (AUTO): Uses model's full context window capability
379
+ - max_tokens = N: Hard limit (deployment constraint for GPU/RAM)
380
+
381
+ This ensures we don't exceed GPU memory constraints even when the model
382
+ theoretically supports larger contexts.
383
+
363
384
  Uses centralized TokenUtils for accurate token estimation.
364
385
  Falls back to character count if model information unavailable.
365
386
  """
@@ -370,18 +391,214 @@ class BasicSummarizer:
370
391
  if self.llm and hasattr(self.llm, 'model'):
371
392
  model_name = self.llm.model
372
393
 
373
- # Estimate tokens using centralized utility
374
- estimated_tokens = TokenUtils.estimate_tokens(text, model_name)
394
+ # Estimate tokens using centralized utility. If estimation fails for any reason,
395
+ # fall back to character chunking (conservative).
396
+ try:
397
+ estimated_tokens = TokenUtils.estimate_tokens(text, model_name)
398
+ except Exception:
399
+ return len(text) > self.max_chunk_size
375
400
 
376
- # Use a conservative token limit (leaving room for prompt overhead)
377
- # Most models have 32k+ context nowadays, so 8k tokens for input text is safe
378
- token_limit = 8000
401
+ # Determine the effective token budget
402
+ # Get provider's capabilities
403
+ provider_max_input = getattr(self.llm, "max_input_tokens", None) if self.llm else None
404
+ if provider_max_input is None:
405
+ provider_total = getattr(self.llm, "max_tokens", None) if self.llm else None
406
+ provider_output = getattr(self.llm, "max_output_tokens", None) if self.llm else None
407
+ if provider_total is not None and provider_output is not None:
408
+ try:
409
+ provider_max_input = int(provider_total) - int(provider_output)
410
+ except Exception:
411
+ provider_max_input = None
379
412
 
380
- if estimated_tokens > token_limit:
381
- return True
413
+ # Determine effective max_input_tokens based on configuration
414
+ if self.max_tokens == -1:
415
+ # AUTO mode: Use model's capability
416
+ if provider_max_input is not None:
417
+ max_input_tokens = provider_max_input
418
+ else:
419
+ # Fallback to safe default if model info unavailable
420
+ max_input_tokens = 24000 # Conservative default
421
+ else:
422
+ # User-specified limit (deployment constraint)
423
+ user_max_output = self.max_output_tokens if self.max_output_tokens != -1 else 8000
424
+ user_max_input = self.max_tokens - user_max_output
382
425
 
383
- # Fallback to character-based check for very long texts
384
- return len(text) > self.max_chunk_size
426
+ if provider_max_input is not None:
427
+ # Respect BOTH user limit AND model capability (take minimum)
428
+ max_input_tokens = min(provider_max_input, user_max_input)
429
+ else:
430
+ # No model info, use user limit
431
+ max_input_tokens = user_max_input
432
+
433
+ # Reserve prompt/formatting overhead (structured output schemas + instructions).
434
+ # Keep the historical safety floor (8000) for small-context models.
435
+ try:
436
+ token_limit = max(8000, int(max_input_tokens) - 1200)
437
+ except Exception:
438
+ token_limit = 8000
439
+
440
+ logger.debug(
441
+ "Chunking decision",
442
+ estimated_tokens=estimated_tokens,
443
+ token_limit=token_limit,
444
+ max_tokens_config=self.max_tokens,
445
+ is_auto_mode=(self.max_tokens == -1),
446
+ will_chunk=(estimated_tokens > token_limit)
447
+ )
448
+
449
+ return estimated_tokens > token_limit
450
+
451
+ def _extract_summary_structured_output(self, response: object, *, context: str) -> LLMSummaryOutput:
452
+ """Extract structured summary output from AbstractCore responses."""
453
+ if isinstance(response, LLMSummaryOutput):
454
+ return response
455
+ if hasattr(response, "structured_output") and getattr(response, "structured_output"):
456
+ return response.structured_output
457
+
458
+ error_msg = f"Failed to generate structured {context} output. Response type: {type(response)}"
459
+ if hasattr(response, "content") and getattr(response, "content"):
460
+ try:
461
+ error_msg += f", Content: {str(response.content)[:200]}..."
462
+ except Exception:
463
+ pass
464
+ if hasattr(response, "structured_output"):
465
+ try:
466
+ error_msg += f", Structured output: {getattr(response, 'structured_output')}"
467
+ except Exception:
468
+ pass
469
+ raise ValueError(error_msg)
470
+
471
+ def _summarize_fallback(
472
+ self,
473
+ *,
474
+ text: str,
475
+ focus: Optional[str],
476
+ style: SummaryStyle,
477
+ length: SummaryLength,
478
+ ) -> LLMSummaryOutput:
479
+ """Best-effort summary when structured output cannot be produced reliably."""
480
+ prompt = self._build_fallback_prompt(text=text, focus=focus, style=style, length=length)
481
+ response = self.llm.generate(prompt)
482
+ content = getattr(response, "content", None)
483
+ if content is None:
484
+ content = str(response)
485
+ summary, key_points, confidence, focus_alignment = self._parse_fallback_response(str(content))
486
+ return LLMSummaryOutput(
487
+ summary=summary,
488
+ key_points=key_points[:8],
489
+ confidence=confidence,
490
+ focus_alignment=focus_alignment,
491
+ )
492
+
493
+ def _build_fallback_prompt(
494
+ self,
495
+ *,
496
+ text: str,
497
+ focus: Optional[str],
498
+ style: SummaryStyle,
499
+ length: SummaryLength,
500
+ ) -> str:
501
+ """Build a non-JSON prompt that is easy to parse deterministically."""
502
+ style_instructions = {
503
+ SummaryStyle.STRUCTURED: "Present the summary in a clear, organized format with distinct sections or bullet points.",
504
+ SummaryStyle.NARRATIVE: "Write the summary as a flowing narrative that tells the story of the content.",
505
+ SummaryStyle.OBJECTIVE: "Maintain a neutral, factual tone without opinions or interpretations.",
506
+ SummaryStyle.ANALYTICAL: "Provide critical analysis with insights, implications, and deeper understanding.",
507
+ SummaryStyle.EXECUTIVE: "Focus on actionable insights, business implications, and key decisions.",
508
+ SummaryStyle.CONVERSATIONAL: "Preserve conversational context, key decisions, ongoing topics, and user intent. Focus on information needed for conversation continuity.",
509
+ }
510
+
511
+ length_instructions = {
512
+ SummaryLength.BRIEF: "Keep the summary very concise - 2-3 sentences covering only the most essential points.",
513
+ SummaryLength.STANDARD: "Provide a balanced summary of 1-2 paragraphs covering the main ideas.",
514
+ SummaryLength.DETAILED: "Create a comprehensive summary with multiple paragraphs covering all important aspects.",
515
+ SummaryLength.COMPREHENSIVE: "Provide an extensive analysis covering all significant points, context, and implications.",
516
+ }
517
+
518
+ focus_instruction = ""
519
+ if focus:
520
+ focus_instruction = f"\nPay special attention to: {focus}\n"
521
+
522
+ return f"""Analyze the following text and produce a summary.
523
+
524
+ {style_instructions[style]}
525
+ {length_instructions[length]}{focus_instruction}
526
+
527
+ Text to summarize:
528
+ {text}
529
+
530
+ Return your answer in this EXACT plain-text format (no JSON, no code blocks):
531
+
532
+ SUMMARY:
533
+ <the main summary text>
534
+
535
+ KEY POINTS:
536
+ - <point 1>
537
+ - <point 2>
538
+ - <point 3>
539
+
540
+ CONFIDENCE: <0-1>
541
+ FOCUS_ALIGNMENT: <0-1>
542
+ """
543
+
544
+ @staticmethod
545
+ def _parse_fallback_response(content: str) -> Tuple[str, List[str], float, float]:
546
+ """Parse marker-format fallback summaries into structured fields."""
547
+ text = (content or "").strip()
548
+ if not text:
549
+ return "", [], 0.5, 0.5
550
+
551
+ def _parse_score(label_re: str, default: float) -> float:
552
+ m = re.search(rf"(?im)^{label_re}\s*:\s*(.+?)\s*$", text)
553
+ if not m:
554
+ return default
555
+ raw = m.group(1).strip()
556
+ try:
557
+ if raw.endswith("%"):
558
+ val = float(raw[:-1].strip()) / 100.0
559
+ else:
560
+ val = float(raw)
561
+ except Exception:
562
+ return default
563
+ return max(0.0, min(1.0, val))
564
+
565
+ summary = ""
566
+ m_summary = re.search(r"(?is)summary\s*:\s*(.*?)\n\s*key\s*points\s*:", text)
567
+ if m_summary:
568
+ summary = m_summary.group(1).strip()
569
+ else:
570
+ # Best-effort: take the first paragraph.
571
+ summary = text.split("\n\n", 1)[0].strip()
572
+
573
+ key_points: List[str] = []
574
+ m_kp = re.search(
575
+ r"(?is)key\s*points\s*:\s*(.*?)(?:\n\s*confidence\s*:|\n\s*focus[_ ]alignment\s*:|\Z)",
576
+ text,
577
+ )
578
+ if m_kp:
579
+ block = m_kp.group(1)
580
+ for line in block.splitlines():
581
+ line = line.strip()
582
+ if not line:
583
+ continue
584
+ if line.startswith(("-", "•", "*")):
585
+ line = line.lstrip("-•*").strip()
586
+ if line:
587
+ key_points.append(line)
588
+ if not key_points:
589
+ # Fallback: try to extract bullet-like lines anywhere.
590
+ for line in text.splitlines():
591
+ line = line.strip()
592
+ if line.startswith(("-", "•", "*")):
593
+ cleaned = line.lstrip("-•*").strip()
594
+ if cleaned:
595
+ key_points.append(cleaned)
596
+ key_points = key_points[:8]
597
+
598
+ confidence = _parse_score("confidence", 0.6)
599
+ focus_alignment = _parse_score(r"focus[_ ]alignment", 0.6)
600
+
601
+ return summary, key_points, confidence, focus_alignment
385
602
 
386
603
  def _split_text_into_chunks(self, text: str, overlap: int = 200) -> List[str]:
387
604
  """Split text into overlapping chunks"""
@@ -650,4 +867,4 @@ Create a unified summary that represents the entire document effectively."""
650
867
  else:
651
868
  formatted_lines.append(f"[{role.upper()}]: {content}")
652
869
 
653
- return "\n\n".join(formatted_lines)
870
+ return "\n\n".join(formatted_lines)
@@ -100,6 +100,30 @@ class AnthropicProvider(BaseProvider):
100
100
  "role": "assistant",
101
101
  "content": msg["content"]
102
102
  })
103
+ elif role == "tool":
104
+ # Anthropic Messages API represents tool outputs as `tool_result`
105
+ # content blocks inside a USER message (there is no `role="tool"`).
106
+ meta = msg.get("metadata") if isinstance(msg.get("metadata"), dict) else {}
107
+ tool_use_id = meta.get("call_id") or meta.get("tool_use_id") or meta.get("id")
108
+ tool_text = msg.get("content", "")
109
+ tool_text = "" if tool_text is None else str(tool_text)
110
+
111
+ if isinstance(tool_use_id, str) and tool_use_id.strip():
112
+ api_messages.append(
113
+ {
114
+ "role": "user",
115
+ "content": [
116
+ {
117
+ "type": "tool_result",
118
+ "tool_use_id": tool_use_id.strip(),
119
+ "content": tool_text,
120
+ }
121
+ ],
122
+ }
123
+ )
124
+ else:
125
+ # Fallback: preserve as plain user text when no tool_use_id is available.
126
+ api_messages.append({"role": "user", "content": tool_text})
103
127
  else:
104
128
  api_messages.append({
105
129
  "role": "user",
@@ -194,7 +218,9 @@ class AnthropicProvider(BaseProvider):
194
218
  call_params["tool_choice"] = {"type": kwargs.get("tool_choice", "auto")}
195
219
  else:
196
220
  # Add tools as system prompt for prompted models
197
- tool_prompt = self.tool_handler.format_tools_prompt(tools)
221
+ system_text = call_params.get("system") if isinstance(call_params.get("system"), str) else ""
222
+ include_tool_list = "## Tools (session)" not in system_text
223
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
198
224
  if call_params.get("system"):
199
225
  call_params["system"] += f"\n\n{tool_prompt}"
200
226
  else:
@@ -213,6 +239,8 @@ class AnthropicProvider(BaseProvider):
213
239
  formatted = self._format_response(response)
214
240
  # Add generation time to response
215
241
  formatted.gen_time = gen_time
242
+ formatted.metadata = dict(formatted.metadata or {})
243
+ formatted.metadata["_provider_request"] = {"call_params": call_params}
216
244
 
217
245
  # Handle tool execution for Anthropic responses
218
246
  if tools and (formatted.has_tool_calls() or
@@ -232,7 +260,7 @@ class AnthropicProvider(BaseProvider):
232
260
  error_message = format_model_error("Anthropic", self.model, available_models)
233
261
  raise ModelNotFoundError(error_message)
234
262
  else:
235
- raise ProviderAPIError(f"Anthropic API error: {str(e)}")
263
+ raise
236
264
 
237
265
  async def _agenerate_internal(self,
238
266
  prompt: str,
@@ -260,6 +288,30 @@ class AnthropicProvider(BaseProvider):
260
288
  "role": "assistant",
261
289
  "content": msg["content"]
262
290
  })
291
+ elif role == "tool":
292
+ # Anthropic Messages API represents tool outputs as `tool_result`
293
+ # content blocks inside a USER message (there is no `role="tool"`).
294
+ meta = msg.get("metadata") if isinstance(msg.get("metadata"), dict) else {}
295
+ tool_use_id = meta.get("call_id") or meta.get("tool_use_id") or meta.get("id")
296
+ tool_text = msg.get("content", "")
297
+ tool_text = "" if tool_text is None else str(tool_text)
298
+
299
+ if isinstance(tool_use_id, str) and tool_use_id.strip():
300
+ api_messages.append(
301
+ {
302
+ "role": "user",
303
+ "content": [
304
+ {
305
+ "type": "tool_result",
306
+ "tool_use_id": tool_use_id.strip(),
307
+ "content": tool_text,
308
+ }
309
+ ],
310
+ }
311
+ )
312
+ else:
313
+ # Fallback: preserve as plain user text when no tool_use_id is available.
314
+ api_messages.append({"role": "user", "content": tool_text})
263
315
  else:
264
316
  api_messages.append({
265
317
  "role": "user",
@@ -348,7 +400,9 @@ class AnthropicProvider(BaseProvider):
348
400
  elif kwargs.get("tool_choice"):
349
401
  call_params["tool_choice"] = {"type": kwargs.get("tool_choice", "auto")}
350
402
  else:
351
- tool_prompt = self.tool_handler.format_tools_prompt(tools)
403
+ system_text = call_params.get("system") if isinstance(call_params.get("system"), str) else ""
404
+ include_tool_list = "## Tools (session)" not in system_text
405
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
352
406
  if call_params.get("system"):
353
407
  call_params["system"] += f"\n\n{tool_prompt}"
354
408
  else:
@@ -365,6 +419,8 @@ class AnthropicProvider(BaseProvider):
365
419
 
366
420
  formatted = self._format_response(response)
367
421
  formatted.gen_time = gen_time
422
+ formatted.metadata = dict(formatted.metadata or {})
423
+ formatted.metadata["_provider_request"] = {"call_params": call_params}
368
424
 
369
425
  if tools and (formatted.has_tool_calls() or
370
426
  (self.tool_handler.supports_prompted and formatted.content)):
@@ -381,7 +437,7 @@ class AnthropicProvider(BaseProvider):
381
437
  error_message = format_model_error("Anthropic", self.model, available_models)
382
438
  raise ModelNotFoundError(error_message)
383
439
  else:
384
- raise ProviderAPIError(f"Anthropic API error: {str(e)}")
440
+ raise
385
441
 
386
442
  async def _async_stream_response(self, call_params: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None) -> AsyncIterator[GenerateResponse]:
387
443
  """Native async streaming with Anthropic's context manager pattern."""
@@ -397,7 +453,7 @@ class AnthropicProvider(BaseProvider):
397
453
  raw_response=chunk
398
454
  )
399
455
  except Exception as e:
400
- raise ProviderAPIError(f"Anthropic streaming error: {str(e)}")
456
+ raise
401
457
 
402
458
  def unload(self) -> None:
403
459
  """Close async client if it was created."""
@@ -414,13 +470,38 @@ class AnthropicProvider(BaseProvider):
414
470
  """Format tools for Anthropic API format"""
415
471
  formatted_tools = []
416
472
  for tool in tools:
417
- # Get parameters and ensure proper JSON schema format
473
+ # Anthropic expects `input_schema` to be a JSON Schema object:
474
+ # https://platform.claude.com/docs/en/agents-and-tools/tool-use/implement-tool-use
475
+ #
476
+ # Our internal tool representation typically uses:
477
+ # tool["parameters"] = { "arg": {"type": "...", "default": ...?}, ... }
478
+ # or, less commonly:
479
+ # tool["parameters"] = {"type":"object","properties":{...},"required":[...]}
418
480
  params = tool.get("parameters", {})
419
- input_schema = {
481
+
482
+ properties: Dict[str, Any] = {}
483
+ required: List[str] = []
484
+
485
+ if isinstance(params, dict) and "properties" in params:
486
+ # Treat as already-schema-like.
487
+ raw_props = params.get("properties") if isinstance(params.get("properties"), dict) else {}
488
+ properties = dict(raw_props)
489
+ raw_required = params.get("required")
490
+ if isinstance(raw_required, list):
491
+ required = [str(x) for x in raw_required if isinstance(x, (str, int))]
492
+ elif isinstance(params, dict):
493
+ # Treat as compact parameter dict; infer required args by absence of `default`.
494
+ properties = dict(params)
495
+ for k, v in params.items():
496
+ if isinstance(v, dict) and "default" not in v:
497
+ required.append(str(k))
498
+
499
+ input_schema: Dict[str, Any] = {
420
500
  "type": "object",
421
- "properties": params.get("properties", params), # Handle both formats
422
- "required": params.get("required", list(params.keys()) if "properties" not in params else [])
501
+ "properties": properties,
423
502
  }
503
+ if required:
504
+ input_schema["required"] = required
424
505
 
425
506
  formatted_tool = {
426
507
  "name": tool.get("name"),
@@ -440,7 +521,7 @@ class AnthropicProvider(BaseProvider):
440
521
  # Handle different content types
441
522
  for content_block in response.content:
442
523
  if content_block.type == "text":
443
- content = content_block.text
524
+ content += content_block.text
444
525
  elif content_block.type == "tool_use":
445
526
  if tool_calls is None:
446
527
  tool_calls = []