fast-agent-mcp 0.2.57__py3-none-any.whl → 0.2.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fast-agent-mcp might be problematic. Click here for more details.

@@ -1,8 +1,10 @@
1
1
  import json
2
2
  import os
3
3
  import re
4
- from enum import Enum
5
- from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, Union
4
+ import sys
5
+ from dataclasses import dataclass
6
+ from enum import Enum, auto
7
+ from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, Union, cast
6
8
 
7
9
  from mcp.types import ContentBlock, TextContent
8
10
  from rich.text import Text
@@ -29,10 +31,6 @@ except ImportError:
29
31
  ClientError = Exception
30
32
  NoCredentialsError = Exception
31
33
 
32
- try:
33
- from anthropic.types import ToolParam
34
- except ImportError:
35
- ToolParam = None
36
34
 
37
35
  from mcp.types import (
38
36
  CallToolRequest,
@@ -41,6 +39,26 @@ from mcp.types import (
41
39
 
42
40
  DEFAULT_BEDROCK_MODEL = "amazon.nova-lite-v1:0"
43
41
 
42
+
43
+ # Local ReasoningEffort enum to avoid circular imports
44
+ class ReasoningEffort(Enum):
45
+ """Reasoning effort levels for Bedrock models"""
46
+
47
+ MINIMAL = "minimal"
48
+ LOW = "low"
49
+ MEDIUM = "medium"
50
+ HIGH = "high"
51
+
52
+
53
+ # Reasoning effort to token budget mapping
54
+ # Based on AWS recommendations: start with 1024 minimum, increment reasonably
55
+ REASONING_EFFORT_BUDGETS = {
56
+ ReasoningEffort.MINIMAL: 0, # Disabled
57
+ ReasoningEffort.LOW: 512, # Light reasoning
58
+ ReasoningEffort.MEDIUM: 1024, # AWS minimum recommendation
59
+ ReasoningEffort.HIGH: 2048, # Higher reasoning
60
+ }
61
+
44
62
  # Bedrock message format types
45
63
  BedrockMessage = Dict[str, Any] # Bedrock message format
46
64
  BedrockMessageParam = Dict[str, Any] # Bedrock message parameter format
@@ -49,9 +67,54 @@ BedrockMessageParam = Dict[str, Any] # Bedrock message parameter format
49
67
  class ToolSchemaType(Enum):
50
68
  """Enum for different tool schema formats used by different model families."""
51
69
 
52
- DEFAULT = "default" # Default toolSpec format used by most models (formerly Nova)
53
- SYSTEM_PROMPT = "system_prompt" # System prompt-based tool calling format
54
- ANTHROPIC = "anthropic" # Native Anthropic tool calling format
70
+ DEFAULT = auto() # Default toolSpec format used by most models (formerly Nova)
71
+ SYSTEM_PROMPT = auto() # System prompt-based tool calling format
72
+ ANTHROPIC = auto() # Native Anthropic tool calling format
73
+ NONE = auto() # Schema fallback failed, avoid retries
74
+
75
+
76
+ class SystemMode(Enum):
77
+ """System message handling modes."""
78
+
79
+ SYSTEM = auto() # Use native system parameter
80
+ INJECT = auto() # Inject into user message
81
+
82
+
83
+ class StreamPreference(Enum):
84
+ """Streaming preference with tools."""
85
+
86
+ STREAM_OK = auto() # Model can stream with tools
87
+ NON_STREAM = auto() # Model requires non-streaming for tools
88
+
89
+
90
+ class ToolNamePolicy(Enum):
91
+ """Tool name transformation policy."""
92
+
93
+ PRESERVE = auto() # Keep original tool names
94
+ UNDERSCORES = auto() # Convert to underscore format
95
+
96
+
97
+ class StructuredStrategy(Enum):
98
+ """Structured output generation strategy."""
99
+
100
+ STRICT_SCHEMA = auto() # Use full JSON schema
101
+ SIMPLIFIED_SCHEMA = auto() # Use simplified schema
102
+
103
+
104
+ @dataclass
105
+ class ModelCapabilities:
106
+ """Unified per-model capability cache to avoid scattered caches.
107
+
108
+ Uses proper enums and types to prevent typos and improve type safety.
109
+ """
110
+
111
+ schema: ToolSchemaType | None = None
112
+ system_mode: SystemMode | None = None
113
+ stream_with_tools: StreamPreference | None = None
114
+ tool_name_policy: ToolNamePolicy | None = None
115
+ structured_strategy: StructuredStrategy | None = None
116
+ reasoning_support: bool | None = None # True=supported, False=unsupported, None=unknown
117
+ supports_tools: bool | None = None # True=yes, False=no, None=unknown
55
118
 
56
119
 
57
120
  class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
@@ -60,38 +123,58 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
60
123
  Supports all Bedrock models including Nova, Claude, Meta, etc.
61
124
  """
62
125
 
63
- # Bedrock-specific parameter exclusions
64
- BEDROCK_EXCLUDE_FIELDS = {
65
- AugmentedLLM.PARAM_MESSAGES,
66
- AugmentedLLM.PARAM_MODEL,
67
- AugmentedLLM.PARAM_SYSTEM_PROMPT,
68
- AugmentedLLM.PARAM_STOP_SEQUENCES,
69
- AugmentedLLM.PARAM_MAX_TOKENS,
70
- AugmentedLLM.PARAM_METADATA,
71
- AugmentedLLM.PARAM_USE_HISTORY,
72
- AugmentedLLM.PARAM_MAX_ITERATIONS,
73
- AugmentedLLM.PARAM_PARALLEL_TOOL_CALLS,
74
- AugmentedLLM.PARAM_TEMPLATE_VARS,
75
- }
126
+ # Class-level capabilities cache shared across all instances
127
+ capabilities: Dict[str, ModelCapabilities] = {}
128
+
129
+ @classmethod
130
+ def debug_cache(cls) -> None:
131
+ """Print human-readable JSON representation of the capabilities cache.
132
+
133
+ Useful for debugging and understanding what capabilities have been
134
+ discovered and cached for each model. Uses sys.stdout to bypass
135
+ any logging hijacking.
136
+ """
137
+ if not cls.capabilities:
138
+ sys.stdout.write("{}\n")
139
+ sys.stdout.flush()
140
+ return
141
+
142
+ cache_dict = {}
143
+ for model, caps in cls.capabilities.items():
144
+ cache_dict[model] = {
145
+ "schema": caps.schema.name if caps.schema else None,
146
+ "system_mode": caps.system_mode.name if caps.system_mode else None,
147
+ "stream_with_tools": caps.stream_with_tools.name
148
+ if caps.stream_with_tools
149
+ else None,
150
+ "tool_name_policy": caps.tool_name_policy.name if caps.tool_name_policy else None,
151
+ "structured_strategy": caps.structured_strategy.name
152
+ if caps.structured_strategy
153
+ else None,
154
+ "reasoning_support": caps.reasoning_support,
155
+ "supports_tools": caps.supports_tools,
156
+ }
157
+
158
+ output = json.dumps(cache_dict, indent=2, sort_keys=True)
159
+ sys.stdout.write(f"{output}\n")
160
+ sys.stdout.flush()
76
161
 
77
162
  @classmethod
78
163
  def matches_model_pattern(cls, model_name: str) -> bool:
79
- """Check if a model name matches Bedrock model patterns."""
80
- # Bedrock model patterns
81
- bedrock_patterns = [
82
- r"^amazon\.nova.*", # Amazon Nova models
83
- r"^anthropic\.claude.*", # Anthropic Claude models
84
- r"^meta\.llama.*", # Meta Llama models
85
- r"^mistral\..*", # Mistral models
86
- r"^cohere\..*", # Cohere models
87
- r"^ai21\..*", # AI21 models
88
- r"^stability\..*", # Stability AI models
89
- r"^openai\..*", # OpenAI models
90
- ]
164
+ """Return True if model_name exists in the Bedrock model list loaded at init.
91
165
 
92
- import re
166
+ Uses the centralized discovery in bedrock_utils; no regex, no fallbacks.
167
+ Gracefully handles environments without AWS access by returning False.
168
+ """
169
+ from mcp_agent.llm.providers.bedrock_utils import all_bedrock_models
93
170
 
94
- return any(re.match(pattern, model_name) for pattern in bedrock_patterns)
171
+ try:
172
+ available = set(all_bedrock_models(prefix=""))
173
+ return model_name in available
174
+ except Exception:
175
+ # If AWS calls fail (no credentials, region not configured, etc.),
176
+ # assume this is not a Bedrock model
177
+ return False
95
178
 
96
179
  def __init__(self, *args, **kwargs) -> None:
97
180
  """Initialize the Bedrock LLM with AWS credentials and region."""
@@ -131,22 +214,41 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
131
214
  self._bedrock_client = None
132
215
  self._bedrock_runtime_client = None
133
216
 
217
+ # One-shot hint to force non-streaming on next completion (used by structured outputs)
218
+ self._force_non_streaming_once: bool = False
219
+
220
+ # Set up reasoning-related attributes
221
+ self._reasoning_effort = kwargs.get("reasoning_effort", None)
222
+ if (
223
+ self._reasoning_effort is None
224
+ and self.context
225
+ and self.context.config
226
+ and self.context.config.bedrock
227
+ ):
228
+ if hasattr(self.context.config.bedrock, "reasoning_effort"):
229
+ self._reasoning_effort = self.context.config.bedrock.reasoning_effort
230
+
134
231
  def _initialize_default_params(self, kwargs: dict) -> RequestParams:
135
232
  """Initialize Bedrock-specific default parameters"""
136
233
  # Get base defaults from parent (includes ModelDatabase lookup)
137
234
  base_params = super()._initialize_default_params(kwargs)
138
235
 
139
- # Override with Bedrock-specific settings
236
+ # Override with Bedrock-specific settings - ensure we always have a model
140
237
  chosen_model = kwargs.get("model", DEFAULT_BEDROCK_MODEL)
141
238
  base_params.model = chosen_model
142
239
 
143
240
  return base_params
144
241
 
242
+ @property
243
+ def model(self) -> str:
244
+ """Get the model name, guaranteed to be set."""
245
+ return self.default_request_params.model
246
+
145
247
  def _get_bedrock_client(self):
146
248
  """Get or create Bedrock client."""
147
249
  if self._bedrock_client is None:
148
250
  try:
149
- session = boto3.Session(profile_name=self.aws_profile)
251
+ session = boto3.Session(profile_name=self.aws_profile) # type: ignore[union-attr]
150
252
  self._bedrock_client = session.client("bedrock", region_name=self.aws_region)
151
253
  except NoCredentialsError as e:
152
254
  raise ProviderKeyError(
@@ -159,7 +261,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
159
261
  """Get or create Bedrock Runtime client."""
160
262
  if self._bedrock_runtime_client is None:
161
263
  try:
162
- session = boto3.Session(profile_name=self.aws_profile)
264
+ session = boto3.Session(profile_name=self.aws_profile) # type: ignore[union-attr]
163
265
  self._bedrock_runtime_client = session.client(
164
266
  "bedrock-runtime", region_name=self.aws_region
165
267
  )
@@ -170,161 +272,33 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
170
272
  ) from e
171
273
  return self._bedrock_runtime_client
172
274
 
173
- def _get_tool_schema_type(self, model_id: str) -> ToolSchemaType:
174
- """
175
- Determine which tool schema format to use based on model family.
176
-
177
- Args:
178
- model_id: The model ID (e.g., "bedrock.meta.llama3-1-8b-instruct-v1:0")
275
+ def _build_tool_name_mapping(
276
+ self, tools: "ListToolsResult", name_policy: ToolNamePolicy
277
+ ) -> Dict[str, str]:
278
+ """Build tool name mapping based on schema type and name policy.
179
279
 
180
- Returns:
181
- ToolSchemaType indicating which format to use
280
+ Returns dict mapping from converted_name -> original_name for tool execution.
182
281
  """
183
- # Remove any "bedrock." prefix for pattern matching
184
- clean_model = model_id.replace("bedrock.", "")
282
+ mapping = {}
185
283
 
186
- # Anthropic models use native Anthropic format
187
- if re.search(r"anthropic\.claude", clean_model):
188
- self.logger.debug(
189
- f"Model {model_id} detected as Anthropic - using native Anthropic format"
190
- )
191
- return ToolSchemaType.ANTHROPIC
192
-
193
- # Scout models use SYSTEM_PROMPT format
194
- if re.search(r"meta\.llama4-scout", clean_model):
195
- self.logger.debug(f"Model {model_id} detected as Scout - using SYSTEM_PROMPT format")
196
- return ToolSchemaType.SYSTEM_PROMPT
197
-
198
- # Other Llama 4 models use default toolConfig format
199
- if re.search(r"meta\.llama4", clean_model):
200
- self.logger.debug(
201
- f"Model {model_id} detected as Llama 4 (non-Scout) - using default toolConfig format"
202
- )
203
- return ToolSchemaType.DEFAULT
204
-
205
- # Llama 3.x models use system prompt format
206
- if re.search(r"meta\.llama3", clean_model):
207
- self.logger.debug(
208
- f"Model {model_id} detected as Llama 3.x - using system prompt format"
209
- )
210
- return ToolSchemaType.SYSTEM_PROMPT
211
-
212
- # Future: Add other model-specific formats here
213
- # if re.search(r"mistral\.", clean_model):
214
- # return ToolSchemaType.MISTRAL
215
-
216
- # Default to default format for all other models
217
- self.logger.debug(f"Model {model_id} using default tool format")
218
- return ToolSchemaType.DEFAULT
219
-
220
- def _supports_streaming_with_tools(self, model: str) -> bool:
221
- """
222
- Check if a model supports streaming with tools.
223
-
224
- Some models (like AI21 Jamba) support tools but not in streaming mode.
225
- This method uses regex patterns to identify such models.
226
-
227
- Args:
228
- model: The model name (e.g., "ai21.jamba-1-5-mini-v1:0")
229
-
230
- Returns:
231
- False if the model requires non-streaming for tools, True otherwise
232
- """
233
- # Remove any "bedrock." prefix for pattern matching
234
- clean_model = model.replace("bedrock.", "")
235
-
236
- # Models that don't support streaming with tools
237
- non_streaming_patterns = [
238
- r"ai21\.jamba", # All AI21 Jamba models
239
- r"meta\.llama", # All Meta Llama models
240
- r"mistral\.", # All Mistral models
241
- r"amazon\.titan", # All Amazon Titan models
242
- r"cohere\.command", # All Cohere Command models
243
- r"anthropic\.claude-instant", # Anthropic Claude Instant models
244
- r"anthropic\.claude-v2", # Anthropic Claude v2 models
245
- r"deepseek\.", # All DeepSeek models
246
- ]
247
-
248
- for pattern in non_streaming_patterns:
249
- if re.search(pattern, clean_model, re.IGNORECASE):
250
- self.logger.debug(
251
- f"Model {model} detected as non-streaming for tools (pattern: {pattern})"
252
- )
253
- return False
254
-
255
- return True
256
-
257
- def _supports_tool_use(self, model_id: str) -> bool:
258
- """
259
- Determine if a model supports tool use at all.
260
- Some models don't support tools in any form.
261
- Based on AWS Bedrock documentation: https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-supported-models-features.html
262
- """
263
- # Models that don't support tool use at all
264
- no_tool_use_patterns = [
265
- r"ai21\.jamba-instruct", # AI21 Jamba-Instruct (but not jamba 1.5)
266
- r"ai21\..*jurassic", # AI21 Labs Jurassic-2 models
267
- r"amazon\.titan", # All Amazon Titan models
268
- r"anthropic\.claude-v2", # Anthropic Claude v2 models
269
- r"anthropic\.claude-instant", # Anthropic Claude Instant models
270
- r"cohere\.command(?!-r)", # Cohere Command (but not Command R/R+)
271
- r"cohere\.command-light", # Cohere Command Light
272
- r"deepseek\.", # All DeepSeek models
273
- r"meta\.llama[23](?![-.])", # Meta Llama 2 and 3 (but not 3.1+, 3.2+, etc.)
274
- r"meta\.llama3-1-8b", # Meta Llama 3.1 8b - doesn't support tool calls
275
- r"meta\.llama3-2-[13]b", # Meta Llama 3.2 1b and 3b (but not 11b/90b)
276
- r"meta\.llama3-2-11b", # Meta Llama 3.2 11b - doesn't support tool calls
277
- r"mistral\..*-instruct", # Mistral AI Instruct (but not Mistral Large)
278
- ]
279
-
280
- for pattern in no_tool_use_patterns:
281
- if re.search(pattern, model_id):
282
- self.logger.info(f"Model {model_id} does not support tool use")
283
- return False
284
-
285
- return True
286
-
287
- def _supports_system_messages(self, model: str) -> bool:
288
- """
289
- Check if a model supports system messages.
290
-
291
- Some models (like Titan and Cohere embedding models) don't support system messages.
292
- This method uses regex patterns to identify such models.
293
-
294
- Args:
295
- model: The model name (e.g., "amazon.titan-embed-text-v1")
296
-
297
- Returns:
298
- False if the model doesn't support system messages, True otherwise
299
- """
300
- # Remove any "bedrock." prefix for pattern matching
301
- clean_model = model.replace("bedrock.", "")
302
-
303
- # DEBUG: Print the model names for debugging
304
- self.logger.info(
305
- f"DEBUG: Checking system message support for model='{model}', clean_model='{clean_model}'"
306
- )
307
-
308
- # Models that don't support system messages (reverse logic as suggested)
309
- no_system_message_patterns = [
310
- r"amazon\.titan", # All Amazon Titan models
311
- r"cohere\.command.*-text", # Cohere command text models (command-text-v14, command-light-text-v14)
312
- r"mistral.*mixtral.*8x7b", # Mistral Mixtral 8x7b models
313
- r"mistral.mistral-7b-instruct", # Mistral 7b instruct models
314
- r"meta\.llama3-2-11b-instruct", # Specific Meta Llama3 model
315
- ]
316
-
317
- for pattern in no_system_message_patterns:
318
- if re.search(pattern, clean_model, re.IGNORECASE):
319
- self.logger.info(
320
- f"DEBUG: Model {model} detected as NOT supporting system messages (pattern: {pattern})"
321
- )
322
- return False
284
+ if name_policy == ToolNamePolicy.PRESERVE:
285
+ # Identity mapping for preserve policy
286
+ for tool in tools.tools:
287
+ mapping[tool.name] = tool.name
288
+ else:
289
+ # Nova-style cleaning for underscores policy
290
+ for tool in tools.tools:
291
+ clean_name = re.sub(r"[^a-zA-Z0-9_]", "_", tool.name)
292
+ clean_name = re.sub(r"_+", "_", clean_name).strip("_")
293
+ if not clean_name:
294
+ clean_name = f"tool_{hash(tool.name) % 10000}"
295
+ mapping[clean_name] = tool.name
323
296
 
324
- self.logger.info(f"DEBUG: Model {model} detected as supporting system messages")
325
- return True
297
+ return mapping
326
298
 
327
- def _convert_tools_nova_format(self, tools: "ListToolsResult") -> List[Dict[str, Any]]:
299
+ def _convert_tools_nova_format(
300
+ self, tools: "ListToolsResult", tool_name_mapping: Dict[str, str]
301
+ ) -> List[Dict[str, Any]]:
328
302
  """Convert MCP tools to Nova-specific toolSpec format.
329
303
 
330
304
  Note: Nova models have VERY strict JSON schema requirements:
@@ -381,12 +355,12 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
381
355
  ):
382
356
  nova_schema["required"] = input_schema["required"]
383
357
 
384
- # IMPORTANT: Nova tool name compatibility fix
385
- # Problem: Amazon Nova models fail with "Model produced invalid sequence as part of ToolUse"
386
- # when tool names contain hyphens (e.g., "utils-get_current_date_information")
387
- # Solution: Replace hyphens with underscores for Nova (e.g., "utils_get_current_date_information")
388
- # Note: Underscores work fine, simple names work fine, but hyphens cause tool calling to fail
389
- clean_name = tool.name.replace("-", "_")
358
+ # Apply tool name policy (e.g., Nova requires hyphen→underscore)
359
+ policy = getattr(self, "_tool_name_policy_for_conversion", "preserve")
360
+ if policy == "replace_hyphens_with_underscores":
361
+ clean_name = tool.name.replace("-", "_")
362
+ else:
363
+ clean_name = tool.name
390
364
 
391
365
  # Store mapping from cleaned name back to original MCP name
392
366
  # This is needed because:
@@ -409,132 +383,62 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
409
383
  self.logger.debug(f"Converted {len(bedrock_tools)} tools for Nova format")
410
384
  return bedrock_tools
411
385
 
412
- def _convert_tools_system_prompt_format(self, tools: "ListToolsResult") -> str:
413
- """Convert MCP tools to system prompt format.
414
-
415
- Uses different formats based on the model:
416
- - Scout models: Comprehensive system prompt format
417
- - Other models: Minimal format
418
- """
386
+ def _convert_tools_system_prompt_format(
387
+ self, tools: "ListToolsResult", tool_name_mapping: Dict[str, str]
388
+ ) -> str:
389
+ """Convert MCP tools to system prompt format."""
419
390
  if not tools.tools:
420
391
  return ""
421
392
 
422
- # Create mapping from tool names to original names (no cleaning needed for Llama)
423
- self.tool_name_mapping = {}
424
-
425
- self.logger.debug(
426
- f"Converting {len(tools.tools)} MCP tools to Llama native system prompt format"
427
- )
393
+ self.logger.debug(f"Converting {len(tools.tools)} MCP tools to system prompt format")
428
394
 
429
- # Check if this is a Scout model
430
- model_id = self.default_request_params.model or DEFAULT_BEDROCK_MODEL
431
- clean_model = model_id.replace("bedrock.", "")
432
- is_scout = re.search(r"meta\.llama4-scout", clean_model)
433
-
434
- if is_scout:
435
- # Use comprehensive system prompt format for Scout models
436
- prompt_parts = [
437
- "You are a helpful assistant with access to the following functions. Use them if required:",
438
- "",
439
- ]
440
-
441
- # Add each tool definition in JSON format
442
- for tool in tools.tools:
443
- self.logger.debug(f"Converting MCP tool: {tool.name}")
395
+ prompt_parts = [
396
+ "You have the following tools available to help answer the user's request. You can call one or more functions at a time. The functions are described here in JSON-schema format:",
397
+ "",
398
+ ]
444
399
 
445
- # Use original tool name (no hyphen replacement for Llama)
446
- tool_name = tool.name
400
+ # Add each tool definition in JSON format
401
+ for tool in tools.tools:
402
+ self.logger.debug(f"Converting MCP tool: {tool.name}")
447
403
 
448
- # Store mapping (identity mapping since no name cleaning)
449
- self.tool_name_mapping[tool_name] = tool.name
404
+ # Use original tool name (no hyphen replacement)
405
+ tool_name = tool.name
450
406
 
451
- # Create tool definition in the format Llama expects
452
- tool_def = {
453
- "type": "function",
454
- "function": {
455
- "name": tool_name,
456
- "description": tool.description or f"Tool: {tool.name}",
457
- "parameters": tool.inputSchema or {"type": "object", "properties": {}},
458
- },
459
- }
407
+ # Create tool definition
408
+ tool_def = {
409
+ "type": "function",
410
+ "function": {
411
+ "name": tool_name,
412
+ "description": tool.description or f"Tool: {tool.name}",
413
+ "parameters": tool.inputSchema or {"type": "object", "properties": {}},
414
+ },
415
+ }
460
416
 
461
- prompt_parts.append(json.dumps(tool_def))
417
+ prompt_parts.append(json.dumps(tool_def))
462
418
 
463
- # Add comprehensive response format instructions for Scout
464
- prompt_parts.extend(
465
- [
466
- "",
467
- "## Rules for Function Calling:",
468
- "1. When you need to call a function, use the following format:",
469
- " [function_name(arguments)]",
470
- "2. You can call multiple functions in a single response if needed",
471
- "3. Always provide the function results in your response to the user",
472
- "4. If a function call fails, explain the error and try an alternative approach",
473
- "5. Only call functions when necessary to answer the user's question",
474
- "",
475
- "## Response Rules:",
476
- "- Always provide a complete answer to the user's question",
477
- "- Include function results in your response",
478
- "- Be helpful and informative",
479
- "- If you cannot answer without calling a function, call the appropriate function first",
480
- "",
481
- "## Boundaries:",
482
- "- Only call functions that are explicitly provided above",
483
- "- Do not make up function names or parameters",
484
- "- Follow the exact function signature provided",
485
- "- Always validate your function calls before making them",
486
- ]
487
- )
488
- else:
489
- # Use minimal format for other Llama models
490
- prompt_parts = [
491
- "You have the following tools available to help answer the user's request. You can call one or more functions at a time. The functions are described here in JSON-schema format:",
419
+ # Add the response format instructions
420
+ prompt_parts.extend(
421
+ [
422
+ "",
423
+ "To call one or more tools, provide the tool calls on a new line as a JSON-formatted array. Explain your steps in a neutral tone. Then, only call the tools you can for the first step, then end your turn. If you previously received an error, you can try to call the tool again. Give up after 3 errors.",
492
424
  "",
425
+ "Conform precisely to the single-line format of this example:",
426
+ "Tool Call:",
427
+ '[{"name": "SampleTool", "arguments": {"foo": "bar"}},{"name": "SampleTool", "arguments": {"foo": "other"}}]',
428
+ "",
429
+ "When calling a tool you must supply valid JSON with both 'name' and 'arguments' keys with the function name and function arguments respectively. Do not add any preamble, labels or extra text, just the single JSON string in one of the specified formats",
493
430
  ]
494
-
495
- # Add each tool definition in JSON format
496
- for tool in tools.tools:
497
- self.logger.debug(f"Converting MCP tool: {tool.name}")
498
-
499
- # Use original tool name (no hyphen replacement for Llama)
500
- tool_name = tool.name
501
-
502
- # Store mapping (identity mapping since no name cleaning)
503
- self.tool_name_mapping[tool_name] = tool.name
504
-
505
- # Create tool definition in the format Llama expects
506
- tool_def = {
507
- "type": "function",
508
- "function": {
509
- "name": tool_name,
510
- "description": tool.description or f"Tool: {tool.name}",
511
- "parameters": tool.inputSchema or {"type": "object", "properties": {}},
512
- },
513
- }
514
-
515
- prompt_parts.append(json.dumps(tool_def))
516
-
517
- # Add the response format instructions based on community best practices
518
- prompt_parts.extend(
519
- [
520
- "",
521
- "To call one or more tools, provide the tool calls on a new line as a JSON-formatted array. Explain your steps in a neutral tone. Then, only call the tools you can for the first step, then end your turn. If you previously received an error, you can try to call the tool again. Give up after 3 errors.",
522
- "",
523
- "Conform precisely to the single-line format of this example:",
524
- "Tool Call:",
525
- '[{"name": "SampleTool", "arguments": {"foo": "bar"}},{"name": "SampleTool", "arguments": {"foo": "other"}}]',
526
- ]
527
- )
431
+ )
528
432
 
529
433
  system_prompt = "\n".join(prompt_parts)
530
434
  self.logger.debug(f"Generated Llama native system prompt: {system_prompt}")
531
435
 
532
436
  return system_prompt
533
437
 
534
- def _convert_tools_anthropic_format(self, tools: "ListToolsResult") -> List[Dict[str, Any]]:
438
+ def _convert_tools_anthropic_format(
439
+ self, tools: "ListToolsResult", tool_name_mapping: Dict[str, str]
440
+ ) -> List[Dict[str, Any]]:
535
441
  """Convert MCP tools to Anthropic format wrapped in Bedrock toolSpec - preserves raw schema."""
536
- # No tool name mapping needed for Anthropic (uses original names)
537
- self.tool_name_mapping = {}
538
442
 
539
443
  self.logger.debug(
540
444
  f"Converting {len(tools.tools)} MCP tools to Anthropic format with toolSpec wrapper"
@@ -544,9 +448,6 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
544
448
  for tool in tools.tools:
545
449
  self.logger.debug(f"Converting MCP tool: {tool.name}")
546
450
 
547
- # Store identity mapping (no name cleaning for Anthropic)
548
- self.tool_name_mapping[tool.name] = tool.name
549
-
550
451
  # Use raw MCP schema (like native Anthropic provider) - no cleaning
551
452
  input_schema = tool.inputSchema or {"type": "object", "properties": {}}
552
453
 
@@ -567,71 +468,6 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
567
468
  )
568
469
  return bedrock_tools
569
470
 
570
- def _convert_mcp_tools_to_bedrock(
571
- self, tools: "ListToolsResult"
572
- ) -> Union[List[Dict[str, Any]], str]:
573
- """Convert MCP tools to appropriate Bedrock format based on model type."""
574
- model_id = self.default_request_params.model or DEFAULT_BEDROCK_MODEL
575
- schema_type = self._get_tool_schema_type(model_id)
576
-
577
- if schema_type == ToolSchemaType.SYSTEM_PROMPT:
578
- system_prompt = self._convert_tools_system_prompt_format(tools)
579
- # Store the system prompt for later use in system message
580
- self._system_prompt_tools = system_prompt
581
- return system_prompt
582
- elif schema_type == ToolSchemaType.ANTHROPIC:
583
- return self._convert_tools_anthropic_format(tools)
584
- else:
585
- return self._convert_tools_nova_format(tools)
586
-
587
- def _add_tools_to_request(
588
- self,
589
- converse_args: Dict[str, Any],
590
- available_tools: Union[List[Dict[str, Any]], str],
591
- model_id: str,
592
- ) -> None:
593
- """Add tools to the request in the appropriate format based on model type."""
594
- schema_type = self._get_tool_schema_type(model_id)
595
-
596
- if schema_type == ToolSchemaType.SYSTEM_PROMPT:
597
- # System prompt models expect tools in the system prompt, not as API parameters
598
- # Tools are already handled in the system prompt generation
599
- self.logger.debug("System prompt tools handled in system prompt")
600
- elif schema_type == ToolSchemaType.ANTHROPIC:
601
- # Anthropic models expect toolConfig with tools array (like native provider)
602
- converse_args["toolConfig"] = {"tools": available_tools}
603
- self.logger.debug(
604
- f"Added {len(available_tools)} tools to Anthropic request in toolConfig format"
605
- )
606
- else:
607
- # Nova models expect toolConfig with toolSpec format
608
- converse_args["toolConfig"] = {"tools": available_tools}
609
- self.logger.debug(
610
- f"Added {len(available_tools)} tools to Nova request in toolConfig format"
611
- )
612
-
613
- def _parse_nova_tool_response(self, processed_response: Dict[str, Any]) -> List[Dict[str, Any]]:
614
- """Parse Nova-format tool response (toolUse format)."""
615
- tool_uses = [
616
- content_item
617
- for content_item in processed_response.get("content", [])
618
- if "toolUse" in content_item
619
- ]
620
-
621
- parsed_tools = []
622
- for tool_use_item in tool_uses:
623
- tool_use = tool_use_item["toolUse"]
624
- parsed_tools.append(
625
- {
626
- "type": "nova",
627
- "name": tool_use["name"],
628
- "arguments": tool_use["input"],
629
- "id": tool_use["toolUseId"],
630
- }
631
- )
632
-
633
- return parsed_tools
634
-
635
471
  def _parse_system_prompt_tool_response(
636
472
  self, processed_response: Dict[str, Any]
637
473
  ) -> List[Dict[str, Any]]:
@@ -672,7 +508,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
672
508
 
673
509
  tool_calls.append(
674
510
  {
675
- "type": "system_prompt",
511
+ "type": "system_prompt_tool",
676
512
  "name": func_name,
677
513
  "arguments": arguments,
678
514
  "id": f"system_prompt_{func_name}_{i}",
@@ -693,7 +529,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
693
529
  if isinstance(call, dict) and "name" in call:
694
530
  tool_calls.append(
695
531
  {
696
- "type": "system_prompt",
532
+ "type": "system_prompt_tool",
697
533
  "name": call["name"],
698
534
  "arguments": call.get("arguments", {}),
699
535
  "id": f"system_prompt_{call['name']}_{i}",
@@ -703,8 +539,9 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
703
539
  except json.JSONDecodeError as e:
704
540
  self.logger.warning(f"Failed to parse Tool Call JSON array: {json_str} - {e}")
705
541
 
706
- # Fallback: try to parse any JSON array in the text
707
- array_match = re.search(r"\[.*?\]", text_content, re.DOTALL)
542
+ # Fallback: try to parse JSON arrays that look like tool calls
543
+ # Look for arrays containing objects with "name" fields - avoid simple citations
544
+ array_match = re.search(r'\[.*?\{.*?"name".*?\}.*?\]', text_content, re.DOTALL)
708
545
  if array_match:
709
546
  json_str = array_match.group(0)
710
547
  try:
@@ -714,7 +551,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
714
551
  if isinstance(call, dict) and "name" in call:
715
552
  tool_calls.append(
716
553
  {
717
- "type": "system_prompt",
554
+ "type": "system_prompt_tool",
718
555
  "name": call["name"],
719
556
  "arguments": call.get("arguments", {}),
720
557
  "id": f"system_prompt_{call['name']}_{i}",
@@ -722,7 +559,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
722
559
  )
723
560
  return tool_calls
724
561
  except json.JSONDecodeError as e:
725
- self.logger.warning(f"Failed to parse JSON array: {json_str} - {e}")
562
+ self.logger.debug(f"Failed to parse JSON array: {json_str} - {e}")
726
563
 
727
564
  # Fallback: try to parse as single JSON object (backward compatibility)
728
565
  try:
@@ -734,7 +571,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
734
571
  if "name" in function_call:
735
572
  return [
736
573
  {
737
- "type": "system_prompt",
574
+ "type": "system_prompt_tool",
738
575
  "name": function_call["name"],
739
576
  "arguments": function_call.get("arguments", {}),
740
577
  "id": f"system_prompt_{function_call['name']}",
@@ -758,7 +595,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
758
595
  function_args = json.loads(function_args_json)
759
596
  return [
760
597
  {
761
- "type": "system_prompt",
598
+ "type": "system_prompt_tool",
762
599
  "name": function_name,
763
600
  "arguments": function_args,
764
601
  "id": f"system_prompt_{function_name}",
@@ -783,7 +620,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
783
620
  tool_use = content_item["toolUse"]
784
621
  tool_uses.append(
785
622
  {
786
- "type": "anthropic",
623
+ "type": "anthropic_tool",
787
624
  "name": tool_use["name"],
788
625
  "arguments": tool_use["input"],
789
626
  "id": tool_use["toolUseId"],
@@ -793,17 +630,74 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
793
630
  return tool_uses
794
631
 
795
632
  def _parse_tool_response(
796
- self, processed_response: Dict[str, Any], model_id: str
633
+ self, processed_response: Dict[str, Any], model: str
797
634
  ) -> List[Dict[str, Any]]:
798
- """Parse tool response based on model type."""
799
- schema_type = self._get_tool_schema_type(model_id)
635
+ """Parse tool responses using cached schema, without model/family heuristics."""
636
+ caps = self.capabilities.get(model) or ModelCapabilities()
637
+ schema = caps.schema
800
638
 
801
- if schema_type == ToolSchemaType.SYSTEM_PROMPT:
639
+ # Choose parser strictly by cached schema
640
+ if schema == ToolSchemaType.SYSTEM_PROMPT:
802
641
  return self._parse_system_prompt_tool_response(processed_response)
803
- elif schema_type == ToolSchemaType.ANTHROPIC:
642
+ if schema == ToolSchemaType.ANTHROPIC:
804
643
  return self._parse_anthropic_tool_response(processed_response)
805
- else:
806
- return self._parse_nova_tool_response(processed_response)
644
+
645
+ # Default/Nova: detect toolUse objects
646
+ tool_uses = [
647
+ c
648
+ for c in processed_response.get("content", [])
649
+ if isinstance(c, dict) and "toolUse" in c
650
+ ]
651
+ if tool_uses:
652
+ parsed_tools: List[Dict[str, Any]] = []
653
+ for item in tool_uses:
654
+ tu = item.get("toolUse", {})
655
+ if not isinstance(tu, dict):
656
+ continue
657
+ parsed_tools.append(
658
+ {
659
+ "type": "nova_tool",
660
+ "name": tu.get("name"),
661
+ "arguments": tu.get("input", {}),
662
+ "id": tu.get("toolUseId"),
663
+ }
664
+ )
665
+ if parsed_tools:
666
+ return parsed_tools
667
+
668
+ # Family-agnostic fallback: parse JSON array embedded in text
669
+ try:
670
+ text_content = ""
671
+ for content_item in processed_response.get("content", []):
672
+ if isinstance(content_item, dict) and "text" in content_item:
673
+ text_content += content_item["text"]
674
+ if text_content:
675
+ import json as _json
676
+ import re as _re
677
+
678
+ match = _re.search(r"\[(?:.|\n)*?\]", text_content)
679
+ if match:
680
+ arr = _json.loads(match.group(0))
681
+ if isinstance(arr, list) and arr and isinstance(arr[0], dict):
682
+ parsed_calls = []
683
+ for i, call in enumerate(arr):
684
+ name = call.get("name")
685
+ args = call.get("arguments", {})
686
+ if name:
687
+ parsed_calls.append(
688
+ {
689
+ "type": "system_prompt_tool",
690
+ "name": name,
691
+ "arguments": args,
692
+ "id": f"system_prompt_{name}_{i}",
693
+ }
694
+ )
695
+ if parsed_calls:
696
+ return parsed_calls
697
+ except Exception:
698
+ pass
699
+
700
+ return []
807
701
 
808
702
  def _convert_messages_to_bedrock(
809
703
  self, messages: List[BedrockMessageParam]
@@ -1094,208 +988,427 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1094
988
  messages.extend(self.history.get(include_completion_history=params.use_history))
1095
989
  messages.append(message_param)
1096
990
 
1097
- # Get available tools - but only if model supports tool use
1098
- available_tools = []
991
+ # Get available tools (no resolver gating; fallback logic will decide wiring)
1099
992
  tool_list = None
1100
- model_to_check = self.default_request_params.model or DEFAULT_BEDROCK_MODEL
1101
-
1102
- if self._supports_tool_use(model_to_check):
1103
- try:
1104
- tool_list = await self.aggregator.list_tools()
1105
- self.logger.debug(f"Found {len(tool_list.tools)} MCP tools")
1106
993
 
1107
- available_tools = self._convert_mcp_tools_to_bedrock(tool_list)
1108
- self.logger.debug(
1109
- f"Successfully converted {len(available_tools)} tools for Bedrock"
1110
- )
994
+ try:
995
+ tool_list = await self.aggregator.list_tools()
996
+ self.logger.debug(f"Found {len(tool_list.tools)} MCP tools")
997
+ except Exception as e:
998
+ self.logger.error(f"Error fetching MCP tools: {e}")
999
+ import traceback
1111
1000
 
1112
- except Exception as e:
1113
- self.logger.error(f"Error fetching or converting MCP tools: {e}")
1114
- import traceback
1115
-
1116
- self.logger.debug(f"Traceback: {traceback.format_exc()}")
1117
- available_tools = []
1118
- tool_list = None
1119
- else:
1120
- self.logger.info(
1121
- f"Model {model_to_check} does not support tool use - skipping tool preparation"
1122
- )
1001
+ self.logger.debug(f"Traceback: {traceback.format_exc()}")
1002
+ tool_list = None
1123
1003
 
1124
1004
  responses: List[ContentBlock] = []
1005
+ tool_result_responses: List[ContentBlock] = []
1125
1006
  model = self.default_request_params.model
1007
+ # Loop guard for repeated identical tool calls (system-prompt parsing path)
1008
+ last_tool_signature: str | None = None
1009
+ repeated_tool_calls_count: int = 0
1010
+ max_repeated_tool_calls: int = 3
1126
1011
 
1127
1012
  for i in range(params.max_iterations):
1128
1013
  self._log_chat_progress(self.chat_turn(), model=model)
1129
1014
 
1130
- # Process tools BEFORE message conversion for Llama native format
1131
- model_to_check = model or DEFAULT_BEDROCK_MODEL
1132
- schema_type = self._get_tool_schema_type(model_to_check)
1133
-
1134
- # For Llama native format, we need to store tools before message conversion
1135
- if schema_type == ToolSchemaType.SYSTEM_PROMPT and available_tools:
1136
- has_tools = bool(available_tools) and (
1137
- (isinstance(available_tools, list) and len(available_tools) > 0)
1138
- or (isinstance(available_tools, str) and available_tools.strip())
1139
- )
1140
-
1141
- if has_tools:
1142
- self._add_tools_to_request({}, available_tools, model_to_check)
1143
- self.logger.debug("Pre-processed Llama native tools for message injection")
1015
+ # Resolver-free: schema type inferred by runtime fallback below
1144
1016
 
1145
1017
  # Convert messages to Bedrock format
1146
1018
  bedrock_messages = self._convert_messages_to_bedrock(messages)
1147
1019
 
1148
- # Prepare Bedrock Converse API arguments
1149
- converse_args = {
1150
- "modelId": model,
1151
- "messages": bedrock_messages,
1152
- }
1020
+ # Base system text
1021
+ base_system_text = self.instruction or params.systemPrompt
1153
1022
 
1154
- # Add system prompt if available and supported by the model
1155
- system_text = self.instruction or params.systemPrompt
1156
-
1157
- # For Llama native format, inject tools into system prompt
1158
- if (
1159
- schema_type == ToolSchemaType.SYSTEM_PROMPT
1160
- and hasattr(self, "_system_prompt_tools")
1161
- and self._system_prompt_tools
1162
- ):
1163
- # Combine system prompt with tools for Llama native format
1164
- if system_text:
1165
- system_text = f"{system_text}\n\n{self._system_prompt_tools}"
1166
- else:
1167
- system_text = self._system_prompt_tools
1168
- self.logger.debug("Combined system prompt with system prompt tools")
1169
- elif hasattr(self, "_system_prompt_tools") and self._system_prompt_tools:
1170
- # For other formats, combine system prompt with tools
1171
- if system_text:
1172
- system_text = f"{system_text}\n\n{self._system_prompt_tools}"
1023
+ # Determine tool schema fallback order and caches
1024
+ caps = self.capabilities.get(model) or ModelCapabilities()
1025
+ if caps.schema and caps.schema != ToolSchemaType.NONE:
1026
+ schema_order = [caps.schema]
1027
+ else:
1028
+ # Restore original fallback order: Anthropic models try anthropic first, others skip it
1029
+ if model.startswith("anthropic."):
1030
+ schema_order = [
1031
+ ToolSchemaType.ANTHROPIC,
1032
+ ToolSchemaType.DEFAULT,
1033
+ ToolSchemaType.SYSTEM_PROMPT,
1034
+ ]
1173
1035
  else:
1174
- system_text = self._system_prompt_tools
1175
- self.logger.debug("Combined system prompt with tools system prompt")
1036
+ schema_order = [
1037
+ ToolSchemaType.DEFAULT,
1038
+ ToolSchemaType.SYSTEM_PROMPT,
1039
+ ]
1176
1040
 
1177
- self.logger.info(
1178
- f"DEBUG: BEFORE CHECK - model='{model_to_check}', has_system_text={bool(system_text)}"
1179
- )
1180
- self.logger.info(
1181
- f"DEBUG: self.instruction='{self.instruction}', params.systemPrompt='{params.systemPrompt}'"
1182
- )
1041
+ # Track whether we changed system mode cache this turn
1042
+ tried_system_fallback = False
1183
1043
 
1184
- supports_system = self._supports_system_messages(model_to_check)
1185
- self.logger.info(f"DEBUG: supports_system={supports_system}")
1044
+ processed_response = None # type: ignore[assignment]
1045
+ last_error_msg = None
1186
1046
 
1187
- if system_text and supports_system:
1188
- converse_args["system"] = [{"text": system_text}]
1189
- self.logger.info(f"DEBUG: Added system prompt to {model_to_check} request")
1190
- elif system_text:
1191
- # For models that don't support system messages, inject system prompt into the first user message
1192
- self.logger.info(
1193
- f"DEBUG: Injecting system prompt into first user message for {model_to_check} (doesn't support system messages)"
1194
- )
1195
- if bedrock_messages and bedrock_messages[0].get("role") == "user":
1196
- first_message = bedrock_messages[0]
1197
- if first_message.get("content") and len(first_message["content"]) > 0:
1198
- # Prepend system instruction to the first user message
1199
- original_text = first_message["content"][0].get("text", "")
1200
- first_message["content"][0]["text"] = (
1201
- f"System: {system_text}\n\nUser: {original_text}"
1202
- )
1203
- self.logger.info("DEBUG: Injected system prompt into first user message")
1204
- else:
1205
- self.logger.info(f"DEBUG: No system text provided for {model_to_check}")
1047
+ for schema_choice in schema_order:
1048
+ # Fresh messages per attempt
1049
+ converse_args = {"modelId": model, "messages": [dict(m) for m in bedrock_messages]}
1206
1050
 
1207
- # Add tools if available - format depends on model type (skip for Llama native as already processed)
1208
- if schema_type != ToolSchemaType.SYSTEM_PROMPT:
1209
- has_tools = bool(available_tools) and (
1210
- (isinstance(available_tools, list) and len(available_tools) > 0)
1211
- or (isinstance(available_tools, str) and available_tools.strip())
1212
- )
1051
+ # Build tools representation for this schema
1052
+ tools_payload: Union[List[Dict[str, Any]], str, None] = None
1053
+ if tool_list and tool_list.tools:
1054
+ # Build tool name mapping once per schema attempt
1055
+ name_policy = (
1056
+ self.capabilities.get(model) or ModelCapabilities()
1057
+ ).tool_name_policy or ToolNamePolicy.PRESERVE
1058
+ tool_name_mapping = self._build_tool_name_mapping(tool_list, name_policy)
1213
1059
 
1214
- if has_tools:
1215
- self._add_tools_to_request(converse_args, available_tools, model_to_check)
1216
- else:
1217
- self.logger.debug(
1218
- "No tools available - omitting tool configuration from request"
1219
- )
1060
+ # Store mapping for tool execution
1061
+ self.tool_name_mapping = tool_name_mapping
1220
1062
 
1221
- # Add inference configuration
1222
- inference_config = {}
1223
- if params.maxTokens is not None:
1224
- inference_config["maxTokens"] = params.maxTokens
1225
- if params.stopSequences:
1226
- inference_config["stopSequences"] = params.stopSequences
1227
-
1228
- # Nova-specific recommended settings for tool calling
1229
- if model and "nova" in model.lower():
1230
- inference_config["topP"] = 1.0
1231
- inference_config["temperature"] = 1.0
1232
- # Add additionalModelRequestFields for topK
1233
- converse_args["additionalModelRequestFields"] = {"inferenceConfig": {"topK": 1}}
1234
-
1235
- if inference_config:
1236
- converse_args["inferenceConfig"] = inference_config
1237
-
1238
- self.logger.debug(f"Bedrock converse args: {converse_args}")
1239
-
1240
- # Debug: Print the actual messages being sent to Bedrock for Llama models
1241
- schema_type = self._get_tool_schema_type(model_to_check)
1242
- if schema_type == ToolSchemaType.SYSTEM_PROMPT:
1243
- self.logger.info("=== SYSTEM PROMPT DEBUG ===")
1244
- self.logger.info("Messages being sent to Bedrock:")
1245
- for i, msg in enumerate(converse_args.get("messages", [])):
1246
- self.logger.info(f"Message {i} ({msg.get('role', 'unknown')}):")
1247
- for j, content in enumerate(msg.get("content", [])):
1248
- if "text" in content:
1249
- self.logger.info(f" Content {j}: {content['text'][:500]}...")
1250
- self.logger.info("=== END SYSTEM PROMPT DEBUG ===")
1251
-
1252
- # Debug: Print the full tool config being sent
1253
- if "toolConfig" in converse_args:
1254
- self.logger.debug(
1255
- f"Tool config being sent to Bedrock: {json.dumps(converse_args['toolConfig'], indent=2)}"
1256
- )
1063
+ if schema_choice == ToolSchemaType.ANTHROPIC:
1064
+ tools_payload = self._convert_tools_anthropic_format(
1065
+ tool_list, tool_name_mapping
1066
+ )
1067
+ elif schema_choice == ToolSchemaType.DEFAULT:
1068
+ # Set tool name policy for Nova conversion
1069
+ self._tool_name_policy_for_conversion = (
1070
+ "replace_hyphens_with_underscores"
1071
+ if name_policy == ToolNamePolicy.UNDERSCORES
1072
+ else "preserve"
1073
+ )
1074
+ tools_payload = self._convert_tools_nova_format(
1075
+ tool_list, tool_name_mapping
1076
+ )
1077
+ elif schema_choice == ToolSchemaType.SYSTEM_PROMPT:
1078
+ tools_payload = self._convert_tools_system_prompt_format(
1079
+ tool_list, tool_name_mapping
1080
+ )
1257
1081
 
1258
- try:
1259
- # Choose streaming vs non-streaming based on model capabilities and tool presence
1260
- # Logic: Only use non-streaming when BOTH conditions are true:
1261
- # 1. Tools are available (available_tools is not empty)
1262
- # 2. Model doesn't support streaming with tools
1263
- # Otherwise, always prefer streaming for better UX
1264
- has_tools = bool(available_tools) and (
1265
- (isinstance(available_tools, list) and len(available_tools) > 0)
1266
- or (isinstance(available_tools, str) and available_tools.strip())
1267
- )
1082
+ # System prompt handling with cache
1083
+ system_mode = (
1084
+ self.capabilities.get(model) or ModelCapabilities()
1085
+ ).system_mode or SystemMode.SYSTEM
1086
+ system_text = base_system_text
1268
1087
 
1269
- if has_tools and not self._supports_streaming_with_tools(
1270
- model or DEFAULT_BEDROCK_MODEL
1088
+ if (
1089
+ schema_choice == ToolSchemaType.SYSTEM_PROMPT
1090
+ and isinstance(tools_payload, str)
1091
+ and tools_payload
1271
1092
  ):
1272
- # Use non-streaming API: model requires it for tool calls
1273
- self.logger.debug(
1274
- f"Using non-streaming API for {model} with tools (model limitation)"
1093
+ system_text = (
1094
+ f"{system_text}\n\n{tools_payload}" if system_text else tools_payload
1275
1095
  )
1276
- response = client.converse(**converse_args)
1277
- processed_response = self._process_non_streaming_response(
1278
- response, model or DEFAULT_BEDROCK_MODEL
1279
- )
1280
- else:
1281
- # Use streaming API: either no tools OR model supports streaming with tools
1282
- streaming_reason = (
1283
- "no tools present"
1284
- if not has_tools
1285
- else "model supports streaming with tools"
1096
+
1097
+ if system_text:
1098
+ if system_mode == SystemMode.SYSTEM:
1099
+ converse_args["system"] = [{"text": system_text}]
1100
+ self.logger.debug(
1101
+ f"Attempting with system param for {model} and schema={schema_choice}"
1102
+ )
1103
+ else:
1104
+ # inject
1105
+ if (
1106
+ converse_args["messages"]
1107
+ and converse_args["messages"][0].get("role") == "user"
1108
+ ):
1109
+ first_message = converse_args["messages"][0]
1110
+ if first_message.get("content") and len(first_message["content"]) > 0:
1111
+ original_text = first_message["content"][0].get("text", "")
1112
+ first_message["content"][0]["text"] = (
1113
+ f"System: {system_text}\n\nUser: {original_text}"
1114
+ )
1115
+ self.logger.debug(
1116
+ "Injected system prompt into first user message (cached mode)"
1117
+ )
1118
+
1119
+ # Tools wiring
1120
+ if (
1121
+ schema_choice in (ToolSchemaType.ANTHROPIC, ToolSchemaType.DEFAULT)
1122
+ and isinstance(tools_payload, list)
1123
+ and tools_payload
1124
+ ):
1125
+ converse_args["toolConfig"] = {"tools": tools_payload}
1126
+
1127
+ # Inference configuration and overrides
1128
+ inference_config: Dict[str, Any] = {}
1129
+ if params.maxTokens is not None:
1130
+ inference_config["maxTokens"] = params.maxTokens
1131
+ if params.stopSequences:
1132
+ inference_config["stopSequences"] = params.stopSequences
1133
+
1134
+ # Check if reasoning should be enabled
1135
+ reasoning_budget = 0
1136
+ if self._reasoning_effort and self._reasoning_effort != ReasoningEffort.MINIMAL:
1137
+ # Convert string to enum if needed
1138
+ if isinstance(self._reasoning_effort, str):
1139
+ try:
1140
+ effort_enum = ReasoningEffort(self._reasoning_effort)
1141
+ except ValueError:
1142
+ effort_enum = ReasoningEffort.MINIMAL
1143
+ else:
1144
+ effort_enum = self._reasoning_effort
1145
+
1146
+ if effort_enum != ReasoningEffort.MINIMAL:
1147
+ reasoning_budget = REASONING_EFFORT_BUDGETS.get(effort_enum, 0)
1148
+
1149
+ # Handle temperature and reasoning configuration
1150
+ # AWS docs: "Thinking isn't compatible with temperature, top_p, or top_k modifications"
1151
+ reasoning_enabled = False
1152
+ if reasoning_budget > 0:
1153
+ # Check if this model supports reasoning (with caching)
1154
+ cached_reasoning = (
1155
+ self.capabilities.get(model) or ModelCapabilities()
1156
+ ).reasoning_support
1157
+ if cached_reasoning == "supported":
1158
+ # We know this model supports reasoning
1159
+ converse_args["performanceConfig"] = {
1160
+ "reasoning": {"maxReasoningTokens": reasoning_budget}
1161
+ }
1162
+ reasoning_enabled = True
1163
+ elif cached_reasoning != "unsupported":
1164
+ # Unknown - we'll try reasoning and fallback if needed
1165
+ converse_args["performanceConfig"] = {
1166
+ "reasoning": {"maxReasoningTokens": reasoning_budget}
1167
+ }
1168
+ reasoning_enabled = True
1169
+
1170
+ if not reasoning_enabled:
1171
+ # No reasoning - apply temperature if provided
1172
+ if params.temperature is not None:
1173
+ inference_config["temperature"] = params.temperature
1174
+
1175
+ # Nova-specific recommendations (when not using reasoning)
1176
+ if model and "nova" in (model or "").lower() and reasoning_budget == 0:
1177
+ inference_config.setdefault("topP", 1.0)
1178
+ # Merge/attach additionalModelRequestFields for topK
1179
+ existing_amrf = converse_args.get("additionalModelRequestFields", {})
1180
+ merged_amrf = {**existing_amrf, **{"inferenceConfig": {"topK": 1}}}
1181
+ converse_args["additionalModelRequestFields"] = merged_amrf
1182
+
1183
+ # Note: resolver default inference overrides removed; keep minimal Nova heuristic above.
1184
+
1185
+ if inference_config:
1186
+ converse_args["inferenceConfig"] = inference_config
1187
+
1188
+ # Decide streaming vs non-streaming (resolver-free with runtime detection + cache)
1189
+ has_tools: bool = False
1190
+ try:
1191
+ has_tools = bool(tools_payload) and bool(
1192
+ (isinstance(tools_payload, list) and len(tools_payload) > 0)
1193
+ or (isinstance(tools_payload, str) and tools_payload.strip())
1286
1194
  )
1287
- self.logger.debug(f"Using streaming API for {model} ({streaming_reason})")
1288
- response = client.converse_stream(**converse_args)
1289
- processed_response = await self._process_stream(
1290
- response, model or DEFAULT_BEDROCK_MODEL
1195
+
1196
+ # Force non-streaming for structured-output flows (one-shot)
1197
+ force_non_streaming = False
1198
+ if self._force_non_streaming_once:
1199
+ force_non_streaming = True
1200
+ self._force_non_streaming_once = False
1201
+
1202
+ # Evaluate cache for streaming-with-tools
1203
+ cache_pref = (
1204
+ self.capabilities.get(model) or ModelCapabilities()
1205
+ ).stream_with_tools
1206
+ use_streaming = True
1207
+ attempted_streaming = False
1208
+
1209
+ if force_non_streaming:
1210
+ use_streaming = False
1211
+ elif has_tools:
1212
+ if cache_pref == StreamPreference.NON_STREAM:
1213
+ use_streaming = False
1214
+ elif cache_pref == StreamPreference.STREAM_OK:
1215
+ use_streaming = True
1216
+ else:
1217
+ # Unknown: try streaming first, fallback on error
1218
+ use_streaming = True
1219
+ else:
1220
+ use_streaming = True
1221
+
1222
+ # Try API call with reasoning fallback
1223
+ try:
1224
+ if not use_streaming:
1225
+ self.logger.debug(
1226
+ f"Using non-streaming API for {model} (schema={schema_choice})"
1227
+ )
1228
+ response = client.converse(**converse_args)
1229
+ processed_response = self._process_non_streaming_response(
1230
+ response, model
1231
+ )
1232
+ else:
1233
+ self.logger.debug(
1234
+ f"Using streaming API for {model} (schema={schema_choice})"
1235
+ )
1236
+ attempted_streaming = True
1237
+ response = client.converse_stream(**converse_args)
1238
+ processed_response = await self._process_stream(response, model)
1239
+ except (ClientError, BotoCoreError) as e:
1240
+ # Check if this is a reasoning-related error
1241
+ if reasoning_budget > 0 and (
1242
+ "reasoning" in str(e).lower() or "performance" in str(e).lower()
1243
+ ):
1244
+ self.logger.debug(
1245
+ f"Model {model} doesn't support reasoning, retrying without: {e}"
1246
+ )
1247
+ caps.reasoning_support = False
1248
+ self.capabilities[model] = caps
1249
+
1250
+ # Remove reasoning and retry
1251
+ if "performanceConfig" in converse_args:
1252
+ del converse_args["performanceConfig"]
1253
+
1254
+ # Apply temperature now that reasoning is disabled
1255
+ if params.temperature is not None:
1256
+ if "inferenceConfig" not in converse_args:
1257
+ converse_args["inferenceConfig"] = {}
1258
+ converse_args["inferenceConfig"]["temperature"] = params.temperature
1259
+
1260
+ # Retry the API call
1261
+ if not use_streaming:
1262
+ response = client.converse(**converse_args)
1263
+ processed_response = self._process_non_streaming_response(
1264
+ response, model
1265
+ )
1266
+ else:
1267
+ response = client.converse_stream(**converse_args)
1268
+ processed_response = await self._process_stream(response, model)
1269
+ else:
1270
+ # Not a reasoning error, re-raise
1271
+ raise
1272
+
1273
+ # Success: cache the working schema choice if not already cached
1274
+ # Only cache schema when tools are present - no tools doesn't predict tool behavior
1275
+ if not caps.schema and has_tools:
1276
+ caps.schema = ToolSchemaType(schema_choice)
1277
+
1278
+ # Cache successful reasoning if we tried it
1279
+ if reasoning_budget > 0 and caps.reasoning_support is not True:
1280
+ caps.reasoning_support = True
1281
+
1282
+ # If Nova/default worked and we used preserve but server complains, flip cache for next time
1283
+ if (
1284
+ schema_choice == ToolSchemaType.DEFAULT
1285
+ and getattr(self, "_tool_name_policy_for_conversion", "preserve")
1286
+ == "preserve"
1287
+ ):
1288
+ # Heuristic: if tool names include '-', prefer underscores next time
1289
+ try:
1290
+ if any("-" in t.name for t in (tool_list.tools if tool_list else [])):
1291
+ caps.tool_name_policy = ToolNamePolicy.UNDERSCORES
1292
+ except Exception:
1293
+ pass
1294
+ # Cache streaming-with-tools behavior on success
1295
+ if has_tools and attempted_streaming:
1296
+ caps.stream_with_tools = StreamPreference.STREAM_OK
1297
+ self.capabilities[model] = caps
1298
+ break
1299
+ except (ClientError, BotoCoreError) as e:
1300
+ error_msg = str(e)
1301
+ last_error_msg = error_msg
1302
+ self.logger.debug(f"Bedrock API error (schema={schema_choice}): {error_msg}")
1303
+
1304
+ # If streaming with tools failed and cache undecided, fallback to non-streaming and cache
1305
+ if has_tools and (caps.stream_with_tools is None):
1306
+ try:
1307
+ self.logger.debug(
1308
+ f"Falling back to non-streaming API for {model} after streaming error"
1309
+ )
1310
+ response = client.converse(**converse_args)
1311
+ processed_response = self._process_non_streaming_response(
1312
+ response, model
1313
+ )
1314
+ caps.stream_with_tools = StreamPreference.NON_STREAM
1315
+ if not caps.schema:
1316
+ caps.schema = ToolSchemaType(schema_choice)
1317
+ self.capabilities[model] = caps
1318
+ break
1319
+ except (ClientError, BotoCoreError) as e_fallback:
1320
+ last_error_msg = str(e_fallback)
1321
+ self.logger.debug(
1322
+ f"Bedrock API error after non-streaming fallback: {last_error_msg}"
1323
+ )
1324
+ # continue to other fallbacks (e.g., system inject or next schema)
1325
+
1326
+ # System parameter fallback once per call if system message unsupported
1327
+ if (
1328
+ not tried_system_fallback
1329
+ and system_text
1330
+ and system_mode == SystemMode.SYSTEM
1331
+ and (
1332
+ "system message" in error_msg.lower()
1333
+ or "system messages" in error_msg.lower()
1334
+ )
1335
+ ):
1336
+ tried_system_fallback = True
1337
+ caps.system_mode = SystemMode.INJECT
1338
+ self.capabilities[model] = caps
1339
+ self.logger.info(
1340
+ f"Switching system mode to inject for {model} and retrying same schema"
1341
+ )
1342
+ # Retry the same schema immediately in inject mode
1343
+ try:
1344
+ # Rebuild messages for inject
1345
+ converse_args = {
1346
+ "modelId": model,
1347
+ "messages": [dict(m) for m in bedrock_messages],
1348
+ }
1349
+ # inject system into first user
1350
+ if (
1351
+ converse_args["messages"]
1352
+ and converse_args["messages"][0].get("role") == "user"
1353
+ ):
1354
+ fm = converse_args["messages"][0]
1355
+ if fm.get("content") and len(fm["content"]) > 0:
1356
+ original_text = fm["content"][0].get("text", "")
1357
+ fm["content"][0]["text"] = (
1358
+ f"System: {system_text}\n\nUser: {original_text}"
1359
+ )
1360
+
1361
+ # Re-add tools
1362
+ if (
1363
+ schema_choice
1364
+ in (ToolSchemaType.ANTHROPIC.value, ToolSchemaType.DEFAULT.value)
1365
+ and isinstance(tools_payload, list)
1366
+ and tools_payload
1367
+ ):
1368
+ converse_args["toolConfig"] = {"tools": tools_payload}
1369
+
1370
+ # Same streaming decision using cache
1371
+ has_tools = bool(tools_payload) and bool(
1372
+ (isinstance(tools_payload, list) and len(tools_payload) > 0)
1373
+ or (isinstance(tools_payload, str) and tools_payload.strip())
1374
+ )
1375
+ cache_pref = (
1376
+ self.capabilities.get(model) or ModelCapabilities()
1377
+ ).stream_with_tools
1378
+ if cache_pref == StreamPreference.NON_STREAM or not has_tools:
1379
+ response = client.converse(**converse_args)
1380
+ processed_response = self._process_non_streaming_response(
1381
+ response, model
1382
+ )
1383
+ else:
1384
+ response = client.converse_stream(**converse_args)
1385
+ processed_response = await self._process_stream(response, model)
1386
+ if not caps.schema and has_tools:
1387
+ caps.schema = ToolSchemaType(schema_choice)
1388
+ self.capabilities[model] = caps
1389
+ break
1390
+ except (ClientError, BotoCoreError) as e2:
1391
+ last_error_msg = str(e2)
1392
+ self.logger.debug(
1393
+ f"Bedrock API error after system inject fallback: {last_error_msg}"
1394
+ )
1395
+ # Fall through to next schema
1396
+ continue
1397
+
1398
+ # For any other error (including tool format errors), continue to next schema
1399
+ self.logger.debug(
1400
+ f"Continuing to next schema after error with {schema_choice}: {error_msg}"
1291
1401
  )
1292
- except (ClientError, BotoCoreError) as e:
1293
- error_msg = str(e)
1294
- self.logger.error(f"Bedrock API error: {error_msg}")
1402
+ continue
1295
1403
 
1296
- # Create error response
1404
+ if processed_response is None:
1405
+ # All attempts failed; mark schema as none to avoid repeated retries this process
1406
+ caps.schema = ToolSchemaType.NONE
1407
+ self.capabilities[model] = caps
1297
1408
  processed_response = {
1298
- "content": [{"text": f"Error during generation: {error_msg}"}],
1409
+ "content": [
1410
+ {"text": f"Error during generation: {last_error_msg or 'Unknown error'}"}
1411
+ ],
1299
1412
  "stop_reason": "error",
1300
1413
  "usage": {"input_tokens": 0, "output_tokens": 0},
1301
1414
  "model": model,
@@ -1312,8 +1425,6 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1312
1425
  input_tokens=usage.get("input_tokens", 0),
1313
1426
  output_tokens=usage.get("output_tokens", 0),
1314
1427
  total_tokens=usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
1315
- cache_creation_input_tokens=0,
1316
- cache_read_input_tokens=0,
1317
1428
  raw_usage=usage,
1318
1429
  )
1319
1430
  self.usage_accumulator.add_turn(turn_usage)
@@ -1335,14 +1446,66 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1335
1446
  # Handle different stop reasons
1336
1447
  stop_reason = processed_response.get("stop_reason", "end_turn")
1337
1448
 
1338
- # For Llama native format, check for tool calls even if stop_reason is "end_turn"
1339
- schema_type = self._get_tool_schema_type(model or DEFAULT_BEDROCK_MODEL)
1340
- if schema_type == ToolSchemaType.SYSTEM_PROMPT and stop_reason == "end_turn":
1449
+ # Determine if we should parse for system-prompt tool calls (unified capabilities)
1450
+ caps_tmp = self.capabilities.get(model) or ModelCapabilities()
1451
+ sys_prompt_schema = caps_tmp.schema == ToolSchemaType.SYSTEM_PROMPT
1452
+
1453
+ if sys_prompt_schema and stop_reason == "end_turn":
1454
+ # Only parse for tools if text contains actual function call structure
1455
+ message_text = ""
1456
+ for content_item in processed_response.get("content", []):
1457
+ if isinstance(content_item, dict) and content_item.get("type") == "text":
1458
+ message_text += content_item.get("text", "")
1459
+
1341
1460
  # Check if there's a tool call in the response
1342
- parsed_tools = self._parse_tool_response(
1343
- processed_response, model or DEFAULT_BEDROCK_MODEL
1344
- )
1461
+ parsed_tools = self._parse_tool_response(processed_response, model)
1345
1462
  if parsed_tools:
1463
+ # Loop guard: if the same single tool call repeats > N times in system-prompt mode, stop
1464
+ if len(parsed_tools) == 1:
1465
+ # Determine normalized tool name as we would use for execution
1466
+ candidate_name = parsed_tools[0]["name"]
1467
+ # Map to canonical name if available
1468
+ canonical = self.tool_name_mapping.get(candidate_name)
1469
+ if not canonical:
1470
+ lowered = candidate_name.lower().replace("_", "-")
1471
+ for key, original in self.tool_name_mapping.items():
1472
+ if lowered == key.lower().replace("_", "-"):
1473
+ canonical = original
1474
+ break
1475
+ normalized_name = canonical or candidate_name
1476
+ try:
1477
+ args_signature = json.dumps(
1478
+ parsed_tools[0].get("arguments", {}), sort_keys=True
1479
+ )
1480
+ except Exception:
1481
+ args_signature = str(parsed_tools[0].get("arguments", {}))
1482
+ current_signature = f"{normalized_name}|{args_signature}"
1483
+
1484
+ # Identify system-prompt schema mode via unified capabilities
1485
+ caps_loop = self.capabilities.get(model) or ModelCapabilities()
1486
+ is_system_prompt_schema_loop = (
1487
+ caps_loop.schema == ToolSchemaType.SYSTEM_PROMPT
1488
+ )
1489
+
1490
+ if is_system_prompt_schema_loop:
1491
+ if current_signature == last_tool_signature:
1492
+ repeated_tool_calls_count += 1
1493
+ else:
1494
+ repeated_tool_calls_count = 1
1495
+ last_tool_signature = current_signature
1496
+
1497
+ if repeated_tool_calls_count > max_repeated_tool_calls:
1498
+ # Return the last tool result content to avoid infinite loops
1499
+ if tool_result_responses:
1500
+ return cast(
1501
+ "List[ContentBlock | CallToolRequestParams]",
1502
+ tool_result_responses,
1503
+ )
1504
+ # Fallback: return a minimal text indicating no content
1505
+ return cast(
1506
+ "List[ContentBlock | CallToolRequestParams]",
1507
+ [TextContent(text="[No content in tool result]")],
1508
+ )
1346
1509
  # Override stop_reason to handle as tool_use
1347
1510
  stop_reason = "tool_use"
1348
1511
  self.logger.debug(
@@ -1385,22 +1548,10 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1385
1548
 
1386
1549
  # Parse tool calls using model-specific method
1387
1550
  self.logger.info(f"DEBUG: About to parse tool response: {processed_response}")
1388
- parsed_tools = self._parse_tool_response(
1389
- processed_response, model or DEFAULT_BEDROCK_MODEL
1390
- )
1551
+ parsed_tools = self._parse_tool_response(processed_response, model)
1391
1552
  self.logger.info(f"DEBUG: Parsed tools: {parsed_tools}")
1392
1553
 
1393
1554
  if parsed_tools:
1394
- # We will comment out showing the assistant's intermediate message
1395
- # to make the output less chatty, as requested by the user.
1396
- # if not message_text:
1397
- # message_text = Text(
1398
- # "the assistant requested tool calls",
1399
- # style="dim green italic",
1400
- # )
1401
- #
1402
- # await self.show_assistant_message(message_text)
1403
-
1404
1555
  # Process tool calls and collect results
1405
1556
  tool_results_for_batch = []
1406
1557
  for tool_idx, parsed_tool in enumerate(parsed_tools):
@@ -1413,7 +1564,9 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1413
1564
  tool_args = parsed_tool["arguments"]
1414
1565
  tool_use_id = parsed_tool["id"]
1415
1566
 
1416
- self.show_tool_call(tool_list.tools, tool_name, tool_args)
1567
+ self.show_tool_call(
1568
+ tool_list.tools if tool_list else [], tool_name, tool_args
1569
+ )
1417
1570
 
1418
1571
  tool_call_request = CallToolRequest(
1419
1572
  method="tools/call",
@@ -1431,15 +1584,17 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1431
1584
  tool_results_for_batch.append((tool_use_id, result, tool_name))
1432
1585
  responses.extend(result.content)
1433
1586
 
1434
- # After processing all tool calls for a turn, clear the intermediate
1435
- # responses. This ensures that the final returned value only contains
1436
- # the model's last message, not the reasoning or raw tool output.
1587
+ # Store tool results temporarily - we'll clear responses only if the model
1588
+ # generates a follow-up message. This ensures tool results are preserved
1589
+ # if the model doesn't generate any follow-up content (like Claude Haiku).
1590
+ tool_result_responses = responses.copy()
1437
1591
  responses.clear()
1438
1592
 
1439
- # Now, create the message with tool results based on the model's schema type.
1440
- schema_type = self._get_tool_schema_type(model or DEFAULT_BEDROCK_MODEL)
1593
+ # Decide result formatting based on unified capabilities
1594
+ caps_tmp = self.capabilities.get(model) or ModelCapabilities()
1595
+ is_system_prompt_schema = caps_tmp.schema == ToolSchemaType.SYSTEM_PROMPT
1441
1596
 
1442
- if schema_type == ToolSchemaType.SYSTEM_PROMPT:
1597
+ if is_system_prompt_schema:
1443
1598
  # For system prompt models (like Llama), format results as a simple text message.
1444
1599
  # The model expects to see the results in a human-readable format to continue.
1445
1600
  tool_result_parts = []
@@ -1540,6 +1695,12 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1540
1695
 
1541
1696
  self.history.set(new_messages)
1542
1697
 
1698
+ # If we have no responses but had tool results, restore the tool results
1699
+ # This handles cases like Claude Haiku where the model calls tools but doesn't generate follow-up text
1700
+ if not responses and tool_result_responses:
1701
+ responses = tool_result_responses
1702
+ self.logger.debug("Restored tool results as no follow-up content was generated")
1703
+
1543
1704
  # Strip leading whitespace from the *last* non-empty text block of the final response
1544
1705
  # to ensure the output is clean.
1545
1706
  if responses:
@@ -1548,7 +1709,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1548
1709
  item.text = item.text.lstrip()
1549
1710
  break
1550
1711
 
1551
- return responses
1712
+ return cast("List[ContentBlock | CallToolRequestParams]", responses)
1552
1713
 
1553
1714
  async def generate_messages(
1554
1715
  self,
@@ -1606,7 +1767,8 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1606
1767
  if isinstance(content_item, TextContent):
1607
1768
  message_param["content"].append({"type": "text", "text": content_item.text})
1608
1769
 
1609
- # Generate response
1770
+ # Generate response (structured paths set a one-shot non-streaming hint)
1771
+ self._force_non_streaming_once = True
1610
1772
  return await self.generate_messages(message_param, request_params)
1611
1773
 
1612
1774
  def _generate_simplified_schema(self, model: Type[ModelT]) -> str:
@@ -1677,49 +1839,169 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1677
1839
  request_params: RequestParams | None = None,
1678
1840
  ) -> Tuple[ModelT | None, PromptMessageMultipart]:
1679
1841
  """Apply structured output for Bedrock using prompt engineering with a simplified schema."""
1842
+ # Short-circuit: if the last message is already an assistant JSON payload,
1843
+ # parse it directly without invoking the model. This restores pre-regression behavior
1844
+ # for tests that seed assistant JSON as the last turn.
1845
+ try:
1846
+ if multipart_messages and multipart_messages[-1].role == "assistant":
1847
+ parsed_model, parsed_mp = self._structured_from_multipart(
1848
+ multipart_messages[-1], model
1849
+ )
1850
+ if parsed_model is not None:
1851
+ return parsed_model, parsed_mp
1852
+ except Exception:
1853
+ # Fall through to normal generation path
1854
+ pass
1855
+
1680
1856
  request_params = self.get_request_params(request_params)
1681
1857
 
1682
- # Generate a simplified, human-readable schema
1683
- simplified_schema = self._generate_simplified_schema(model)
1858
+ # For structured outputs: disable reasoning entirely and set temperature=0 for deterministic JSON
1859
+ # This avoids conflicts between reasoning (requires temperature=1) and structured output (wants temperature=0)
1860
+ original_reasoning_effort = self._reasoning_effort
1861
+ self._reasoning_effort = ReasoningEffort.MINIMAL # Temporarily disable reasoning
1862
+
1863
+ # Override temperature for structured outputs
1864
+ if request_params:
1865
+ request_params = request_params.model_copy(update={"temperature": 0.0})
1866
+ else:
1867
+ request_params = RequestParams(temperature=0.0)
1868
+
1869
+ # Select schema strategy, prefer runtime cache over resolver
1870
+ caps_struct = self.capabilities.get(self.model) or ModelCapabilities()
1871
+ strategy = caps_struct.structured_strategy or StructuredStrategy.STRICT_SCHEMA
1872
+
1873
+ if strategy == StructuredStrategy.SIMPLIFIED_SCHEMA:
1874
+ schema_text = self._generate_simplified_schema(model)
1875
+ else:
1876
+ schema_text = AugmentedLLM.model_to_schema_str(model)
1684
1877
 
1685
1878
  # Build the new simplified prompt
1686
1879
  prompt_parts = [
1687
1880
  "You are a JSON generator. Respond with JSON that strictly follows the provided schema. Do not add any commentary or explanation.",
1688
1881
  "",
1689
1882
  "JSON Schema:",
1690
- simplified_schema,
1883
+ schema_text,
1691
1884
  "",
1692
1885
  "IMPORTANT RULES:",
1693
1886
  "- You MUST respond with only raw JSON data. No other text, commentary, or markdown is allowed.",
1694
1887
  "- All field names and enum values are case-sensitive and must match the schema exactly.",
1695
1888
  "- Do not add any extra fields to the JSON response. Only include the fields specified in the schema.",
1889
+ "- Do not use code fences or backticks (no ```json and no ```).",
1890
+ "- Your output must start with '{' and end with '}'.",
1696
1891
  "- Valid JSON requires double quotes for all field names and string values. Other types (int, float, boolean, etc.) should not be quoted.",
1697
1892
  "",
1698
1893
  "Now, generate the valid JSON response for the following request:",
1699
1894
  ]
1700
1895
 
1701
- # Add the new prompt to the last user message
1702
- multipart_messages[-1].add_text("\n".join(prompt_parts))
1896
+ # IMPORTANT: Do NOT mutate the caller's messages. Create a deep copy of the last
1897
+ # user message, append the schema to the copy only, and pass just that copy into
1898
+ # the provider-specific path. This prevents contamination of routed messages.
1899
+ try:
1900
+ temp_last = multipart_messages[-1].model_copy(deep=True)
1901
+ except Exception:
1902
+ # Fallback: construct a minimal copy if model_copy is unavailable
1903
+ temp_last = PromptMessageMultipart(
1904
+ role=multipart_messages[-1].role, content=list(multipart_messages[-1].content)
1905
+ )
1703
1906
 
1704
- self.logger.info(f"DEBUG: Prompt messages: {multipart_messages[-1].content}")
1907
+ temp_last.add_text("\n".join(prompt_parts))
1705
1908
 
1706
- result: PromptMessageMultipart = await self._apply_prompt_provider_specific(
1707
- multipart_messages, request_params
1909
+ self.logger.debug(
1910
+ "DEBUG: Using copied last message for structured schema; original left untouched"
1708
1911
  )
1709
- return self._structured_from_multipart(result, model)
1912
+
1913
+ try:
1914
+ result: PromptMessageMultipart = await self._apply_prompt_provider_specific(
1915
+ [temp_last], request_params
1916
+ )
1917
+ try:
1918
+ parsed_model, _ = self._structured_from_multipart(result, model)
1919
+ # If parsing returned None (no model instance) we should trigger the retry path
1920
+ if parsed_model is None:
1921
+ raise ValueError("structured parse returned None; triggering retry")
1922
+ return parsed_model, result
1923
+ except Exception:
1924
+ # One retry with stricter JSON-only guidance and simplified schema
1925
+ strict_parts = [
1926
+ "STRICT MODE:",
1927
+ "Return ONLY a single JSON object that matches the schema.",
1928
+ "Do not include any prose, explanations, code fences, or extra characters.",
1929
+ "Start with '{' and end with '}'.",
1930
+ "",
1931
+ "JSON Schema (simplified):",
1932
+ ]
1933
+ try:
1934
+ simplified_schema_text = self._generate_simplified_schema(model)
1935
+ except Exception:
1936
+ simplified_schema_text = AugmentedLLM.model_to_schema_str(model)
1937
+ try:
1938
+ temp_last_retry = multipart_messages[-1].model_copy(deep=True)
1939
+ except Exception:
1940
+ temp_last_retry = PromptMessageMultipart(
1941
+ role=multipart_messages[-1].role,
1942
+ content=list(multipart_messages[-1].content),
1943
+ )
1944
+ temp_last_retry.add_text("\n".join(strict_parts + [simplified_schema_text]))
1945
+
1946
+ retry_result: PromptMessageMultipart = await self._apply_prompt_provider_specific(
1947
+ [temp_last_retry], request_params
1948
+ )
1949
+ return self._structured_from_multipart(retry_result, model)
1950
+ finally:
1951
+ # Restore original reasoning effort
1952
+ self._reasoning_effort = original_reasoning_effort
1710
1953
 
1711
1954
  def _clean_json_response(self, text: str) -> str:
1712
- """Clean up JSON response by removing text before first { and after last }."""
1955
+ """Clean up JSON response by removing text before first { and after last }.
1956
+
1957
+ Also handles cases where models wrap the response in an extra layer like:
1958
+ {"FormattedResponse": {"thinking": "...", "message": "..."}}
1959
+ """
1713
1960
  if not text:
1714
1961
  return text
1715
1962
 
1963
+ # Strip common code fences (```json ... ``` or ``` ... ```), anywhere in the text
1964
+ try:
1965
+ import re as _re
1966
+
1967
+ fence_match = _re.search(r"```(?:json)?\s*([\s\S]*?)```", text)
1968
+ if fence_match:
1969
+ text = fence_match.group(1)
1970
+ except Exception:
1971
+ pass
1972
+
1716
1973
  # Find the first { and last }
1717
1974
  first_brace = text.find("{")
1718
1975
  last_brace = text.rfind("}")
1719
1976
 
1720
1977
  # If we found both braces, extract just the JSON part
1721
1978
  if first_brace != -1 and last_brace != -1 and first_brace < last_brace:
1722
- return text[first_brace : last_brace + 1]
1979
+ json_part = text[first_brace : last_brace + 1]
1980
+
1981
+ # Check if the JSON is wrapped in an extra layer (common model behavior)
1982
+ try:
1983
+ import json
1984
+
1985
+ parsed = json.loads(json_part)
1986
+
1987
+ # If it's a dict with a single key that matches the model class name,
1988
+ # unwrap it (e.g., {"FormattedResponse": {...}} -> {...})
1989
+ if isinstance(parsed, dict) and len(parsed) == 1:
1990
+ key = list(parsed.keys())[0]
1991
+ # Common wrapper patterns: class name, "response", "result", etc.
1992
+ if key in [
1993
+ "FormattedResponse",
1994
+ "WeatherResponse",
1995
+ "SimpleResponse",
1996
+ ] or key.endswith("Response"):
1997
+ inner_value = parsed[key]
1998
+ if isinstance(inner_value, dict):
1999
+ return json.dumps(inner_value)
2000
+
2001
+ return json_part
2002
+ except json.JSONDecodeError:
2003
+ # If parsing fails, return the original JSON part
2004
+ return json_part
1723
2005
 
1724
2006
  # Otherwise return the original text
1725
2007
  return text
@@ -1744,8 +2026,14 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1744
2026
  else:
1745
2027
  cleaned_multipart = message
1746
2028
 
1747
- # Use the parent class method with the cleaned multipart
1748
- return super()._structured_from_multipart(cleaned_multipart, model)
2029
+ # Parse using cleaned multipart first
2030
+ model_instance, parsed_multipart = super()._structured_from_multipart(
2031
+ cleaned_multipart, model
2032
+ )
2033
+ if model_instance is not None:
2034
+ return model_instance, parsed_multipart
2035
+ # Fallback: if parsing failed (e.g., assistant-provided JSON already valid), try original
2036
+ return super()._structured_from_multipart(message, model)
1749
2037
 
1750
2038
  @classmethod
1751
2039
  def convert_message_to_message_param(