fast-agent-mcp 0.2.57__py3-none-any.whl → 0.2.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fast-agent-mcp might be problematic. Click here for more details.
- {fast_agent_mcp-0.2.57.dist-info → fast_agent_mcp-0.2.58.dist-info}/METADATA +2 -2
- {fast_agent_mcp-0.2.57.dist-info → fast_agent_mcp-0.2.58.dist-info}/RECORD +14 -13
- mcp_agent/config.py +3 -0
- mcp_agent/human_input/elicitation_form.py +45 -33
- mcp_agent/llm/augmented_llm.py +1 -0
- mcp_agent/llm/providers/augmented_llm_anthropic.py +1 -0
- mcp_agent/llm/providers/augmented_llm_bedrock.py +890 -602
- mcp_agent/llm/providers/augmented_llm_google_native.py +1 -0
- mcp_agent/llm/providers/augmented_llm_openai.py +1 -0
- mcp_agent/llm/providers/bedrock_utils.py +216 -0
- mcp_agent/resources/examples/mcp/elicitations/elicitation_forms_server.py +25 -3
- {fast_agent_mcp-0.2.57.dist-info → fast_agent_mcp-0.2.58.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.2.57.dist-info → fast_agent_mcp-0.2.58.dist-info}/entry_points.txt +0 -0
- {fast_agent_mcp-0.2.57.dist-info → fast_agent_mcp-0.2.58.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
import re
|
|
4
|
-
|
|
5
|
-
from
|
|
4
|
+
import sys
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum, auto
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, Union, cast
|
|
6
8
|
|
|
7
9
|
from mcp.types import ContentBlock, TextContent
|
|
8
10
|
from rich.text import Text
|
|
@@ -29,10 +31,6 @@ except ImportError:
|
|
|
29
31
|
ClientError = Exception
|
|
30
32
|
NoCredentialsError = Exception
|
|
31
33
|
|
|
32
|
-
try:
|
|
33
|
-
from anthropic.types import ToolParam
|
|
34
|
-
except ImportError:
|
|
35
|
-
ToolParam = None
|
|
36
34
|
|
|
37
35
|
from mcp.types import (
|
|
38
36
|
CallToolRequest,
|
|
@@ -41,6 +39,26 @@ from mcp.types import (
|
|
|
41
39
|
|
|
42
40
|
DEFAULT_BEDROCK_MODEL = "amazon.nova-lite-v1:0"
|
|
43
41
|
|
|
42
|
+
|
|
43
|
+
# Local ReasoningEffort enum to avoid circular imports
|
|
44
|
+
class ReasoningEffort(Enum):
|
|
45
|
+
"""Reasoning effort levels for Bedrock models"""
|
|
46
|
+
|
|
47
|
+
MINIMAL = "minimal"
|
|
48
|
+
LOW = "low"
|
|
49
|
+
MEDIUM = "medium"
|
|
50
|
+
HIGH = "high"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Reasoning effort to token budget mapping
|
|
54
|
+
# Based on AWS recommendations: start with 1024 minimum, increment reasonably
|
|
55
|
+
REASONING_EFFORT_BUDGETS = {
|
|
56
|
+
ReasoningEffort.MINIMAL: 0, # Disabled
|
|
57
|
+
ReasoningEffort.LOW: 512, # Light reasoning
|
|
58
|
+
ReasoningEffort.MEDIUM: 1024, # AWS minimum recommendation
|
|
59
|
+
ReasoningEffort.HIGH: 2048, # Higher reasoning
|
|
60
|
+
}
|
|
61
|
+
|
|
44
62
|
# Bedrock message format types
|
|
45
63
|
BedrockMessage = Dict[str, Any] # Bedrock message format
|
|
46
64
|
BedrockMessageParam = Dict[str, Any] # Bedrock message parameter format
|
|
@@ -49,9 +67,54 @@ BedrockMessageParam = Dict[str, Any] # Bedrock message parameter format
|
|
|
49
67
|
class ToolSchemaType(Enum):
|
|
50
68
|
"""Enum for different tool schema formats used by different model families."""
|
|
51
69
|
|
|
52
|
-
DEFAULT =
|
|
53
|
-
SYSTEM_PROMPT =
|
|
54
|
-
ANTHROPIC =
|
|
70
|
+
DEFAULT = auto() # Default toolSpec format used by most models (formerly Nova)
|
|
71
|
+
SYSTEM_PROMPT = auto() # System prompt-based tool calling format
|
|
72
|
+
ANTHROPIC = auto() # Native Anthropic tool calling format
|
|
73
|
+
NONE = auto() # Schema fallback failed, avoid retries
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class SystemMode(Enum):
|
|
77
|
+
"""System message handling modes."""
|
|
78
|
+
|
|
79
|
+
SYSTEM = auto() # Use native system parameter
|
|
80
|
+
INJECT = auto() # Inject into user message
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class StreamPreference(Enum):
|
|
84
|
+
"""Streaming preference with tools."""
|
|
85
|
+
|
|
86
|
+
STREAM_OK = auto() # Model can stream with tools
|
|
87
|
+
NON_STREAM = auto() # Model requires non-streaming for tools
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ToolNamePolicy(Enum):
|
|
91
|
+
"""Tool name transformation policy."""
|
|
92
|
+
|
|
93
|
+
PRESERVE = auto() # Keep original tool names
|
|
94
|
+
UNDERSCORES = auto() # Convert to underscore format
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class StructuredStrategy(Enum):
|
|
98
|
+
"""Structured output generation strategy."""
|
|
99
|
+
|
|
100
|
+
STRICT_SCHEMA = auto() # Use full JSON schema
|
|
101
|
+
SIMPLIFIED_SCHEMA = auto() # Use simplified schema
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class ModelCapabilities:
|
|
106
|
+
"""Unified per-model capability cache to avoid scattered caches.
|
|
107
|
+
|
|
108
|
+
Uses proper enums and types to prevent typos and improve type safety.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
schema: ToolSchemaType | None = None
|
|
112
|
+
system_mode: SystemMode | None = None
|
|
113
|
+
stream_with_tools: StreamPreference | None = None
|
|
114
|
+
tool_name_policy: ToolNamePolicy | None = None
|
|
115
|
+
structured_strategy: StructuredStrategy | None = None
|
|
116
|
+
reasoning_support: bool | None = None # True=supported, False=unsupported, None=unknown
|
|
117
|
+
supports_tools: bool | None = None # True=yes, False=no, None=unknown
|
|
55
118
|
|
|
56
119
|
|
|
57
120
|
class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
@@ -60,38 +123,58 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
60
123
|
Supports all Bedrock models including Nova, Claude, Meta, etc.
|
|
61
124
|
"""
|
|
62
125
|
|
|
63
|
-
#
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
126
|
+
# Class-level capabilities cache shared across all instances
|
|
127
|
+
capabilities: Dict[str, ModelCapabilities] = {}
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def debug_cache(cls) -> None:
|
|
131
|
+
"""Print human-readable JSON representation of the capabilities cache.
|
|
132
|
+
|
|
133
|
+
Useful for debugging and understanding what capabilities have been
|
|
134
|
+
discovered and cached for each model. Uses sys.stdout to bypass
|
|
135
|
+
any logging hijacking.
|
|
136
|
+
"""
|
|
137
|
+
if not cls.capabilities:
|
|
138
|
+
sys.stdout.write("{}\n")
|
|
139
|
+
sys.stdout.flush()
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
cache_dict = {}
|
|
143
|
+
for model, caps in cls.capabilities.items():
|
|
144
|
+
cache_dict[model] = {
|
|
145
|
+
"schema": caps.schema.name if caps.schema else None,
|
|
146
|
+
"system_mode": caps.system_mode.name if caps.system_mode else None,
|
|
147
|
+
"stream_with_tools": caps.stream_with_tools.name
|
|
148
|
+
if caps.stream_with_tools
|
|
149
|
+
else None,
|
|
150
|
+
"tool_name_policy": caps.tool_name_policy.name if caps.tool_name_policy else None,
|
|
151
|
+
"structured_strategy": caps.structured_strategy.name
|
|
152
|
+
if caps.structured_strategy
|
|
153
|
+
else None,
|
|
154
|
+
"reasoning_support": caps.reasoning_support,
|
|
155
|
+
"supports_tools": caps.supports_tools,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
output = json.dumps(cache_dict, indent=2, sort_keys=True)
|
|
159
|
+
sys.stdout.write(f"{output}\n")
|
|
160
|
+
sys.stdout.flush()
|
|
76
161
|
|
|
77
162
|
@classmethod
|
|
78
163
|
def matches_model_pattern(cls, model_name: str) -> bool:
|
|
79
|
-
"""
|
|
80
|
-
# Bedrock model patterns
|
|
81
|
-
bedrock_patterns = [
|
|
82
|
-
r"^amazon\.nova.*", # Amazon Nova models
|
|
83
|
-
r"^anthropic\.claude.*", # Anthropic Claude models
|
|
84
|
-
r"^meta\.llama.*", # Meta Llama models
|
|
85
|
-
r"^mistral\..*", # Mistral models
|
|
86
|
-
r"^cohere\..*", # Cohere models
|
|
87
|
-
r"^ai21\..*", # AI21 models
|
|
88
|
-
r"^stability\..*", # Stability AI models
|
|
89
|
-
r"^openai\..*", # OpenAI models
|
|
90
|
-
]
|
|
164
|
+
"""Return True if model_name exists in the Bedrock model list loaded at init.
|
|
91
165
|
|
|
92
|
-
|
|
166
|
+
Uses the centralized discovery in bedrock_utils; no regex, no fallbacks.
|
|
167
|
+
Gracefully handles environments without AWS access by returning False.
|
|
168
|
+
"""
|
|
169
|
+
from mcp_agent.llm.providers.bedrock_utils import all_bedrock_models
|
|
93
170
|
|
|
94
|
-
|
|
171
|
+
try:
|
|
172
|
+
available = set(all_bedrock_models(prefix=""))
|
|
173
|
+
return model_name in available
|
|
174
|
+
except Exception:
|
|
175
|
+
# If AWS calls fail (no credentials, region not configured, etc.),
|
|
176
|
+
# assume this is not a Bedrock model
|
|
177
|
+
return False
|
|
95
178
|
|
|
96
179
|
def __init__(self, *args, **kwargs) -> None:
|
|
97
180
|
"""Initialize the Bedrock LLM with AWS credentials and region."""
|
|
@@ -131,22 +214,41 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
131
214
|
self._bedrock_client = None
|
|
132
215
|
self._bedrock_runtime_client = None
|
|
133
216
|
|
|
217
|
+
# One-shot hint to force non-streaming on next completion (used by structured outputs)
|
|
218
|
+
self._force_non_streaming_once: bool = False
|
|
219
|
+
|
|
220
|
+
# Set up reasoning-related attributes
|
|
221
|
+
self._reasoning_effort = kwargs.get("reasoning_effort", None)
|
|
222
|
+
if (
|
|
223
|
+
self._reasoning_effort is None
|
|
224
|
+
and self.context
|
|
225
|
+
and self.context.config
|
|
226
|
+
and self.context.config.bedrock
|
|
227
|
+
):
|
|
228
|
+
if hasattr(self.context.config.bedrock, "reasoning_effort"):
|
|
229
|
+
self._reasoning_effort = self.context.config.bedrock.reasoning_effort
|
|
230
|
+
|
|
134
231
|
def _initialize_default_params(self, kwargs: dict) -> RequestParams:
|
|
135
232
|
"""Initialize Bedrock-specific default parameters"""
|
|
136
233
|
# Get base defaults from parent (includes ModelDatabase lookup)
|
|
137
234
|
base_params = super()._initialize_default_params(kwargs)
|
|
138
235
|
|
|
139
|
-
# Override with Bedrock-specific settings
|
|
236
|
+
# Override with Bedrock-specific settings - ensure we always have a model
|
|
140
237
|
chosen_model = kwargs.get("model", DEFAULT_BEDROCK_MODEL)
|
|
141
238
|
base_params.model = chosen_model
|
|
142
239
|
|
|
143
240
|
return base_params
|
|
144
241
|
|
|
242
|
+
@property
|
|
243
|
+
def model(self) -> str:
|
|
244
|
+
"""Get the model name, guaranteed to be set."""
|
|
245
|
+
return self.default_request_params.model
|
|
246
|
+
|
|
145
247
|
def _get_bedrock_client(self):
|
|
146
248
|
"""Get or create Bedrock client."""
|
|
147
249
|
if self._bedrock_client is None:
|
|
148
250
|
try:
|
|
149
|
-
session = boto3.Session(profile_name=self.aws_profile)
|
|
251
|
+
session = boto3.Session(profile_name=self.aws_profile) # type: ignore[union-attr]
|
|
150
252
|
self._bedrock_client = session.client("bedrock", region_name=self.aws_region)
|
|
151
253
|
except NoCredentialsError as e:
|
|
152
254
|
raise ProviderKeyError(
|
|
@@ -159,7 +261,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
159
261
|
"""Get or create Bedrock Runtime client."""
|
|
160
262
|
if self._bedrock_runtime_client is None:
|
|
161
263
|
try:
|
|
162
|
-
session = boto3.Session(profile_name=self.aws_profile)
|
|
264
|
+
session = boto3.Session(profile_name=self.aws_profile) # type: ignore[union-attr]
|
|
163
265
|
self._bedrock_runtime_client = session.client(
|
|
164
266
|
"bedrock-runtime", region_name=self.aws_region
|
|
165
267
|
)
|
|
@@ -170,161 +272,33 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
170
272
|
) from e
|
|
171
273
|
return self._bedrock_runtime_client
|
|
172
274
|
|
|
173
|
-
def
|
|
174
|
-
""
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
Args:
|
|
178
|
-
model_id: The model ID (e.g., "bedrock.meta.llama3-1-8b-instruct-v1:0")
|
|
275
|
+
def _build_tool_name_mapping(
|
|
276
|
+
self, tools: "ListToolsResult", name_policy: ToolNamePolicy
|
|
277
|
+
) -> Dict[str, str]:
|
|
278
|
+
"""Build tool name mapping based on schema type and name policy.
|
|
179
279
|
|
|
180
|
-
Returns
|
|
181
|
-
ToolSchemaType indicating which format to use
|
|
280
|
+
Returns dict mapping from converted_name -> original_name for tool execution.
|
|
182
281
|
"""
|
|
183
|
-
|
|
184
|
-
clean_model = model_id.replace("bedrock.", "")
|
|
282
|
+
mapping = {}
|
|
185
283
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
# Other Llama 4 models use default toolConfig format
|
|
199
|
-
if re.search(r"meta\.llama4", clean_model):
|
|
200
|
-
self.logger.debug(
|
|
201
|
-
f"Model {model_id} detected as Llama 4 (non-Scout) - using default toolConfig format"
|
|
202
|
-
)
|
|
203
|
-
return ToolSchemaType.DEFAULT
|
|
204
|
-
|
|
205
|
-
# Llama 3.x models use system prompt format
|
|
206
|
-
if re.search(r"meta\.llama3", clean_model):
|
|
207
|
-
self.logger.debug(
|
|
208
|
-
f"Model {model_id} detected as Llama 3.x - using system prompt format"
|
|
209
|
-
)
|
|
210
|
-
return ToolSchemaType.SYSTEM_PROMPT
|
|
211
|
-
|
|
212
|
-
# Future: Add other model-specific formats here
|
|
213
|
-
# if re.search(r"mistral\.", clean_model):
|
|
214
|
-
# return ToolSchemaType.MISTRAL
|
|
215
|
-
|
|
216
|
-
# Default to default format for all other models
|
|
217
|
-
self.logger.debug(f"Model {model_id} using default tool format")
|
|
218
|
-
return ToolSchemaType.DEFAULT
|
|
219
|
-
|
|
220
|
-
def _supports_streaming_with_tools(self, model: str) -> bool:
|
|
221
|
-
"""
|
|
222
|
-
Check if a model supports streaming with tools.
|
|
223
|
-
|
|
224
|
-
Some models (like AI21 Jamba) support tools but not in streaming mode.
|
|
225
|
-
This method uses regex patterns to identify such models.
|
|
226
|
-
|
|
227
|
-
Args:
|
|
228
|
-
model: The model name (e.g., "ai21.jamba-1-5-mini-v1:0")
|
|
229
|
-
|
|
230
|
-
Returns:
|
|
231
|
-
False if the model requires non-streaming for tools, True otherwise
|
|
232
|
-
"""
|
|
233
|
-
# Remove any "bedrock." prefix for pattern matching
|
|
234
|
-
clean_model = model.replace("bedrock.", "")
|
|
235
|
-
|
|
236
|
-
# Models that don't support streaming with tools
|
|
237
|
-
non_streaming_patterns = [
|
|
238
|
-
r"ai21\.jamba", # All AI21 Jamba models
|
|
239
|
-
r"meta\.llama", # All Meta Llama models
|
|
240
|
-
r"mistral\.", # All Mistral models
|
|
241
|
-
r"amazon\.titan", # All Amazon Titan models
|
|
242
|
-
r"cohere\.command", # All Cohere Command models
|
|
243
|
-
r"anthropic\.claude-instant", # Anthropic Claude Instant models
|
|
244
|
-
r"anthropic\.claude-v2", # Anthropic Claude v2 models
|
|
245
|
-
r"deepseek\.", # All DeepSeek models
|
|
246
|
-
]
|
|
247
|
-
|
|
248
|
-
for pattern in non_streaming_patterns:
|
|
249
|
-
if re.search(pattern, clean_model, re.IGNORECASE):
|
|
250
|
-
self.logger.debug(
|
|
251
|
-
f"Model {model} detected as non-streaming for tools (pattern: {pattern})"
|
|
252
|
-
)
|
|
253
|
-
return False
|
|
254
|
-
|
|
255
|
-
return True
|
|
256
|
-
|
|
257
|
-
def _supports_tool_use(self, model_id: str) -> bool:
|
|
258
|
-
"""
|
|
259
|
-
Determine if a model supports tool use at all.
|
|
260
|
-
Some models don't support tools in any form.
|
|
261
|
-
Based on AWS Bedrock documentation: https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-supported-models-features.html
|
|
262
|
-
"""
|
|
263
|
-
# Models that don't support tool use at all
|
|
264
|
-
no_tool_use_patterns = [
|
|
265
|
-
r"ai21\.jamba-instruct", # AI21 Jamba-Instruct (but not jamba 1.5)
|
|
266
|
-
r"ai21\..*jurassic", # AI21 Labs Jurassic-2 models
|
|
267
|
-
r"amazon\.titan", # All Amazon Titan models
|
|
268
|
-
r"anthropic\.claude-v2", # Anthropic Claude v2 models
|
|
269
|
-
r"anthropic\.claude-instant", # Anthropic Claude Instant models
|
|
270
|
-
r"cohere\.command(?!-r)", # Cohere Command (but not Command R/R+)
|
|
271
|
-
r"cohere\.command-light", # Cohere Command Light
|
|
272
|
-
r"deepseek\.", # All DeepSeek models
|
|
273
|
-
r"meta\.llama[23](?![-.])", # Meta Llama 2 and 3 (but not 3.1+, 3.2+, etc.)
|
|
274
|
-
r"meta\.llama3-1-8b", # Meta Llama 3.1 8b - doesn't support tool calls
|
|
275
|
-
r"meta\.llama3-2-[13]b", # Meta Llama 3.2 1b and 3b (but not 11b/90b)
|
|
276
|
-
r"meta\.llama3-2-11b", # Meta Llama 3.2 11b - doesn't support tool calls
|
|
277
|
-
r"mistral\..*-instruct", # Mistral AI Instruct (but not Mistral Large)
|
|
278
|
-
]
|
|
279
|
-
|
|
280
|
-
for pattern in no_tool_use_patterns:
|
|
281
|
-
if re.search(pattern, model_id):
|
|
282
|
-
self.logger.info(f"Model {model_id} does not support tool use")
|
|
283
|
-
return False
|
|
284
|
-
|
|
285
|
-
return True
|
|
286
|
-
|
|
287
|
-
def _supports_system_messages(self, model: str) -> bool:
|
|
288
|
-
"""
|
|
289
|
-
Check if a model supports system messages.
|
|
290
|
-
|
|
291
|
-
Some models (like Titan and Cohere embedding models) don't support system messages.
|
|
292
|
-
This method uses regex patterns to identify such models.
|
|
293
|
-
|
|
294
|
-
Args:
|
|
295
|
-
model: The model name (e.g., "amazon.titan-embed-text-v1")
|
|
296
|
-
|
|
297
|
-
Returns:
|
|
298
|
-
False if the model doesn't support system messages, True otherwise
|
|
299
|
-
"""
|
|
300
|
-
# Remove any "bedrock." prefix for pattern matching
|
|
301
|
-
clean_model = model.replace("bedrock.", "")
|
|
302
|
-
|
|
303
|
-
# DEBUG: Print the model names for debugging
|
|
304
|
-
self.logger.info(
|
|
305
|
-
f"DEBUG: Checking system message support for model='{model}', clean_model='{clean_model}'"
|
|
306
|
-
)
|
|
307
|
-
|
|
308
|
-
# Models that don't support system messages (reverse logic as suggested)
|
|
309
|
-
no_system_message_patterns = [
|
|
310
|
-
r"amazon\.titan", # All Amazon Titan models
|
|
311
|
-
r"cohere\.command.*-text", # Cohere command text models (command-text-v14, command-light-text-v14)
|
|
312
|
-
r"mistral.*mixtral.*8x7b", # Mistral Mixtral 8x7b models
|
|
313
|
-
r"mistral.mistral-7b-instruct", # Mistral 7b instruct models
|
|
314
|
-
r"meta\.llama3-2-11b-instruct", # Specific Meta Llama3 model
|
|
315
|
-
]
|
|
316
|
-
|
|
317
|
-
for pattern in no_system_message_patterns:
|
|
318
|
-
if re.search(pattern, clean_model, re.IGNORECASE):
|
|
319
|
-
self.logger.info(
|
|
320
|
-
f"DEBUG: Model {model} detected as NOT supporting system messages (pattern: {pattern})"
|
|
321
|
-
)
|
|
322
|
-
return False
|
|
284
|
+
if name_policy == ToolNamePolicy.PRESERVE:
|
|
285
|
+
# Identity mapping for preserve policy
|
|
286
|
+
for tool in tools.tools:
|
|
287
|
+
mapping[tool.name] = tool.name
|
|
288
|
+
else:
|
|
289
|
+
# Nova-style cleaning for underscores policy
|
|
290
|
+
for tool in tools.tools:
|
|
291
|
+
clean_name = re.sub(r"[^a-zA-Z0-9_]", "_", tool.name)
|
|
292
|
+
clean_name = re.sub(r"_+", "_", clean_name).strip("_")
|
|
293
|
+
if not clean_name:
|
|
294
|
+
clean_name = f"tool_{hash(tool.name) % 10000}"
|
|
295
|
+
mapping[clean_name] = tool.name
|
|
323
296
|
|
|
324
|
-
|
|
325
|
-
return True
|
|
297
|
+
return mapping
|
|
326
298
|
|
|
327
|
-
def _convert_tools_nova_format(
|
|
299
|
+
def _convert_tools_nova_format(
|
|
300
|
+
self, tools: "ListToolsResult", tool_name_mapping: Dict[str, str]
|
|
301
|
+
) -> List[Dict[str, Any]]:
|
|
328
302
|
"""Convert MCP tools to Nova-specific toolSpec format.
|
|
329
303
|
|
|
330
304
|
Note: Nova models have VERY strict JSON schema requirements:
|
|
@@ -381,12 +355,12 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
381
355
|
):
|
|
382
356
|
nova_schema["required"] = input_schema["required"]
|
|
383
357
|
|
|
384
|
-
#
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
358
|
+
# Apply tool name policy (e.g., Nova requires hyphen→underscore)
|
|
359
|
+
policy = getattr(self, "_tool_name_policy_for_conversion", "preserve")
|
|
360
|
+
if policy == "replace_hyphens_with_underscores":
|
|
361
|
+
clean_name = tool.name.replace("-", "_")
|
|
362
|
+
else:
|
|
363
|
+
clean_name = tool.name
|
|
390
364
|
|
|
391
365
|
# Store mapping from cleaned name back to original MCP name
|
|
392
366
|
# This is needed because:
|
|
@@ -409,132 +383,62 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
409
383
|
self.logger.debug(f"Converted {len(bedrock_tools)} tools for Nova format")
|
|
410
384
|
return bedrock_tools
|
|
411
385
|
|
|
412
|
-
def _convert_tools_system_prompt_format(
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
- Scout models: Comprehensive system prompt format
|
|
417
|
-
- Other models: Minimal format
|
|
418
|
-
"""
|
|
386
|
+
def _convert_tools_system_prompt_format(
|
|
387
|
+
self, tools: "ListToolsResult", tool_name_mapping: Dict[str, str]
|
|
388
|
+
) -> str:
|
|
389
|
+
"""Convert MCP tools to system prompt format."""
|
|
419
390
|
if not tools.tools:
|
|
420
391
|
return ""
|
|
421
392
|
|
|
422
|
-
|
|
423
|
-
self.tool_name_mapping = {}
|
|
424
|
-
|
|
425
|
-
self.logger.debug(
|
|
426
|
-
f"Converting {len(tools.tools)} MCP tools to Llama native system prompt format"
|
|
427
|
-
)
|
|
393
|
+
self.logger.debug(f"Converting {len(tools.tools)} MCP tools to system prompt format")
|
|
428
394
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
if is_scout:
|
|
435
|
-
# Use comprehensive system prompt format for Scout models
|
|
436
|
-
prompt_parts = [
|
|
437
|
-
"You are a helpful assistant with access to the following functions. Use them if required:",
|
|
438
|
-
"",
|
|
439
|
-
]
|
|
440
|
-
|
|
441
|
-
# Add each tool definition in JSON format
|
|
442
|
-
for tool in tools.tools:
|
|
443
|
-
self.logger.debug(f"Converting MCP tool: {tool.name}")
|
|
395
|
+
prompt_parts = [
|
|
396
|
+
"You have the following tools available to help answer the user's request. You can call one or more functions at a time. The functions are described here in JSON-schema format:",
|
|
397
|
+
"",
|
|
398
|
+
]
|
|
444
399
|
|
|
445
|
-
|
|
446
|
-
|
|
400
|
+
# Add each tool definition in JSON format
|
|
401
|
+
for tool in tools.tools:
|
|
402
|
+
self.logger.debug(f"Converting MCP tool: {tool.name}")
|
|
447
403
|
|
|
448
|
-
|
|
449
|
-
|
|
404
|
+
# Use original tool name (no hyphen replacement)
|
|
405
|
+
tool_name = tool.name
|
|
450
406
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
407
|
+
# Create tool definition
|
|
408
|
+
tool_def = {
|
|
409
|
+
"type": "function",
|
|
410
|
+
"function": {
|
|
411
|
+
"name": tool_name,
|
|
412
|
+
"description": tool.description or f"Tool: {tool.name}",
|
|
413
|
+
"parameters": tool.inputSchema or {"type": "object", "properties": {}},
|
|
414
|
+
},
|
|
415
|
+
}
|
|
460
416
|
|
|
461
|
-
|
|
417
|
+
prompt_parts.append(json.dumps(tool_def))
|
|
462
418
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
"1. When you need to call a function, use the following format:",
|
|
469
|
-
" [function_name(arguments)]",
|
|
470
|
-
"2. You can call multiple functions in a single response if needed",
|
|
471
|
-
"3. Always provide the function results in your response to the user",
|
|
472
|
-
"4. If a function call fails, explain the error and try an alternative approach",
|
|
473
|
-
"5. Only call functions when necessary to answer the user's question",
|
|
474
|
-
"",
|
|
475
|
-
"## Response Rules:",
|
|
476
|
-
"- Always provide a complete answer to the user's question",
|
|
477
|
-
"- Include function results in your response",
|
|
478
|
-
"- Be helpful and informative",
|
|
479
|
-
"- If you cannot answer without calling a function, call the appropriate function first",
|
|
480
|
-
"",
|
|
481
|
-
"## Boundaries:",
|
|
482
|
-
"- Only call functions that are explicitly provided above",
|
|
483
|
-
"- Do not make up function names or parameters",
|
|
484
|
-
"- Follow the exact function signature provided",
|
|
485
|
-
"- Always validate your function calls before making them",
|
|
486
|
-
]
|
|
487
|
-
)
|
|
488
|
-
else:
|
|
489
|
-
# Use minimal format for other Llama models
|
|
490
|
-
prompt_parts = [
|
|
491
|
-
"You have the following tools available to help answer the user's request. You can call one or more functions at a time. The functions are described here in JSON-schema format:",
|
|
419
|
+
# Add the response format instructions
|
|
420
|
+
prompt_parts.extend(
|
|
421
|
+
[
|
|
422
|
+
"",
|
|
423
|
+
"To call one or more tools, provide the tool calls on a new line as a JSON-formatted array. Explain your steps in a neutral tone. Then, only call the tools you can for the first step, then end your turn. If you previously received an error, you can try to call the tool again. Give up after 3 errors.",
|
|
492
424
|
"",
|
|
425
|
+
"Conform precisely to the single-line format of this example:",
|
|
426
|
+
"Tool Call:",
|
|
427
|
+
'[{"name": "SampleTool", "arguments": {"foo": "bar"}},{"name": "SampleTool", "arguments": {"foo": "other"}}]',
|
|
428
|
+
"",
|
|
429
|
+
"When calling a tool you must supply valid JSON with both 'name' and 'arguments' keys with the function name and function arguments respectively. Do not add any preamble, labels or extra text, just the single JSON string in one of the specified formats",
|
|
493
430
|
]
|
|
494
|
-
|
|
495
|
-
# Add each tool definition in JSON format
|
|
496
|
-
for tool in tools.tools:
|
|
497
|
-
self.logger.debug(f"Converting MCP tool: {tool.name}")
|
|
498
|
-
|
|
499
|
-
# Use original tool name (no hyphen replacement for Llama)
|
|
500
|
-
tool_name = tool.name
|
|
501
|
-
|
|
502
|
-
# Store mapping (identity mapping since no name cleaning)
|
|
503
|
-
self.tool_name_mapping[tool_name] = tool.name
|
|
504
|
-
|
|
505
|
-
# Create tool definition in the format Llama expects
|
|
506
|
-
tool_def = {
|
|
507
|
-
"type": "function",
|
|
508
|
-
"function": {
|
|
509
|
-
"name": tool_name,
|
|
510
|
-
"description": tool.description or f"Tool: {tool.name}",
|
|
511
|
-
"parameters": tool.inputSchema or {"type": "object", "properties": {}},
|
|
512
|
-
},
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
prompt_parts.append(json.dumps(tool_def))
|
|
516
|
-
|
|
517
|
-
# Add the response format instructions based on community best practices
|
|
518
|
-
prompt_parts.extend(
|
|
519
|
-
[
|
|
520
|
-
"",
|
|
521
|
-
"To call one or more tools, provide the tool calls on a new line as a JSON-formatted array. Explain your steps in a neutral tone. Then, only call the tools you can for the first step, then end your turn. If you previously received an error, you can try to call the tool again. Give up after 3 errors.",
|
|
522
|
-
"",
|
|
523
|
-
"Conform precisely to the single-line format of this example:",
|
|
524
|
-
"Tool Call:",
|
|
525
|
-
'[{"name": "SampleTool", "arguments": {"foo": "bar"}},{"name": "SampleTool", "arguments": {"foo": "other"}}]',
|
|
526
|
-
]
|
|
527
|
-
)
|
|
431
|
+
)
|
|
528
432
|
|
|
529
433
|
system_prompt = "\n".join(prompt_parts)
|
|
530
434
|
self.logger.debug(f"Generated Llama native system prompt: {system_prompt}")
|
|
531
435
|
|
|
532
436
|
return system_prompt
|
|
533
437
|
|
|
534
|
-
def _convert_tools_anthropic_format(
|
|
438
|
+
def _convert_tools_anthropic_format(
|
|
439
|
+
self, tools: "ListToolsResult", tool_name_mapping: Dict[str, str]
|
|
440
|
+
) -> List[Dict[str, Any]]:
|
|
535
441
|
"""Convert MCP tools to Anthropic format wrapped in Bedrock toolSpec - preserves raw schema."""
|
|
536
|
-
# No tool name mapping needed for Anthropic (uses original names)
|
|
537
|
-
self.tool_name_mapping = {}
|
|
538
442
|
|
|
539
443
|
self.logger.debug(
|
|
540
444
|
f"Converting {len(tools.tools)} MCP tools to Anthropic format with toolSpec wrapper"
|
|
@@ -544,9 +448,6 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
544
448
|
for tool in tools.tools:
|
|
545
449
|
self.logger.debug(f"Converting MCP tool: {tool.name}")
|
|
546
450
|
|
|
547
|
-
# Store identity mapping (no name cleaning for Anthropic)
|
|
548
|
-
self.tool_name_mapping[tool.name] = tool.name
|
|
549
|
-
|
|
550
451
|
# Use raw MCP schema (like native Anthropic provider) - no cleaning
|
|
551
452
|
input_schema = tool.inputSchema or {"type": "object", "properties": {}}
|
|
552
453
|
|
|
@@ -567,71 +468,6 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
567
468
|
)
|
|
568
469
|
return bedrock_tools
|
|
569
470
|
|
|
570
|
-
def _convert_mcp_tools_to_bedrock(
|
|
571
|
-
self, tools: "ListToolsResult"
|
|
572
|
-
) -> Union[List[Dict[str, Any]], str]:
|
|
573
|
-
"""Convert MCP tools to appropriate Bedrock format based on model type."""
|
|
574
|
-
model_id = self.default_request_params.model or DEFAULT_BEDROCK_MODEL
|
|
575
|
-
schema_type = self._get_tool_schema_type(model_id)
|
|
576
|
-
|
|
577
|
-
if schema_type == ToolSchemaType.SYSTEM_PROMPT:
|
|
578
|
-
system_prompt = self._convert_tools_system_prompt_format(tools)
|
|
579
|
-
# Store the system prompt for later use in system message
|
|
580
|
-
self._system_prompt_tools = system_prompt
|
|
581
|
-
return system_prompt
|
|
582
|
-
elif schema_type == ToolSchemaType.ANTHROPIC:
|
|
583
|
-
return self._convert_tools_anthropic_format(tools)
|
|
584
|
-
else:
|
|
585
|
-
return self._convert_tools_nova_format(tools)
|
|
586
|
-
|
|
587
|
-
def _add_tools_to_request(
|
|
588
|
-
self,
|
|
589
|
-
converse_args: Dict[str, Any],
|
|
590
|
-
available_tools: Union[List[Dict[str, Any]], str],
|
|
591
|
-
model_id: str,
|
|
592
|
-
) -> None:
|
|
593
|
-
"""Add tools to the request in the appropriate format based on model type."""
|
|
594
|
-
schema_type = self._get_tool_schema_type(model_id)
|
|
595
|
-
|
|
596
|
-
if schema_type == ToolSchemaType.SYSTEM_PROMPT:
|
|
597
|
-
# System prompt models expect tools in the system prompt, not as API parameters
|
|
598
|
-
# Tools are already handled in the system prompt generation
|
|
599
|
-
self.logger.debug("System prompt tools handled in system prompt")
|
|
600
|
-
elif schema_type == ToolSchemaType.ANTHROPIC:
|
|
601
|
-
# Anthropic models expect toolConfig with tools array (like native provider)
|
|
602
|
-
converse_args["toolConfig"] = {"tools": available_tools}
|
|
603
|
-
self.logger.debug(
|
|
604
|
-
f"Added {len(available_tools)} tools to Anthropic request in toolConfig format"
|
|
605
|
-
)
|
|
606
|
-
else:
|
|
607
|
-
# Nova models expect toolConfig with toolSpec format
|
|
608
|
-
converse_args["toolConfig"] = {"tools": available_tools}
|
|
609
|
-
self.logger.debug(
|
|
610
|
-
f"Added {len(available_tools)} tools to Nova request in toolConfig format"
|
|
611
|
-
)
|
|
612
|
-
|
|
613
|
-
def _parse_nova_tool_response(self, processed_response: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
614
|
-
"""Parse Nova-format tool response (toolUse format)."""
|
|
615
|
-
tool_uses = [
|
|
616
|
-
content_item
|
|
617
|
-
for content_item in processed_response.get("content", [])
|
|
618
|
-
if "toolUse" in content_item
|
|
619
|
-
]
|
|
620
|
-
|
|
621
|
-
parsed_tools = []
|
|
622
|
-
for tool_use_item in tool_uses:
|
|
623
|
-
tool_use = tool_use_item["toolUse"]
|
|
624
|
-
parsed_tools.append(
|
|
625
|
-
{
|
|
626
|
-
"type": "nova",
|
|
627
|
-
"name": tool_use["name"],
|
|
628
|
-
"arguments": tool_use["input"],
|
|
629
|
-
"id": tool_use["toolUseId"],
|
|
630
|
-
}
|
|
631
|
-
)
|
|
632
|
-
|
|
633
|
-
return parsed_tools
|
|
634
|
-
|
|
635
471
|
def _parse_system_prompt_tool_response(
|
|
636
472
|
self, processed_response: Dict[str, Any]
|
|
637
473
|
) -> List[Dict[str, Any]]:
|
|
@@ -672,7 +508,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
672
508
|
|
|
673
509
|
tool_calls.append(
|
|
674
510
|
{
|
|
675
|
-
"type": "
|
|
511
|
+
"type": "system_prompt_tool",
|
|
676
512
|
"name": func_name,
|
|
677
513
|
"arguments": arguments,
|
|
678
514
|
"id": f"system_prompt_{func_name}_{i}",
|
|
@@ -693,7 +529,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
693
529
|
if isinstance(call, dict) and "name" in call:
|
|
694
530
|
tool_calls.append(
|
|
695
531
|
{
|
|
696
|
-
"type": "
|
|
532
|
+
"type": "system_prompt_tool",
|
|
697
533
|
"name": call["name"],
|
|
698
534
|
"arguments": call.get("arguments", {}),
|
|
699
535
|
"id": f"system_prompt_{call['name']}_{i}",
|
|
@@ -703,8 +539,9 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
703
539
|
except json.JSONDecodeError as e:
|
|
704
540
|
self.logger.warning(f"Failed to parse Tool Call JSON array: {json_str} - {e}")
|
|
705
541
|
|
|
706
|
-
# Fallback: try to parse
|
|
707
|
-
|
|
542
|
+
# Fallback: try to parse JSON arrays that look like tool calls
|
|
543
|
+
# Look for arrays containing objects with "name" fields - avoid simple citations
|
|
544
|
+
array_match = re.search(r'\[.*?\{.*?"name".*?\}.*?\]', text_content, re.DOTALL)
|
|
708
545
|
if array_match:
|
|
709
546
|
json_str = array_match.group(0)
|
|
710
547
|
try:
|
|
@@ -714,7 +551,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
714
551
|
if isinstance(call, dict) and "name" in call:
|
|
715
552
|
tool_calls.append(
|
|
716
553
|
{
|
|
717
|
-
"type": "
|
|
554
|
+
"type": "system_prompt_tool",
|
|
718
555
|
"name": call["name"],
|
|
719
556
|
"arguments": call.get("arguments", {}),
|
|
720
557
|
"id": f"system_prompt_{call['name']}_{i}",
|
|
@@ -722,7 +559,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
722
559
|
)
|
|
723
560
|
return tool_calls
|
|
724
561
|
except json.JSONDecodeError as e:
|
|
725
|
-
self.logger.
|
|
562
|
+
self.logger.debug(f"Failed to parse JSON array: {json_str} - {e}")
|
|
726
563
|
|
|
727
564
|
# Fallback: try to parse as single JSON object (backward compatibility)
|
|
728
565
|
try:
|
|
@@ -734,7 +571,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
734
571
|
if "name" in function_call:
|
|
735
572
|
return [
|
|
736
573
|
{
|
|
737
|
-
"type": "
|
|
574
|
+
"type": "system_prompt_tool",
|
|
738
575
|
"name": function_call["name"],
|
|
739
576
|
"arguments": function_call.get("arguments", {}),
|
|
740
577
|
"id": f"system_prompt_{function_call['name']}",
|
|
@@ -758,7 +595,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
758
595
|
function_args = json.loads(function_args_json)
|
|
759
596
|
return [
|
|
760
597
|
{
|
|
761
|
-
"type": "
|
|
598
|
+
"type": "system_prompt_tool",
|
|
762
599
|
"name": function_name,
|
|
763
600
|
"arguments": function_args,
|
|
764
601
|
"id": f"system_prompt_{function_name}",
|
|
@@ -783,7 +620,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
783
620
|
tool_use = content_item["toolUse"]
|
|
784
621
|
tool_uses.append(
|
|
785
622
|
{
|
|
786
|
-
"type": "
|
|
623
|
+
"type": "anthropic_tool",
|
|
787
624
|
"name": tool_use["name"],
|
|
788
625
|
"arguments": tool_use["input"],
|
|
789
626
|
"id": tool_use["toolUseId"],
|
|
@@ -793,17 +630,74 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
793
630
|
return tool_uses
|
|
794
631
|
|
|
795
632
|
def _parse_tool_response(
|
|
796
|
-
self, processed_response: Dict[str, Any],
|
|
633
|
+
self, processed_response: Dict[str, Any], model: str
|
|
797
634
|
) -> List[Dict[str, Any]]:
|
|
798
|
-
"""Parse tool
|
|
799
|
-
|
|
635
|
+
"""Parse tool responses using cached schema, without model/family heuristics."""
|
|
636
|
+
caps = self.capabilities.get(model) or ModelCapabilities()
|
|
637
|
+
schema = caps.schema
|
|
800
638
|
|
|
801
|
-
|
|
639
|
+
# Choose parser strictly by cached schema
|
|
640
|
+
if schema == ToolSchemaType.SYSTEM_PROMPT:
|
|
802
641
|
return self._parse_system_prompt_tool_response(processed_response)
|
|
803
|
-
|
|
642
|
+
if schema == ToolSchemaType.ANTHROPIC:
|
|
804
643
|
return self._parse_anthropic_tool_response(processed_response)
|
|
805
|
-
|
|
806
|
-
|
|
644
|
+
|
|
645
|
+
# Default/Nova: detect toolUse objects
|
|
646
|
+
tool_uses = [
|
|
647
|
+
c
|
|
648
|
+
for c in processed_response.get("content", [])
|
|
649
|
+
if isinstance(c, dict) and "toolUse" in c
|
|
650
|
+
]
|
|
651
|
+
if tool_uses:
|
|
652
|
+
parsed_tools: List[Dict[str, Any]] = []
|
|
653
|
+
for item in tool_uses:
|
|
654
|
+
tu = item.get("toolUse", {})
|
|
655
|
+
if not isinstance(tu, dict):
|
|
656
|
+
continue
|
|
657
|
+
parsed_tools.append(
|
|
658
|
+
{
|
|
659
|
+
"type": "nova_tool",
|
|
660
|
+
"name": tu.get("name"),
|
|
661
|
+
"arguments": tu.get("input", {}),
|
|
662
|
+
"id": tu.get("toolUseId"),
|
|
663
|
+
}
|
|
664
|
+
)
|
|
665
|
+
if parsed_tools:
|
|
666
|
+
return parsed_tools
|
|
667
|
+
|
|
668
|
+
# Family-agnostic fallback: parse JSON array embedded in text
|
|
669
|
+
try:
|
|
670
|
+
text_content = ""
|
|
671
|
+
for content_item in processed_response.get("content", []):
|
|
672
|
+
if isinstance(content_item, dict) and "text" in content_item:
|
|
673
|
+
text_content += content_item["text"]
|
|
674
|
+
if text_content:
|
|
675
|
+
import json as _json
|
|
676
|
+
import re as _re
|
|
677
|
+
|
|
678
|
+
match = _re.search(r"\[(?:.|\n)*?\]", text_content)
|
|
679
|
+
if match:
|
|
680
|
+
arr = _json.loads(match.group(0))
|
|
681
|
+
if isinstance(arr, list) and arr and isinstance(arr[0], dict):
|
|
682
|
+
parsed_calls = []
|
|
683
|
+
for i, call in enumerate(arr):
|
|
684
|
+
name = call.get("name")
|
|
685
|
+
args = call.get("arguments", {})
|
|
686
|
+
if name:
|
|
687
|
+
parsed_calls.append(
|
|
688
|
+
{
|
|
689
|
+
"type": "system_prompt_tool",
|
|
690
|
+
"name": name,
|
|
691
|
+
"arguments": args,
|
|
692
|
+
"id": f"system_prompt_{name}_{i}",
|
|
693
|
+
}
|
|
694
|
+
)
|
|
695
|
+
if parsed_calls:
|
|
696
|
+
return parsed_calls
|
|
697
|
+
except Exception:
|
|
698
|
+
pass
|
|
699
|
+
|
|
700
|
+
return []
|
|
807
701
|
|
|
808
702
|
def _convert_messages_to_bedrock(
|
|
809
703
|
self, messages: List[BedrockMessageParam]
|
|
@@ -1094,208 +988,427 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1094
988
|
messages.extend(self.history.get(include_completion_history=params.use_history))
|
|
1095
989
|
messages.append(message_param)
|
|
1096
990
|
|
|
1097
|
-
# Get available tools
|
|
1098
|
-
available_tools = []
|
|
991
|
+
# Get available tools (no resolver gating; fallback logic will decide wiring)
|
|
1099
992
|
tool_list = None
|
|
1100
|
-
model_to_check = self.default_request_params.model or DEFAULT_BEDROCK_MODEL
|
|
1101
|
-
|
|
1102
|
-
if self._supports_tool_use(model_to_check):
|
|
1103
|
-
try:
|
|
1104
|
-
tool_list = await self.aggregator.list_tools()
|
|
1105
|
-
self.logger.debug(f"Found {len(tool_list.tools)} MCP tools")
|
|
1106
993
|
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
994
|
+
try:
|
|
995
|
+
tool_list = await self.aggregator.list_tools()
|
|
996
|
+
self.logger.debug(f"Found {len(tool_list.tools)} MCP tools")
|
|
997
|
+
except Exception as e:
|
|
998
|
+
self.logger.error(f"Error fetching MCP tools: {e}")
|
|
999
|
+
import traceback
|
|
1111
1000
|
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
import traceback
|
|
1115
|
-
|
|
1116
|
-
self.logger.debug(f"Traceback: {traceback.format_exc()}")
|
|
1117
|
-
available_tools = []
|
|
1118
|
-
tool_list = None
|
|
1119
|
-
else:
|
|
1120
|
-
self.logger.info(
|
|
1121
|
-
f"Model {model_to_check} does not support tool use - skipping tool preparation"
|
|
1122
|
-
)
|
|
1001
|
+
self.logger.debug(f"Traceback: {traceback.format_exc()}")
|
|
1002
|
+
tool_list = None
|
|
1123
1003
|
|
|
1124
1004
|
responses: List[ContentBlock] = []
|
|
1005
|
+
tool_result_responses: List[ContentBlock] = []
|
|
1125
1006
|
model = self.default_request_params.model
|
|
1007
|
+
# Loop guard for repeated identical tool calls (system-prompt parsing path)
|
|
1008
|
+
last_tool_signature: str | None = None
|
|
1009
|
+
repeated_tool_calls_count: int = 0
|
|
1010
|
+
max_repeated_tool_calls: int = 3
|
|
1126
1011
|
|
|
1127
1012
|
for i in range(params.max_iterations):
|
|
1128
1013
|
self._log_chat_progress(self.chat_turn(), model=model)
|
|
1129
1014
|
|
|
1130
|
-
#
|
|
1131
|
-
model_to_check = model or DEFAULT_BEDROCK_MODEL
|
|
1132
|
-
schema_type = self._get_tool_schema_type(model_to_check)
|
|
1133
|
-
|
|
1134
|
-
# For Llama native format, we need to store tools before message conversion
|
|
1135
|
-
if schema_type == ToolSchemaType.SYSTEM_PROMPT and available_tools:
|
|
1136
|
-
has_tools = bool(available_tools) and (
|
|
1137
|
-
(isinstance(available_tools, list) and len(available_tools) > 0)
|
|
1138
|
-
or (isinstance(available_tools, str) and available_tools.strip())
|
|
1139
|
-
)
|
|
1140
|
-
|
|
1141
|
-
if has_tools:
|
|
1142
|
-
self._add_tools_to_request({}, available_tools, model_to_check)
|
|
1143
|
-
self.logger.debug("Pre-processed Llama native tools for message injection")
|
|
1015
|
+
# Resolver-free: schema type inferred by runtime fallback below
|
|
1144
1016
|
|
|
1145
1017
|
# Convert messages to Bedrock format
|
|
1146
1018
|
bedrock_messages = self._convert_messages_to_bedrock(messages)
|
|
1147
1019
|
|
|
1148
|
-
#
|
|
1149
|
-
|
|
1150
|
-
"modelId": model,
|
|
1151
|
-
"messages": bedrock_messages,
|
|
1152
|
-
}
|
|
1020
|
+
# Base system text
|
|
1021
|
+
base_system_text = self.instruction or params.systemPrompt
|
|
1153
1022
|
|
|
1154
|
-
#
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
else:
|
|
1167
|
-
system_text = self._system_prompt_tools
|
|
1168
|
-
self.logger.debug("Combined system prompt with system prompt tools")
|
|
1169
|
-
elif hasattr(self, "_system_prompt_tools") and self._system_prompt_tools:
|
|
1170
|
-
# For other formats, combine system prompt with tools
|
|
1171
|
-
if system_text:
|
|
1172
|
-
system_text = f"{system_text}\n\n{self._system_prompt_tools}"
|
|
1023
|
+
# Determine tool schema fallback order and caches
|
|
1024
|
+
caps = self.capabilities.get(model) or ModelCapabilities()
|
|
1025
|
+
if caps.schema and caps.schema != ToolSchemaType.NONE:
|
|
1026
|
+
schema_order = [caps.schema]
|
|
1027
|
+
else:
|
|
1028
|
+
# Restore original fallback order: Anthropic models try anthropic first, others skip it
|
|
1029
|
+
if model.startswith("anthropic."):
|
|
1030
|
+
schema_order = [
|
|
1031
|
+
ToolSchemaType.ANTHROPIC,
|
|
1032
|
+
ToolSchemaType.DEFAULT,
|
|
1033
|
+
ToolSchemaType.SYSTEM_PROMPT,
|
|
1034
|
+
]
|
|
1173
1035
|
else:
|
|
1174
|
-
|
|
1175
|
-
|
|
1036
|
+
schema_order = [
|
|
1037
|
+
ToolSchemaType.DEFAULT,
|
|
1038
|
+
ToolSchemaType.SYSTEM_PROMPT,
|
|
1039
|
+
]
|
|
1176
1040
|
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
)
|
|
1180
|
-
self.logger.info(
|
|
1181
|
-
f"DEBUG: self.instruction='{self.instruction}', params.systemPrompt='{params.systemPrompt}'"
|
|
1182
|
-
)
|
|
1041
|
+
# Track whether we changed system mode cache this turn
|
|
1042
|
+
tried_system_fallback = False
|
|
1183
1043
|
|
|
1184
|
-
|
|
1185
|
-
|
|
1044
|
+
processed_response = None # type: ignore[assignment]
|
|
1045
|
+
last_error_msg = None
|
|
1186
1046
|
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
elif system_text:
|
|
1191
|
-
# For models that don't support system messages, inject system prompt into the first user message
|
|
1192
|
-
self.logger.info(
|
|
1193
|
-
f"DEBUG: Injecting system prompt into first user message for {model_to_check} (doesn't support system messages)"
|
|
1194
|
-
)
|
|
1195
|
-
if bedrock_messages and bedrock_messages[0].get("role") == "user":
|
|
1196
|
-
first_message = bedrock_messages[0]
|
|
1197
|
-
if first_message.get("content") and len(first_message["content"]) > 0:
|
|
1198
|
-
# Prepend system instruction to the first user message
|
|
1199
|
-
original_text = first_message["content"][0].get("text", "")
|
|
1200
|
-
first_message["content"][0]["text"] = (
|
|
1201
|
-
f"System: {system_text}\n\nUser: {original_text}"
|
|
1202
|
-
)
|
|
1203
|
-
self.logger.info("DEBUG: Injected system prompt into first user message")
|
|
1204
|
-
else:
|
|
1205
|
-
self.logger.info(f"DEBUG: No system text provided for {model_to_check}")
|
|
1047
|
+
for schema_choice in schema_order:
|
|
1048
|
+
# Fresh messages per attempt
|
|
1049
|
+
converse_args = {"modelId": model, "messages": [dict(m) for m in bedrock_messages]}
|
|
1206
1050
|
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1051
|
+
# Build tools representation for this schema
|
|
1052
|
+
tools_payload: Union[List[Dict[str, Any]], str, None] = None
|
|
1053
|
+
if tool_list and tool_list.tools:
|
|
1054
|
+
# Build tool name mapping once per schema attempt
|
|
1055
|
+
name_policy = (
|
|
1056
|
+
self.capabilities.get(model) or ModelCapabilities()
|
|
1057
|
+
).tool_name_policy or ToolNamePolicy.PRESERVE
|
|
1058
|
+
tool_name_mapping = self._build_tool_name_mapping(tool_list, name_policy)
|
|
1213
1059
|
|
|
1214
|
-
|
|
1215
|
-
self.
|
|
1216
|
-
else:
|
|
1217
|
-
self.logger.debug(
|
|
1218
|
-
"No tools available - omitting tool configuration from request"
|
|
1219
|
-
)
|
|
1060
|
+
# Store mapping for tool execution
|
|
1061
|
+
self.tool_name_mapping = tool_name_mapping
|
|
1220
1062
|
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
# Debug: Print the actual messages being sent to Bedrock for Llama models
|
|
1241
|
-
schema_type = self._get_tool_schema_type(model_to_check)
|
|
1242
|
-
if schema_type == ToolSchemaType.SYSTEM_PROMPT:
|
|
1243
|
-
self.logger.info("=== SYSTEM PROMPT DEBUG ===")
|
|
1244
|
-
self.logger.info("Messages being sent to Bedrock:")
|
|
1245
|
-
for i, msg in enumerate(converse_args.get("messages", [])):
|
|
1246
|
-
self.logger.info(f"Message {i} ({msg.get('role', 'unknown')}):")
|
|
1247
|
-
for j, content in enumerate(msg.get("content", [])):
|
|
1248
|
-
if "text" in content:
|
|
1249
|
-
self.logger.info(f" Content {j}: {content['text'][:500]}...")
|
|
1250
|
-
self.logger.info("=== END SYSTEM PROMPT DEBUG ===")
|
|
1251
|
-
|
|
1252
|
-
# Debug: Print the full tool config being sent
|
|
1253
|
-
if "toolConfig" in converse_args:
|
|
1254
|
-
self.logger.debug(
|
|
1255
|
-
f"Tool config being sent to Bedrock: {json.dumps(converse_args['toolConfig'], indent=2)}"
|
|
1256
|
-
)
|
|
1063
|
+
if schema_choice == ToolSchemaType.ANTHROPIC:
|
|
1064
|
+
tools_payload = self._convert_tools_anthropic_format(
|
|
1065
|
+
tool_list, tool_name_mapping
|
|
1066
|
+
)
|
|
1067
|
+
elif schema_choice == ToolSchemaType.DEFAULT:
|
|
1068
|
+
# Set tool name policy for Nova conversion
|
|
1069
|
+
self._tool_name_policy_for_conversion = (
|
|
1070
|
+
"replace_hyphens_with_underscores"
|
|
1071
|
+
if name_policy == ToolNamePolicy.UNDERSCORES
|
|
1072
|
+
else "preserve"
|
|
1073
|
+
)
|
|
1074
|
+
tools_payload = self._convert_tools_nova_format(
|
|
1075
|
+
tool_list, tool_name_mapping
|
|
1076
|
+
)
|
|
1077
|
+
elif schema_choice == ToolSchemaType.SYSTEM_PROMPT:
|
|
1078
|
+
tools_payload = self._convert_tools_system_prompt_format(
|
|
1079
|
+
tool_list, tool_name_mapping
|
|
1080
|
+
)
|
|
1257
1081
|
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
# Otherwise, always prefer streaming for better UX
|
|
1264
|
-
has_tools = bool(available_tools) and (
|
|
1265
|
-
(isinstance(available_tools, list) and len(available_tools) > 0)
|
|
1266
|
-
or (isinstance(available_tools, str) and available_tools.strip())
|
|
1267
|
-
)
|
|
1082
|
+
# System prompt handling with cache
|
|
1083
|
+
system_mode = (
|
|
1084
|
+
self.capabilities.get(model) or ModelCapabilities()
|
|
1085
|
+
).system_mode or SystemMode.SYSTEM
|
|
1086
|
+
system_text = base_system_text
|
|
1268
1087
|
|
|
1269
|
-
if
|
|
1270
|
-
|
|
1088
|
+
if (
|
|
1089
|
+
schema_choice == ToolSchemaType.SYSTEM_PROMPT
|
|
1090
|
+
and isinstance(tools_payload, str)
|
|
1091
|
+
and tools_payload
|
|
1271
1092
|
):
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
f"Using non-streaming API for {model} with tools (model limitation)"
|
|
1093
|
+
system_text = (
|
|
1094
|
+
f"{system_text}\n\n{tools_payload}" if system_text else tools_payload
|
|
1275
1095
|
)
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1096
|
+
|
|
1097
|
+
if system_text:
|
|
1098
|
+
if system_mode == SystemMode.SYSTEM:
|
|
1099
|
+
converse_args["system"] = [{"text": system_text}]
|
|
1100
|
+
self.logger.debug(
|
|
1101
|
+
f"Attempting with system param for {model} and schema={schema_choice}"
|
|
1102
|
+
)
|
|
1103
|
+
else:
|
|
1104
|
+
# inject
|
|
1105
|
+
if (
|
|
1106
|
+
converse_args["messages"]
|
|
1107
|
+
and converse_args["messages"][0].get("role") == "user"
|
|
1108
|
+
):
|
|
1109
|
+
first_message = converse_args["messages"][0]
|
|
1110
|
+
if first_message.get("content") and len(first_message["content"]) > 0:
|
|
1111
|
+
original_text = first_message["content"][0].get("text", "")
|
|
1112
|
+
first_message["content"][0]["text"] = (
|
|
1113
|
+
f"System: {system_text}\n\nUser: {original_text}"
|
|
1114
|
+
)
|
|
1115
|
+
self.logger.debug(
|
|
1116
|
+
"Injected system prompt into first user message (cached mode)"
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
# Tools wiring
|
|
1120
|
+
if (
|
|
1121
|
+
schema_choice in (ToolSchemaType.ANTHROPIC, ToolSchemaType.DEFAULT)
|
|
1122
|
+
and isinstance(tools_payload, list)
|
|
1123
|
+
and tools_payload
|
|
1124
|
+
):
|
|
1125
|
+
converse_args["toolConfig"] = {"tools": tools_payload}
|
|
1126
|
+
|
|
1127
|
+
# Inference configuration and overrides
|
|
1128
|
+
inference_config: Dict[str, Any] = {}
|
|
1129
|
+
if params.maxTokens is not None:
|
|
1130
|
+
inference_config["maxTokens"] = params.maxTokens
|
|
1131
|
+
if params.stopSequences:
|
|
1132
|
+
inference_config["stopSequences"] = params.stopSequences
|
|
1133
|
+
|
|
1134
|
+
# Check if reasoning should be enabled
|
|
1135
|
+
reasoning_budget = 0
|
|
1136
|
+
if self._reasoning_effort and self._reasoning_effort != ReasoningEffort.MINIMAL:
|
|
1137
|
+
# Convert string to enum if needed
|
|
1138
|
+
if isinstance(self._reasoning_effort, str):
|
|
1139
|
+
try:
|
|
1140
|
+
effort_enum = ReasoningEffort(self._reasoning_effort)
|
|
1141
|
+
except ValueError:
|
|
1142
|
+
effort_enum = ReasoningEffort.MINIMAL
|
|
1143
|
+
else:
|
|
1144
|
+
effort_enum = self._reasoning_effort
|
|
1145
|
+
|
|
1146
|
+
if effort_enum != ReasoningEffort.MINIMAL:
|
|
1147
|
+
reasoning_budget = REASONING_EFFORT_BUDGETS.get(effort_enum, 0)
|
|
1148
|
+
|
|
1149
|
+
# Handle temperature and reasoning configuration
|
|
1150
|
+
# AWS docs: "Thinking isn't compatible with temperature, top_p, or top_k modifications"
|
|
1151
|
+
reasoning_enabled = False
|
|
1152
|
+
if reasoning_budget > 0:
|
|
1153
|
+
# Check if this model supports reasoning (with caching)
|
|
1154
|
+
cached_reasoning = (
|
|
1155
|
+
self.capabilities.get(model) or ModelCapabilities()
|
|
1156
|
+
).reasoning_support
|
|
1157
|
+
if cached_reasoning == "supported":
|
|
1158
|
+
# We know this model supports reasoning
|
|
1159
|
+
converse_args["performanceConfig"] = {
|
|
1160
|
+
"reasoning": {"maxReasoningTokens": reasoning_budget}
|
|
1161
|
+
}
|
|
1162
|
+
reasoning_enabled = True
|
|
1163
|
+
elif cached_reasoning != "unsupported":
|
|
1164
|
+
# Unknown - we'll try reasoning and fallback if needed
|
|
1165
|
+
converse_args["performanceConfig"] = {
|
|
1166
|
+
"reasoning": {"maxReasoningTokens": reasoning_budget}
|
|
1167
|
+
}
|
|
1168
|
+
reasoning_enabled = True
|
|
1169
|
+
|
|
1170
|
+
if not reasoning_enabled:
|
|
1171
|
+
# No reasoning - apply temperature if provided
|
|
1172
|
+
if params.temperature is not None:
|
|
1173
|
+
inference_config["temperature"] = params.temperature
|
|
1174
|
+
|
|
1175
|
+
# Nova-specific recommendations (when not using reasoning)
|
|
1176
|
+
if model and "nova" in (model or "").lower() and reasoning_budget == 0:
|
|
1177
|
+
inference_config.setdefault("topP", 1.0)
|
|
1178
|
+
# Merge/attach additionalModelRequestFields for topK
|
|
1179
|
+
existing_amrf = converse_args.get("additionalModelRequestFields", {})
|
|
1180
|
+
merged_amrf = {**existing_amrf, **{"inferenceConfig": {"topK": 1}}}
|
|
1181
|
+
converse_args["additionalModelRequestFields"] = merged_amrf
|
|
1182
|
+
|
|
1183
|
+
# Note: resolver default inference overrides removed; keep minimal Nova heuristic above.
|
|
1184
|
+
|
|
1185
|
+
if inference_config:
|
|
1186
|
+
converse_args["inferenceConfig"] = inference_config
|
|
1187
|
+
|
|
1188
|
+
# Decide streaming vs non-streaming (resolver-free with runtime detection + cache)
|
|
1189
|
+
has_tools: bool = False
|
|
1190
|
+
try:
|
|
1191
|
+
has_tools = bool(tools_payload) and bool(
|
|
1192
|
+
(isinstance(tools_payload, list) and len(tools_payload) > 0)
|
|
1193
|
+
or (isinstance(tools_payload, str) and tools_payload.strip())
|
|
1286
1194
|
)
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1195
|
+
|
|
1196
|
+
# Force non-streaming for structured-output flows (one-shot)
|
|
1197
|
+
force_non_streaming = False
|
|
1198
|
+
if self._force_non_streaming_once:
|
|
1199
|
+
force_non_streaming = True
|
|
1200
|
+
self._force_non_streaming_once = False
|
|
1201
|
+
|
|
1202
|
+
# Evaluate cache for streaming-with-tools
|
|
1203
|
+
cache_pref = (
|
|
1204
|
+
self.capabilities.get(model) or ModelCapabilities()
|
|
1205
|
+
).stream_with_tools
|
|
1206
|
+
use_streaming = True
|
|
1207
|
+
attempted_streaming = False
|
|
1208
|
+
|
|
1209
|
+
if force_non_streaming:
|
|
1210
|
+
use_streaming = False
|
|
1211
|
+
elif has_tools:
|
|
1212
|
+
if cache_pref == StreamPreference.NON_STREAM:
|
|
1213
|
+
use_streaming = False
|
|
1214
|
+
elif cache_pref == StreamPreference.STREAM_OK:
|
|
1215
|
+
use_streaming = True
|
|
1216
|
+
else:
|
|
1217
|
+
# Unknown: try streaming first, fallback on error
|
|
1218
|
+
use_streaming = True
|
|
1219
|
+
else:
|
|
1220
|
+
use_streaming = True
|
|
1221
|
+
|
|
1222
|
+
# Try API call with reasoning fallback
|
|
1223
|
+
try:
|
|
1224
|
+
if not use_streaming:
|
|
1225
|
+
self.logger.debug(
|
|
1226
|
+
f"Using non-streaming API for {model} (schema={schema_choice})"
|
|
1227
|
+
)
|
|
1228
|
+
response = client.converse(**converse_args)
|
|
1229
|
+
processed_response = self._process_non_streaming_response(
|
|
1230
|
+
response, model
|
|
1231
|
+
)
|
|
1232
|
+
else:
|
|
1233
|
+
self.logger.debug(
|
|
1234
|
+
f"Using streaming API for {model} (schema={schema_choice})"
|
|
1235
|
+
)
|
|
1236
|
+
attempted_streaming = True
|
|
1237
|
+
response = client.converse_stream(**converse_args)
|
|
1238
|
+
processed_response = await self._process_stream(response, model)
|
|
1239
|
+
except (ClientError, BotoCoreError) as e:
|
|
1240
|
+
# Check if this is a reasoning-related error
|
|
1241
|
+
if reasoning_budget > 0 and (
|
|
1242
|
+
"reasoning" in str(e).lower() or "performance" in str(e).lower()
|
|
1243
|
+
):
|
|
1244
|
+
self.logger.debug(
|
|
1245
|
+
f"Model {model} doesn't support reasoning, retrying without: {e}"
|
|
1246
|
+
)
|
|
1247
|
+
caps.reasoning_support = False
|
|
1248
|
+
self.capabilities[model] = caps
|
|
1249
|
+
|
|
1250
|
+
# Remove reasoning and retry
|
|
1251
|
+
if "performanceConfig" in converse_args:
|
|
1252
|
+
del converse_args["performanceConfig"]
|
|
1253
|
+
|
|
1254
|
+
# Apply temperature now that reasoning is disabled
|
|
1255
|
+
if params.temperature is not None:
|
|
1256
|
+
if "inferenceConfig" not in converse_args:
|
|
1257
|
+
converse_args["inferenceConfig"] = {}
|
|
1258
|
+
converse_args["inferenceConfig"]["temperature"] = params.temperature
|
|
1259
|
+
|
|
1260
|
+
# Retry the API call
|
|
1261
|
+
if not use_streaming:
|
|
1262
|
+
response = client.converse(**converse_args)
|
|
1263
|
+
processed_response = self._process_non_streaming_response(
|
|
1264
|
+
response, model
|
|
1265
|
+
)
|
|
1266
|
+
else:
|
|
1267
|
+
response = client.converse_stream(**converse_args)
|
|
1268
|
+
processed_response = await self._process_stream(response, model)
|
|
1269
|
+
else:
|
|
1270
|
+
# Not a reasoning error, re-raise
|
|
1271
|
+
raise
|
|
1272
|
+
|
|
1273
|
+
# Success: cache the working schema choice if not already cached
|
|
1274
|
+
# Only cache schema when tools are present - no tools doesn't predict tool behavior
|
|
1275
|
+
if not caps.schema and has_tools:
|
|
1276
|
+
caps.schema = ToolSchemaType(schema_choice)
|
|
1277
|
+
|
|
1278
|
+
# Cache successful reasoning if we tried it
|
|
1279
|
+
if reasoning_budget > 0 and caps.reasoning_support is not True:
|
|
1280
|
+
caps.reasoning_support = True
|
|
1281
|
+
|
|
1282
|
+
# If Nova/default worked and we used preserve but server complains, flip cache for next time
|
|
1283
|
+
if (
|
|
1284
|
+
schema_choice == ToolSchemaType.DEFAULT
|
|
1285
|
+
and getattr(self, "_tool_name_policy_for_conversion", "preserve")
|
|
1286
|
+
== "preserve"
|
|
1287
|
+
):
|
|
1288
|
+
# Heuristic: if tool names include '-', prefer underscores next time
|
|
1289
|
+
try:
|
|
1290
|
+
if any("-" in t.name for t in (tool_list.tools if tool_list else [])):
|
|
1291
|
+
caps.tool_name_policy = ToolNamePolicy.UNDERSCORES
|
|
1292
|
+
except Exception:
|
|
1293
|
+
pass
|
|
1294
|
+
# Cache streaming-with-tools behavior on success
|
|
1295
|
+
if has_tools and attempted_streaming:
|
|
1296
|
+
caps.stream_with_tools = StreamPreference.STREAM_OK
|
|
1297
|
+
self.capabilities[model] = caps
|
|
1298
|
+
break
|
|
1299
|
+
except (ClientError, BotoCoreError) as e:
|
|
1300
|
+
error_msg = str(e)
|
|
1301
|
+
last_error_msg = error_msg
|
|
1302
|
+
self.logger.debug(f"Bedrock API error (schema={schema_choice}): {error_msg}")
|
|
1303
|
+
|
|
1304
|
+
# If streaming with tools failed and cache undecided, fallback to non-streaming and cache
|
|
1305
|
+
if has_tools and (caps.stream_with_tools is None):
|
|
1306
|
+
try:
|
|
1307
|
+
self.logger.debug(
|
|
1308
|
+
f"Falling back to non-streaming API for {model} after streaming error"
|
|
1309
|
+
)
|
|
1310
|
+
response = client.converse(**converse_args)
|
|
1311
|
+
processed_response = self._process_non_streaming_response(
|
|
1312
|
+
response, model
|
|
1313
|
+
)
|
|
1314
|
+
caps.stream_with_tools = StreamPreference.NON_STREAM
|
|
1315
|
+
if not caps.schema:
|
|
1316
|
+
caps.schema = ToolSchemaType(schema_choice)
|
|
1317
|
+
self.capabilities[model] = caps
|
|
1318
|
+
break
|
|
1319
|
+
except (ClientError, BotoCoreError) as e_fallback:
|
|
1320
|
+
last_error_msg = str(e_fallback)
|
|
1321
|
+
self.logger.debug(
|
|
1322
|
+
f"Bedrock API error after non-streaming fallback: {last_error_msg}"
|
|
1323
|
+
)
|
|
1324
|
+
# continue to other fallbacks (e.g., system inject or next schema)
|
|
1325
|
+
|
|
1326
|
+
# System parameter fallback once per call if system message unsupported
|
|
1327
|
+
if (
|
|
1328
|
+
not tried_system_fallback
|
|
1329
|
+
and system_text
|
|
1330
|
+
and system_mode == SystemMode.SYSTEM
|
|
1331
|
+
and (
|
|
1332
|
+
"system message" in error_msg.lower()
|
|
1333
|
+
or "system messages" in error_msg.lower()
|
|
1334
|
+
)
|
|
1335
|
+
):
|
|
1336
|
+
tried_system_fallback = True
|
|
1337
|
+
caps.system_mode = SystemMode.INJECT
|
|
1338
|
+
self.capabilities[model] = caps
|
|
1339
|
+
self.logger.info(
|
|
1340
|
+
f"Switching system mode to inject for {model} and retrying same schema"
|
|
1341
|
+
)
|
|
1342
|
+
# Retry the same schema immediately in inject mode
|
|
1343
|
+
try:
|
|
1344
|
+
# Rebuild messages for inject
|
|
1345
|
+
converse_args = {
|
|
1346
|
+
"modelId": model,
|
|
1347
|
+
"messages": [dict(m) for m in bedrock_messages],
|
|
1348
|
+
}
|
|
1349
|
+
# inject system into first user
|
|
1350
|
+
if (
|
|
1351
|
+
converse_args["messages"]
|
|
1352
|
+
and converse_args["messages"][0].get("role") == "user"
|
|
1353
|
+
):
|
|
1354
|
+
fm = converse_args["messages"][0]
|
|
1355
|
+
if fm.get("content") and len(fm["content"]) > 0:
|
|
1356
|
+
original_text = fm["content"][0].get("text", "")
|
|
1357
|
+
fm["content"][0]["text"] = (
|
|
1358
|
+
f"System: {system_text}\n\nUser: {original_text}"
|
|
1359
|
+
)
|
|
1360
|
+
|
|
1361
|
+
# Re-add tools
|
|
1362
|
+
if (
|
|
1363
|
+
schema_choice
|
|
1364
|
+
in (ToolSchemaType.ANTHROPIC.value, ToolSchemaType.DEFAULT.value)
|
|
1365
|
+
and isinstance(tools_payload, list)
|
|
1366
|
+
and tools_payload
|
|
1367
|
+
):
|
|
1368
|
+
converse_args["toolConfig"] = {"tools": tools_payload}
|
|
1369
|
+
|
|
1370
|
+
# Same streaming decision using cache
|
|
1371
|
+
has_tools = bool(tools_payload) and bool(
|
|
1372
|
+
(isinstance(tools_payload, list) and len(tools_payload) > 0)
|
|
1373
|
+
or (isinstance(tools_payload, str) and tools_payload.strip())
|
|
1374
|
+
)
|
|
1375
|
+
cache_pref = (
|
|
1376
|
+
self.capabilities.get(model) or ModelCapabilities()
|
|
1377
|
+
).stream_with_tools
|
|
1378
|
+
if cache_pref == StreamPreference.NON_STREAM or not has_tools:
|
|
1379
|
+
response = client.converse(**converse_args)
|
|
1380
|
+
processed_response = self._process_non_streaming_response(
|
|
1381
|
+
response, model
|
|
1382
|
+
)
|
|
1383
|
+
else:
|
|
1384
|
+
response = client.converse_stream(**converse_args)
|
|
1385
|
+
processed_response = await self._process_stream(response, model)
|
|
1386
|
+
if not caps.schema and has_tools:
|
|
1387
|
+
caps.schema = ToolSchemaType(schema_choice)
|
|
1388
|
+
self.capabilities[model] = caps
|
|
1389
|
+
break
|
|
1390
|
+
except (ClientError, BotoCoreError) as e2:
|
|
1391
|
+
last_error_msg = str(e2)
|
|
1392
|
+
self.logger.debug(
|
|
1393
|
+
f"Bedrock API error after system inject fallback: {last_error_msg}"
|
|
1394
|
+
)
|
|
1395
|
+
# Fall through to next schema
|
|
1396
|
+
continue
|
|
1397
|
+
|
|
1398
|
+
# For any other error (including tool format errors), continue to next schema
|
|
1399
|
+
self.logger.debug(
|
|
1400
|
+
f"Continuing to next schema after error with {schema_choice}: {error_msg}"
|
|
1291
1401
|
)
|
|
1292
|
-
|
|
1293
|
-
error_msg = str(e)
|
|
1294
|
-
self.logger.error(f"Bedrock API error: {error_msg}")
|
|
1402
|
+
continue
|
|
1295
1403
|
|
|
1296
|
-
|
|
1404
|
+
if processed_response is None:
|
|
1405
|
+
# All attempts failed; mark schema as none to avoid repeated retries this process
|
|
1406
|
+
caps.schema = ToolSchemaType.NONE
|
|
1407
|
+
self.capabilities[model] = caps
|
|
1297
1408
|
processed_response = {
|
|
1298
|
-
"content": [
|
|
1409
|
+
"content": [
|
|
1410
|
+
{"text": f"Error during generation: {last_error_msg or 'Unknown error'}"}
|
|
1411
|
+
],
|
|
1299
1412
|
"stop_reason": "error",
|
|
1300
1413
|
"usage": {"input_tokens": 0, "output_tokens": 0},
|
|
1301
1414
|
"model": model,
|
|
@@ -1312,8 +1425,6 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1312
1425
|
input_tokens=usage.get("input_tokens", 0),
|
|
1313
1426
|
output_tokens=usage.get("output_tokens", 0),
|
|
1314
1427
|
total_tokens=usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
|
|
1315
|
-
cache_creation_input_tokens=0,
|
|
1316
|
-
cache_read_input_tokens=0,
|
|
1317
1428
|
raw_usage=usage,
|
|
1318
1429
|
)
|
|
1319
1430
|
self.usage_accumulator.add_turn(turn_usage)
|
|
@@ -1335,14 +1446,66 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1335
1446
|
# Handle different stop reasons
|
|
1336
1447
|
stop_reason = processed_response.get("stop_reason", "end_turn")
|
|
1337
1448
|
|
|
1338
|
-
#
|
|
1339
|
-
|
|
1340
|
-
|
|
1449
|
+
# Determine if we should parse for system-prompt tool calls (unified capabilities)
|
|
1450
|
+
caps_tmp = self.capabilities.get(model) or ModelCapabilities()
|
|
1451
|
+
sys_prompt_schema = caps_tmp.schema == ToolSchemaType.SYSTEM_PROMPT
|
|
1452
|
+
|
|
1453
|
+
if sys_prompt_schema and stop_reason == "end_turn":
|
|
1454
|
+
# Only parse for tools if text contains actual function call structure
|
|
1455
|
+
message_text = ""
|
|
1456
|
+
for content_item in processed_response.get("content", []):
|
|
1457
|
+
if isinstance(content_item, dict) and content_item.get("type") == "text":
|
|
1458
|
+
message_text += content_item.get("text", "")
|
|
1459
|
+
|
|
1341
1460
|
# Check if there's a tool call in the response
|
|
1342
|
-
parsed_tools = self._parse_tool_response(
|
|
1343
|
-
processed_response, model or DEFAULT_BEDROCK_MODEL
|
|
1344
|
-
)
|
|
1461
|
+
parsed_tools = self._parse_tool_response(processed_response, model)
|
|
1345
1462
|
if parsed_tools:
|
|
1463
|
+
# Loop guard: if the same single tool call repeats > N times in system-prompt mode, stop
|
|
1464
|
+
if len(parsed_tools) == 1:
|
|
1465
|
+
# Determine normalized tool name as we would use for execution
|
|
1466
|
+
candidate_name = parsed_tools[0]["name"]
|
|
1467
|
+
# Map to canonical name if available
|
|
1468
|
+
canonical = self.tool_name_mapping.get(candidate_name)
|
|
1469
|
+
if not canonical:
|
|
1470
|
+
lowered = candidate_name.lower().replace("_", "-")
|
|
1471
|
+
for key, original in self.tool_name_mapping.items():
|
|
1472
|
+
if lowered == key.lower().replace("_", "-"):
|
|
1473
|
+
canonical = original
|
|
1474
|
+
break
|
|
1475
|
+
normalized_name = canonical or candidate_name
|
|
1476
|
+
try:
|
|
1477
|
+
args_signature = json.dumps(
|
|
1478
|
+
parsed_tools[0].get("arguments", {}), sort_keys=True
|
|
1479
|
+
)
|
|
1480
|
+
except Exception:
|
|
1481
|
+
args_signature = str(parsed_tools[0].get("arguments", {}))
|
|
1482
|
+
current_signature = f"{normalized_name}|{args_signature}"
|
|
1483
|
+
|
|
1484
|
+
# Identify system-prompt schema mode via unified capabilities
|
|
1485
|
+
caps_loop = self.capabilities.get(model) or ModelCapabilities()
|
|
1486
|
+
is_system_prompt_schema_loop = (
|
|
1487
|
+
caps_loop.schema == ToolSchemaType.SYSTEM_PROMPT
|
|
1488
|
+
)
|
|
1489
|
+
|
|
1490
|
+
if is_system_prompt_schema_loop:
|
|
1491
|
+
if current_signature == last_tool_signature:
|
|
1492
|
+
repeated_tool_calls_count += 1
|
|
1493
|
+
else:
|
|
1494
|
+
repeated_tool_calls_count = 1
|
|
1495
|
+
last_tool_signature = current_signature
|
|
1496
|
+
|
|
1497
|
+
if repeated_tool_calls_count > max_repeated_tool_calls:
|
|
1498
|
+
# Return the last tool result content to avoid infinite loops
|
|
1499
|
+
if tool_result_responses:
|
|
1500
|
+
return cast(
|
|
1501
|
+
"List[ContentBlock | CallToolRequestParams]",
|
|
1502
|
+
tool_result_responses,
|
|
1503
|
+
)
|
|
1504
|
+
# Fallback: return a minimal text indicating no content
|
|
1505
|
+
return cast(
|
|
1506
|
+
"List[ContentBlock | CallToolRequestParams]",
|
|
1507
|
+
[TextContent(text="[No content in tool result]")],
|
|
1508
|
+
)
|
|
1346
1509
|
# Override stop_reason to handle as tool_use
|
|
1347
1510
|
stop_reason = "tool_use"
|
|
1348
1511
|
self.logger.debug(
|
|
@@ -1385,22 +1548,10 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1385
1548
|
|
|
1386
1549
|
# Parse tool calls using model-specific method
|
|
1387
1550
|
self.logger.info(f"DEBUG: About to parse tool response: {processed_response}")
|
|
1388
|
-
parsed_tools = self._parse_tool_response(
|
|
1389
|
-
processed_response, model or DEFAULT_BEDROCK_MODEL
|
|
1390
|
-
)
|
|
1551
|
+
parsed_tools = self._parse_tool_response(processed_response, model)
|
|
1391
1552
|
self.logger.info(f"DEBUG: Parsed tools: {parsed_tools}")
|
|
1392
1553
|
|
|
1393
1554
|
if parsed_tools:
|
|
1394
|
-
# We will comment out showing the assistant's intermediate message
|
|
1395
|
-
# to make the output less chatty, as requested by the user.
|
|
1396
|
-
# if not message_text:
|
|
1397
|
-
# message_text = Text(
|
|
1398
|
-
# "the assistant requested tool calls",
|
|
1399
|
-
# style="dim green italic",
|
|
1400
|
-
# )
|
|
1401
|
-
#
|
|
1402
|
-
# await self.show_assistant_message(message_text)
|
|
1403
|
-
|
|
1404
1555
|
# Process tool calls and collect results
|
|
1405
1556
|
tool_results_for_batch = []
|
|
1406
1557
|
for tool_idx, parsed_tool in enumerate(parsed_tools):
|
|
@@ -1413,7 +1564,9 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1413
1564
|
tool_args = parsed_tool["arguments"]
|
|
1414
1565
|
tool_use_id = parsed_tool["id"]
|
|
1415
1566
|
|
|
1416
|
-
self.show_tool_call(
|
|
1567
|
+
self.show_tool_call(
|
|
1568
|
+
tool_list.tools if tool_list else [], tool_name, tool_args
|
|
1569
|
+
)
|
|
1417
1570
|
|
|
1418
1571
|
tool_call_request = CallToolRequest(
|
|
1419
1572
|
method="tools/call",
|
|
@@ -1431,15 +1584,17 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1431
1584
|
tool_results_for_batch.append((tool_use_id, result, tool_name))
|
|
1432
1585
|
responses.extend(result.content)
|
|
1433
1586
|
|
|
1434
|
-
#
|
|
1435
|
-
#
|
|
1436
|
-
# the model'
|
|
1587
|
+
# Store tool results temporarily - we'll clear responses only if the model
|
|
1588
|
+
# generates a follow-up message. This ensures tool results are preserved
|
|
1589
|
+
# if the model doesn't generate any follow-up content (like Claude Haiku).
|
|
1590
|
+
tool_result_responses = responses.copy()
|
|
1437
1591
|
responses.clear()
|
|
1438
1592
|
|
|
1439
|
-
#
|
|
1440
|
-
|
|
1593
|
+
# Decide result formatting based on unified capabilities
|
|
1594
|
+
caps_tmp = self.capabilities.get(model) or ModelCapabilities()
|
|
1595
|
+
is_system_prompt_schema = caps_tmp.schema == ToolSchemaType.SYSTEM_PROMPT
|
|
1441
1596
|
|
|
1442
|
-
if
|
|
1597
|
+
if is_system_prompt_schema:
|
|
1443
1598
|
# For system prompt models (like Llama), format results as a simple text message.
|
|
1444
1599
|
# The model expects to see the results in a human-readable format to continue.
|
|
1445
1600
|
tool_result_parts = []
|
|
@@ -1540,6 +1695,12 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1540
1695
|
|
|
1541
1696
|
self.history.set(new_messages)
|
|
1542
1697
|
|
|
1698
|
+
# If we have no responses but had tool results, restore the tool results
|
|
1699
|
+
# This handles cases like Claude Haiku where the model calls tools but doesn't generate follow-up text
|
|
1700
|
+
if not responses and tool_result_responses:
|
|
1701
|
+
responses = tool_result_responses
|
|
1702
|
+
self.logger.debug("Restored tool results as no follow-up content was generated")
|
|
1703
|
+
|
|
1543
1704
|
# Strip leading whitespace from the *last* non-empty text block of the final response
|
|
1544
1705
|
# to ensure the output is clean.
|
|
1545
1706
|
if responses:
|
|
@@ -1548,7 +1709,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1548
1709
|
item.text = item.text.lstrip()
|
|
1549
1710
|
break
|
|
1550
1711
|
|
|
1551
|
-
return responses
|
|
1712
|
+
return cast("List[ContentBlock | CallToolRequestParams]", responses)
|
|
1552
1713
|
|
|
1553
1714
|
async def generate_messages(
|
|
1554
1715
|
self,
|
|
@@ -1606,7 +1767,8 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1606
1767
|
if isinstance(content_item, TextContent):
|
|
1607
1768
|
message_param["content"].append({"type": "text", "text": content_item.text})
|
|
1608
1769
|
|
|
1609
|
-
# Generate response
|
|
1770
|
+
# Generate response (structured paths set a one-shot non-streaming hint)
|
|
1771
|
+
self._force_non_streaming_once = True
|
|
1610
1772
|
return await self.generate_messages(message_param, request_params)
|
|
1611
1773
|
|
|
1612
1774
|
def _generate_simplified_schema(self, model: Type[ModelT]) -> str:
|
|
@@ -1677,49 +1839,169 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1677
1839
|
request_params: RequestParams | None = None,
|
|
1678
1840
|
) -> Tuple[ModelT | None, PromptMessageMultipart]:
|
|
1679
1841
|
"""Apply structured output for Bedrock using prompt engineering with a simplified schema."""
|
|
1842
|
+
# Short-circuit: if the last message is already an assistant JSON payload,
|
|
1843
|
+
# parse it directly without invoking the model. This restores pre-regression behavior
|
|
1844
|
+
# for tests that seed assistant JSON as the last turn.
|
|
1845
|
+
try:
|
|
1846
|
+
if multipart_messages and multipart_messages[-1].role == "assistant":
|
|
1847
|
+
parsed_model, parsed_mp = self._structured_from_multipart(
|
|
1848
|
+
multipart_messages[-1], model
|
|
1849
|
+
)
|
|
1850
|
+
if parsed_model is not None:
|
|
1851
|
+
return parsed_model, parsed_mp
|
|
1852
|
+
except Exception:
|
|
1853
|
+
# Fall through to normal generation path
|
|
1854
|
+
pass
|
|
1855
|
+
|
|
1680
1856
|
request_params = self.get_request_params(request_params)
|
|
1681
1857
|
|
|
1682
|
-
#
|
|
1683
|
-
|
|
1858
|
+
# For structured outputs: disable reasoning entirely and set temperature=0 for deterministic JSON
|
|
1859
|
+
# This avoids conflicts between reasoning (requires temperature=1) and structured output (wants temperature=0)
|
|
1860
|
+
original_reasoning_effort = self._reasoning_effort
|
|
1861
|
+
self._reasoning_effort = ReasoningEffort.MINIMAL # Temporarily disable reasoning
|
|
1862
|
+
|
|
1863
|
+
# Override temperature for structured outputs
|
|
1864
|
+
if request_params:
|
|
1865
|
+
request_params = request_params.model_copy(update={"temperature": 0.0})
|
|
1866
|
+
else:
|
|
1867
|
+
request_params = RequestParams(temperature=0.0)
|
|
1868
|
+
|
|
1869
|
+
# Select schema strategy, prefer runtime cache over resolver
|
|
1870
|
+
caps_struct = self.capabilities.get(self.model) or ModelCapabilities()
|
|
1871
|
+
strategy = caps_struct.structured_strategy or StructuredStrategy.STRICT_SCHEMA
|
|
1872
|
+
|
|
1873
|
+
if strategy == StructuredStrategy.SIMPLIFIED_SCHEMA:
|
|
1874
|
+
schema_text = self._generate_simplified_schema(model)
|
|
1875
|
+
else:
|
|
1876
|
+
schema_text = AugmentedLLM.model_to_schema_str(model)
|
|
1684
1877
|
|
|
1685
1878
|
# Build the new simplified prompt
|
|
1686
1879
|
prompt_parts = [
|
|
1687
1880
|
"You are a JSON generator. Respond with JSON that strictly follows the provided schema. Do not add any commentary or explanation.",
|
|
1688
1881
|
"",
|
|
1689
1882
|
"JSON Schema:",
|
|
1690
|
-
|
|
1883
|
+
schema_text,
|
|
1691
1884
|
"",
|
|
1692
1885
|
"IMPORTANT RULES:",
|
|
1693
1886
|
"- You MUST respond with only raw JSON data. No other text, commentary, or markdown is allowed.",
|
|
1694
1887
|
"- All field names and enum values are case-sensitive and must match the schema exactly.",
|
|
1695
1888
|
"- Do not add any extra fields to the JSON response. Only include the fields specified in the schema.",
|
|
1889
|
+
"- Do not use code fences or backticks (no ```json and no ```).",
|
|
1890
|
+
"- Your output must start with '{' and end with '}'.",
|
|
1696
1891
|
"- Valid JSON requires double quotes for all field names and string values. Other types (int, float, boolean, etc.) should not be quoted.",
|
|
1697
1892
|
"",
|
|
1698
1893
|
"Now, generate the valid JSON response for the following request:",
|
|
1699
1894
|
]
|
|
1700
1895
|
|
|
1701
|
-
#
|
|
1702
|
-
|
|
1896
|
+
# IMPORTANT: Do NOT mutate the caller's messages. Create a deep copy of the last
|
|
1897
|
+
# user message, append the schema to the copy only, and pass just that copy into
|
|
1898
|
+
# the provider-specific path. This prevents contamination of routed messages.
|
|
1899
|
+
try:
|
|
1900
|
+
temp_last = multipart_messages[-1].model_copy(deep=True)
|
|
1901
|
+
except Exception:
|
|
1902
|
+
# Fallback: construct a minimal copy if model_copy is unavailable
|
|
1903
|
+
temp_last = PromptMessageMultipart(
|
|
1904
|
+
role=multipart_messages[-1].role, content=list(multipart_messages[-1].content)
|
|
1905
|
+
)
|
|
1703
1906
|
|
|
1704
|
-
|
|
1907
|
+
temp_last.add_text("\n".join(prompt_parts))
|
|
1705
1908
|
|
|
1706
|
-
|
|
1707
|
-
|
|
1909
|
+
self.logger.debug(
|
|
1910
|
+
"DEBUG: Using copied last message for structured schema; original left untouched"
|
|
1708
1911
|
)
|
|
1709
|
-
|
|
1912
|
+
|
|
1913
|
+
try:
|
|
1914
|
+
result: PromptMessageMultipart = await self._apply_prompt_provider_specific(
|
|
1915
|
+
[temp_last], request_params
|
|
1916
|
+
)
|
|
1917
|
+
try:
|
|
1918
|
+
parsed_model, _ = self._structured_from_multipart(result, model)
|
|
1919
|
+
# If parsing returned None (no model instance) we should trigger the retry path
|
|
1920
|
+
if parsed_model is None:
|
|
1921
|
+
raise ValueError("structured parse returned None; triggering retry")
|
|
1922
|
+
return parsed_model, result
|
|
1923
|
+
except Exception:
|
|
1924
|
+
# One retry with stricter JSON-only guidance and simplified schema
|
|
1925
|
+
strict_parts = [
|
|
1926
|
+
"STRICT MODE:",
|
|
1927
|
+
"Return ONLY a single JSON object that matches the schema.",
|
|
1928
|
+
"Do not include any prose, explanations, code fences, or extra characters.",
|
|
1929
|
+
"Start with '{' and end with '}'.",
|
|
1930
|
+
"",
|
|
1931
|
+
"JSON Schema (simplified):",
|
|
1932
|
+
]
|
|
1933
|
+
try:
|
|
1934
|
+
simplified_schema_text = self._generate_simplified_schema(model)
|
|
1935
|
+
except Exception:
|
|
1936
|
+
simplified_schema_text = AugmentedLLM.model_to_schema_str(model)
|
|
1937
|
+
try:
|
|
1938
|
+
temp_last_retry = multipart_messages[-1].model_copy(deep=True)
|
|
1939
|
+
except Exception:
|
|
1940
|
+
temp_last_retry = PromptMessageMultipart(
|
|
1941
|
+
role=multipart_messages[-1].role,
|
|
1942
|
+
content=list(multipart_messages[-1].content),
|
|
1943
|
+
)
|
|
1944
|
+
temp_last_retry.add_text("\n".join(strict_parts + [simplified_schema_text]))
|
|
1945
|
+
|
|
1946
|
+
retry_result: PromptMessageMultipart = await self._apply_prompt_provider_specific(
|
|
1947
|
+
[temp_last_retry], request_params
|
|
1948
|
+
)
|
|
1949
|
+
return self._structured_from_multipart(retry_result, model)
|
|
1950
|
+
finally:
|
|
1951
|
+
# Restore original reasoning effort
|
|
1952
|
+
self._reasoning_effort = original_reasoning_effort
|
|
1710
1953
|
|
|
1711
1954
|
def _clean_json_response(self, text: str) -> str:
|
|
1712
|
-
"""Clean up JSON response by removing text before first { and after last }.
|
|
1955
|
+
"""Clean up JSON response by removing text before first { and after last }.
|
|
1956
|
+
|
|
1957
|
+
Also handles cases where models wrap the response in an extra layer like:
|
|
1958
|
+
{"FormattedResponse": {"thinking": "...", "message": "..."}}
|
|
1959
|
+
"""
|
|
1713
1960
|
if not text:
|
|
1714
1961
|
return text
|
|
1715
1962
|
|
|
1963
|
+
# Strip common code fences (```json ... ``` or ``` ... ```), anywhere in the text
|
|
1964
|
+
try:
|
|
1965
|
+
import re as _re
|
|
1966
|
+
|
|
1967
|
+
fence_match = _re.search(r"```(?:json)?\s*([\s\S]*?)```", text)
|
|
1968
|
+
if fence_match:
|
|
1969
|
+
text = fence_match.group(1)
|
|
1970
|
+
except Exception:
|
|
1971
|
+
pass
|
|
1972
|
+
|
|
1716
1973
|
# Find the first { and last }
|
|
1717
1974
|
first_brace = text.find("{")
|
|
1718
1975
|
last_brace = text.rfind("}")
|
|
1719
1976
|
|
|
1720
1977
|
# If we found both braces, extract just the JSON part
|
|
1721
1978
|
if first_brace != -1 and last_brace != -1 and first_brace < last_brace:
|
|
1722
|
-
|
|
1979
|
+
json_part = text[first_brace : last_brace + 1]
|
|
1980
|
+
|
|
1981
|
+
# Check if the JSON is wrapped in an extra layer (common model behavior)
|
|
1982
|
+
try:
|
|
1983
|
+
import json
|
|
1984
|
+
|
|
1985
|
+
parsed = json.loads(json_part)
|
|
1986
|
+
|
|
1987
|
+
# If it's a dict with a single key that matches the model class name,
|
|
1988
|
+
# unwrap it (e.g., {"FormattedResponse": {...}} -> {...})
|
|
1989
|
+
if isinstance(parsed, dict) and len(parsed) == 1:
|
|
1990
|
+
key = list(parsed.keys())[0]
|
|
1991
|
+
# Common wrapper patterns: class name, "response", "result", etc.
|
|
1992
|
+
if key in [
|
|
1993
|
+
"FormattedResponse",
|
|
1994
|
+
"WeatherResponse",
|
|
1995
|
+
"SimpleResponse",
|
|
1996
|
+
] or key.endswith("Response"):
|
|
1997
|
+
inner_value = parsed[key]
|
|
1998
|
+
if isinstance(inner_value, dict):
|
|
1999
|
+
return json.dumps(inner_value)
|
|
2000
|
+
|
|
2001
|
+
return json_part
|
|
2002
|
+
except json.JSONDecodeError:
|
|
2003
|
+
# If parsing fails, return the original JSON part
|
|
2004
|
+
return json_part
|
|
1723
2005
|
|
|
1724
2006
|
# Otherwise return the original text
|
|
1725
2007
|
return text
|
|
@@ -1744,8 +2026,14 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1744
2026
|
else:
|
|
1745
2027
|
cleaned_multipart = message
|
|
1746
2028
|
|
|
1747
|
-
#
|
|
1748
|
-
|
|
2029
|
+
# Parse using cleaned multipart first
|
|
2030
|
+
model_instance, parsed_multipart = super()._structured_from_multipart(
|
|
2031
|
+
cleaned_multipart, model
|
|
2032
|
+
)
|
|
2033
|
+
if model_instance is not None:
|
|
2034
|
+
return model_instance, parsed_multipart
|
|
2035
|
+
# Fallback: if parsing failed (e.g., assistant-provided JSON already valid), try original
|
|
2036
|
+
return super()._structured_from_multipart(message, model)
|
|
1749
2037
|
|
|
1750
2038
|
@classmethod
|
|
1751
2039
|
def convert_message_to_message_param(
|