fast-agent-mcp 0.2.58__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fast-agent-mcp might be problematic. Click here for more details.
- fast_agent/__init__.py +127 -0
- fast_agent/agents/__init__.py +36 -0
- {mcp_agent/core → fast_agent/agents}/agent_types.py +2 -1
- fast_agent/agents/llm_agent.py +217 -0
- fast_agent/agents/llm_decorator.py +486 -0
- mcp_agent/agents/base_agent.py → fast_agent/agents/mcp_agent.py +377 -385
- fast_agent/agents/tool_agent.py +168 -0
- {mcp_agent → fast_agent}/agents/workflow/chain_agent.py +43 -33
- {mcp_agent → fast_agent}/agents/workflow/evaluator_optimizer.py +31 -35
- {mcp_agent → fast_agent}/agents/workflow/iterative_planner.py +56 -47
- {mcp_agent → fast_agent}/agents/workflow/orchestrator_models.py +4 -4
- {mcp_agent → fast_agent}/agents/workflow/parallel_agent.py +34 -41
- {mcp_agent → fast_agent}/agents/workflow/router_agent.py +54 -39
- {mcp_agent → fast_agent}/cli/__main__.py +5 -3
- {mcp_agent → fast_agent}/cli/commands/check_config.py +95 -66
- {mcp_agent → fast_agent}/cli/commands/go.py +20 -11
- {mcp_agent → fast_agent}/cli/commands/quickstart.py +4 -4
- {mcp_agent → fast_agent}/cli/commands/server_helpers.py +1 -1
- {mcp_agent → fast_agent}/cli/commands/setup.py +64 -134
- {mcp_agent → fast_agent}/cli/commands/url_parser.py +9 -8
- {mcp_agent → fast_agent}/cli/main.py +36 -16
- {mcp_agent → fast_agent}/cli/terminal.py +2 -2
- {mcp_agent → fast_agent}/config.py +10 -2
- fast_agent/constants.py +8 -0
- {mcp_agent → fast_agent}/context.py +24 -19
- {mcp_agent → fast_agent}/context_dependent.py +9 -5
- fast_agent/core/__init__.py +17 -0
- {mcp_agent → fast_agent}/core/agent_app.py +39 -36
- fast_agent/core/core_app.py +135 -0
- {mcp_agent → fast_agent}/core/direct_decorators.py +12 -26
- {mcp_agent → fast_agent}/core/direct_factory.py +95 -73
- {mcp_agent → fast_agent/core}/executor/executor.py +4 -5
- {mcp_agent → fast_agent}/core/fastagent.py +32 -32
- fast_agent/core/logging/__init__.py +5 -0
- {mcp_agent → fast_agent/core}/logging/events.py +3 -3
- {mcp_agent → fast_agent/core}/logging/json_serializer.py +1 -1
- {mcp_agent → fast_agent/core}/logging/listeners.py +85 -7
- {mcp_agent → fast_agent/core}/logging/logger.py +7 -7
- {mcp_agent → fast_agent/core}/logging/transport.py +10 -11
- fast_agent/core/prompt.py +9 -0
- {mcp_agent → fast_agent}/core/validation.py +4 -4
- fast_agent/event_progress.py +61 -0
- fast_agent/history/history_exporter.py +44 -0
- {mcp_agent → fast_agent}/human_input/__init__.py +9 -12
- {mcp_agent → fast_agent}/human_input/elicitation_handler.py +26 -8
- {mcp_agent → fast_agent}/human_input/elicitation_state.py +7 -7
- {mcp_agent → fast_agent}/human_input/simple_form.py +6 -4
- {mcp_agent → fast_agent}/human_input/types.py +1 -18
- fast_agent/interfaces.py +228 -0
- fast_agent/llm/__init__.py +9 -0
- mcp_agent/llm/augmented_llm.py → fast_agent/llm/fastagent_llm.py +127 -218
- fast_agent/llm/internal/passthrough.py +137 -0
- mcp_agent/llm/augmented_llm_playback.py → fast_agent/llm/internal/playback.py +29 -25
- mcp_agent/llm/augmented_llm_silent.py → fast_agent/llm/internal/silent.py +10 -17
- fast_agent/llm/internal/slow.py +38 -0
- {mcp_agent → fast_agent}/llm/memory.py +40 -30
- {mcp_agent → fast_agent}/llm/model_database.py +35 -2
- {mcp_agent → fast_agent}/llm/model_factory.py +103 -77
- fast_agent/llm/model_info.py +126 -0
- {mcp_agent/llm/providers → fast_agent/llm/provider/anthropic}/anthropic_utils.py +7 -7
- fast_agent/llm/provider/anthropic/llm_anthropic.py +603 -0
- {mcp_agent/llm/providers → fast_agent/llm/provider/anthropic}/multipart_converter_anthropic.py +79 -86
- {mcp_agent/llm/providers → fast_agent/llm/provider/bedrock}/bedrock_utils.py +3 -1
- mcp_agent/llm/providers/augmented_llm_bedrock.py → fast_agent/llm/provider/bedrock/llm_bedrock.py +833 -717
- {mcp_agent/llm/providers → fast_agent/llm/provider/google}/google_converter.py +66 -14
- fast_agent/llm/provider/google/llm_google_native.py +431 -0
- mcp_agent/llm/providers/augmented_llm_aliyun.py → fast_agent/llm/provider/openai/llm_aliyun.py +6 -7
- mcp_agent/llm/providers/augmented_llm_azure.py → fast_agent/llm/provider/openai/llm_azure.py +4 -4
- mcp_agent/llm/providers/augmented_llm_deepseek.py → fast_agent/llm/provider/openai/llm_deepseek.py +10 -11
- mcp_agent/llm/providers/augmented_llm_generic.py → fast_agent/llm/provider/openai/llm_generic.py +4 -4
- mcp_agent/llm/providers/augmented_llm_google_oai.py → fast_agent/llm/provider/openai/llm_google_oai.py +4 -4
- mcp_agent/llm/providers/augmented_llm_groq.py → fast_agent/llm/provider/openai/llm_groq.py +14 -16
- mcp_agent/llm/providers/augmented_llm_openai.py → fast_agent/llm/provider/openai/llm_openai.py +133 -207
- mcp_agent/llm/providers/augmented_llm_openrouter.py → fast_agent/llm/provider/openai/llm_openrouter.py +6 -6
- mcp_agent/llm/providers/augmented_llm_tensorzero_openai.py → fast_agent/llm/provider/openai/llm_tensorzero_openai.py +17 -16
- mcp_agent/llm/providers/augmented_llm_xai.py → fast_agent/llm/provider/openai/llm_xai.py +6 -6
- {mcp_agent/llm/providers → fast_agent/llm/provider/openai}/multipart_converter_openai.py +125 -63
- {mcp_agent/llm/providers → fast_agent/llm/provider/openai}/openai_multipart.py +12 -12
- {mcp_agent/llm/providers → fast_agent/llm/provider/openai}/openai_utils.py +18 -16
- {mcp_agent → fast_agent}/llm/provider_key_manager.py +2 -2
- {mcp_agent → fast_agent}/llm/provider_types.py +2 -0
- {mcp_agent → fast_agent}/llm/sampling_converter.py +15 -12
- {mcp_agent → fast_agent}/llm/usage_tracking.py +23 -5
- fast_agent/mcp/__init__.py +43 -0
- {mcp_agent → fast_agent}/mcp/elicitation_factory.py +3 -3
- {mcp_agent → fast_agent}/mcp/elicitation_handlers.py +19 -10
- {mcp_agent → fast_agent}/mcp/gen_client.py +3 -3
- fast_agent/mcp/helpers/__init__.py +36 -0
- fast_agent/mcp/helpers/content_helpers.py +183 -0
- {mcp_agent → fast_agent}/mcp/helpers/server_config_helpers.py +8 -8
- {mcp_agent → fast_agent}/mcp/hf_auth.py +25 -23
- fast_agent/mcp/interfaces.py +93 -0
- {mcp_agent → fast_agent}/mcp/logger_textio.py +4 -4
- {mcp_agent → fast_agent}/mcp/mcp_agent_client_session.py +49 -44
- {mcp_agent → fast_agent}/mcp/mcp_aggregator.py +66 -115
- {mcp_agent → fast_agent}/mcp/mcp_connection_manager.py +16 -23
- {mcp_agent/core → fast_agent/mcp}/mcp_content.py +23 -15
- {mcp_agent → fast_agent}/mcp/mime_utils.py +39 -0
- fast_agent/mcp/prompt.py +159 -0
- mcp_agent/mcp/prompt_message_multipart.py → fast_agent/mcp/prompt_message_extended.py +27 -20
- {mcp_agent → fast_agent}/mcp/prompt_render.py +21 -19
- {mcp_agent → fast_agent}/mcp/prompt_serialization.py +46 -46
- fast_agent/mcp/prompts/__main__.py +7 -0
- {mcp_agent → fast_agent}/mcp/prompts/prompt_helpers.py +31 -30
- {mcp_agent → fast_agent}/mcp/prompts/prompt_load.py +8 -8
- {mcp_agent → fast_agent}/mcp/prompts/prompt_server.py +11 -19
- {mcp_agent → fast_agent}/mcp/prompts/prompt_template.py +18 -18
- {mcp_agent → fast_agent}/mcp/resource_utils.py +1 -1
- {mcp_agent → fast_agent}/mcp/sampling.py +31 -26
- {mcp_agent/mcp_server → fast_agent/mcp/server}/__init__.py +1 -1
- {mcp_agent/mcp_server → fast_agent/mcp/server}/agent_server.py +5 -6
- fast_agent/mcp/ui_agent.py +48 -0
- fast_agent/mcp/ui_mixin.py +209 -0
- fast_agent/mcp_server_registry.py +90 -0
- {mcp_agent → fast_agent}/resources/examples/data-analysis/analysis-campaign.py +5 -4
- {mcp_agent → fast_agent}/resources/examples/data-analysis/analysis.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/forms_demo.py +3 -3
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/game_character.py +2 -2
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/game_character_handler.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/tool_call.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/agent_one.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/agent_two.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/researcher/researcher-eval.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/researcher/researcher-imp.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/researcher/researcher.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/tensorzero/agent.py +2 -2
- {mcp_agent → fast_agent}/resources/examples/tensorzero/image_demo.py +3 -3
- {mcp_agent → fast_agent}/resources/examples/tensorzero/simple_agent.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/workflows/chaining.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/workflows/evaluator.py +3 -3
- {mcp_agent → fast_agent}/resources/examples/workflows/human_input.py +5 -3
- {mcp_agent → fast_agent}/resources/examples/workflows/orchestrator.py +1 -1
- {mcp_agent → fast_agent}/resources/examples/workflows/parallel.py +2 -2
- {mcp_agent → fast_agent}/resources/examples/workflows/router.py +5 -2
- fast_agent/resources/setup/.gitignore +24 -0
- fast_agent/resources/setup/agent.py +18 -0
- fast_agent/resources/setup/fastagent.config.yaml +44 -0
- fast_agent/resources/setup/fastagent.secrets.yaml.example +38 -0
- fast_agent/tools/elicitation.py +369 -0
- fast_agent/types/__init__.py +32 -0
- fast_agent/types/llm_stop_reason.py +77 -0
- fast_agent/ui/__init__.py +38 -0
- fast_agent/ui/console_display.py +1005 -0
- {mcp_agent/human_input → fast_agent/ui}/elicitation_form.py +17 -12
- mcp_agent/human_input/elicitation_forms.py → fast_agent/ui/elicitation_style.py +1 -1
- {mcp_agent/core → fast_agent/ui}/enhanced_prompt.py +96 -25
- {mcp_agent/core → fast_agent/ui}/interactive_prompt.py +330 -125
- fast_agent/ui/mcp_ui_utils.py +224 -0
- {mcp_agent → fast_agent/ui}/progress_display.py +2 -2
- {mcp_agent/logging → fast_agent/ui}/rich_progress.py +4 -4
- {mcp_agent/core → fast_agent/ui}/usage_display.py +3 -8
- {fast_agent_mcp-0.2.58.dist-info → fast_agent_mcp-0.3.0.dist-info}/METADATA +7 -7
- fast_agent_mcp-0.3.0.dist-info/RECORD +202 -0
- fast_agent_mcp-0.3.0.dist-info/entry_points.txt +5 -0
- fast_agent_mcp-0.2.58.dist-info/RECORD +0 -193
- fast_agent_mcp-0.2.58.dist-info/entry_points.txt +0 -6
- mcp_agent/__init__.py +0 -114
- mcp_agent/agents/agent.py +0 -92
- mcp_agent/agents/workflow/__init__.py +0 -1
- mcp_agent/agents/workflow/orchestrator_agent.py +0 -597
- mcp_agent/app.py +0 -175
- mcp_agent/core/__init__.py +0 -26
- mcp_agent/core/prompt.py +0 -191
- mcp_agent/event_progress.py +0 -134
- mcp_agent/human_input/handler.py +0 -81
- mcp_agent/llm/__init__.py +0 -2
- mcp_agent/llm/augmented_llm_passthrough.py +0 -232
- mcp_agent/llm/augmented_llm_slow.py +0 -53
- mcp_agent/llm/providers/__init__.py +0 -8
- mcp_agent/llm/providers/augmented_llm_anthropic.py +0 -718
- mcp_agent/llm/providers/augmented_llm_google_native.py +0 -496
- mcp_agent/llm/providers/sampling_converter_anthropic.py +0 -57
- mcp_agent/llm/providers/sampling_converter_openai.py +0 -26
- mcp_agent/llm/sampling_format_converter.py +0 -37
- mcp_agent/logging/__init__.py +0 -0
- mcp_agent/mcp/__init__.py +0 -50
- mcp_agent/mcp/helpers/__init__.py +0 -25
- mcp_agent/mcp/helpers/content_helpers.py +0 -187
- mcp_agent/mcp/interfaces.py +0 -266
- mcp_agent/mcp/prompts/__init__.py +0 -0
- mcp_agent/mcp/prompts/__main__.py +0 -10
- mcp_agent/mcp_server_registry.py +0 -343
- mcp_agent/tools/tool_definition.py +0 -14
- mcp_agent/ui/console_display.py +0 -790
- mcp_agent/ui/console_display_legacy.py +0 -401
- {mcp_agent → fast_agent}/agents/workflow/orchestrator_prompts.py +0 -0
- {mcp_agent/agents → fast_agent/cli}/__init__.py +0 -0
- {mcp_agent → fast_agent}/cli/constants.py +0 -0
- {mcp_agent → fast_agent}/core/error_handling.py +0 -0
- {mcp_agent → fast_agent}/core/exceptions.py +0 -0
- {mcp_agent/cli → fast_agent/core/executor}/__init__.py +0 -0
- {mcp_agent → fast_agent/core}/executor/task_registry.py +0 -0
- {mcp_agent → fast_agent/core}/executor/workflow_signal.py +0 -0
- {mcp_agent → fast_agent}/human_input/form_fields.py +0 -0
- {mcp_agent → fast_agent}/llm/prompt_utils.py +0 -0
- {mcp_agent/core → fast_agent/llm}/request_params.py +0 -0
- {mcp_agent → fast_agent}/mcp/common.py +0 -0
- {mcp_agent/executor → fast_agent/mcp/prompts}/__init__.py +0 -0
- {mcp_agent → fast_agent}/mcp/prompts/prompt_constants.py +0 -0
- {mcp_agent → fast_agent}/py.typed +0 -0
- {mcp_agent → fast_agent}/resources/examples/data-analysis/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/data-analysis/mount-point/WA_Fn-UseC_-HR-Employee-Attrition.csv +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/elicitation_account_server.py +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/elicitation_forms_server.py +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/elicitation_game_server.py +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/fastagent.secrets.yaml.example +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/fastagent.secrets.yaml.example +0 -0
- {mcp_agent → fast_agent}/resources/examples/researcher/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/.env.sample +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/Makefile +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/README.md +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/demo_images/clam.jpg +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/demo_images/crab.png +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/demo_images/shrimp.png +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/docker-compose.yml +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/Dockerfile +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/entrypoint.sh +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/mcp_server.py +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/pyproject.toml +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/tensorzero_config/system_schema.json +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/tensorzero_config/system_template.minijinja +0 -0
- {mcp_agent → fast_agent}/resources/examples/tensorzero/tensorzero_config/tensorzero.toml +0 -0
- {mcp_agent → fast_agent}/resources/examples/workflows/fastagent.config.yaml +0 -0
- {mcp_agent → fast_agent}/resources/examples/workflows/graded_report.md +0 -0
- {mcp_agent → fast_agent}/resources/examples/workflows/short_story.md +0 -0
- {mcp_agent → fast_agent}/resources/examples/workflows/short_story.txt +0 -0
- {mcp_agent → fast_agent/ui}/console.py +0 -0
- {mcp_agent/core → fast_agent/ui}/mermaid_utils.py +0 -0
- {fast_agent_mcp-0.2.58.dist-info → fast_agent_mcp-0.3.0.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.2.58.dist-info → fast_agent_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
mcp_agent/llm/providers/augmented_llm_bedrock.py → fast_agent/llm/provider/bedrock/llm_bedrock.py
RENAMED
|
@@ -4,20 +4,32 @@ import re
|
|
|
4
4
|
import sys
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from enum import Enum, auto
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, Union
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, Union
|
|
8
8
|
|
|
9
|
-
from mcp
|
|
10
|
-
from
|
|
9
|
+
from mcp import Tool
|
|
10
|
+
from mcp.types import (
|
|
11
|
+
CallToolRequest,
|
|
12
|
+
CallToolRequestParams,
|
|
13
|
+
ContentBlock,
|
|
14
|
+
TextContent,
|
|
15
|
+
)
|
|
11
16
|
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
17
|
+
from fast_agent.core.exceptions import ProviderKeyError
|
|
18
|
+
from fast_agent.core.logging.logger import get_logger
|
|
19
|
+
from fast_agent.event_progress import ProgressAction
|
|
20
|
+
from fast_agent.interfaces import ModelT
|
|
21
|
+
from fast_agent.llm.fastagent_llm import FastAgentLLM
|
|
22
|
+
from fast_agent.llm.provider_types import Provider
|
|
23
|
+
from fast_agent.llm.usage_tracking import TurnUsage
|
|
24
|
+
from fast_agent.types import PromptMessageExtended, RequestParams
|
|
25
|
+
from fast_agent.types.llm_stop_reason import LlmStopReason
|
|
26
|
+
|
|
27
|
+
# Mapping from Bedrock's snake_case stop reasons to MCP's camelCase
|
|
28
|
+
BEDROCK_TO_MCP_STOP_REASON = {
|
|
29
|
+
"end_turn": LlmStopReason.END_TURN.value,
|
|
30
|
+
"stop_sequence": LlmStopReason.STOP_SEQUENCE.value,
|
|
31
|
+
"max_tokens": LlmStopReason.MAX_TOKENS.value,
|
|
32
|
+
}
|
|
21
33
|
|
|
22
34
|
if TYPE_CHECKING:
|
|
23
35
|
from mcp import ListToolsResult
|
|
@@ -32,11 +44,6 @@ except ImportError:
|
|
|
32
44
|
NoCredentialsError = Exception
|
|
33
45
|
|
|
34
46
|
|
|
35
|
-
from mcp.types import (
|
|
36
|
-
CallToolRequest,
|
|
37
|
-
CallToolRequestParams,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
47
|
DEFAULT_BEDROCK_MODEL = "amazon.nova-lite-v1:0"
|
|
41
48
|
|
|
42
49
|
|
|
@@ -117,7 +124,7 @@ class ModelCapabilities:
|
|
|
117
124
|
supports_tools: bool | None = None # True=yes, False=no, None=unknown
|
|
118
125
|
|
|
119
126
|
|
|
120
|
-
class
|
|
127
|
+
class BedrockLLM(FastAgentLLM[BedrockMessageParam, BedrockMessage]):
|
|
121
128
|
"""
|
|
122
129
|
AWS Bedrock implementation of AugmentedLLM using the Converse API.
|
|
123
130
|
Supports all Bedrock models including Nova, Claude, Meta, etc.
|
|
@@ -166,7 +173,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
166
173
|
Uses the centralized discovery in bedrock_utils; no regex, no fallbacks.
|
|
167
174
|
Gracefully handles environments without AWS access by returning False.
|
|
168
175
|
"""
|
|
169
|
-
from
|
|
176
|
+
from fast_agent.llm.provider.bedrock.bedrock_utils import all_bedrock_models
|
|
170
177
|
|
|
171
178
|
try:
|
|
172
179
|
available = set(all_bedrock_models(prefix=""))
|
|
@@ -310,9 +317,6 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
310
317
|
"""
|
|
311
318
|
bedrock_tools = []
|
|
312
319
|
|
|
313
|
-
# Create mapping from cleaned names to original names for tool execution
|
|
314
|
-
self.tool_name_mapping = {}
|
|
315
|
-
|
|
316
320
|
self.logger.debug(f"Converting {len(tools.tools)} MCP tools to Nova format")
|
|
317
321
|
|
|
318
322
|
for tool in tools.tools:
|
|
@@ -355,20 +359,20 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
355
359
|
):
|
|
356
360
|
nova_schema["required"] = input_schema["required"]
|
|
357
361
|
|
|
358
|
-
#
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
362
|
+
# Use the tool name mapping that was already built in _bedrock_completion
|
|
363
|
+
# This ensures consistent transformation logic across the codebase
|
|
364
|
+
clean_name = None
|
|
365
|
+
for mapped_name, original_name in tool_name_mapping.items():
|
|
366
|
+
if original_name == tool.name:
|
|
367
|
+
clean_name = mapped_name
|
|
368
|
+
break
|
|
364
369
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
self.tool_name_mapping[clean_name] = tool.name
|
|
370
|
+
if clean_name is None:
|
|
371
|
+
# Fallback if mapping not found (shouldn't happen)
|
|
372
|
+
clean_name = tool.name
|
|
373
|
+
self.logger.warning(
|
|
374
|
+
f"Tool name mapping not found for {tool.name}, using original name"
|
|
375
|
+
)
|
|
372
376
|
|
|
373
377
|
bedrock_tool = {
|
|
374
378
|
"toolSpec": {
|
|
@@ -469,7 +473,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
469
473
|
return bedrock_tools
|
|
470
474
|
|
|
471
475
|
def _parse_system_prompt_tool_response(
|
|
472
|
-
self, processed_response: Dict[str, Any]
|
|
476
|
+
self, processed_response: Dict[str, Any], model: str
|
|
473
477
|
) -> List[Dict[str, Any]]:
|
|
474
478
|
"""Parse system prompt tool response format: function calls in text."""
|
|
475
479
|
# Extract text content from the response
|
|
@@ -518,7 +522,53 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
518
522
|
if tool_calls:
|
|
519
523
|
return tool_calls
|
|
520
524
|
|
|
521
|
-
# Second try: find the "
|
|
525
|
+
# Second try: find the "Action:" format (commonly used by Nova models)
|
|
526
|
+
action_pattern = r"Action:\s*([^(]+)\(([^)]*)\)"
|
|
527
|
+
action_matches = re.findall(action_pattern, text_content)
|
|
528
|
+
if action_matches:
|
|
529
|
+
for i, (func_name, args_str) in enumerate(action_matches):
|
|
530
|
+
func_name = func_name.strip()
|
|
531
|
+
args_str = args_str.strip()
|
|
532
|
+
|
|
533
|
+
# Parse arguments - handle quoted strings and key=value pairs
|
|
534
|
+
arguments = {}
|
|
535
|
+
if args_str:
|
|
536
|
+
try:
|
|
537
|
+
# Handle key=value format like location="London"
|
|
538
|
+
if "=" in args_str:
|
|
539
|
+
# Split by comma, then by = for each part
|
|
540
|
+
for arg_part in args_str.split(","):
|
|
541
|
+
if "=" in arg_part:
|
|
542
|
+
key, value = arg_part.split("=", 1)
|
|
543
|
+
key = key.strip()
|
|
544
|
+
value = value.strip().strip("\"'") # Remove quotes
|
|
545
|
+
arguments[key] = value
|
|
546
|
+
else:
|
|
547
|
+
# Single value argument - try to map to appropriate parameter name
|
|
548
|
+
value = args_str.strip("\"'") if args_str else ""
|
|
549
|
+
# Handle common single-parameter functions
|
|
550
|
+
if func_name == "check_weather":
|
|
551
|
+
arguments = {"location": value}
|
|
552
|
+
else:
|
|
553
|
+
# Generic fallback
|
|
554
|
+
arguments = {"value": value}
|
|
555
|
+
except Exception as e:
|
|
556
|
+
self.logger.warning(f"Failed to parse Action arguments: {args_str} - {e}")
|
|
557
|
+
arguments = {"value": args_str}
|
|
558
|
+
|
|
559
|
+
tool_calls.append(
|
|
560
|
+
{
|
|
561
|
+
"type": "system_prompt_tool",
|
|
562
|
+
"name": func_name,
|
|
563
|
+
"arguments": arguments,
|
|
564
|
+
"id": f"system_prompt_{func_name}_{i}",
|
|
565
|
+
}
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
if tool_calls:
|
|
569
|
+
return tool_calls
|
|
570
|
+
|
|
571
|
+
# Third try: find the "Tool Call:" format
|
|
522
572
|
tool_call_match = re.search(r"Tool Call:\s*(\[.*?\])", text_content, re.DOTALL)
|
|
523
573
|
if tool_call_match:
|
|
524
574
|
json_str = tool_call_match.group(1)
|
|
@@ -606,6 +656,49 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
606
656
|
f"Failed to parse fallback custom tag format: {function_args_json}"
|
|
607
657
|
)
|
|
608
658
|
|
|
659
|
+
# Third try: find direct function call format like "function_name(args)"
|
|
660
|
+
direct_call_pattern = r"^([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]*)\)$"
|
|
661
|
+
direct_call_match = re.search(direct_call_pattern, text_content.strip())
|
|
662
|
+
if direct_call_match:
|
|
663
|
+
func_name, args_str = direct_call_match.groups()
|
|
664
|
+
func_name = func_name.strip()
|
|
665
|
+
args_str = args_str.strip()
|
|
666
|
+
|
|
667
|
+
# Parse arguments
|
|
668
|
+
arguments = {}
|
|
669
|
+
if args_str:
|
|
670
|
+
try:
|
|
671
|
+
# Handle key=value format like location="London"
|
|
672
|
+
if "=" in args_str:
|
|
673
|
+
# Split by comma, then by = for each part
|
|
674
|
+
for arg_part in args_str.split(","):
|
|
675
|
+
if "=" in arg_part:
|
|
676
|
+
key, value = arg_part.split("=", 1)
|
|
677
|
+
key = key.strip()
|
|
678
|
+
value = value.strip().strip("\"'") # Remove quotes
|
|
679
|
+
arguments[key] = value
|
|
680
|
+
else:
|
|
681
|
+
# Single value argument - try to map to appropriate parameter name
|
|
682
|
+
value = args_str.strip("\"'") if args_str else ""
|
|
683
|
+
# Handle common single-parameter functions
|
|
684
|
+
if func_name == "check_weather":
|
|
685
|
+
arguments = {"location": value}
|
|
686
|
+
else:
|
|
687
|
+
# Generic fallback
|
|
688
|
+
arguments = {"value": value}
|
|
689
|
+
except Exception as e:
|
|
690
|
+
self.logger.warning(f"Failed to parse direct call arguments: {args_str} - {e}")
|
|
691
|
+
arguments = {"value": args_str}
|
|
692
|
+
|
|
693
|
+
return [
|
|
694
|
+
{
|
|
695
|
+
"type": "system_prompt_tool",
|
|
696
|
+
"name": func_name,
|
|
697
|
+
"arguments": arguments,
|
|
698
|
+
"id": f"system_prompt_{func_name}_0",
|
|
699
|
+
}
|
|
700
|
+
]
|
|
701
|
+
|
|
609
702
|
return []
|
|
610
703
|
|
|
611
704
|
def _parse_anthropic_tool_response(
|
|
@@ -638,7 +731,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
638
731
|
|
|
639
732
|
# Choose parser strictly by cached schema
|
|
640
733
|
if schema == ToolSchemaType.SYSTEM_PROMPT:
|
|
641
|
-
return self._parse_system_prompt_tool_response(processed_response)
|
|
734
|
+
return self._parse_system_prompt_tool_response(processed_response, model)
|
|
642
735
|
if schema == ToolSchemaType.ANTHROPIC:
|
|
643
736
|
return self._parse_anthropic_tool_response(processed_response)
|
|
644
737
|
|
|
@@ -697,8 +790,145 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
697
790
|
except Exception:
|
|
698
791
|
pass
|
|
699
792
|
|
|
793
|
+
# Final fallback: try system prompt parsing regardless of cached schema
|
|
794
|
+
# This handles cases where native tool calling failed but model generated system prompt format
|
|
795
|
+
try:
|
|
796
|
+
return self._parse_system_prompt_tool_response(processed_response, model)
|
|
797
|
+
except Exception:
|
|
798
|
+
pass
|
|
799
|
+
|
|
700
800
|
return []
|
|
701
801
|
|
|
802
|
+
def _build_tool_calls_dict(
|
|
803
|
+
self, parsed_tools: List[Dict[str, Any]]
|
|
804
|
+
) -> Dict[str, CallToolRequest]:
|
|
805
|
+
"""
|
|
806
|
+
Convert parsed tools to CallToolRequest dict for external execution.
|
|
807
|
+
|
|
808
|
+
Args:
|
|
809
|
+
parsed_tools: List of parsed tool dictionaries from _parse_tool_response()
|
|
810
|
+
|
|
811
|
+
Returns:
|
|
812
|
+
Dictionary mapping tool_use_id to CallToolRequest objects
|
|
813
|
+
"""
|
|
814
|
+
tool_calls = {}
|
|
815
|
+
for parsed_tool in parsed_tools:
|
|
816
|
+
# Use tool name directly, but map back to original if a mapping is available
|
|
817
|
+
tool_name = parsed_tool["name"]
|
|
818
|
+
try:
|
|
819
|
+
mapping = getattr(self, "tool_name_mapping", None)
|
|
820
|
+
if isinstance(mapping, dict):
|
|
821
|
+
tool_name = mapping.get(tool_name, tool_name)
|
|
822
|
+
except Exception:
|
|
823
|
+
pass
|
|
824
|
+
|
|
825
|
+
# Create CallToolRequest
|
|
826
|
+
tool_call = CallToolRequest(
|
|
827
|
+
method="tools/call",
|
|
828
|
+
params=CallToolRequestParams(
|
|
829
|
+
name=tool_name, arguments=parsed_tool.get("arguments", {})
|
|
830
|
+
),
|
|
831
|
+
)
|
|
832
|
+
tool_calls[parsed_tool["id"]] = tool_call
|
|
833
|
+
return tool_calls
|
|
834
|
+
|
|
835
|
+
def _map_bedrock_stop_reason(self, bedrock_stop_reason: str) -> LlmStopReason:
|
|
836
|
+
"""
|
|
837
|
+
Map Bedrock stop reasons to LlmStopReason enum.
|
|
838
|
+
|
|
839
|
+
Args:
|
|
840
|
+
bedrock_stop_reason: Stop reason from Bedrock API
|
|
841
|
+
|
|
842
|
+
Returns:
|
|
843
|
+
Corresponding LlmStopReason enum value
|
|
844
|
+
"""
|
|
845
|
+
if bedrock_stop_reason == "tool_use":
|
|
846
|
+
return LlmStopReason.TOOL_USE
|
|
847
|
+
elif bedrock_stop_reason == "end_turn":
|
|
848
|
+
return LlmStopReason.END_TURN
|
|
849
|
+
elif bedrock_stop_reason == "stop_sequence":
|
|
850
|
+
return LlmStopReason.STOP_SEQUENCE
|
|
851
|
+
elif bedrock_stop_reason == "max_tokens":
|
|
852
|
+
return LlmStopReason.MAX_TOKENS
|
|
853
|
+
else:
|
|
854
|
+
# Default to END_TURN for unknown stop reasons, but log for debugging
|
|
855
|
+
self.logger.warning(
|
|
856
|
+
f"Unknown Bedrock stop reason: {bedrock_stop_reason}, defaulting to END_TURN"
|
|
857
|
+
)
|
|
858
|
+
return LlmStopReason.END_TURN
|
|
859
|
+
|
|
860
|
+
def _convert_multipart_to_bedrock_message(
|
|
861
|
+
self, msg: PromptMessageExtended
|
|
862
|
+
) -> BedrockMessageParam:
|
|
863
|
+
"""
|
|
864
|
+
Convert a PromptMessageExtended to Bedrock message parameter format.
|
|
865
|
+
Handles tool results and regular content.
|
|
866
|
+
|
|
867
|
+
Args:
|
|
868
|
+
msg: PromptMessageExtended message to convert
|
|
869
|
+
|
|
870
|
+
Returns:
|
|
871
|
+
Bedrock message parameter dictionary
|
|
872
|
+
"""
|
|
873
|
+
bedrock_msg = {"role": msg.role, "content": []}
|
|
874
|
+
|
|
875
|
+
# Handle tool results first (if present)
|
|
876
|
+
if msg.tool_results:
|
|
877
|
+
# Get the cached schema type to determine result formatting
|
|
878
|
+
caps = self.capabilities.get(self.model) or ModelCapabilities()
|
|
879
|
+
# Check if any tool ID indicates system prompt format
|
|
880
|
+
has_system_prompt_tools = any(
|
|
881
|
+
tool_id.startswith("system_prompt_") for tool_id in msg.tool_results.keys()
|
|
882
|
+
)
|
|
883
|
+
is_system_prompt_schema = (
|
|
884
|
+
caps.schema == ToolSchemaType.SYSTEM_PROMPT or has_system_prompt_tools
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
if is_system_prompt_schema:
|
|
888
|
+
# For system prompt models: format as human-readable text
|
|
889
|
+
tool_result_parts = []
|
|
890
|
+
for tool_id, tool_result in msg.tool_results.items():
|
|
891
|
+
result_text = "".join(
|
|
892
|
+
part.text for part in tool_result.content if isinstance(part, TextContent)
|
|
893
|
+
)
|
|
894
|
+
result_payload = {
|
|
895
|
+
"tool_name": tool_id, # Use tool_id as name for system prompt
|
|
896
|
+
"status": "error" if tool_result.isError else "success",
|
|
897
|
+
"result": result_text,
|
|
898
|
+
}
|
|
899
|
+
tool_result_parts.append(json.dumps(result_payload))
|
|
900
|
+
|
|
901
|
+
if tool_result_parts:
|
|
902
|
+
full_result_text = f"Tool Results:\n{', '.join(tool_result_parts)}"
|
|
903
|
+
bedrock_msg["content"].append({"type": "text", "text": full_result_text})
|
|
904
|
+
else:
|
|
905
|
+
# For Nova/Anthropic models: use structured tool_result format
|
|
906
|
+
for tool_id, tool_result in msg.tool_results.items():
|
|
907
|
+
result_content_blocks = []
|
|
908
|
+
if tool_result.content:
|
|
909
|
+
for part in tool_result.content:
|
|
910
|
+
if isinstance(part, TextContent):
|
|
911
|
+
result_content_blocks.append({"text": part.text})
|
|
912
|
+
|
|
913
|
+
if not result_content_blocks:
|
|
914
|
+
result_content_blocks.append({"text": "[No content in tool result]"})
|
|
915
|
+
|
|
916
|
+
bedrock_msg["content"].append(
|
|
917
|
+
{
|
|
918
|
+
"type": "tool_result",
|
|
919
|
+
"tool_use_id": tool_id,
|
|
920
|
+
"content": result_content_blocks,
|
|
921
|
+
"status": "error" if tool_result.isError else "success",
|
|
922
|
+
}
|
|
923
|
+
)
|
|
924
|
+
|
|
925
|
+
# Handle regular content
|
|
926
|
+
for content_item in msg.content:
|
|
927
|
+
if isinstance(content_item, TextContent):
|
|
928
|
+
bedrock_msg["content"].append({"type": "text", "text": content_item.text})
|
|
929
|
+
|
|
930
|
+
return bedrock_msg
|
|
931
|
+
|
|
702
932
|
def _convert_messages_to_bedrock(
|
|
703
933
|
self, messages: List[BedrockMessageParam]
|
|
704
934
|
) -> List[Dict[str, Any]]:
|
|
@@ -846,8 +1076,8 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
846
1076
|
self.logger.warning(
|
|
847
1077
|
f"Failed to parse accumulated input as JSON: {accumulated_input} - {e}"
|
|
848
1078
|
)
|
|
849
|
-
# If it's not valid JSON,
|
|
850
|
-
tool_use["toolUse"]["input"] = accumulated_input
|
|
1079
|
+
# If it's not valid JSON, wrap it as a dict to avoid downstream errors
|
|
1080
|
+
tool_use["toolUse"]["input"] = {"value": accumulated_input}
|
|
851
1081
|
# Clean up the accumulator
|
|
852
1082
|
del tool_use["toolUse"]["_input_accumulator"]
|
|
853
1083
|
continue
|
|
@@ -913,8 +1143,8 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
913
1143
|
self.logger.warning(
|
|
914
1144
|
f"Failed to parse final accumulated input as JSON: {accumulated_input} - {e}"
|
|
915
1145
|
)
|
|
916
|
-
# If it's not valid JSON,
|
|
917
|
-
tool_use["toolUse"]["input"] = accumulated_input
|
|
1146
|
+
# If it's not valid JSON, wrap it as a dict to avoid downstream errors
|
|
1147
|
+
tool_use["toolUse"]["input"] = {"value": accumulated_input}
|
|
918
1148
|
# Clean up the accumulator
|
|
919
1149
|
del tool_use["toolUse"]["_input_accumulator"]
|
|
920
1150
|
|
|
@@ -961,9 +1191,11 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
961
1191
|
self,
|
|
962
1192
|
message_param: BedrockMessageParam,
|
|
963
1193
|
request_params: RequestParams | None = None,
|
|
964
|
-
|
|
1194
|
+
tools: List[Tool] | None = None,
|
|
1195
|
+
) -> PromptMessageExtended:
|
|
965
1196
|
"""
|
|
966
1197
|
Process a query using Bedrock and available tools.
|
|
1198
|
+
Returns PromptMessageExtended with tool calls for external execution.
|
|
967
1199
|
"""
|
|
968
1200
|
client = self._get_bedrock_runtime_client()
|
|
969
1201
|
|
|
@@ -1001,684 +1233,591 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1001
1233
|
self.logger.debug(f"Traceback: {traceback.format_exc()}")
|
|
1002
1234
|
tool_list = None
|
|
1003
1235
|
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1236
|
+
# Use tools parameter if provided, otherwise get from aggregator
|
|
1237
|
+
if tools is None:
|
|
1238
|
+
tools = tool_list.tools if tool_list else []
|
|
1239
|
+
elif tool_list is None and tools:
|
|
1240
|
+
# Create a ListToolsResult from the provided tools for conversion
|
|
1241
|
+
from mcp.types import ListToolsResult
|
|
1242
|
+
|
|
1243
|
+
tool_list = ListToolsResult(tools=tools)
|
|
1011
1244
|
|
|
1012
|
-
|
|
1013
|
-
|
|
1245
|
+
response_content_blocks: List[ContentBlock] = []
|
|
1246
|
+
model = self.default_request_params.model
|
|
1014
1247
|
|
|
1015
|
-
|
|
1248
|
+
# Single API call - no tool execution loop
|
|
1249
|
+
self._log_chat_progress(self.chat_turn(), model=model)
|
|
1016
1250
|
|
|
1017
|
-
|
|
1018
|
-
|
|
1251
|
+
# Convert messages to Bedrock format
|
|
1252
|
+
bedrock_messages = self._convert_messages_to_bedrock(messages)
|
|
1019
1253
|
|
|
1020
|
-
|
|
1021
|
-
|
|
1254
|
+
# Base system text
|
|
1255
|
+
base_system_text = self.instruction or params.systemPrompt
|
|
1022
1256
|
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1257
|
+
# Determine tool schema fallback order and caches
|
|
1258
|
+
caps = self.capabilities.get(model) or ModelCapabilities()
|
|
1259
|
+
if caps.schema and caps.schema != ToolSchemaType.NONE:
|
|
1260
|
+
# Special case: Force Mistral 7B to try SYSTEM_PROMPT instead of cached DEFAULT
|
|
1261
|
+
if (
|
|
1262
|
+
model == "mistral.mistral-7b-instruct-v0:2"
|
|
1263
|
+
and caps.schema == ToolSchemaType.DEFAULT
|
|
1264
|
+
):
|
|
1265
|
+
print(
|
|
1266
|
+
f"🔧 FORCING SYSTEM_PROMPT for {model} (was cached as DEFAULT)",
|
|
1267
|
+
file=sys.stderr,
|
|
1268
|
+
flush=True,
|
|
1269
|
+
)
|
|
1270
|
+
schema_order = [ToolSchemaType.SYSTEM_PROMPT, ToolSchemaType.DEFAULT]
|
|
1271
|
+
else:
|
|
1026
1272
|
schema_order = [caps.schema]
|
|
1273
|
+
else:
|
|
1274
|
+
# Restore original fallback order: Anthropic models try anthropic first, others skip it
|
|
1275
|
+
if model.startswith("anthropic."):
|
|
1276
|
+
schema_order = [
|
|
1277
|
+
ToolSchemaType.ANTHROPIC,
|
|
1278
|
+
ToolSchemaType.DEFAULT,
|
|
1279
|
+
ToolSchemaType.SYSTEM_PROMPT,
|
|
1280
|
+
]
|
|
1281
|
+
elif model == "mistral.mistral-7b-instruct-v0:2":
|
|
1282
|
+
# Force Mistral 7B to try SYSTEM_PROMPT first (it doesn't work well with DEFAULT)
|
|
1283
|
+
schema_order = [
|
|
1284
|
+
ToolSchemaType.SYSTEM_PROMPT,
|
|
1285
|
+
ToolSchemaType.DEFAULT,
|
|
1286
|
+
]
|
|
1027
1287
|
else:
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
ToolSchemaType.DEFAULT,
|
|
1033
|
-
ToolSchemaType.SYSTEM_PROMPT,
|
|
1034
|
-
]
|
|
1035
|
-
else:
|
|
1036
|
-
schema_order = [
|
|
1037
|
-
ToolSchemaType.DEFAULT,
|
|
1038
|
-
ToolSchemaType.SYSTEM_PROMPT,
|
|
1039
|
-
]
|
|
1040
|
-
|
|
1041
|
-
# Track whether we changed system mode cache this turn
|
|
1042
|
-
tried_system_fallback = False
|
|
1288
|
+
schema_order = [
|
|
1289
|
+
ToolSchemaType.DEFAULT,
|
|
1290
|
+
ToolSchemaType.SYSTEM_PROMPT,
|
|
1291
|
+
]
|
|
1043
1292
|
|
|
1044
|
-
|
|
1045
|
-
|
|
1293
|
+
# Track whether we changed system mode cache this turn
|
|
1294
|
+
tried_system_fallback = False
|
|
1046
1295
|
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
converse_args = {"modelId": model, "messages": [dict(m) for m in bedrock_messages]}
|
|
1296
|
+
processed_response = None # type: ignore[assignment]
|
|
1297
|
+
last_error_msg = None
|
|
1050
1298
|
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
# Build tool name mapping once per schema attempt
|
|
1055
|
-
name_policy = (
|
|
1056
|
-
self.capabilities.get(model) or ModelCapabilities()
|
|
1057
|
-
).tool_name_policy or ToolNamePolicy.PRESERVE
|
|
1058
|
-
tool_name_mapping = self._build_tool_name_mapping(tool_list, name_policy)
|
|
1299
|
+
for schema_choice in schema_order:
|
|
1300
|
+
# Fresh messages per attempt
|
|
1301
|
+
converse_args = {"modelId": model, "messages": [dict(m) for m in bedrock_messages]}
|
|
1059
1302
|
|
|
1060
|
-
|
|
1061
|
-
|
|
1303
|
+
# Build tools representation for this schema
|
|
1304
|
+
tools_payload: Union[List[Dict[str, Any]], str, None] = None
|
|
1305
|
+
# Get tool name policy (needed even when no tools for cache logic)
|
|
1306
|
+
name_policy = (
|
|
1307
|
+
self.capabilities.get(model) or ModelCapabilities()
|
|
1308
|
+
).tool_name_policy or ToolNamePolicy.PRESERVE
|
|
1062
1309
|
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
)
|
|
1067
|
-
elif schema_choice == ToolSchemaType.DEFAULT:
|
|
1068
|
-
# Set tool name policy for Nova conversion
|
|
1069
|
-
self._tool_name_policy_for_conversion = (
|
|
1070
|
-
"replace_hyphens_with_underscores"
|
|
1071
|
-
if name_policy == ToolNamePolicy.UNDERSCORES
|
|
1072
|
-
else "preserve"
|
|
1073
|
-
)
|
|
1074
|
-
tools_payload = self._convert_tools_nova_format(
|
|
1075
|
-
tool_list, tool_name_mapping
|
|
1076
|
-
)
|
|
1077
|
-
elif schema_choice == ToolSchemaType.SYSTEM_PROMPT:
|
|
1078
|
-
tools_payload = self._convert_tools_system_prompt_format(
|
|
1079
|
-
tool_list, tool_name_mapping
|
|
1080
|
-
)
|
|
1310
|
+
if tool_list and tool_list.tools:
|
|
1311
|
+
# Build tool name mapping once per schema attempt
|
|
1312
|
+
tool_name_mapping = self._build_tool_name_mapping(tool_list, name_policy)
|
|
1081
1313
|
|
|
1082
|
-
#
|
|
1083
|
-
|
|
1084
|
-
self.capabilities.get(model) or ModelCapabilities()
|
|
1085
|
-
).system_mode or SystemMode.SYSTEM
|
|
1086
|
-
system_text = base_system_text
|
|
1314
|
+
# Store mapping for tool execution
|
|
1315
|
+
self.tool_name_mapping = tool_name_mapping
|
|
1087
1316
|
|
|
1088
|
-
if
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1317
|
+
if schema_choice == ToolSchemaType.ANTHROPIC:
|
|
1318
|
+
tools_payload = self._convert_tools_anthropic_format(
|
|
1319
|
+
tool_list, tool_name_mapping
|
|
1320
|
+
)
|
|
1321
|
+
elif schema_choice == ToolSchemaType.DEFAULT:
|
|
1322
|
+
tools_payload = self._convert_tools_nova_format(tool_list, tool_name_mapping)
|
|
1323
|
+
elif schema_choice == ToolSchemaType.SYSTEM_PROMPT:
|
|
1324
|
+
tools_payload = self._convert_tools_system_prompt_format(
|
|
1325
|
+
tool_list, tool_name_mapping
|
|
1095
1326
|
)
|
|
1096
1327
|
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
)
|
|
1103
|
-
else:
|
|
1104
|
-
# inject
|
|
1105
|
-
if (
|
|
1106
|
-
converse_args["messages"]
|
|
1107
|
-
and converse_args["messages"][0].get("role") == "user"
|
|
1108
|
-
):
|
|
1109
|
-
first_message = converse_args["messages"][0]
|
|
1110
|
-
if first_message.get("content") and len(first_message["content"]) > 0:
|
|
1111
|
-
original_text = first_message["content"][0].get("text", "")
|
|
1112
|
-
first_message["content"][0]["text"] = (
|
|
1113
|
-
f"System: {system_text}\n\nUser: {original_text}"
|
|
1114
|
-
)
|
|
1115
|
-
self.logger.debug(
|
|
1116
|
-
"Injected system prompt into first user message (cached mode)"
|
|
1117
|
-
)
|
|
1118
|
-
|
|
1119
|
-
# Tools wiring
|
|
1120
|
-
if (
|
|
1121
|
-
schema_choice in (ToolSchemaType.ANTHROPIC, ToolSchemaType.DEFAULT)
|
|
1122
|
-
and isinstance(tools_payload, list)
|
|
1123
|
-
and tools_payload
|
|
1124
|
-
):
|
|
1125
|
-
converse_args["toolConfig"] = {"tools": tools_payload}
|
|
1126
|
-
|
|
1127
|
-
# Inference configuration and overrides
|
|
1128
|
-
inference_config: Dict[str, Any] = {}
|
|
1129
|
-
if params.maxTokens is not None:
|
|
1130
|
-
inference_config["maxTokens"] = params.maxTokens
|
|
1131
|
-
if params.stopSequences:
|
|
1132
|
-
inference_config["stopSequences"] = params.stopSequences
|
|
1133
|
-
|
|
1134
|
-
# Check if reasoning should be enabled
|
|
1135
|
-
reasoning_budget = 0
|
|
1136
|
-
if self._reasoning_effort and self._reasoning_effort != ReasoningEffort.MINIMAL:
|
|
1137
|
-
# Convert string to enum if needed
|
|
1138
|
-
if isinstance(self._reasoning_effort, str):
|
|
1139
|
-
try:
|
|
1140
|
-
effort_enum = ReasoningEffort(self._reasoning_effort)
|
|
1141
|
-
except ValueError:
|
|
1142
|
-
effort_enum = ReasoningEffort.MINIMAL
|
|
1143
|
-
else:
|
|
1144
|
-
effort_enum = self._reasoning_effort
|
|
1145
|
-
|
|
1146
|
-
if effort_enum != ReasoningEffort.MINIMAL:
|
|
1147
|
-
reasoning_budget = REASONING_EFFORT_BUDGETS.get(effort_enum, 0)
|
|
1148
|
-
|
|
1149
|
-
# Handle temperature and reasoning configuration
|
|
1150
|
-
# AWS docs: "Thinking isn't compatible with temperature, top_p, or top_k modifications"
|
|
1151
|
-
reasoning_enabled = False
|
|
1152
|
-
if reasoning_budget > 0:
|
|
1153
|
-
# Check if this model supports reasoning (with caching)
|
|
1154
|
-
cached_reasoning = (
|
|
1155
|
-
self.capabilities.get(model) or ModelCapabilities()
|
|
1156
|
-
).reasoning_support
|
|
1157
|
-
if cached_reasoning == "supported":
|
|
1158
|
-
# We know this model supports reasoning
|
|
1159
|
-
converse_args["performanceConfig"] = {
|
|
1160
|
-
"reasoning": {"maxReasoningTokens": reasoning_budget}
|
|
1161
|
-
}
|
|
1162
|
-
reasoning_enabled = True
|
|
1163
|
-
elif cached_reasoning != "unsupported":
|
|
1164
|
-
# Unknown - we'll try reasoning and fallback if needed
|
|
1165
|
-
converse_args["performanceConfig"] = {
|
|
1166
|
-
"reasoning": {"maxReasoningTokens": reasoning_budget}
|
|
1167
|
-
}
|
|
1168
|
-
reasoning_enabled = True
|
|
1328
|
+
# System prompt handling with cache
|
|
1329
|
+
system_mode = (
|
|
1330
|
+
self.capabilities.get(model) or ModelCapabilities()
|
|
1331
|
+
).system_mode or SystemMode.SYSTEM
|
|
1332
|
+
system_text = base_system_text
|
|
1169
1333
|
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1334
|
+
if (
|
|
1335
|
+
schema_choice == ToolSchemaType.SYSTEM_PROMPT
|
|
1336
|
+
and isinstance(tools_payload, str)
|
|
1337
|
+
and tools_payload
|
|
1338
|
+
):
|
|
1339
|
+
system_text = f"{system_text}\n\n{tools_payload}" if system_text else tools_payload
|
|
1174
1340
|
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1341
|
+
# Cohere-specific nudge: force exact echo of tool result text on final answer
|
|
1342
|
+
if (
|
|
1343
|
+
schema_choice == ToolSchemaType.SYSTEM_PROMPT
|
|
1344
|
+
and isinstance(model, str)
|
|
1345
|
+
and model.startswith("cohere.")
|
|
1346
|
+
):
|
|
1347
|
+
cohere_nudge = (
|
|
1348
|
+
"FINAL ANSWER RULES (STRICT):\n"
|
|
1349
|
+
"- When a tool result is provided, your final answer MUST be exactly the raw tool result text.\n"
|
|
1350
|
+
"- Do not add any extra words, punctuation, qualifiers, or phrases (e.g., 'according to the tool').\n"
|
|
1351
|
+
"- Example: If tool result text is 'It"
|
|
1352
|
+
"s sunny in London', your final answer must be exactly: It"
|
|
1353
|
+
"s sunny in London\n"
|
|
1354
|
+
)
|
|
1355
|
+
system_text = f"{system_text}\n\n{cohere_nudge}" if system_text else cohere_nudge
|
|
1182
1356
|
|
|
1183
|
-
|
|
1357
|
+
# Llama3-specific nudge: prevent paraphrasing and extra tool calls
|
|
1358
|
+
if (
|
|
1359
|
+
schema_choice == ToolSchemaType.SYSTEM_PROMPT
|
|
1360
|
+
and isinstance(model, str)
|
|
1361
|
+
and model.startswith("meta.llama3")
|
|
1362
|
+
):
|
|
1363
|
+
llama_nudge = (
|
|
1364
|
+
"TOOL RESPONSE RULES:\n"
|
|
1365
|
+
"- After receiving a tool result, immediately output ONLY the exact tool result text.\n"
|
|
1366
|
+
"- Do not call additional tools or add commentary.\n"
|
|
1367
|
+
"- Do not paraphrase or modify the tool result in any way."
|
|
1368
|
+
)
|
|
1369
|
+
system_text = f"{system_text}\n\n{llama_nudge}" if system_text else llama_nudge
|
|
1184
1370
|
|
|
1185
|
-
|
|
1186
|
-
|
|
1371
|
+
# Mistral-specific nudge: prevent tool calling loops and accept tool results
|
|
1372
|
+
if (
|
|
1373
|
+
schema_choice == ToolSchemaType.SYSTEM_PROMPT
|
|
1374
|
+
and isinstance(model, str)
|
|
1375
|
+
and model.startswith("mistral.")
|
|
1376
|
+
):
|
|
1377
|
+
mistral_nudge = (
|
|
1378
|
+
"TOOL EXECUTION RULES:\n"
|
|
1379
|
+
"- Call each tool only ONCE per conversation turn.\n"
|
|
1380
|
+
"- Accept and trust all tool results - do not question or retry them.\n"
|
|
1381
|
+
"- After receiving a tool result, provide a direct answer based on that result.\n"
|
|
1382
|
+
"- Do not call the same tool multiple times or call additional tools unless specifically requested.\n"
|
|
1383
|
+
"- Tool results are always valid - do not attempt to validate or correct them."
|
|
1384
|
+
)
|
|
1385
|
+
system_text = f"{system_text}\n\n{mistral_nudge}" if system_text else mistral_nudge
|
|
1187
1386
|
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
or (isinstance(tools_payload, str) and tools_payload.strip())
|
|
1387
|
+
if system_text:
|
|
1388
|
+
if system_mode == SystemMode.SYSTEM:
|
|
1389
|
+
converse_args["system"] = [{"text": system_text}]
|
|
1390
|
+
self.logger.debug(
|
|
1391
|
+
f"Attempting with system param for {model} and schema={schema_choice}"
|
|
1194
1392
|
)
|
|
1393
|
+
else:
|
|
1394
|
+
# inject
|
|
1395
|
+
if (
|
|
1396
|
+
converse_args["messages"]
|
|
1397
|
+
and converse_args["messages"][0].get("role") == "user"
|
|
1398
|
+
):
|
|
1399
|
+
first_message = converse_args["messages"][0]
|
|
1400
|
+
if first_message.get("content") and len(first_message["content"]) > 0:
|
|
1401
|
+
original_text = first_message["content"][0].get("text", "")
|
|
1402
|
+
first_message["content"][0]["text"] = (
|
|
1403
|
+
f"System: {system_text}\n\nUser: {original_text}"
|
|
1404
|
+
)
|
|
1405
|
+
self.logger.debug(
|
|
1406
|
+
"Injected system prompt into first user message (cached mode)"
|
|
1407
|
+
)
|
|
1408
|
+
|
|
1409
|
+
# Tools wiring
|
|
1410
|
+
# Always include toolConfig if we have tools OR if there are tool results in the conversation
|
|
1411
|
+
has_tool_results = False
|
|
1412
|
+
for msg in bedrock_messages:
|
|
1413
|
+
if isinstance(msg, dict) and msg.get("content"):
|
|
1414
|
+
for content in msg["content"]:
|
|
1415
|
+
if isinstance(content, dict) and "toolResult" in content:
|
|
1416
|
+
has_tool_results = True
|
|
1417
|
+
break
|
|
1418
|
+
if has_tool_results:
|
|
1419
|
+
break
|
|
1195
1420
|
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1421
|
+
if (
|
|
1422
|
+
schema_choice in (ToolSchemaType.ANTHROPIC, ToolSchemaType.DEFAULT)
|
|
1423
|
+
and isinstance(tools_payload, list)
|
|
1424
|
+
and tools_payload
|
|
1425
|
+
):
|
|
1426
|
+
# Include tools only when we have actual tools to provide
|
|
1427
|
+
converse_args["toolConfig"] = {"tools": tools_payload}
|
|
1428
|
+
|
|
1429
|
+
# Inference configuration and overrides
|
|
1430
|
+
inference_config: Dict[str, Any] = {}
|
|
1431
|
+
if params.maxTokens is not None:
|
|
1432
|
+
inference_config["maxTokens"] = params.maxTokens
|
|
1433
|
+
if params.stopSequences:
|
|
1434
|
+
inference_config["stopSequences"] = params.stopSequences
|
|
1435
|
+
|
|
1436
|
+
# Check if reasoning should be enabled
|
|
1437
|
+
reasoning_budget = 0
|
|
1438
|
+
if self._reasoning_effort and self._reasoning_effort != ReasoningEffort.MINIMAL:
|
|
1439
|
+
# Convert string to enum if needed
|
|
1440
|
+
if isinstance(self._reasoning_effort, str):
|
|
1441
|
+
try:
|
|
1442
|
+
effort_enum = ReasoningEffort(self._reasoning_effort)
|
|
1443
|
+
except ValueError:
|
|
1444
|
+
effort_enum = ReasoningEffort.MINIMAL
|
|
1445
|
+
else:
|
|
1446
|
+
effort_enum = self._reasoning_effort
|
|
1447
|
+
|
|
1448
|
+
if effort_enum != ReasoningEffort.MINIMAL:
|
|
1449
|
+
reasoning_budget = REASONING_EFFORT_BUDGETS.get(effort_enum, 0)
|
|
1201
1450
|
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1451
|
+
# Handle temperature and reasoning configuration
|
|
1452
|
+
# AWS docs: "Thinking isn't compatible with temperature, top_p, or top_k modifications"
|
|
1453
|
+
reasoning_enabled = False
|
|
1454
|
+
if reasoning_budget > 0:
|
|
1455
|
+
# Check if this model supports reasoning (with caching)
|
|
1456
|
+
cached_reasoning = (
|
|
1457
|
+
self.capabilities.get(model) or ModelCapabilities()
|
|
1458
|
+
).reasoning_support
|
|
1459
|
+
if cached_reasoning == "supported":
|
|
1460
|
+
# We know this model supports reasoning
|
|
1461
|
+
converse_args["performanceConfig"] = {
|
|
1462
|
+
"reasoning": {"maxReasoningTokens": reasoning_budget}
|
|
1463
|
+
}
|
|
1464
|
+
reasoning_enabled = True
|
|
1465
|
+
elif cached_reasoning != "unsupported":
|
|
1466
|
+
# Unknown - we'll try reasoning and fallback if needed
|
|
1467
|
+
converse_args["performanceConfig"] = {
|
|
1468
|
+
"reasoning": {"maxReasoningTokens": reasoning_budget}
|
|
1469
|
+
}
|
|
1470
|
+
reasoning_enabled = True
|
|
1471
|
+
|
|
1472
|
+
if not reasoning_enabled:
|
|
1473
|
+
# No reasoning - apply temperature if provided
|
|
1474
|
+
if params.temperature is not None:
|
|
1475
|
+
inference_config["temperature"] = params.temperature
|
|
1476
|
+
|
|
1477
|
+
# Nova-specific recommendations (when not using reasoning)
|
|
1478
|
+
if model and "nova" in (model or "").lower() and reasoning_budget == 0:
|
|
1479
|
+
inference_config.setdefault("topP", 1.0)
|
|
1480
|
+
# Merge/attach additionalModelRequestFields for topK
|
|
1481
|
+
existing_amrf = converse_args.get("additionalModelRequestFields", {})
|
|
1482
|
+
merged_amrf = {**existing_amrf, **{"inferenceConfig": {"topK": 1}}}
|
|
1483
|
+
converse_args["additionalModelRequestFields"] = merged_amrf
|
|
1484
|
+
|
|
1485
|
+
if inference_config:
|
|
1486
|
+
converse_args["inferenceConfig"] = inference_config
|
|
1487
|
+
|
|
1488
|
+
# Decide streaming vs non-streaming (resolver-free with runtime detection + cache)
|
|
1489
|
+
has_tools: bool = False
|
|
1490
|
+
try:
|
|
1491
|
+
has_tools = bool(tools_payload) and bool(
|
|
1492
|
+
(isinstance(tools_payload, list) and len(tools_payload) > 0)
|
|
1493
|
+
or (isinstance(tools_payload, str) and tools_payload.strip())
|
|
1494
|
+
)
|
|
1208
1495
|
|
|
1209
|
-
|
|
1496
|
+
# Force non-streaming for structured-output flows (one-shot)
|
|
1497
|
+
force_non_streaming = False
|
|
1498
|
+
if self._force_non_streaming_once:
|
|
1499
|
+
force_non_streaming = True
|
|
1500
|
+
self._force_non_streaming_once = False
|
|
1501
|
+
|
|
1502
|
+
# Evaluate cache for streaming-with-tools
|
|
1503
|
+
cache_pref = (self.capabilities.get(model) or ModelCapabilities()).stream_with_tools
|
|
1504
|
+
use_streaming = True
|
|
1505
|
+
attempted_streaming = False
|
|
1506
|
+
|
|
1507
|
+
if force_non_streaming:
|
|
1508
|
+
use_streaming = False
|
|
1509
|
+
elif has_tools:
|
|
1510
|
+
if cache_pref == StreamPreference.NON_STREAM:
|
|
1210
1511
|
use_streaming = False
|
|
1211
|
-
elif
|
|
1212
|
-
|
|
1213
|
-
use_streaming = False
|
|
1214
|
-
elif cache_pref == StreamPreference.STREAM_OK:
|
|
1215
|
-
use_streaming = True
|
|
1216
|
-
else:
|
|
1217
|
-
# Unknown: try streaming first, fallback on error
|
|
1218
|
-
use_streaming = True
|
|
1512
|
+
elif cache_pref == StreamPreference.STREAM_OK:
|
|
1513
|
+
use_streaming = True
|
|
1219
1514
|
else:
|
|
1515
|
+
# Unknown: try streaming first, fallback on error
|
|
1220
1516
|
use_streaming = True
|
|
1221
1517
|
|
|
1222
|
-
|
|
1223
|
-
|
|
1518
|
+
# NEW: For Anthropic schema, when tool results are present in the conversation,
|
|
1519
|
+
# force non-streaming on this second turn to avoid empty streamed replies.
|
|
1520
|
+
if schema_choice == ToolSchemaType.ANTHROPIC and has_tool_results:
|
|
1521
|
+
use_streaming = False
|
|
1522
|
+
self.logger.debug(
|
|
1523
|
+
"Forcing non-streaming for Anthropic second turn with tool results"
|
|
1524
|
+
)
|
|
1525
|
+
|
|
1526
|
+
# Try API call with reasoning fallback
|
|
1527
|
+
try:
|
|
1528
|
+
if not use_streaming:
|
|
1529
|
+
self.logger.debug(
|
|
1530
|
+
f"Using non-streaming API for {model} (schema={schema_choice})"
|
|
1531
|
+
)
|
|
1532
|
+
response = client.converse(**converse_args)
|
|
1533
|
+
processed_response = self._process_non_streaming_response(response, model)
|
|
1534
|
+
else:
|
|
1535
|
+
self.logger.debug(
|
|
1536
|
+
f"Using streaming API for {model} (schema={schema_choice})"
|
|
1537
|
+
)
|
|
1538
|
+
attempted_streaming = True
|
|
1539
|
+
response = client.converse_stream(**converse_args)
|
|
1540
|
+
processed_response = await self._process_stream(response, model)
|
|
1541
|
+
except (ClientError, BotoCoreError) as e:
|
|
1542
|
+
# Check if this is a reasoning-related error
|
|
1543
|
+
if reasoning_budget > 0 and (
|
|
1544
|
+
"reasoning" in str(e).lower() or "performance" in str(e).lower()
|
|
1545
|
+
):
|
|
1546
|
+
self.logger.debug(
|
|
1547
|
+
f"Model {model} doesn't support reasoning, retrying without: {e}"
|
|
1548
|
+
)
|
|
1549
|
+
caps.reasoning_support = False
|
|
1550
|
+
self.capabilities[model] = caps
|
|
1551
|
+
|
|
1552
|
+
# Remove reasoning and retry
|
|
1553
|
+
if "performanceConfig" in converse_args:
|
|
1554
|
+
del converse_args["performanceConfig"]
|
|
1555
|
+
|
|
1556
|
+
# Apply temperature now that reasoning is disabled
|
|
1557
|
+
if params.temperature is not None:
|
|
1558
|
+
if "inferenceConfig" not in converse_args:
|
|
1559
|
+
converse_args["inferenceConfig"] = {}
|
|
1560
|
+
converse_args["inferenceConfig"]["temperature"] = params.temperature
|
|
1561
|
+
|
|
1562
|
+
# Retry the API call
|
|
1224
1563
|
if not use_streaming:
|
|
1225
|
-
self.logger.debug(
|
|
1226
|
-
f"Using non-streaming API for {model} (schema={schema_choice})"
|
|
1227
|
-
)
|
|
1228
1564
|
response = client.converse(**converse_args)
|
|
1229
1565
|
processed_response = self._process_non_streaming_response(
|
|
1230
1566
|
response, model
|
|
1231
1567
|
)
|
|
1232
1568
|
else:
|
|
1233
|
-
self.logger.debug(
|
|
1234
|
-
f"Using streaming API for {model} (schema={schema_choice})"
|
|
1235
|
-
)
|
|
1236
|
-
attempted_streaming = True
|
|
1237
1569
|
response = client.converse_stream(**converse_args)
|
|
1238
1570
|
processed_response = await self._process_stream(response, model)
|
|
1239
|
-
|
|
1240
|
-
#
|
|
1241
|
-
|
|
1242
|
-
"reasoning" in str(e).lower() or "performance" in str(e).lower()
|
|
1243
|
-
):
|
|
1244
|
-
self.logger.debug(
|
|
1245
|
-
f"Model {model} doesn't support reasoning, retrying without: {e}"
|
|
1246
|
-
)
|
|
1247
|
-
caps.reasoning_support = False
|
|
1248
|
-
self.capabilities[model] = caps
|
|
1249
|
-
|
|
1250
|
-
# Remove reasoning and retry
|
|
1251
|
-
if "performanceConfig" in converse_args:
|
|
1252
|
-
del converse_args["performanceConfig"]
|
|
1253
|
-
|
|
1254
|
-
# Apply temperature now that reasoning is disabled
|
|
1255
|
-
if params.temperature is not None:
|
|
1256
|
-
if "inferenceConfig" not in converse_args:
|
|
1257
|
-
converse_args["inferenceConfig"] = {}
|
|
1258
|
-
converse_args["inferenceConfig"]["temperature"] = params.temperature
|
|
1259
|
-
|
|
1260
|
-
# Retry the API call
|
|
1261
|
-
if not use_streaming:
|
|
1262
|
-
response = client.converse(**converse_args)
|
|
1263
|
-
processed_response = self._process_non_streaming_response(
|
|
1264
|
-
response, model
|
|
1265
|
-
)
|
|
1266
|
-
else:
|
|
1267
|
-
response = client.converse_stream(**converse_args)
|
|
1268
|
-
processed_response = await self._process_stream(response, model)
|
|
1269
|
-
else:
|
|
1270
|
-
# Not a reasoning error, re-raise
|
|
1271
|
-
raise
|
|
1272
|
-
|
|
1273
|
-
# Success: cache the working schema choice if not already cached
|
|
1274
|
-
# Only cache schema when tools are present - no tools doesn't predict tool behavior
|
|
1275
|
-
if not caps.schema and has_tools:
|
|
1276
|
-
caps.schema = ToolSchemaType(schema_choice)
|
|
1571
|
+
else:
|
|
1572
|
+
# Not a reasoning error, re-raise
|
|
1573
|
+
raise
|
|
1277
1574
|
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1575
|
+
# Success: cache the working schema choice if not already cached
|
|
1576
|
+
# Only cache schema when tools are present - no tools doesn't predict tool behavior
|
|
1577
|
+
if not caps.schema and has_tools:
|
|
1578
|
+
caps.schema = ToolSchemaType(schema_choice)
|
|
1281
1579
|
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
and getattr(self, "_tool_name_policy_for_conversion", "preserve")
|
|
1286
|
-
== "preserve"
|
|
1287
|
-
):
|
|
1288
|
-
# Heuristic: if tool names include '-', prefer underscores next time
|
|
1289
|
-
try:
|
|
1290
|
-
if any("-" in t.name for t in (tool_list.tools if tool_list else [])):
|
|
1291
|
-
caps.tool_name_policy = ToolNamePolicy.UNDERSCORES
|
|
1292
|
-
except Exception:
|
|
1293
|
-
pass
|
|
1294
|
-
# Cache streaming-with-tools behavior on success
|
|
1295
|
-
if has_tools and attempted_streaming:
|
|
1296
|
-
caps.stream_with_tools = StreamPreference.STREAM_OK
|
|
1297
|
-
self.capabilities[model] = caps
|
|
1298
|
-
break
|
|
1299
|
-
except (ClientError, BotoCoreError) as e:
|
|
1300
|
-
error_msg = str(e)
|
|
1301
|
-
last_error_msg = error_msg
|
|
1302
|
-
self.logger.debug(f"Bedrock API error (schema={schema_choice}): {error_msg}")
|
|
1580
|
+
# Cache successful reasoning if we tried it
|
|
1581
|
+
if reasoning_budget > 0 and caps.reasoning_support is not True:
|
|
1582
|
+
caps.reasoning_support = True
|
|
1303
1583
|
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
# continue to other fallbacks (e.g., system inject or next schema)
|
|
1584
|
+
# If Nova/default worked and we used preserve but server complains, flip cache for next time
|
|
1585
|
+
if (
|
|
1586
|
+
schema_choice == ToolSchemaType.DEFAULT
|
|
1587
|
+
and name_policy == ToolNamePolicy.PRESERVE
|
|
1588
|
+
):
|
|
1589
|
+
# Heuristic: if tool names include '-', prefer underscores next time
|
|
1590
|
+
try:
|
|
1591
|
+
if any("-" in t.name for t in (tool_list.tools if tool_list else [])):
|
|
1592
|
+
caps.tool_name_policy = ToolNamePolicy.UNDERSCORES
|
|
1593
|
+
except Exception:
|
|
1594
|
+
pass
|
|
1595
|
+
# Cache streaming-with-tools behavior on success
|
|
1596
|
+
if has_tools and attempted_streaming:
|
|
1597
|
+
caps.stream_with_tools = StreamPreference.STREAM_OK
|
|
1598
|
+
self.capabilities[model] = caps
|
|
1599
|
+
break
|
|
1600
|
+
except (ClientError, BotoCoreError) as e:
|
|
1601
|
+
error_msg = str(e)
|
|
1602
|
+
last_error_msg = error_msg
|
|
1603
|
+
self.logger.debug(f"Bedrock API error (schema={schema_choice}): {error_msg}")
|
|
1325
1604
|
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
and (
|
|
1332
|
-
"system message" in error_msg.lower()
|
|
1333
|
-
or "system messages" in error_msg.lower()
|
|
1605
|
+
# If streaming with tools failed and cache undecided, fallback to non-streaming and cache
|
|
1606
|
+
if has_tools and (caps.stream_with_tools is None):
|
|
1607
|
+
try:
|
|
1608
|
+
self.logger.debug(
|
|
1609
|
+
f"Falling back to non-streaming API for {model} after streaming error"
|
|
1334
1610
|
)
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
caps.
|
|
1611
|
+
response = client.converse(**converse_args)
|
|
1612
|
+
processed_response = self._process_non_streaming_response(response, model)
|
|
1613
|
+
caps.stream_with_tools = StreamPreference.NON_STREAM
|
|
1614
|
+
if not caps.schema:
|
|
1615
|
+
caps.schema = ToolSchemaType(schema_choice)
|
|
1338
1616
|
self.capabilities[model] = caps
|
|
1339
|
-
|
|
1340
|
-
|
|
1617
|
+
break
|
|
1618
|
+
except (ClientError, BotoCoreError) as e_fallback:
|
|
1619
|
+
last_error_msg = str(e_fallback)
|
|
1620
|
+
self.logger.debug(
|
|
1621
|
+
f"Bedrock API error after non-streaming fallback: {last_error_msg}"
|
|
1341
1622
|
)
|
|
1342
|
-
#
|
|
1343
|
-
try:
|
|
1344
|
-
# Rebuild messages for inject
|
|
1345
|
-
converse_args = {
|
|
1346
|
-
"modelId": model,
|
|
1347
|
-
"messages": [dict(m) for m in bedrock_messages],
|
|
1348
|
-
}
|
|
1349
|
-
# inject system into first user
|
|
1350
|
-
if (
|
|
1351
|
-
converse_args["messages"]
|
|
1352
|
-
and converse_args["messages"][0].get("role") == "user"
|
|
1353
|
-
):
|
|
1354
|
-
fm = converse_args["messages"][0]
|
|
1355
|
-
if fm.get("content") and len(fm["content"]) > 0:
|
|
1356
|
-
original_text = fm["content"][0].get("text", "")
|
|
1357
|
-
fm["content"][0]["text"] = (
|
|
1358
|
-
f"System: {system_text}\n\nUser: {original_text}"
|
|
1359
|
-
)
|
|
1360
|
-
|
|
1361
|
-
# Re-add tools
|
|
1362
|
-
if (
|
|
1363
|
-
schema_choice
|
|
1364
|
-
in (ToolSchemaType.ANTHROPIC.value, ToolSchemaType.DEFAULT.value)
|
|
1365
|
-
and isinstance(tools_payload, list)
|
|
1366
|
-
and tools_payload
|
|
1367
|
-
):
|
|
1368
|
-
converse_args["toolConfig"] = {"tools": tools_payload}
|
|
1369
|
-
|
|
1370
|
-
# Same streaming decision using cache
|
|
1371
|
-
has_tools = bool(tools_payload) and bool(
|
|
1372
|
-
(isinstance(tools_payload, list) and len(tools_payload) > 0)
|
|
1373
|
-
or (isinstance(tools_payload, str) and tools_payload.strip())
|
|
1374
|
-
)
|
|
1375
|
-
cache_pref = (
|
|
1376
|
-
self.capabilities.get(model) or ModelCapabilities()
|
|
1377
|
-
).stream_with_tools
|
|
1378
|
-
if cache_pref == StreamPreference.NON_STREAM or not has_tools:
|
|
1379
|
-
response = client.converse(**converse_args)
|
|
1380
|
-
processed_response = self._process_non_streaming_response(
|
|
1381
|
-
response, model
|
|
1382
|
-
)
|
|
1383
|
-
else:
|
|
1384
|
-
response = client.converse_stream(**converse_args)
|
|
1385
|
-
processed_response = await self._process_stream(response, model)
|
|
1386
|
-
if not caps.schema and has_tools:
|
|
1387
|
-
caps.schema = ToolSchemaType(schema_choice)
|
|
1388
|
-
self.capabilities[model] = caps
|
|
1389
|
-
break
|
|
1390
|
-
except (ClientError, BotoCoreError) as e2:
|
|
1391
|
-
last_error_msg = str(e2)
|
|
1392
|
-
self.logger.debug(
|
|
1393
|
-
f"Bedrock API error after system inject fallback: {last_error_msg}"
|
|
1394
|
-
)
|
|
1395
|
-
# Fall through to next schema
|
|
1396
|
-
continue
|
|
1623
|
+
# continue to other fallbacks (e.g., system inject or next schema)
|
|
1397
1624
|
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1625
|
+
# System parameter fallback once per call if system message unsupported
|
|
1626
|
+
if (
|
|
1627
|
+
not tried_system_fallback
|
|
1628
|
+
and system_text
|
|
1629
|
+
and system_mode == SystemMode.SYSTEM
|
|
1630
|
+
and (
|
|
1631
|
+
"system message" in error_msg.lower()
|
|
1632
|
+
or "system messages" in error_msg.lower()
|
|
1401
1633
|
)
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
processed_response = {
|
|
1409
|
-
"content": [
|
|
1410
|
-
{"text": f"Error during generation: {last_error_msg or 'Unknown error'}"}
|
|
1411
|
-
],
|
|
1412
|
-
"stop_reason": "error",
|
|
1413
|
-
"usage": {"input_tokens": 0, "output_tokens": 0},
|
|
1414
|
-
"model": model,
|
|
1415
|
-
"role": "assistant",
|
|
1416
|
-
}
|
|
1417
|
-
|
|
1418
|
-
# Track usage
|
|
1419
|
-
if processed_response.get("usage"):
|
|
1420
|
-
try:
|
|
1421
|
-
usage = processed_response["usage"]
|
|
1422
|
-
turn_usage = TurnUsage(
|
|
1423
|
-
provider=Provider.BEDROCK.value,
|
|
1424
|
-
model=model,
|
|
1425
|
-
input_tokens=usage.get("input_tokens", 0),
|
|
1426
|
-
output_tokens=usage.get("output_tokens", 0),
|
|
1427
|
-
total_tokens=usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
|
|
1428
|
-
raw_usage=usage,
|
|
1634
|
+
):
|
|
1635
|
+
tried_system_fallback = True
|
|
1636
|
+
caps.system_mode = SystemMode.INJECT
|
|
1637
|
+
self.capabilities[model] = caps
|
|
1638
|
+
self.logger.info(
|
|
1639
|
+
f"Switching system mode to inject for {model} and retrying same schema"
|
|
1429
1640
|
)
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
stop_reason = processed_response.get("stop_reason", "end_turn")
|
|
1448
|
-
|
|
1449
|
-
# Determine if we should parse for system-prompt tool calls (unified capabilities)
|
|
1450
|
-
caps_tmp = self.capabilities.get(model) or ModelCapabilities()
|
|
1451
|
-
sys_prompt_schema = caps_tmp.schema == ToolSchemaType.SYSTEM_PROMPT
|
|
1452
|
-
|
|
1453
|
-
if sys_prompt_schema and stop_reason == "end_turn":
|
|
1454
|
-
# Only parse for tools if text contains actual function call structure
|
|
1455
|
-
message_text = ""
|
|
1456
|
-
for content_item in processed_response.get("content", []):
|
|
1457
|
-
if isinstance(content_item, dict) and content_item.get("type") == "text":
|
|
1458
|
-
message_text += content_item.get("text", "")
|
|
1459
|
-
|
|
1460
|
-
# Check if there's a tool call in the response
|
|
1461
|
-
parsed_tools = self._parse_tool_response(processed_response, model)
|
|
1462
|
-
if parsed_tools:
|
|
1463
|
-
# Loop guard: if the same single tool call repeats > N times in system-prompt mode, stop
|
|
1464
|
-
if len(parsed_tools) == 1:
|
|
1465
|
-
# Determine normalized tool name as we would use for execution
|
|
1466
|
-
candidate_name = parsed_tools[0]["name"]
|
|
1467
|
-
# Map to canonical name if available
|
|
1468
|
-
canonical = self.tool_name_mapping.get(candidate_name)
|
|
1469
|
-
if not canonical:
|
|
1470
|
-
lowered = candidate_name.lower().replace("_", "-")
|
|
1471
|
-
for key, original in self.tool_name_mapping.items():
|
|
1472
|
-
if lowered == key.lower().replace("_", "-"):
|
|
1473
|
-
canonical = original
|
|
1474
|
-
break
|
|
1475
|
-
normalized_name = canonical or candidate_name
|
|
1476
|
-
try:
|
|
1477
|
-
args_signature = json.dumps(
|
|
1478
|
-
parsed_tools[0].get("arguments", {}), sort_keys=True
|
|
1479
|
-
)
|
|
1480
|
-
except Exception:
|
|
1481
|
-
args_signature = str(parsed_tools[0].get("arguments", {}))
|
|
1482
|
-
current_signature = f"{normalized_name}|{args_signature}"
|
|
1483
|
-
|
|
1484
|
-
# Identify system-prompt schema mode via unified capabilities
|
|
1485
|
-
caps_loop = self.capabilities.get(model) or ModelCapabilities()
|
|
1486
|
-
is_system_prompt_schema_loop = (
|
|
1487
|
-
caps_loop.schema == ToolSchemaType.SYSTEM_PROMPT
|
|
1488
|
-
)
|
|
1489
|
-
|
|
1490
|
-
if is_system_prompt_schema_loop:
|
|
1491
|
-
if current_signature == last_tool_signature:
|
|
1492
|
-
repeated_tool_calls_count += 1
|
|
1493
|
-
else:
|
|
1494
|
-
repeated_tool_calls_count = 1
|
|
1495
|
-
last_tool_signature = current_signature
|
|
1496
|
-
|
|
1497
|
-
if repeated_tool_calls_count > max_repeated_tool_calls:
|
|
1498
|
-
# Return the last tool result content to avoid infinite loops
|
|
1499
|
-
if tool_result_responses:
|
|
1500
|
-
return cast(
|
|
1501
|
-
"List[ContentBlock | CallToolRequestParams]",
|
|
1502
|
-
tool_result_responses,
|
|
1503
|
-
)
|
|
1504
|
-
# Fallback: return a minimal text indicating no content
|
|
1505
|
-
return cast(
|
|
1506
|
-
"List[ContentBlock | CallToolRequestParams]",
|
|
1507
|
-
[TextContent(text="[No content in tool result]")],
|
|
1641
|
+
# Retry the same schema immediately in inject mode
|
|
1642
|
+
try:
|
|
1643
|
+
# Rebuild messages for inject
|
|
1644
|
+
converse_args = {
|
|
1645
|
+
"modelId": model,
|
|
1646
|
+
"messages": [dict(m) for m in bedrock_messages],
|
|
1647
|
+
}
|
|
1648
|
+
# inject system into first user
|
|
1649
|
+
if (
|
|
1650
|
+
converse_args["messages"]
|
|
1651
|
+
and converse_args["messages"][0].get("role") == "user"
|
|
1652
|
+
):
|
|
1653
|
+
fm = converse_args["messages"][0]
|
|
1654
|
+
if fm.get("content") and len(fm["content"]) > 0:
|
|
1655
|
+
original_text = fm["content"][0].get("text", "")
|
|
1656
|
+
fm["content"][0]["text"] = (
|
|
1657
|
+
f"System: {system_text}\n\nUser: {original_text}"
|
|
1508
1658
|
)
|
|
1509
|
-
# Override stop_reason to handle as tool_use
|
|
1510
|
-
stop_reason = "tool_use"
|
|
1511
|
-
self.logger.debug(
|
|
1512
|
-
"Detected system prompt tool call, overriding stop_reason to 'tool_use'"
|
|
1513
|
-
)
|
|
1514
|
-
|
|
1515
|
-
if stop_reason == "end_turn":
|
|
1516
|
-
# Extract text for display
|
|
1517
|
-
message_text = ""
|
|
1518
|
-
for content_item in processed_response.get("content", []):
|
|
1519
|
-
if content_item.get("text"):
|
|
1520
|
-
message_text += content_item["text"]
|
|
1521
|
-
|
|
1522
|
-
await self.show_assistant_message(message_text)
|
|
1523
|
-
self.logger.debug(f"Iteration {i}: Stopping because stop_reason is 'end_turn'")
|
|
1524
|
-
break
|
|
1525
|
-
elif stop_reason == "stop_sequence":
|
|
1526
|
-
self.logger.debug(f"Iteration {i}: Stopping because stop_reason is 'stop_sequence'")
|
|
1527
|
-
break
|
|
1528
|
-
elif stop_reason == "max_tokens":
|
|
1529
|
-
self.logger.debug(f"Iteration {i}: Stopping because stop_reason is 'max_tokens'")
|
|
1530
|
-
if params.maxTokens is not None:
|
|
1531
|
-
message_text = Text(
|
|
1532
|
-
f"the assistant has reached the maximum token limit ({params.maxTokens})",
|
|
1533
|
-
style="dim green italic",
|
|
1534
|
-
)
|
|
1535
|
-
else:
|
|
1536
|
-
message_text = Text(
|
|
1537
|
-
"the assistant has reached the maximum token limit",
|
|
1538
|
-
style="dim green italic",
|
|
1539
|
-
)
|
|
1540
|
-
await self.show_assistant_message(message_text)
|
|
1541
|
-
break
|
|
1542
|
-
elif stop_reason in ["tool_use", "tool_calls"]:
|
|
1543
|
-
# Handle tool use/calls - format depends on model type
|
|
1544
|
-
message_text = ""
|
|
1545
|
-
for content_item in processed_response.get("content", []):
|
|
1546
|
-
if content_item.get("text"):
|
|
1547
|
-
message_text += content_item["text"]
|
|
1548
|
-
|
|
1549
|
-
# Parse tool calls using model-specific method
|
|
1550
|
-
self.logger.info(f"DEBUG: About to parse tool response: {processed_response}")
|
|
1551
|
-
parsed_tools = self._parse_tool_response(processed_response, model)
|
|
1552
|
-
self.logger.info(f"DEBUG: Parsed tools: {parsed_tools}")
|
|
1553
|
-
|
|
1554
|
-
if parsed_tools:
|
|
1555
|
-
# Process tool calls and collect results
|
|
1556
|
-
tool_results_for_batch = []
|
|
1557
|
-
for tool_idx, parsed_tool in enumerate(parsed_tools):
|
|
1558
|
-
# The original name is needed to call the tool, which is in tool_name_mapping.
|
|
1559
|
-
tool_name_from_model = parsed_tool["name"]
|
|
1560
|
-
tool_name = self.tool_name_mapping.get(
|
|
1561
|
-
tool_name_from_model, tool_name_from_model
|
|
1562
|
-
)
|
|
1563
1659
|
|
|
1564
|
-
|
|
1565
|
-
|
|
1660
|
+
# Re-add tools
|
|
1661
|
+
if (
|
|
1662
|
+
schema_choice
|
|
1663
|
+
in (ToolSchemaType.ANTHROPIC.value, ToolSchemaType.DEFAULT.value)
|
|
1664
|
+
and isinstance(tools_payload, list)
|
|
1665
|
+
and tools_payload
|
|
1666
|
+
):
|
|
1667
|
+
converse_args["toolConfig"] = {"tools": tools_payload}
|
|
1566
1668
|
|
|
1567
|
-
|
|
1568
|
-
|
|
1669
|
+
# Same streaming decision using cache
|
|
1670
|
+
has_tools = bool(tools_payload) and bool(
|
|
1671
|
+
(isinstance(tools_payload, list) and len(tools_payload) > 0)
|
|
1672
|
+
or (isinstance(tools_payload, str) and tools_payload.strip())
|
|
1569
1673
|
)
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1674
|
+
cache_pref = (
|
|
1675
|
+
self.capabilities.get(model) or ModelCapabilities()
|
|
1676
|
+
).stream_with_tools
|
|
1677
|
+
if cache_pref == StreamPreference.NON_STREAM or not has_tools:
|
|
1678
|
+
response = client.converse(**converse_args)
|
|
1679
|
+
processed_response = self._process_non_streaming_response(
|
|
1680
|
+
response, model
|
|
1681
|
+
)
|
|
1682
|
+
else:
|
|
1683
|
+
response = client.converse_stream(**converse_args)
|
|
1684
|
+
processed_response = await self._process_stream(response, model)
|
|
1685
|
+
if not caps.schema and has_tools:
|
|
1686
|
+
caps.schema = ToolSchemaType(schema_choice)
|
|
1687
|
+
self.capabilities[model] = caps
|
|
1688
|
+
break
|
|
1689
|
+
except (ClientError, BotoCoreError) as e2:
|
|
1690
|
+
last_error_msg = str(e2)
|
|
1691
|
+
self.logger.debug(
|
|
1692
|
+
f"Bedrock API error after system inject fallback: {last_error_msg}"
|
|
1574
1693
|
)
|
|
1694
|
+
# Fall through to next schema
|
|
1695
|
+
continue
|
|
1575
1696
|
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
is_system_prompt_schema = caps_tmp.schema == ToolSchemaType.SYSTEM_PROMPT
|
|
1596
|
-
|
|
1597
|
-
if is_system_prompt_schema:
|
|
1598
|
-
# For system prompt models (like Llama), format results as a simple text message.
|
|
1599
|
-
# The model expects to see the results in a human-readable format to continue.
|
|
1600
|
-
tool_result_parts = []
|
|
1601
|
-
for _, tool_result, tool_name in tool_results_for_batch:
|
|
1602
|
-
result_text = "".join(
|
|
1603
|
-
[
|
|
1604
|
-
part.text
|
|
1605
|
-
for part in tool_result.content
|
|
1606
|
-
if isinstance(part, TextContent)
|
|
1607
|
-
]
|
|
1608
|
-
)
|
|
1697
|
+
# For any other error (including tool format errors), continue to next schema
|
|
1698
|
+
self.logger.debug(
|
|
1699
|
+
f"Continuing to next schema after error with {schema_choice}: {error_msg}"
|
|
1700
|
+
)
|
|
1701
|
+
continue
|
|
1702
|
+
|
|
1703
|
+
if processed_response is None:
|
|
1704
|
+
# All attempts failed; mark schema as none to avoid repeated retries this process
|
|
1705
|
+
caps.schema = ToolSchemaType.NONE
|
|
1706
|
+
self.capabilities[model] = caps
|
|
1707
|
+
processed_response = {
|
|
1708
|
+
"content": [
|
|
1709
|
+
{"text": f"Error during generation: {last_error_msg or 'Unknown error'}"}
|
|
1710
|
+
],
|
|
1711
|
+
"stop_reason": "error",
|
|
1712
|
+
"usage": {"input_tokens": 0, "output_tokens": 0},
|
|
1713
|
+
"model": model,
|
|
1714
|
+
"role": "assistant",
|
|
1715
|
+
}
|
|
1609
1716
|
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1717
|
+
# Track usage
|
|
1718
|
+
if processed_response.get("usage"):
|
|
1719
|
+
try:
|
|
1720
|
+
usage = processed_response["usage"]
|
|
1721
|
+
turn_usage = TurnUsage(
|
|
1722
|
+
provider=Provider.BEDROCK.value,
|
|
1723
|
+
model=model,
|
|
1724
|
+
input_tokens=usage.get("input_tokens", 0),
|
|
1725
|
+
output_tokens=usage.get("output_tokens", 0),
|
|
1726
|
+
total_tokens=usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
|
|
1727
|
+
raw_usage=usage,
|
|
1728
|
+
)
|
|
1729
|
+
self.usage_accumulator.add_turn(turn_usage)
|
|
1730
|
+
except Exception as e:
|
|
1731
|
+
self.logger.warning(f"Failed to track usage: {e}")
|
|
1732
|
+
|
|
1733
|
+
self.logger.debug(f"{model} response:", data=processed_response)
|
|
1734
|
+
|
|
1735
|
+
# Convert response to message param and add to messages
|
|
1736
|
+
response_message_param = self.convert_message_to_message_param(processed_response)
|
|
1737
|
+
messages.append(response_message_param)
|
|
1738
|
+
|
|
1739
|
+
# Extract text content for responses
|
|
1740
|
+
if processed_response.get("content"):
|
|
1741
|
+
for content_item in processed_response["content"]:
|
|
1742
|
+
if content_item.get("text"):
|
|
1743
|
+
response_content_blocks.append(
|
|
1744
|
+
TextContent(type="text", text=content_item["text"])
|
|
1745
|
+
)
|
|
1618
1746
|
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
#
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1747
|
+
# Fallback: if no content returned and the last input contained tool results,
|
|
1748
|
+
# synthesize the assistant reply using the tool result text to preserve behavior.
|
|
1749
|
+
if not response_content_blocks:
|
|
1750
|
+
try:
|
|
1751
|
+
# messages currently includes the appended assistant response; inspect the prior user message
|
|
1752
|
+
last_index = len(messages) - 2 if len(messages) >= 2 else (len(messages) - 1)
|
|
1753
|
+
last_input = messages[last_index] if last_index >= 0 else None
|
|
1754
|
+
if isinstance(last_input, dict):
|
|
1755
|
+
contents = last_input.get("content", []) or []
|
|
1756
|
+
for c in contents:
|
|
1757
|
+
# Handle parameter-level representation
|
|
1758
|
+
if isinstance(c, dict) and c.get("type") == "tool_result":
|
|
1759
|
+
tr_content = c.get("content", []) or []
|
|
1760
|
+
fallback_text = " ".join(
|
|
1761
|
+
part.get("text", "")
|
|
1762
|
+
for part in tr_content
|
|
1763
|
+
if isinstance(part, dict)
|
|
1764
|
+
).strip()
|
|
1765
|
+
if fallback_text:
|
|
1766
|
+
response_content_blocks.append(
|
|
1767
|
+
TextContent(type="text", text=fallback_text)
|
|
1768
|
+
)
|
|
1769
|
+
break
|
|
1770
|
+
# Handle bedrock-level representation
|
|
1771
|
+
if isinstance(c, dict) and "toolResult" in c:
|
|
1772
|
+
tr = c["toolResult"]
|
|
1773
|
+
tr_content = tr.get("content", []) or []
|
|
1774
|
+
fallback_text = " ".join(
|
|
1775
|
+
part.get("text", "")
|
|
1776
|
+
for part in tr_content
|
|
1777
|
+
if isinstance(part, dict)
|
|
1778
|
+
).strip()
|
|
1779
|
+
if fallback_text:
|
|
1780
|
+
response_content_blocks.append(
|
|
1781
|
+
TextContent(type="text", text=fallback_text)
|
|
1646
1782
|
)
|
|
1783
|
+
break
|
|
1784
|
+
except Exception:
|
|
1785
|
+
pass
|
|
1647
1786
|
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
{
|
|
1651
|
-
"type": "tool_result",
|
|
1652
|
-
"tool_use_id": tool_id,
|
|
1653
|
-
"content": result_content_blocks,
|
|
1654
|
-
"status": "error" if tool_result.isError else "success",
|
|
1655
|
-
}
|
|
1656
|
-
)
|
|
1787
|
+
# Handle different stop reasons
|
|
1788
|
+
stop_reason = processed_response.get("stop_reason", "end_turn")
|
|
1657
1789
|
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
messages.append(
|
|
1661
|
-
{
|
|
1662
|
-
"role": "user",
|
|
1663
|
-
"content": tool_result_blocks,
|
|
1664
|
-
}
|
|
1665
|
-
)
|
|
1790
|
+
# Determine if we should parse for system-prompt tool calls (unified capabilities)
|
|
1791
|
+
caps_tmp = self.capabilities.get(model) or ModelCapabilities()
|
|
1666
1792
|
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1793
|
+
# Try to parse system prompt tool calls if we have an end_turn with tools available
|
|
1794
|
+
# This handles cases where native tool calling failed but model generates system prompt format
|
|
1795
|
+
if stop_reason == "end_turn" and tools:
|
|
1796
|
+
# Only parse for tools if text contains actual function call structure
|
|
1797
|
+
message_text = ""
|
|
1798
|
+
for content_item in processed_response.get("content", []):
|
|
1799
|
+
if isinstance(content_item, dict) and "text" in content_item:
|
|
1800
|
+
message_text += content_item.get("text", "")
|
|
1801
|
+
|
|
1802
|
+
# Check if there's a tool call in the response
|
|
1803
|
+
parsed_tools = self._parse_tool_response(processed_response, model)
|
|
1804
|
+
if parsed_tools:
|
|
1805
|
+
# Override stop_reason to handle as tool_use
|
|
1806
|
+
stop_reason = "tool_use"
|
|
1807
|
+
# Update capabilities cache to reflect successful system prompt tool calling
|
|
1808
|
+
if not caps_tmp.schema:
|
|
1809
|
+
caps_tmp.schema = ToolSchemaType.SYSTEM_PROMPT
|
|
1810
|
+
self.capabilities[model] = caps_tmp
|
|
1811
|
+
|
|
1812
|
+
# NEW: Handle tool calls without execution - return them for external handling
|
|
1813
|
+
tool_calls: Dict[str, CallToolRequest] | None = None
|
|
1814
|
+
if stop_reason in ["tool_use", "tool_calls"]:
|
|
1815
|
+
parsed_tools = self._parse_tool_response(processed_response, model)
|
|
1816
|
+
if parsed_tools:
|
|
1817
|
+
tool_calls = self._build_tool_calls_dict(parsed_tools)
|
|
1818
|
+
|
|
1819
|
+
# Map stop reason to LlmStopReason
|
|
1820
|
+
mapped_stop_reason = self._map_bedrock_stop_reason(stop_reason)
|
|
1682
1821
|
|
|
1683
1822
|
# Update history
|
|
1684
1823
|
if params.use_history:
|
|
@@ -1695,63 +1834,39 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1695
1834
|
|
|
1696
1835
|
self.history.set(new_messages)
|
|
1697
1836
|
|
|
1698
|
-
|
|
1699
|
-
# This handles cases like Claude Haiku where the model calls tools but doesn't generate follow-up text
|
|
1700
|
-
if not responses and tool_result_responses:
|
|
1701
|
-
responses = tool_result_responses
|
|
1702
|
-
self.logger.debug("Restored tool results as no follow-up content was generated")
|
|
1703
|
-
|
|
1704
|
-
# Strip leading whitespace from the *last* non-empty text block of the final response
|
|
1705
|
-
# to ensure the output is clean.
|
|
1706
|
-
if responses:
|
|
1707
|
-
for item in reversed(responses):
|
|
1708
|
-
if isinstance(item, TextContent) and item.text:
|
|
1709
|
-
item.text = item.text.lstrip()
|
|
1710
|
-
break
|
|
1837
|
+
self._log_chat_finished(model=model)
|
|
1711
1838
|
|
|
1712
|
-
|
|
1839
|
+
# Return PromptMessageExtended with tool calls for external execution
|
|
1840
|
+
from fast_agent.core.prompt import Prompt
|
|
1713
1841
|
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
request_params: RequestParams | None = None,
|
|
1718
|
-
) -> PromptMessageMultipart:
|
|
1719
|
-
"""Generate messages using Bedrock."""
|
|
1720
|
-
responses = await self._bedrock_completion(message_param, request_params)
|
|
1721
|
-
|
|
1722
|
-
# Convert responses to PromptMessageMultipart
|
|
1723
|
-
content_list = []
|
|
1724
|
-
for response in responses:
|
|
1725
|
-
if isinstance(response, TextContent):
|
|
1726
|
-
content_list.append(response)
|
|
1727
|
-
|
|
1728
|
-
return PromptMessageMultipart(role="assistant", content=content_list)
|
|
1842
|
+
return Prompt.assistant(
|
|
1843
|
+
*response_content_blocks, stop_reason=mapped_stop_reason, tool_calls=tool_calls
|
|
1844
|
+
)
|
|
1729
1845
|
|
|
1730
1846
|
async def _apply_prompt_provider_specific(
|
|
1731
1847
|
self,
|
|
1732
|
-
multipart_messages: List[
|
|
1848
|
+
multipart_messages: List[PromptMessageExtended],
|
|
1733
1849
|
request_params: RequestParams | None = None,
|
|
1850
|
+
tools: List[Tool] | None = None,
|
|
1734
1851
|
is_template: bool = False,
|
|
1735
|
-
) ->
|
|
1852
|
+
) -> PromptMessageExtended:
|
|
1736
1853
|
"""Apply Bedrock-specific prompt formatting."""
|
|
1737
1854
|
if not multipart_messages:
|
|
1738
|
-
return
|
|
1855
|
+
return PromptMessageExtended(role="user", content=[])
|
|
1739
1856
|
|
|
1740
1857
|
# Check the last message role
|
|
1741
1858
|
last_message = multipart_messages[-1]
|
|
1742
1859
|
|
|
1743
1860
|
# Add all previous messages to history (or all messages if last is from assistant)
|
|
1744
1861
|
# if the last message is a "user" inference is required
|
|
1862
|
+
# if the last message is a "user" inference is required
|
|
1745
1863
|
messages_to_add = (
|
|
1746
1864
|
multipart_messages[:-1] if last_message.role == "user" else multipart_messages
|
|
1747
1865
|
)
|
|
1748
1866
|
converted = []
|
|
1749
1867
|
for msg in messages_to_add:
|
|
1750
1868
|
# Convert each message to Bedrock message parameter format
|
|
1751
|
-
bedrock_msg =
|
|
1752
|
-
for content_item in msg.content:
|
|
1753
|
-
if isinstance(content_item, TextContent):
|
|
1754
|
-
bedrock_msg["content"].append({"type": "text", "text": content_item.text})
|
|
1869
|
+
bedrock_msg = self._convert_multipart_to_bedrock_message(msg)
|
|
1755
1870
|
converted.append(bedrock_msg)
|
|
1756
1871
|
|
|
1757
1872
|
# Add messages to history
|
|
@@ -1761,15 +1876,16 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1761
1876
|
# For assistant messages: Return the last message (no completion needed)
|
|
1762
1877
|
return last_message
|
|
1763
1878
|
|
|
1764
|
-
#
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1879
|
+
# For user messages with tool_results, we need to add the tool result message to the conversation
|
|
1880
|
+
if last_message.tool_results:
|
|
1881
|
+
# Convert the tool result message and use it as the final input
|
|
1882
|
+
message_param = self._convert_multipart_to_bedrock_message(last_message)
|
|
1883
|
+
else:
|
|
1884
|
+
# Convert the last user message to Bedrock message parameter format
|
|
1885
|
+
message_param = self._convert_multipart_to_bedrock_message(last_message)
|
|
1769
1886
|
|
|
1770
|
-
#
|
|
1771
|
-
self.
|
|
1772
|
-
return await self.generate_messages(message_param, request_params)
|
|
1887
|
+
# Call the refactored completion method directly
|
|
1888
|
+
return await self._bedrock_completion(message_param, request_params, tools)
|
|
1773
1889
|
|
|
1774
1890
|
def _generate_simplified_schema(self, model: Type[ModelT]) -> str:
|
|
1775
1891
|
"""Generates a simplified, human-readable schema with inline enum constraints."""
|
|
@@ -1834,10 +1950,10 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1834
1950
|
|
|
1835
1951
|
async def _apply_prompt_provider_specific_structured(
|
|
1836
1952
|
self,
|
|
1837
|
-
multipart_messages: List[
|
|
1953
|
+
multipart_messages: List[PromptMessageExtended],
|
|
1838
1954
|
model: Type[ModelT],
|
|
1839
1955
|
request_params: RequestParams | None = None,
|
|
1840
|
-
) -> Tuple[ModelT | None,
|
|
1956
|
+
) -> Tuple[ModelT | None, PromptMessageExtended]:
|
|
1841
1957
|
"""Apply structured output for Bedrock using prompt engineering with a simplified schema."""
|
|
1842
1958
|
# Short-circuit: if the last message is already an assistant JSON payload,
|
|
1843
1959
|
# parse it directly without invoking the model. This restores pre-regression behavior
|
|
@@ -1873,7 +1989,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1873
1989
|
if strategy == StructuredStrategy.SIMPLIFIED_SCHEMA:
|
|
1874
1990
|
schema_text = self._generate_simplified_schema(model)
|
|
1875
1991
|
else:
|
|
1876
|
-
schema_text =
|
|
1992
|
+
schema_text = FastAgentLLM.model_to_schema_str(model)
|
|
1877
1993
|
|
|
1878
1994
|
# Build the new simplified prompt
|
|
1879
1995
|
prompt_parts = [
|
|
@@ -1900,7 +2016,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1900
2016
|
temp_last = multipart_messages[-1].model_copy(deep=True)
|
|
1901
2017
|
except Exception:
|
|
1902
2018
|
# Fallback: construct a minimal copy if model_copy is unavailable
|
|
1903
|
-
temp_last =
|
|
2019
|
+
temp_last = PromptMessageExtended(
|
|
1904
2020
|
role=multipart_messages[-1].role, content=list(multipart_messages[-1].content)
|
|
1905
2021
|
)
|
|
1906
2022
|
|
|
@@ -1911,7 +2027,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1911
2027
|
)
|
|
1912
2028
|
|
|
1913
2029
|
try:
|
|
1914
|
-
result:
|
|
2030
|
+
result: PromptMessageExtended = await self._apply_prompt_provider_specific(
|
|
1915
2031
|
[temp_last], request_params
|
|
1916
2032
|
)
|
|
1917
2033
|
try:
|
|
@@ -1933,17 +2049,17 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
1933
2049
|
try:
|
|
1934
2050
|
simplified_schema_text = self._generate_simplified_schema(model)
|
|
1935
2051
|
except Exception:
|
|
1936
|
-
simplified_schema_text =
|
|
2052
|
+
simplified_schema_text = FastAgentLLM.model_to_schema_str(model)
|
|
1937
2053
|
try:
|
|
1938
2054
|
temp_last_retry = multipart_messages[-1].model_copy(deep=True)
|
|
1939
2055
|
except Exception:
|
|
1940
|
-
temp_last_retry =
|
|
2056
|
+
temp_last_retry = PromptMessageExtended(
|
|
1941
2057
|
role=multipart_messages[-1].role,
|
|
1942
2058
|
content=list(multipart_messages[-1].content),
|
|
1943
2059
|
)
|
|
1944
2060
|
temp_last_retry.add_text("\n".join(strict_parts + [simplified_schema_text]))
|
|
1945
2061
|
|
|
1946
|
-
retry_result:
|
|
2062
|
+
retry_result: PromptMessageExtended = await self._apply_prompt_provider_specific(
|
|
1947
2063
|
[temp_last_retry], request_params
|
|
1948
2064
|
)
|
|
1949
2065
|
return self._structured_from_multipart(retry_result, model)
|
|
@@ -2007,8 +2123,8 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
2007
2123
|
return text
|
|
2008
2124
|
|
|
2009
2125
|
def _structured_from_multipart(
|
|
2010
|
-
self, message:
|
|
2011
|
-
) -> Tuple[ModelT | None,
|
|
2126
|
+
self, message: PromptMessageExtended, model: Type[ModelT]
|
|
2127
|
+
) -> Tuple[ModelT | None, PromptMessageExtended]:
|
|
2012
2128
|
"""Override to apply JSON cleaning before parsing."""
|
|
2013
2129
|
# Get the text from the multipart message
|
|
2014
2130
|
text = message.all_text()
|
|
@@ -2020,7 +2136,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
|
|
|
2020
2136
|
if cleaned_text != text:
|
|
2021
2137
|
from mcp.types import TextContent
|
|
2022
2138
|
|
|
2023
|
-
cleaned_multipart =
|
|
2139
|
+
cleaned_multipart = PromptMessageExtended(
|
|
2024
2140
|
role=message.role, content=[TextContent(type="text", text=cleaned_text)]
|
|
2025
2141
|
)
|
|
2026
2142
|
else:
|