fast-agent-mcp 0.2.58__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fast-agent-mcp might be problematic. Click here for more details.

Files changed (233) hide show
  1. fast_agent/__init__.py +127 -0
  2. fast_agent/agents/__init__.py +36 -0
  3. {mcp_agent/core → fast_agent/agents}/agent_types.py +2 -1
  4. fast_agent/agents/llm_agent.py +217 -0
  5. fast_agent/agents/llm_decorator.py +486 -0
  6. mcp_agent/agents/base_agent.py → fast_agent/agents/mcp_agent.py +377 -385
  7. fast_agent/agents/tool_agent.py +168 -0
  8. {mcp_agent → fast_agent}/agents/workflow/chain_agent.py +43 -33
  9. {mcp_agent → fast_agent}/agents/workflow/evaluator_optimizer.py +31 -35
  10. {mcp_agent → fast_agent}/agents/workflow/iterative_planner.py +56 -47
  11. {mcp_agent → fast_agent}/agents/workflow/orchestrator_models.py +4 -4
  12. {mcp_agent → fast_agent}/agents/workflow/parallel_agent.py +34 -41
  13. {mcp_agent → fast_agent}/agents/workflow/router_agent.py +54 -39
  14. {mcp_agent → fast_agent}/cli/__main__.py +5 -3
  15. {mcp_agent → fast_agent}/cli/commands/check_config.py +95 -66
  16. {mcp_agent → fast_agent}/cli/commands/go.py +20 -11
  17. {mcp_agent → fast_agent}/cli/commands/quickstart.py +4 -4
  18. {mcp_agent → fast_agent}/cli/commands/server_helpers.py +1 -1
  19. {mcp_agent → fast_agent}/cli/commands/setup.py +64 -134
  20. {mcp_agent → fast_agent}/cli/commands/url_parser.py +9 -8
  21. {mcp_agent → fast_agent}/cli/main.py +36 -16
  22. {mcp_agent → fast_agent}/cli/terminal.py +2 -2
  23. {mcp_agent → fast_agent}/config.py +10 -2
  24. fast_agent/constants.py +8 -0
  25. {mcp_agent → fast_agent}/context.py +24 -19
  26. {mcp_agent → fast_agent}/context_dependent.py +9 -5
  27. fast_agent/core/__init__.py +17 -0
  28. {mcp_agent → fast_agent}/core/agent_app.py +39 -36
  29. fast_agent/core/core_app.py +135 -0
  30. {mcp_agent → fast_agent}/core/direct_decorators.py +12 -26
  31. {mcp_agent → fast_agent}/core/direct_factory.py +95 -73
  32. {mcp_agent → fast_agent/core}/executor/executor.py +4 -5
  33. {mcp_agent → fast_agent}/core/fastagent.py +32 -32
  34. fast_agent/core/logging/__init__.py +5 -0
  35. {mcp_agent → fast_agent/core}/logging/events.py +3 -3
  36. {mcp_agent → fast_agent/core}/logging/json_serializer.py +1 -1
  37. {mcp_agent → fast_agent/core}/logging/listeners.py +85 -7
  38. {mcp_agent → fast_agent/core}/logging/logger.py +7 -7
  39. {mcp_agent → fast_agent/core}/logging/transport.py +10 -11
  40. fast_agent/core/prompt.py +9 -0
  41. {mcp_agent → fast_agent}/core/validation.py +4 -4
  42. fast_agent/event_progress.py +61 -0
  43. fast_agent/history/history_exporter.py +44 -0
  44. {mcp_agent → fast_agent}/human_input/__init__.py +9 -12
  45. {mcp_agent → fast_agent}/human_input/elicitation_handler.py +26 -8
  46. {mcp_agent → fast_agent}/human_input/elicitation_state.py +7 -7
  47. {mcp_agent → fast_agent}/human_input/simple_form.py +6 -4
  48. {mcp_agent → fast_agent}/human_input/types.py +1 -18
  49. fast_agent/interfaces.py +228 -0
  50. fast_agent/llm/__init__.py +9 -0
  51. mcp_agent/llm/augmented_llm.py → fast_agent/llm/fastagent_llm.py +127 -218
  52. fast_agent/llm/internal/passthrough.py +137 -0
  53. mcp_agent/llm/augmented_llm_playback.py → fast_agent/llm/internal/playback.py +29 -25
  54. mcp_agent/llm/augmented_llm_silent.py → fast_agent/llm/internal/silent.py +10 -17
  55. fast_agent/llm/internal/slow.py +38 -0
  56. {mcp_agent → fast_agent}/llm/memory.py +40 -30
  57. {mcp_agent → fast_agent}/llm/model_database.py +35 -2
  58. {mcp_agent → fast_agent}/llm/model_factory.py +103 -77
  59. fast_agent/llm/model_info.py +126 -0
  60. {mcp_agent/llm/providers → fast_agent/llm/provider/anthropic}/anthropic_utils.py +7 -7
  61. fast_agent/llm/provider/anthropic/llm_anthropic.py +603 -0
  62. {mcp_agent/llm/providers → fast_agent/llm/provider/anthropic}/multipart_converter_anthropic.py +79 -86
  63. {mcp_agent/llm/providers → fast_agent/llm/provider/bedrock}/bedrock_utils.py +3 -1
  64. mcp_agent/llm/providers/augmented_llm_bedrock.py → fast_agent/llm/provider/bedrock/llm_bedrock.py +833 -717
  65. {mcp_agent/llm/providers → fast_agent/llm/provider/google}/google_converter.py +66 -14
  66. fast_agent/llm/provider/google/llm_google_native.py +431 -0
  67. mcp_agent/llm/providers/augmented_llm_aliyun.py → fast_agent/llm/provider/openai/llm_aliyun.py +6 -7
  68. mcp_agent/llm/providers/augmented_llm_azure.py → fast_agent/llm/provider/openai/llm_azure.py +4 -4
  69. mcp_agent/llm/providers/augmented_llm_deepseek.py → fast_agent/llm/provider/openai/llm_deepseek.py +10 -11
  70. mcp_agent/llm/providers/augmented_llm_generic.py → fast_agent/llm/provider/openai/llm_generic.py +4 -4
  71. mcp_agent/llm/providers/augmented_llm_google_oai.py → fast_agent/llm/provider/openai/llm_google_oai.py +4 -4
  72. mcp_agent/llm/providers/augmented_llm_groq.py → fast_agent/llm/provider/openai/llm_groq.py +14 -16
  73. mcp_agent/llm/providers/augmented_llm_openai.py → fast_agent/llm/provider/openai/llm_openai.py +133 -207
  74. mcp_agent/llm/providers/augmented_llm_openrouter.py → fast_agent/llm/provider/openai/llm_openrouter.py +6 -6
  75. mcp_agent/llm/providers/augmented_llm_tensorzero_openai.py → fast_agent/llm/provider/openai/llm_tensorzero_openai.py +17 -16
  76. mcp_agent/llm/providers/augmented_llm_xai.py → fast_agent/llm/provider/openai/llm_xai.py +6 -6
  77. {mcp_agent/llm/providers → fast_agent/llm/provider/openai}/multipart_converter_openai.py +125 -63
  78. {mcp_agent/llm/providers → fast_agent/llm/provider/openai}/openai_multipart.py +12 -12
  79. {mcp_agent/llm/providers → fast_agent/llm/provider/openai}/openai_utils.py +18 -16
  80. {mcp_agent → fast_agent}/llm/provider_key_manager.py +2 -2
  81. {mcp_agent → fast_agent}/llm/provider_types.py +2 -0
  82. {mcp_agent → fast_agent}/llm/sampling_converter.py +15 -12
  83. {mcp_agent → fast_agent}/llm/usage_tracking.py +23 -5
  84. fast_agent/mcp/__init__.py +43 -0
  85. {mcp_agent → fast_agent}/mcp/elicitation_factory.py +3 -3
  86. {mcp_agent → fast_agent}/mcp/elicitation_handlers.py +19 -10
  87. {mcp_agent → fast_agent}/mcp/gen_client.py +3 -3
  88. fast_agent/mcp/helpers/__init__.py +36 -0
  89. fast_agent/mcp/helpers/content_helpers.py +183 -0
  90. {mcp_agent → fast_agent}/mcp/helpers/server_config_helpers.py +8 -8
  91. {mcp_agent → fast_agent}/mcp/hf_auth.py +25 -23
  92. fast_agent/mcp/interfaces.py +93 -0
  93. {mcp_agent → fast_agent}/mcp/logger_textio.py +4 -4
  94. {mcp_agent → fast_agent}/mcp/mcp_agent_client_session.py +49 -44
  95. {mcp_agent → fast_agent}/mcp/mcp_aggregator.py +66 -115
  96. {mcp_agent → fast_agent}/mcp/mcp_connection_manager.py +16 -23
  97. {mcp_agent/core → fast_agent/mcp}/mcp_content.py +23 -15
  98. {mcp_agent → fast_agent}/mcp/mime_utils.py +39 -0
  99. fast_agent/mcp/prompt.py +159 -0
  100. mcp_agent/mcp/prompt_message_multipart.py → fast_agent/mcp/prompt_message_extended.py +27 -20
  101. {mcp_agent → fast_agent}/mcp/prompt_render.py +21 -19
  102. {mcp_agent → fast_agent}/mcp/prompt_serialization.py +46 -46
  103. fast_agent/mcp/prompts/__main__.py +7 -0
  104. {mcp_agent → fast_agent}/mcp/prompts/prompt_helpers.py +31 -30
  105. {mcp_agent → fast_agent}/mcp/prompts/prompt_load.py +8 -8
  106. {mcp_agent → fast_agent}/mcp/prompts/prompt_server.py +11 -19
  107. {mcp_agent → fast_agent}/mcp/prompts/prompt_template.py +18 -18
  108. {mcp_agent → fast_agent}/mcp/resource_utils.py +1 -1
  109. {mcp_agent → fast_agent}/mcp/sampling.py +31 -26
  110. {mcp_agent/mcp_server → fast_agent/mcp/server}/__init__.py +1 -1
  111. {mcp_agent/mcp_server → fast_agent/mcp/server}/agent_server.py +5 -6
  112. fast_agent/mcp/ui_agent.py +48 -0
  113. fast_agent/mcp/ui_mixin.py +209 -0
  114. fast_agent/mcp_server_registry.py +90 -0
  115. {mcp_agent → fast_agent}/resources/examples/data-analysis/analysis-campaign.py +5 -4
  116. {mcp_agent → fast_agent}/resources/examples/data-analysis/analysis.py +1 -1
  117. {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/forms_demo.py +3 -3
  118. {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/game_character.py +2 -2
  119. {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/game_character_handler.py +1 -1
  120. {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/tool_call.py +1 -1
  121. {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/agent_one.py +1 -1
  122. {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/agent_two.py +1 -1
  123. {mcp_agent → fast_agent}/resources/examples/researcher/researcher-eval.py +1 -1
  124. {mcp_agent → fast_agent}/resources/examples/researcher/researcher-imp.py +1 -1
  125. {mcp_agent → fast_agent}/resources/examples/researcher/researcher.py +1 -1
  126. {mcp_agent → fast_agent}/resources/examples/tensorzero/agent.py +2 -2
  127. {mcp_agent → fast_agent}/resources/examples/tensorzero/image_demo.py +3 -3
  128. {mcp_agent → fast_agent}/resources/examples/tensorzero/simple_agent.py +1 -1
  129. {mcp_agent → fast_agent}/resources/examples/workflows/chaining.py +1 -1
  130. {mcp_agent → fast_agent}/resources/examples/workflows/evaluator.py +3 -3
  131. {mcp_agent → fast_agent}/resources/examples/workflows/human_input.py +5 -3
  132. {mcp_agent → fast_agent}/resources/examples/workflows/orchestrator.py +1 -1
  133. {mcp_agent → fast_agent}/resources/examples/workflows/parallel.py +2 -2
  134. {mcp_agent → fast_agent}/resources/examples/workflows/router.py +5 -2
  135. fast_agent/resources/setup/.gitignore +24 -0
  136. fast_agent/resources/setup/agent.py +18 -0
  137. fast_agent/resources/setup/fastagent.config.yaml +44 -0
  138. fast_agent/resources/setup/fastagent.secrets.yaml.example +38 -0
  139. fast_agent/tools/elicitation.py +369 -0
  140. fast_agent/types/__init__.py +32 -0
  141. fast_agent/types/llm_stop_reason.py +77 -0
  142. fast_agent/ui/__init__.py +38 -0
  143. fast_agent/ui/console_display.py +1005 -0
  144. {mcp_agent/human_input → fast_agent/ui}/elicitation_form.py +17 -12
  145. mcp_agent/human_input/elicitation_forms.py → fast_agent/ui/elicitation_style.py +1 -1
  146. {mcp_agent/core → fast_agent/ui}/enhanced_prompt.py +96 -25
  147. {mcp_agent/core → fast_agent/ui}/interactive_prompt.py +330 -125
  148. fast_agent/ui/mcp_ui_utils.py +224 -0
  149. {mcp_agent → fast_agent/ui}/progress_display.py +2 -2
  150. {mcp_agent/logging → fast_agent/ui}/rich_progress.py +4 -4
  151. {mcp_agent/core → fast_agent/ui}/usage_display.py +3 -8
  152. {fast_agent_mcp-0.2.58.dist-info → fast_agent_mcp-0.3.0.dist-info}/METADATA +7 -7
  153. fast_agent_mcp-0.3.0.dist-info/RECORD +202 -0
  154. fast_agent_mcp-0.3.0.dist-info/entry_points.txt +5 -0
  155. fast_agent_mcp-0.2.58.dist-info/RECORD +0 -193
  156. fast_agent_mcp-0.2.58.dist-info/entry_points.txt +0 -6
  157. mcp_agent/__init__.py +0 -114
  158. mcp_agent/agents/agent.py +0 -92
  159. mcp_agent/agents/workflow/__init__.py +0 -1
  160. mcp_agent/agents/workflow/orchestrator_agent.py +0 -597
  161. mcp_agent/app.py +0 -175
  162. mcp_agent/core/__init__.py +0 -26
  163. mcp_agent/core/prompt.py +0 -191
  164. mcp_agent/event_progress.py +0 -134
  165. mcp_agent/human_input/handler.py +0 -81
  166. mcp_agent/llm/__init__.py +0 -2
  167. mcp_agent/llm/augmented_llm_passthrough.py +0 -232
  168. mcp_agent/llm/augmented_llm_slow.py +0 -53
  169. mcp_agent/llm/providers/__init__.py +0 -8
  170. mcp_agent/llm/providers/augmented_llm_anthropic.py +0 -718
  171. mcp_agent/llm/providers/augmented_llm_google_native.py +0 -496
  172. mcp_agent/llm/providers/sampling_converter_anthropic.py +0 -57
  173. mcp_agent/llm/providers/sampling_converter_openai.py +0 -26
  174. mcp_agent/llm/sampling_format_converter.py +0 -37
  175. mcp_agent/logging/__init__.py +0 -0
  176. mcp_agent/mcp/__init__.py +0 -50
  177. mcp_agent/mcp/helpers/__init__.py +0 -25
  178. mcp_agent/mcp/helpers/content_helpers.py +0 -187
  179. mcp_agent/mcp/interfaces.py +0 -266
  180. mcp_agent/mcp/prompts/__init__.py +0 -0
  181. mcp_agent/mcp/prompts/__main__.py +0 -10
  182. mcp_agent/mcp_server_registry.py +0 -343
  183. mcp_agent/tools/tool_definition.py +0 -14
  184. mcp_agent/ui/console_display.py +0 -790
  185. mcp_agent/ui/console_display_legacy.py +0 -401
  186. {mcp_agent → fast_agent}/agents/workflow/orchestrator_prompts.py +0 -0
  187. {mcp_agent/agents → fast_agent/cli}/__init__.py +0 -0
  188. {mcp_agent → fast_agent}/cli/constants.py +0 -0
  189. {mcp_agent → fast_agent}/core/error_handling.py +0 -0
  190. {mcp_agent → fast_agent}/core/exceptions.py +0 -0
  191. {mcp_agent/cli → fast_agent/core/executor}/__init__.py +0 -0
  192. {mcp_agent → fast_agent/core}/executor/task_registry.py +0 -0
  193. {mcp_agent → fast_agent/core}/executor/workflow_signal.py +0 -0
  194. {mcp_agent → fast_agent}/human_input/form_fields.py +0 -0
  195. {mcp_agent → fast_agent}/llm/prompt_utils.py +0 -0
  196. {mcp_agent/core → fast_agent/llm}/request_params.py +0 -0
  197. {mcp_agent → fast_agent}/mcp/common.py +0 -0
  198. {mcp_agent/executor → fast_agent/mcp/prompts}/__init__.py +0 -0
  199. {mcp_agent → fast_agent}/mcp/prompts/prompt_constants.py +0 -0
  200. {mcp_agent → fast_agent}/py.typed +0 -0
  201. {mcp_agent → fast_agent}/resources/examples/data-analysis/fastagent.config.yaml +0 -0
  202. {mcp_agent → fast_agent}/resources/examples/data-analysis/mount-point/WA_Fn-UseC_-HR-Employee-Attrition.csv +0 -0
  203. {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/elicitation_account_server.py +0 -0
  204. {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/elicitation_forms_server.py +0 -0
  205. {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/elicitation_game_server.py +0 -0
  206. {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/fastagent.config.yaml +0 -0
  207. {mcp_agent → fast_agent}/resources/examples/mcp/elicitations/fastagent.secrets.yaml.example +0 -0
  208. {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/fastagent.config.yaml +0 -0
  209. {mcp_agent → fast_agent}/resources/examples/mcp/state-transfer/fastagent.secrets.yaml.example +0 -0
  210. {mcp_agent → fast_agent}/resources/examples/researcher/fastagent.config.yaml +0 -0
  211. {mcp_agent → fast_agent}/resources/examples/tensorzero/.env.sample +0 -0
  212. {mcp_agent → fast_agent}/resources/examples/tensorzero/Makefile +0 -0
  213. {mcp_agent → fast_agent}/resources/examples/tensorzero/README.md +0 -0
  214. {mcp_agent → fast_agent}/resources/examples/tensorzero/demo_images/clam.jpg +0 -0
  215. {mcp_agent → fast_agent}/resources/examples/tensorzero/demo_images/crab.png +0 -0
  216. {mcp_agent → fast_agent}/resources/examples/tensorzero/demo_images/shrimp.png +0 -0
  217. {mcp_agent → fast_agent}/resources/examples/tensorzero/docker-compose.yml +0 -0
  218. {mcp_agent → fast_agent}/resources/examples/tensorzero/fastagent.config.yaml +0 -0
  219. {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/Dockerfile +0 -0
  220. {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/entrypoint.sh +0 -0
  221. {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/mcp_server.py +0 -0
  222. {mcp_agent → fast_agent}/resources/examples/tensorzero/mcp_server/pyproject.toml +0 -0
  223. {mcp_agent → fast_agent}/resources/examples/tensorzero/tensorzero_config/system_schema.json +0 -0
  224. {mcp_agent → fast_agent}/resources/examples/tensorzero/tensorzero_config/system_template.minijinja +0 -0
  225. {mcp_agent → fast_agent}/resources/examples/tensorzero/tensorzero_config/tensorzero.toml +0 -0
  226. {mcp_agent → fast_agent}/resources/examples/workflows/fastagent.config.yaml +0 -0
  227. {mcp_agent → fast_agent}/resources/examples/workflows/graded_report.md +0 -0
  228. {mcp_agent → fast_agent}/resources/examples/workflows/short_story.md +0 -0
  229. {mcp_agent → fast_agent}/resources/examples/workflows/short_story.txt +0 -0
  230. {mcp_agent → fast_agent/ui}/console.py +0 -0
  231. {mcp_agent/core → fast_agent/ui}/mermaid_utils.py +0 -0
  232. {fast_agent_mcp-0.2.58.dist-info → fast_agent_mcp-0.3.0.dist-info}/WHEEL +0 -0
  233. {fast_agent_mcp-0.2.58.dist-info → fast_agent_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -4,20 +4,32 @@ import re
4
4
  import sys
5
5
  from dataclasses import dataclass
6
6
  from enum import Enum, auto
7
- from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, Union, cast
7
+ from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, Union
8
8
 
9
- from mcp.types import ContentBlock, TextContent
10
- from rich.text import Text
9
+ from mcp import Tool
10
+ from mcp.types import (
11
+ CallToolRequest,
12
+ CallToolRequestParams,
13
+ ContentBlock,
14
+ TextContent,
15
+ )
11
16
 
12
- from mcp_agent.core.exceptions import ProviderKeyError
13
- from mcp_agent.core.request_params import RequestParams
14
- from mcp_agent.event_progress import ProgressAction
15
- from mcp_agent.llm.augmented_llm import AugmentedLLM
16
- from mcp_agent.llm.provider_types import Provider
17
- from mcp_agent.llm.usage_tracking import TurnUsage
18
- from mcp_agent.logging.logger import get_logger
19
- from mcp_agent.mcp.interfaces import ModelT
20
- from mcp_agent.mcp.prompt_message_multipart import PromptMessageMultipart
17
+ from fast_agent.core.exceptions import ProviderKeyError
18
+ from fast_agent.core.logging.logger import get_logger
19
+ from fast_agent.event_progress import ProgressAction
20
+ from fast_agent.interfaces import ModelT
21
+ from fast_agent.llm.fastagent_llm import FastAgentLLM
22
+ from fast_agent.llm.provider_types import Provider
23
+ from fast_agent.llm.usage_tracking import TurnUsage
24
+ from fast_agent.types import PromptMessageExtended, RequestParams
25
+ from fast_agent.types.llm_stop_reason import LlmStopReason
26
+
27
+ # Mapping from Bedrock's snake_case stop reasons to MCP's camelCase
28
+ BEDROCK_TO_MCP_STOP_REASON = {
29
+ "end_turn": LlmStopReason.END_TURN.value,
30
+ "stop_sequence": LlmStopReason.STOP_SEQUENCE.value,
31
+ "max_tokens": LlmStopReason.MAX_TOKENS.value,
32
+ }
21
33
 
22
34
  if TYPE_CHECKING:
23
35
  from mcp import ListToolsResult
@@ -32,11 +44,6 @@ except ImportError:
32
44
  NoCredentialsError = Exception
33
45
 
34
46
 
35
- from mcp.types import (
36
- CallToolRequest,
37
- CallToolRequestParams,
38
- )
39
-
40
47
  DEFAULT_BEDROCK_MODEL = "amazon.nova-lite-v1:0"
41
48
 
42
49
 
@@ -117,7 +124,7 @@ class ModelCapabilities:
117
124
  supports_tools: bool | None = None # True=yes, False=no, None=unknown
118
125
 
119
126
 
120
- class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
127
+ class BedrockLLM(FastAgentLLM[BedrockMessageParam, BedrockMessage]):
121
128
  """
122
129
  AWS Bedrock implementation of AugmentedLLM using the Converse API.
123
130
  Supports all Bedrock models including Nova, Claude, Meta, etc.
@@ -166,7 +173,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
166
173
  Uses the centralized discovery in bedrock_utils; no regex, no fallbacks.
167
174
  Gracefully handles environments without AWS access by returning False.
168
175
  """
169
- from mcp_agent.llm.providers.bedrock_utils import all_bedrock_models
176
+ from fast_agent.llm.provider.bedrock.bedrock_utils import all_bedrock_models
170
177
 
171
178
  try:
172
179
  available = set(all_bedrock_models(prefix=""))
@@ -310,9 +317,6 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
310
317
  """
311
318
  bedrock_tools = []
312
319
 
313
- # Create mapping from cleaned names to original names for tool execution
314
- self.tool_name_mapping = {}
315
-
316
320
  self.logger.debug(f"Converting {len(tools.tools)} MCP tools to Nova format")
317
321
 
318
322
  for tool in tools.tools:
@@ -355,20 +359,20 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
355
359
  ):
356
360
  nova_schema["required"] = input_schema["required"]
357
361
 
358
- # Apply tool name policy (e.g., Nova requires hyphen→underscore)
359
- policy = getattr(self, "_tool_name_policy_for_conversion", "preserve")
360
- if policy == "replace_hyphens_with_underscores":
361
- clean_name = tool.name.replace("-", "_")
362
- else:
363
- clean_name = tool.name
362
+ # Use the tool name mapping that was already built in _bedrock_completion
363
+ # This ensures consistent transformation logic across the codebase
364
+ clean_name = None
365
+ for mapped_name, original_name in tool_name_mapping.items():
366
+ if original_name == tool.name:
367
+ clean_name = mapped_name
368
+ break
364
369
 
365
- # Store mapping from cleaned name back to original MCP name
366
- # This is needed because:
367
- # 1. Nova receives tools with cleaned names (utils_get_current_date_information)
368
- # 2. Nova calls tools using cleaned names
369
- # 3. But MCP server expects original names (utils-get_current_date_information)
370
- # 4. So we map back: utils_get_current_date_information -> utils-get_current_date_information
371
- self.tool_name_mapping[clean_name] = tool.name
370
+ if clean_name is None:
371
+ # Fallback if mapping not found (shouldn't happen)
372
+ clean_name = tool.name
373
+ self.logger.warning(
374
+ f"Tool name mapping not found for {tool.name}, using original name"
375
+ )
372
376
 
373
377
  bedrock_tool = {
374
378
  "toolSpec": {
@@ -469,7 +473,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
469
473
  return bedrock_tools
470
474
 
471
475
  def _parse_system_prompt_tool_response(
472
- self, processed_response: Dict[str, Any]
476
+ self, processed_response: Dict[str, Any], model: str
473
477
  ) -> List[Dict[str, Any]]:
474
478
  """Parse system prompt tool response format: function calls in text."""
475
479
  # Extract text content from the response
@@ -518,7 +522,53 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
518
522
  if tool_calls:
519
523
  return tool_calls
520
524
 
521
- # Second try: find the "Tool Call:" format
525
+ # Second try: find the "Action:" format (commonly used by Nova models)
526
+ action_pattern = r"Action:\s*([^(]+)\(([^)]*)\)"
527
+ action_matches = re.findall(action_pattern, text_content)
528
+ if action_matches:
529
+ for i, (func_name, args_str) in enumerate(action_matches):
530
+ func_name = func_name.strip()
531
+ args_str = args_str.strip()
532
+
533
+ # Parse arguments - handle quoted strings and key=value pairs
534
+ arguments = {}
535
+ if args_str:
536
+ try:
537
+ # Handle key=value format like location="London"
538
+ if "=" in args_str:
539
+ # Split by comma, then by = for each part
540
+ for arg_part in args_str.split(","):
541
+ if "=" in arg_part:
542
+ key, value = arg_part.split("=", 1)
543
+ key = key.strip()
544
+ value = value.strip().strip("\"'") # Remove quotes
545
+ arguments[key] = value
546
+ else:
547
+ # Single value argument - try to map to appropriate parameter name
548
+ value = args_str.strip("\"'") if args_str else ""
549
+ # Handle common single-parameter functions
550
+ if func_name == "check_weather":
551
+ arguments = {"location": value}
552
+ else:
553
+ # Generic fallback
554
+ arguments = {"value": value}
555
+ except Exception as e:
556
+ self.logger.warning(f"Failed to parse Action arguments: {args_str} - {e}")
557
+ arguments = {"value": args_str}
558
+
559
+ tool_calls.append(
560
+ {
561
+ "type": "system_prompt_tool",
562
+ "name": func_name,
563
+ "arguments": arguments,
564
+ "id": f"system_prompt_{func_name}_{i}",
565
+ }
566
+ )
567
+
568
+ if tool_calls:
569
+ return tool_calls
570
+
571
+ # Third try: find the "Tool Call:" format
522
572
  tool_call_match = re.search(r"Tool Call:\s*(\[.*?\])", text_content, re.DOTALL)
523
573
  if tool_call_match:
524
574
  json_str = tool_call_match.group(1)
@@ -606,6 +656,49 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
606
656
  f"Failed to parse fallback custom tag format: {function_args_json}"
607
657
  )
608
658
 
659
+ # Third try: find direct function call format like "function_name(args)"
660
+ direct_call_pattern = r"^([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]*)\)$"
661
+ direct_call_match = re.search(direct_call_pattern, text_content.strip())
662
+ if direct_call_match:
663
+ func_name, args_str = direct_call_match.groups()
664
+ func_name = func_name.strip()
665
+ args_str = args_str.strip()
666
+
667
+ # Parse arguments
668
+ arguments = {}
669
+ if args_str:
670
+ try:
671
+ # Handle key=value format like location="London"
672
+ if "=" in args_str:
673
+ # Split by comma, then by = for each part
674
+ for arg_part in args_str.split(","):
675
+ if "=" in arg_part:
676
+ key, value = arg_part.split("=", 1)
677
+ key = key.strip()
678
+ value = value.strip().strip("\"'") # Remove quotes
679
+ arguments[key] = value
680
+ else:
681
+ # Single value argument - try to map to appropriate parameter name
682
+ value = args_str.strip("\"'") if args_str else ""
683
+ # Handle common single-parameter functions
684
+ if func_name == "check_weather":
685
+ arguments = {"location": value}
686
+ else:
687
+ # Generic fallback
688
+ arguments = {"value": value}
689
+ except Exception as e:
690
+ self.logger.warning(f"Failed to parse direct call arguments: {args_str} - {e}")
691
+ arguments = {"value": args_str}
692
+
693
+ return [
694
+ {
695
+ "type": "system_prompt_tool",
696
+ "name": func_name,
697
+ "arguments": arguments,
698
+ "id": f"system_prompt_{func_name}_0",
699
+ }
700
+ ]
701
+
609
702
  return []
610
703
 
611
704
  def _parse_anthropic_tool_response(
@@ -638,7 +731,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
638
731
 
639
732
  # Choose parser strictly by cached schema
640
733
  if schema == ToolSchemaType.SYSTEM_PROMPT:
641
- return self._parse_system_prompt_tool_response(processed_response)
734
+ return self._parse_system_prompt_tool_response(processed_response, model)
642
735
  if schema == ToolSchemaType.ANTHROPIC:
643
736
  return self._parse_anthropic_tool_response(processed_response)
644
737
 
@@ -697,8 +790,145 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
697
790
  except Exception:
698
791
  pass
699
792
 
793
+ # Final fallback: try system prompt parsing regardless of cached schema
794
+ # This handles cases where native tool calling failed but model generated system prompt format
795
+ try:
796
+ return self._parse_system_prompt_tool_response(processed_response, model)
797
+ except Exception:
798
+ pass
799
+
700
800
  return []
701
801
 
802
+ def _build_tool_calls_dict(
803
+ self, parsed_tools: List[Dict[str, Any]]
804
+ ) -> Dict[str, CallToolRequest]:
805
+ """
806
+ Convert parsed tools to CallToolRequest dict for external execution.
807
+
808
+ Args:
809
+ parsed_tools: List of parsed tool dictionaries from _parse_tool_response()
810
+
811
+ Returns:
812
+ Dictionary mapping tool_use_id to CallToolRequest objects
813
+ """
814
+ tool_calls = {}
815
+ for parsed_tool in parsed_tools:
816
+ # Use tool name directly, but map back to original if a mapping is available
817
+ tool_name = parsed_tool["name"]
818
+ try:
819
+ mapping = getattr(self, "tool_name_mapping", None)
820
+ if isinstance(mapping, dict):
821
+ tool_name = mapping.get(tool_name, tool_name)
822
+ except Exception:
823
+ pass
824
+
825
+ # Create CallToolRequest
826
+ tool_call = CallToolRequest(
827
+ method="tools/call",
828
+ params=CallToolRequestParams(
829
+ name=tool_name, arguments=parsed_tool.get("arguments", {})
830
+ ),
831
+ )
832
+ tool_calls[parsed_tool["id"]] = tool_call
833
+ return tool_calls
834
+
835
+ def _map_bedrock_stop_reason(self, bedrock_stop_reason: str) -> LlmStopReason:
836
+ """
837
+ Map Bedrock stop reasons to LlmStopReason enum.
838
+
839
+ Args:
840
+ bedrock_stop_reason: Stop reason from Bedrock API
841
+
842
+ Returns:
843
+ Corresponding LlmStopReason enum value
844
+ """
845
+ if bedrock_stop_reason == "tool_use":
846
+ return LlmStopReason.TOOL_USE
847
+ elif bedrock_stop_reason == "end_turn":
848
+ return LlmStopReason.END_TURN
849
+ elif bedrock_stop_reason == "stop_sequence":
850
+ return LlmStopReason.STOP_SEQUENCE
851
+ elif bedrock_stop_reason == "max_tokens":
852
+ return LlmStopReason.MAX_TOKENS
853
+ else:
854
+ # Default to END_TURN for unknown stop reasons, but log for debugging
855
+ self.logger.warning(
856
+ f"Unknown Bedrock stop reason: {bedrock_stop_reason}, defaulting to END_TURN"
857
+ )
858
+ return LlmStopReason.END_TURN
859
+
860
+ def _convert_multipart_to_bedrock_message(
861
+ self, msg: PromptMessageExtended
862
+ ) -> BedrockMessageParam:
863
+ """
864
+ Convert a PromptMessageExtended to Bedrock message parameter format.
865
+ Handles tool results and regular content.
866
+
867
+ Args:
868
+ msg: PromptMessageExtended message to convert
869
+
870
+ Returns:
871
+ Bedrock message parameter dictionary
872
+ """
873
+ bedrock_msg = {"role": msg.role, "content": []}
874
+
875
+ # Handle tool results first (if present)
876
+ if msg.tool_results:
877
+ # Get the cached schema type to determine result formatting
878
+ caps = self.capabilities.get(self.model) or ModelCapabilities()
879
+ # Check if any tool ID indicates system prompt format
880
+ has_system_prompt_tools = any(
881
+ tool_id.startswith("system_prompt_") for tool_id in msg.tool_results.keys()
882
+ )
883
+ is_system_prompt_schema = (
884
+ caps.schema == ToolSchemaType.SYSTEM_PROMPT or has_system_prompt_tools
885
+ )
886
+
887
+ if is_system_prompt_schema:
888
+ # For system prompt models: format as human-readable text
889
+ tool_result_parts = []
890
+ for tool_id, tool_result in msg.tool_results.items():
891
+ result_text = "".join(
892
+ part.text for part in tool_result.content if isinstance(part, TextContent)
893
+ )
894
+ result_payload = {
895
+ "tool_name": tool_id, # Use tool_id as name for system prompt
896
+ "status": "error" if tool_result.isError else "success",
897
+ "result": result_text,
898
+ }
899
+ tool_result_parts.append(json.dumps(result_payload))
900
+
901
+ if tool_result_parts:
902
+ full_result_text = f"Tool Results:\n{', '.join(tool_result_parts)}"
903
+ bedrock_msg["content"].append({"type": "text", "text": full_result_text})
904
+ else:
905
+ # For Nova/Anthropic models: use structured tool_result format
906
+ for tool_id, tool_result in msg.tool_results.items():
907
+ result_content_blocks = []
908
+ if tool_result.content:
909
+ for part in tool_result.content:
910
+ if isinstance(part, TextContent):
911
+ result_content_blocks.append({"text": part.text})
912
+
913
+ if not result_content_blocks:
914
+ result_content_blocks.append({"text": "[No content in tool result]"})
915
+
916
+ bedrock_msg["content"].append(
917
+ {
918
+ "type": "tool_result",
919
+ "tool_use_id": tool_id,
920
+ "content": result_content_blocks,
921
+ "status": "error" if tool_result.isError else "success",
922
+ }
923
+ )
924
+
925
+ # Handle regular content
926
+ for content_item in msg.content:
927
+ if isinstance(content_item, TextContent):
928
+ bedrock_msg["content"].append({"type": "text", "text": content_item.text})
929
+
930
+ return bedrock_msg
931
+
702
932
  def _convert_messages_to_bedrock(
703
933
  self, messages: List[BedrockMessageParam]
704
934
  ) -> List[Dict[str, Any]]:
@@ -846,8 +1076,8 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
846
1076
  self.logger.warning(
847
1077
  f"Failed to parse accumulated input as JSON: {accumulated_input} - {e}"
848
1078
  )
849
- # If it's not valid JSON, treat it as a string value
850
- tool_use["toolUse"]["input"] = accumulated_input
1079
+ # If it's not valid JSON, wrap it as a dict to avoid downstream errors
1080
+ tool_use["toolUse"]["input"] = {"value": accumulated_input}
851
1081
  # Clean up the accumulator
852
1082
  del tool_use["toolUse"]["_input_accumulator"]
853
1083
  continue
@@ -913,8 +1143,8 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
913
1143
  self.logger.warning(
914
1144
  f"Failed to parse final accumulated input as JSON: {accumulated_input} - {e}"
915
1145
  )
916
- # If it's not valid JSON, treat it as a string value
917
- tool_use["toolUse"]["input"] = accumulated_input
1146
+ # If it's not valid JSON, wrap it as a dict to avoid downstream errors
1147
+ tool_use["toolUse"]["input"] = {"value": accumulated_input}
918
1148
  # Clean up the accumulator
919
1149
  del tool_use["toolUse"]["_input_accumulator"]
920
1150
 
@@ -961,9 +1191,11 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
961
1191
  self,
962
1192
  message_param: BedrockMessageParam,
963
1193
  request_params: RequestParams | None = None,
964
- ) -> List[ContentBlock | CallToolRequestParams]:
1194
+ tools: List[Tool] | None = None,
1195
+ ) -> PromptMessageExtended:
965
1196
  """
966
1197
  Process a query using Bedrock and available tools.
1198
+ Returns PromptMessageExtended with tool calls for external execution.
967
1199
  """
968
1200
  client = self._get_bedrock_runtime_client()
969
1201
 
@@ -1001,684 +1233,591 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1001
1233
  self.logger.debug(f"Traceback: {traceback.format_exc()}")
1002
1234
  tool_list = None
1003
1235
 
1004
- responses: List[ContentBlock] = []
1005
- tool_result_responses: List[ContentBlock] = []
1006
- model = self.default_request_params.model
1007
- # Loop guard for repeated identical tool calls (system-prompt parsing path)
1008
- last_tool_signature: str | None = None
1009
- repeated_tool_calls_count: int = 0
1010
- max_repeated_tool_calls: int = 3
1236
+ # Use tools parameter if provided, otherwise get from aggregator
1237
+ if tools is None:
1238
+ tools = tool_list.tools if tool_list else []
1239
+ elif tool_list is None and tools:
1240
+ # Create a ListToolsResult from the provided tools for conversion
1241
+ from mcp.types import ListToolsResult
1242
+
1243
+ tool_list = ListToolsResult(tools=tools)
1011
1244
 
1012
- for i in range(params.max_iterations):
1013
- self._log_chat_progress(self.chat_turn(), model=model)
1245
+ response_content_blocks: List[ContentBlock] = []
1246
+ model = self.default_request_params.model
1014
1247
 
1015
- # Resolver-free: schema type inferred by runtime fallback below
1248
+ # Single API call - no tool execution loop
1249
+ self._log_chat_progress(self.chat_turn(), model=model)
1016
1250
 
1017
- # Convert messages to Bedrock format
1018
- bedrock_messages = self._convert_messages_to_bedrock(messages)
1251
+ # Convert messages to Bedrock format
1252
+ bedrock_messages = self._convert_messages_to_bedrock(messages)
1019
1253
 
1020
- # Base system text
1021
- base_system_text = self.instruction or params.systemPrompt
1254
+ # Base system text
1255
+ base_system_text = self.instruction or params.systemPrompt
1022
1256
 
1023
- # Determine tool schema fallback order and caches
1024
- caps = self.capabilities.get(model) or ModelCapabilities()
1025
- if caps.schema and caps.schema != ToolSchemaType.NONE:
1257
+ # Determine tool schema fallback order and caches
1258
+ caps = self.capabilities.get(model) or ModelCapabilities()
1259
+ if caps.schema and caps.schema != ToolSchemaType.NONE:
1260
+ # Special case: Force Mistral 7B to try SYSTEM_PROMPT instead of cached DEFAULT
1261
+ if (
1262
+ model == "mistral.mistral-7b-instruct-v0:2"
1263
+ and caps.schema == ToolSchemaType.DEFAULT
1264
+ ):
1265
+ print(
1266
+ f"🔧 FORCING SYSTEM_PROMPT for {model} (was cached as DEFAULT)",
1267
+ file=sys.stderr,
1268
+ flush=True,
1269
+ )
1270
+ schema_order = [ToolSchemaType.SYSTEM_PROMPT, ToolSchemaType.DEFAULT]
1271
+ else:
1026
1272
  schema_order = [caps.schema]
1273
+ else:
1274
+ # Restore original fallback order: Anthropic models try anthropic first, others skip it
1275
+ if model.startswith("anthropic."):
1276
+ schema_order = [
1277
+ ToolSchemaType.ANTHROPIC,
1278
+ ToolSchemaType.DEFAULT,
1279
+ ToolSchemaType.SYSTEM_PROMPT,
1280
+ ]
1281
+ elif model == "mistral.mistral-7b-instruct-v0:2":
1282
+ # Force Mistral 7B to try SYSTEM_PROMPT first (it doesn't work well with DEFAULT)
1283
+ schema_order = [
1284
+ ToolSchemaType.SYSTEM_PROMPT,
1285
+ ToolSchemaType.DEFAULT,
1286
+ ]
1027
1287
  else:
1028
- # Restore original fallback order: Anthropic models try anthropic first, others skip it
1029
- if model.startswith("anthropic."):
1030
- schema_order = [
1031
- ToolSchemaType.ANTHROPIC,
1032
- ToolSchemaType.DEFAULT,
1033
- ToolSchemaType.SYSTEM_PROMPT,
1034
- ]
1035
- else:
1036
- schema_order = [
1037
- ToolSchemaType.DEFAULT,
1038
- ToolSchemaType.SYSTEM_PROMPT,
1039
- ]
1040
-
1041
- # Track whether we changed system mode cache this turn
1042
- tried_system_fallback = False
1288
+ schema_order = [
1289
+ ToolSchemaType.DEFAULT,
1290
+ ToolSchemaType.SYSTEM_PROMPT,
1291
+ ]
1043
1292
 
1044
- processed_response = None # type: ignore[assignment]
1045
- last_error_msg = None
1293
+ # Track whether we changed system mode cache this turn
1294
+ tried_system_fallback = False
1046
1295
 
1047
- for schema_choice in schema_order:
1048
- # Fresh messages per attempt
1049
- converse_args = {"modelId": model, "messages": [dict(m) for m in bedrock_messages]}
1296
+ processed_response = None # type: ignore[assignment]
1297
+ last_error_msg = None
1050
1298
 
1051
- # Build tools representation for this schema
1052
- tools_payload: Union[List[Dict[str, Any]], str, None] = None
1053
- if tool_list and tool_list.tools:
1054
- # Build tool name mapping once per schema attempt
1055
- name_policy = (
1056
- self.capabilities.get(model) or ModelCapabilities()
1057
- ).tool_name_policy or ToolNamePolicy.PRESERVE
1058
- tool_name_mapping = self._build_tool_name_mapping(tool_list, name_policy)
1299
+ for schema_choice in schema_order:
1300
+ # Fresh messages per attempt
1301
+ converse_args = {"modelId": model, "messages": [dict(m) for m in bedrock_messages]}
1059
1302
 
1060
- # Store mapping for tool execution
1061
- self.tool_name_mapping = tool_name_mapping
1303
+ # Build tools representation for this schema
1304
+ tools_payload: Union[List[Dict[str, Any]], str, None] = None
1305
+ # Get tool name policy (needed even when no tools for cache logic)
1306
+ name_policy = (
1307
+ self.capabilities.get(model) or ModelCapabilities()
1308
+ ).tool_name_policy or ToolNamePolicy.PRESERVE
1062
1309
 
1063
- if schema_choice == ToolSchemaType.ANTHROPIC:
1064
- tools_payload = self._convert_tools_anthropic_format(
1065
- tool_list, tool_name_mapping
1066
- )
1067
- elif schema_choice == ToolSchemaType.DEFAULT:
1068
- # Set tool name policy for Nova conversion
1069
- self._tool_name_policy_for_conversion = (
1070
- "replace_hyphens_with_underscores"
1071
- if name_policy == ToolNamePolicy.UNDERSCORES
1072
- else "preserve"
1073
- )
1074
- tools_payload = self._convert_tools_nova_format(
1075
- tool_list, tool_name_mapping
1076
- )
1077
- elif schema_choice == ToolSchemaType.SYSTEM_PROMPT:
1078
- tools_payload = self._convert_tools_system_prompt_format(
1079
- tool_list, tool_name_mapping
1080
- )
1310
+ if tool_list and tool_list.tools:
1311
+ # Build tool name mapping once per schema attempt
1312
+ tool_name_mapping = self._build_tool_name_mapping(tool_list, name_policy)
1081
1313
 
1082
- # System prompt handling with cache
1083
- system_mode = (
1084
- self.capabilities.get(model) or ModelCapabilities()
1085
- ).system_mode or SystemMode.SYSTEM
1086
- system_text = base_system_text
1314
+ # Store mapping for tool execution
1315
+ self.tool_name_mapping = tool_name_mapping
1087
1316
 
1088
- if (
1089
- schema_choice == ToolSchemaType.SYSTEM_PROMPT
1090
- and isinstance(tools_payload, str)
1091
- and tools_payload
1092
- ):
1093
- system_text = (
1094
- f"{system_text}\n\n{tools_payload}" if system_text else tools_payload
1317
+ if schema_choice == ToolSchemaType.ANTHROPIC:
1318
+ tools_payload = self._convert_tools_anthropic_format(
1319
+ tool_list, tool_name_mapping
1320
+ )
1321
+ elif schema_choice == ToolSchemaType.DEFAULT:
1322
+ tools_payload = self._convert_tools_nova_format(tool_list, tool_name_mapping)
1323
+ elif schema_choice == ToolSchemaType.SYSTEM_PROMPT:
1324
+ tools_payload = self._convert_tools_system_prompt_format(
1325
+ tool_list, tool_name_mapping
1095
1326
  )
1096
1327
 
1097
- if system_text:
1098
- if system_mode == SystemMode.SYSTEM:
1099
- converse_args["system"] = [{"text": system_text}]
1100
- self.logger.debug(
1101
- f"Attempting with system param for {model} and schema={schema_choice}"
1102
- )
1103
- else:
1104
- # inject
1105
- if (
1106
- converse_args["messages"]
1107
- and converse_args["messages"][0].get("role") == "user"
1108
- ):
1109
- first_message = converse_args["messages"][0]
1110
- if first_message.get("content") and len(first_message["content"]) > 0:
1111
- original_text = first_message["content"][0].get("text", "")
1112
- first_message["content"][0]["text"] = (
1113
- f"System: {system_text}\n\nUser: {original_text}"
1114
- )
1115
- self.logger.debug(
1116
- "Injected system prompt into first user message (cached mode)"
1117
- )
1118
-
1119
- # Tools wiring
1120
- if (
1121
- schema_choice in (ToolSchemaType.ANTHROPIC, ToolSchemaType.DEFAULT)
1122
- and isinstance(tools_payload, list)
1123
- and tools_payload
1124
- ):
1125
- converse_args["toolConfig"] = {"tools": tools_payload}
1126
-
1127
- # Inference configuration and overrides
1128
- inference_config: Dict[str, Any] = {}
1129
- if params.maxTokens is not None:
1130
- inference_config["maxTokens"] = params.maxTokens
1131
- if params.stopSequences:
1132
- inference_config["stopSequences"] = params.stopSequences
1133
-
1134
- # Check if reasoning should be enabled
1135
- reasoning_budget = 0
1136
- if self._reasoning_effort and self._reasoning_effort != ReasoningEffort.MINIMAL:
1137
- # Convert string to enum if needed
1138
- if isinstance(self._reasoning_effort, str):
1139
- try:
1140
- effort_enum = ReasoningEffort(self._reasoning_effort)
1141
- except ValueError:
1142
- effort_enum = ReasoningEffort.MINIMAL
1143
- else:
1144
- effort_enum = self._reasoning_effort
1145
-
1146
- if effort_enum != ReasoningEffort.MINIMAL:
1147
- reasoning_budget = REASONING_EFFORT_BUDGETS.get(effort_enum, 0)
1148
-
1149
- # Handle temperature and reasoning configuration
1150
- # AWS docs: "Thinking isn't compatible with temperature, top_p, or top_k modifications"
1151
- reasoning_enabled = False
1152
- if reasoning_budget > 0:
1153
- # Check if this model supports reasoning (with caching)
1154
- cached_reasoning = (
1155
- self.capabilities.get(model) or ModelCapabilities()
1156
- ).reasoning_support
1157
- if cached_reasoning == "supported":
1158
- # We know this model supports reasoning
1159
- converse_args["performanceConfig"] = {
1160
- "reasoning": {"maxReasoningTokens": reasoning_budget}
1161
- }
1162
- reasoning_enabled = True
1163
- elif cached_reasoning != "unsupported":
1164
- # Unknown - we'll try reasoning and fallback if needed
1165
- converse_args["performanceConfig"] = {
1166
- "reasoning": {"maxReasoningTokens": reasoning_budget}
1167
- }
1168
- reasoning_enabled = True
1328
+ # System prompt handling with cache
1329
+ system_mode = (
1330
+ self.capabilities.get(model) or ModelCapabilities()
1331
+ ).system_mode or SystemMode.SYSTEM
1332
+ system_text = base_system_text
1169
1333
 
1170
- if not reasoning_enabled:
1171
- # No reasoning - apply temperature if provided
1172
- if params.temperature is not None:
1173
- inference_config["temperature"] = params.temperature
1334
+ if (
1335
+ schema_choice == ToolSchemaType.SYSTEM_PROMPT
1336
+ and isinstance(tools_payload, str)
1337
+ and tools_payload
1338
+ ):
1339
+ system_text = f"{system_text}\n\n{tools_payload}" if system_text else tools_payload
1174
1340
 
1175
- # Nova-specific recommendations (when not using reasoning)
1176
- if model and "nova" in (model or "").lower() and reasoning_budget == 0:
1177
- inference_config.setdefault("topP", 1.0)
1178
- # Merge/attach additionalModelRequestFields for topK
1179
- existing_amrf = converse_args.get("additionalModelRequestFields", {})
1180
- merged_amrf = {**existing_amrf, **{"inferenceConfig": {"topK": 1}}}
1181
- converse_args["additionalModelRequestFields"] = merged_amrf
1341
+ # Cohere-specific nudge: force exact echo of tool result text on final answer
1342
+ if (
1343
+ schema_choice == ToolSchemaType.SYSTEM_PROMPT
1344
+ and isinstance(model, str)
1345
+ and model.startswith("cohere.")
1346
+ ):
1347
+ cohere_nudge = (
1348
+ "FINAL ANSWER RULES (STRICT):\n"
1349
+ "- When a tool result is provided, your final answer MUST be exactly the raw tool result text.\n"
1350
+ "- Do not add any extra words, punctuation, qualifiers, or phrases (e.g., 'according to the tool').\n"
1351
+ "- Example: If tool result text is 'It"
1352
+ "s sunny in London', your final answer must be exactly: It"
1353
+ "s sunny in London\n"
1354
+ )
1355
+ system_text = f"{system_text}\n\n{cohere_nudge}" if system_text else cohere_nudge
1182
1356
 
1183
- # Note: resolver default inference overrides removed; keep minimal Nova heuristic above.
1357
+ # Llama3-specific nudge: prevent paraphrasing and extra tool calls
1358
+ if (
1359
+ schema_choice == ToolSchemaType.SYSTEM_PROMPT
1360
+ and isinstance(model, str)
1361
+ and model.startswith("meta.llama3")
1362
+ ):
1363
+ llama_nudge = (
1364
+ "TOOL RESPONSE RULES:\n"
1365
+ "- After receiving a tool result, immediately output ONLY the exact tool result text.\n"
1366
+ "- Do not call additional tools or add commentary.\n"
1367
+ "- Do not paraphrase or modify the tool result in any way."
1368
+ )
1369
+ system_text = f"{system_text}\n\n{llama_nudge}" if system_text else llama_nudge
1184
1370
 
1185
- if inference_config:
1186
- converse_args["inferenceConfig"] = inference_config
1371
+ # Mistral-specific nudge: prevent tool calling loops and accept tool results
1372
+ if (
1373
+ schema_choice == ToolSchemaType.SYSTEM_PROMPT
1374
+ and isinstance(model, str)
1375
+ and model.startswith("mistral.")
1376
+ ):
1377
+ mistral_nudge = (
1378
+ "TOOL EXECUTION RULES:\n"
1379
+ "- Call each tool only ONCE per conversation turn.\n"
1380
+ "- Accept and trust all tool results - do not question or retry them.\n"
1381
+ "- After receiving a tool result, provide a direct answer based on that result.\n"
1382
+ "- Do not call the same tool multiple times or call additional tools unless specifically requested.\n"
1383
+ "- Tool results are always valid - do not attempt to validate or correct them."
1384
+ )
1385
+ system_text = f"{system_text}\n\n{mistral_nudge}" if system_text else mistral_nudge
1187
1386
 
1188
- # Decide streaming vs non-streaming (resolver-free with runtime detection + cache)
1189
- has_tools: bool = False
1190
- try:
1191
- has_tools = bool(tools_payload) and bool(
1192
- (isinstance(tools_payload, list) and len(tools_payload) > 0)
1193
- or (isinstance(tools_payload, str) and tools_payload.strip())
1387
+ if system_text:
1388
+ if system_mode == SystemMode.SYSTEM:
1389
+ converse_args["system"] = [{"text": system_text}]
1390
+ self.logger.debug(
1391
+ f"Attempting with system param for {model} and schema={schema_choice}"
1194
1392
  )
1393
+ else:
1394
+ # inject
1395
+ if (
1396
+ converse_args["messages"]
1397
+ and converse_args["messages"][0].get("role") == "user"
1398
+ ):
1399
+ first_message = converse_args["messages"][0]
1400
+ if first_message.get("content") and len(first_message["content"]) > 0:
1401
+ original_text = first_message["content"][0].get("text", "")
1402
+ first_message["content"][0]["text"] = (
1403
+ f"System: {system_text}\n\nUser: {original_text}"
1404
+ )
1405
+ self.logger.debug(
1406
+ "Injected system prompt into first user message (cached mode)"
1407
+ )
1408
+
1409
+ # Tools wiring
1410
+ # Always include toolConfig if we have tools OR if there are tool results in the conversation
1411
+ has_tool_results = False
1412
+ for msg in bedrock_messages:
1413
+ if isinstance(msg, dict) and msg.get("content"):
1414
+ for content in msg["content"]:
1415
+ if isinstance(content, dict) and "toolResult" in content:
1416
+ has_tool_results = True
1417
+ break
1418
+ if has_tool_results:
1419
+ break
1195
1420
 
1196
- # Force non-streaming for structured-output flows (one-shot)
1197
- force_non_streaming = False
1198
- if self._force_non_streaming_once:
1199
- force_non_streaming = True
1200
- self._force_non_streaming_once = False
1421
+ if (
1422
+ schema_choice in (ToolSchemaType.ANTHROPIC, ToolSchemaType.DEFAULT)
1423
+ and isinstance(tools_payload, list)
1424
+ and tools_payload
1425
+ ):
1426
+ # Include tools only when we have actual tools to provide
1427
+ converse_args["toolConfig"] = {"tools": tools_payload}
1428
+
1429
+ # Inference configuration and overrides
1430
+ inference_config: Dict[str, Any] = {}
1431
+ if params.maxTokens is not None:
1432
+ inference_config["maxTokens"] = params.maxTokens
1433
+ if params.stopSequences:
1434
+ inference_config["stopSequences"] = params.stopSequences
1435
+
1436
+ # Check if reasoning should be enabled
1437
+ reasoning_budget = 0
1438
+ if self._reasoning_effort and self._reasoning_effort != ReasoningEffort.MINIMAL:
1439
+ # Convert string to enum if needed
1440
+ if isinstance(self._reasoning_effort, str):
1441
+ try:
1442
+ effort_enum = ReasoningEffort(self._reasoning_effort)
1443
+ except ValueError:
1444
+ effort_enum = ReasoningEffort.MINIMAL
1445
+ else:
1446
+ effort_enum = self._reasoning_effort
1447
+
1448
+ if effort_enum != ReasoningEffort.MINIMAL:
1449
+ reasoning_budget = REASONING_EFFORT_BUDGETS.get(effort_enum, 0)
1201
1450
 
1202
- # Evaluate cache for streaming-with-tools
1203
- cache_pref = (
1204
- self.capabilities.get(model) or ModelCapabilities()
1205
- ).stream_with_tools
1206
- use_streaming = True
1207
- attempted_streaming = False
1451
+ # Handle temperature and reasoning configuration
1452
+ # AWS docs: "Thinking isn't compatible with temperature, top_p, or top_k modifications"
1453
+ reasoning_enabled = False
1454
+ if reasoning_budget > 0:
1455
+ # Check if this model supports reasoning (with caching)
1456
+ cached_reasoning = (
1457
+ self.capabilities.get(model) or ModelCapabilities()
1458
+ ).reasoning_support
1459
+ if cached_reasoning == "supported":
1460
+ # We know this model supports reasoning
1461
+ converse_args["performanceConfig"] = {
1462
+ "reasoning": {"maxReasoningTokens": reasoning_budget}
1463
+ }
1464
+ reasoning_enabled = True
1465
+ elif cached_reasoning != "unsupported":
1466
+ # Unknown - we'll try reasoning and fallback if needed
1467
+ converse_args["performanceConfig"] = {
1468
+ "reasoning": {"maxReasoningTokens": reasoning_budget}
1469
+ }
1470
+ reasoning_enabled = True
1471
+
1472
+ if not reasoning_enabled:
1473
+ # No reasoning - apply temperature if provided
1474
+ if params.temperature is not None:
1475
+ inference_config["temperature"] = params.temperature
1476
+
1477
+ # Nova-specific recommendations (when not using reasoning)
1478
+ if model and "nova" in (model or "").lower() and reasoning_budget == 0:
1479
+ inference_config.setdefault("topP", 1.0)
1480
+ # Merge/attach additionalModelRequestFields for topK
1481
+ existing_amrf = converse_args.get("additionalModelRequestFields", {})
1482
+ merged_amrf = {**existing_amrf, **{"inferenceConfig": {"topK": 1}}}
1483
+ converse_args["additionalModelRequestFields"] = merged_amrf
1484
+
1485
+ if inference_config:
1486
+ converse_args["inferenceConfig"] = inference_config
1487
+
1488
+ # Decide streaming vs non-streaming (resolver-free with runtime detection + cache)
1489
+ has_tools: bool = False
1490
+ try:
1491
+ has_tools = bool(tools_payload) and bool(
1492
+ (isinstance(tools_payload, list) and len(tools_payload) > 0)
1493
+ or (isinstance(tools_payload, str) and tools_payload.strip())
1494
+ )
1208
1495
 
1209
- if force_non_streaming:
1496
+ # Force non-streaming for structured-output flows (one-shot)
1497
+ force_non_streaming = False
1498
+ if self._force_non_streaming_once:
1499
+ force_non_streaming = True
1500
+ self._force_non_streaming_once = False
1501
+
1502
+ # Evaluate cache for streaming-with-tools
1503
+ cache_pref = (self.capabilities.get(model) or ModelCapabilities()).stream_with_tools
1504
+ use_streaming = True
1505
+ attempted_streaming = False
1506
+
1507
+ if force_non_streaming:
1508
+ use_streaming = False
1509
+ elif has_tools:
1510
+ if cache_pref == StreamPreference.NON_STREAM:
1210
1511
  use_streaming = False
1211
- elif has_tools:
1212
- if cache_pref == StreamPreference.NON_STREAM:
1213
- use_streaming = False
1214
- elif cache_pref == StreamPreference.STREAM_OK:
1215
- use_streaming = True
1216
- else:
1217
- # Unknown: try streaming first, fallback on error
1218
- use_streaming = True
1512
+ elif cache_pref == StreamPreference.STREAM_OK:
1513
+ use_streaming = True
1219
1514
  else:
1515
+ # Unknown: try streaming first, fallback on error
1220
1516
  use_streaming = True
1221
1517
 
1222
- # Try API call with reasoning fallback
1223
- try:
1518
+ # NEW: For Anthropic schema, when tool results are present in the conversation,
1519
+ # force non-streaming on this second turn to avoid empty streamed replies.
1520
+ if schema_choice == ToolSchemaType.ANTHROPIC and has_tool_results:
1521
+ use_streaming = False
1522
+ self.logger.debug(
1523
+ "Forcing non-streaming for Anthropic second turn with tool results"
1524
+ )
1525
+
1526
+ # Try API call with reasoning fallback
1527
+ try:
1528
+ if not use_streaming:
1529
+ self.logger.debug(
1530
+ f"Using non-streaming API for {model} (schema={schema_choice})"
1531
+ )
1532
+ response = client.converse(**converse_args)
1533
+ processed_response = self._process_non_streaming_response(response, model)
1534
+ else:
1535
+ self.logger.debug(
1536
+ f"Using streaming API for {model} (schema={schema_choice})"
1537
+ )
1538
+ attempted_streaming = True
1539
+ response = client.converse_stream(**converse_args)
1540
+ processed_response = await self._process_stream(response, model)
1541
+ except (ClientError, BotoCoreError) as e:
1542
+ # Check if this is a reasoning-related error
1543
+ if reasoning_budget > 0 and (
1544
+ "reasoning" in str(e).lower() or "performance" in str(e).lower()
1545
+ ):
1546
+ self.logger.debug(
1547
+ f"Model {model} doesn't support reasoning, retrying without: {e}"
1548
+ )
1549
+ caps.reasoning_support = False
1550
+ self.capabilities[model] = caps
1551
+
1552
+ # Remove reasoning and retry
1553
+ if "performanceConfig" in converse_args:
1554
+ del converse_args["performanceConfig"]
1555
+
1556
+ # Apply temperature now that reasoning is disabled
1557
+ if params.temperature is not None:
1558
+ if "inferenceConfig" not in converse_args:
1559
+ converse_args["inferenceConfig"] = {}
1560
+ converse_args["inferenceConfig"]["temperature"] = params.temperature
1561
+
1562
+ # Retry the API call
1224
1563
  if not use_streaming:
1225
- self.logger.debug(
1226
- f"Using non-streaming API for {model} (schema={schema_choice})"
1227
- )
1228
1564
  response = client.converse(**converse_args)
1229
1565
  processed_response = self._process_non_streaming_response(
1230
1566
  response, model
1231
1567
  )
1232
1568
  else:
1233
- self.logger.debug(
1234
- f"Using streaming API for {model} (schema={schema_choice})"
1235
- )
1236
- attempted_streaming = True
1237
1569
  response = client.converse_stream(**converse_args)
1238
1570
  processed_response = await self._process_stream(response, model)
1239
- except (ClientError, BotoCoreError) as e:
1240
- # Check if this is a reasoning-related error
1241
- if reasoning_budget > 0 and (
1242
- "reasoning" in str(e).lower() or "performance" in str(e).lower()
1243
- ):
1244
- self.logger.debug(
1245
- f"Model {model} doesn't support reasoning, retrying without: {e}"
1246
- )
1247
- caps.reasoning_support = False
1248
- self.capabilities[model] = caps
1249
-
1250
- # Remove reasoning and retry
1251
- if "performanceConfig" in converse_args:
1252
- del converse_args["performanceConfig"]
1253
-
1254
- # Apply temperature now that reasoning is disabled
1255
- if params.temperature is not None:
1256
- if "inferenceConfig" not in converse_args:
1257
- converse_args["inferenceConfig"] = {}
1258
- converse_args["inferenceConfig"]["temperature"] = params.temperature
1259
-
1260
- # Retry the API call
1261
- if not use_streaming:
1262
- response = client.converse(**converse_args)
1263
- processed_response = self._process_non_streaming_response(
1264
- response, model
1265
- )
1266
- else:
1267
- response = client.converse_stream(**converse_args)
1268
- processed_response = await self._process_stream(response, model)
1269
- else:
1270
- # Not a reasoning error, re-raise
1271
- raise
1272
-
1273
- # Success: cache the working schema choice if not already cached
1274
- # Only cache schema when tools are present - no tools doesn't predict tool behavior
1275
- if not caps.schema and has_tools:
1276
- caps.schema = ToolSchemaType(schema_choice)
1571
+ else:
1572
+ # Not a reasoning error, re-raise
1573
+ raise
1277
1574
 
1278
- # Cache successful reasoning if we tried it
1279
- if reasoning_budget > 0 and caps.reasoning_support is not True:
1280
- caps.reasoning_support = True
1575
+ # Success: cache the working schema choice if not already cached
1576
+ # Only cache schema when tools are present - no tools doesn't predict tool behavior
1577
+ if not caps.schema and has_tools:
1578
+ caps.schema = ToolSchemaType(schema_choice)
1281
1579
 
1282
- # If Nova/default worked and we used preserve but server complains, flip cache for next time
1283
- if (
1284
- schema_choice == ToolSchemaType.DEFAULT
1285
- and getattr(self, "_tool_name_policy_for_conversion", "preserve")
1286
- == "preserve"
1287
- ):
1288
- # Heuristic: if tool names include '-', prefer underscores next time
1289
- try:
1290
- if any("-" in t.name for t in (tool_list.tools if tool_list else [])):
1291
- caps.tool_name_policy = ToolNamePolicy.UNDERSCORES
1292
- except Exception:
1293
- pass
1294
- # Cache streaming-with-tools behavior on success
1295
- if has_tools and attempted_streaming:
1296
- caps.stream_with_tools = StreamPreference.STREAM_OK
1297
- self.capabilities[model] = caps
1298
- break
1299
- except (ClientError, BotoCoreError) as e:
1300
- error_msg = str(e)
1301
- last_error_msg = error_msg
1302
- self.logger.debug(f"Bedrock API error (schema={schema_choice}): {error_msg}")
1580
+ # Cache successful reasoning if we tried it
1581
+ if reasoning_budget > 0 and caps.reasoning_support is not True:
1582
+ caps.reasoning_support = True
1303
1583
 
1304
- # If streaming with tools failed and cache undecided, fallback to non-streaming and cache
1305
- if has_tools and (caps.stream_with_tools is None):
1306
- try:
1307
- self.logger.debug(
1308
- f"Falling back to non-streaming API for {model} after streaming error"
1309
- )
1310
- response = client.converse(**converse_args)
1311
- processed_response = self._process_non_streaming_response(
1312
- response, model
1313
- )
1314
- caps.stream_with_tools = StreamPreference.NON_STREAM
1315
- if not caps.schema:
1316
- caps.schema = ToolSchemaType(schema_choice)
1317
- self.capabilities[model] = caps
1318
- break
1319
- except (ClientError, BotoCoreError) as e_fallback:
1320
- last_error_msg = str(e_fallback)
1321
- self.logger.debug(
1322
- f"Bedrock API error after non-streaming fallback: {last_error_msg}"
1323
- )
1324
- # continue to other fallbacks (e.g., system inject or next schema)
1584
+ # If Nova/default worked and we used preserve but server complains, flip cache for next time
1585
+ if (
1586
+ schema_choice == ToolSchemaType.DEFAULT
1587
+ and name_policy == ToolNamePolicy.PRESERVE
1588
+ ):
1589
+ # Heuristic: if tool names include '-', prefer underscores next time
1590
+ try:
1591
+ if any("-" in t.name for t in (tool_list.tools if tool_list else [])):
1592
+ caps.tool_name_policy = ToolNamePolicy.UNDERSCORES
1593
+ except Exception:
1594
+ pass
1595
+ # Cache streaming-with-tools behavior on success
1596
+ if has_tools and attempted_streaming:
1597
+ caps.stream_with_tools = StreamPreference.STREAM_OK
1598
+ self.capabilities[model] = caps
1599
+ break
1600
+ except (ClientError, BotoCoreError) as e:
1601
+ error_msg = str(e)
1602
+ last_error_msg = error_msg
1603
+ self.logger.debug(f"Bedrock API error (schema={schema_choice}): {error_msg}")
1325
1604
 
1326
- # System parameter fallback once per call if system message unsupported
1327
- if (
1328
- not tried_system_fallback
1329
- and system_text
1330
- and system_mode == SystemMode.SYSTEM
1331
- and (
1332
- "system message" in error_msg.lower()
1333
- or "system messages" in error_msg.lower()
1605
+ # If streaming with tools failed and cache undecided, fallback to non-streaming and cache
1606
+ if has_tools and (caps.stream_with_tools is None):
1607
+ try:
1608
+ self.logger.debug(
1609
+ f"Falling back to non-streaming API for {model} after streaming error"
1334
1610
  )
1335
- ):
1336
- tried_system_fallback = True
1337
- caps.system_mode = SystemMode.INJECT
1611
+ response = client.converse(**converse_args)
1612
+ processed_response = self._process_non_streaming_response(response, model)
1613
+ caps.stream_with_tools = StreamPreference.NON_STREAM
1614
+ if not caps.schema:
1615
+ caps.schema = ToolSchemaType(schema_choice)
1338
1616
  self.capabilities[model] = caps
1339
- self.logger.info(
1340
- f"Switching system mode to inject for {model} and retrying same schema"
1617
+ break
1618
+ except (ClientError, BotoCoreError) as e_fallback:
1619
+ last_error_msg = str(e_fallback)
1620
+ self.logger.debug(
1621
+ f"Bedrock API error after non-streaming fallback: {last_error_msg}"
1341
1622
  )
1342
- # Retry the same schema immediately in inject mode
1343
- try:
1344
- # Rebuild messages for inject
1345
- converse_args = {
1346
- "modelId": model,
1347
- "messages": [dict(m) for m in bedrock_messages],
1348
- }
1349
- # inject system into first user
1350
- if (
1351
- converse_args["messages"]
1352
- and converse_args["messages"][0].get("role") == "user"
1353
- ):
1354
- fm = converse_args["messages"][0]
1355
- if fm.get("content") and len(fm["content"]) > 0:
1356
- original_text = fm["content"][0].get("text", "")
1357
- fm["content"][0]["text"] = (
1358
- f"System: {system_text}\n\nUser: {original_text}"
1359
- )
1360
-
1361
- # Re-add tools
1362
- if (
1363
- schema_choice
1364
- in (ToolSchemaType.ANTHROPIC.value, ToolSchemaType.DEFAULT.value)
1365
- and isinstance(tools_payload, list)
1366
- and tools_payload
1367
- ):
1368
- converse_args["toolConfig"] = {"tools": tools_payload}
1369
-
1370
- # Same streaming decision using cache
1371
- has_tools = bool(tools_payload) and bool(
1372
- (isinstance(tools_payload, list) and len(tools_payload) > 0)
1373
- or (isinstance(tools_payload, str) and tools_payload.strip())
1374
- )
1375
- cache_pref = (
1376
- self.capabilities.get(model) or ModelCapabilities()
1377
- ).stream_with_tools
1378
- if cache_pref == StreamPreference.NON_STREAM or not has_tools:
1379
- response = client.converse(**converse_args)
1380
- processed_response = self._process_non_streaming_response(
1381
- response, model
1382
- )
1383
- else:
1384
- response = client.converse_stream(**converse_args)
1385
- processed_response = await self._process_stream(response, model)
1386
- if not caps.schema and has_tools:
1387
- caps.schema = ToolSchemaType(schema_choice)
1388
- self.capabilities[model] = caps
1389
- break
1390
- except (ClientError, BotoCoreError) as e2:
1391
- last_error_msg = str(e2)
1392
- self.logger.debug(
1393
- f"Bedrock API error after system inject fallback: {last_error_msg}"
1394
- )
1395
- # Fall through to next schema
1396
- continue
1623
+ # continue to other fallbacks (e.g., system inject or next schema)
1397
1624
 
1398
- # For any other error (including tool format errors), continue to next schema
1399
- self.logger.debug(
1400
- f"Continuing to next schema after error with {schema_choice}: {error_msg}"
1625
+ # System parameter fallback once per call if system message unsupported
1626
+ if (
1627
+ not tried_system_fallback
1628
+ and system_text
1629
+ and system_mode == SystemMode.SYSTEM
1630
+ and (
1631
+ "system message" in error_msg.lower()
1632
+ or "system messages" in error_msg.lower()
1401
1633
  )
1402
- continue
1403
-
1404
- if processed_response is None:
1405
- # All attempts failed; mark schema as none to avoid repeated retries this process
1406
- caps.schema = ToolSchemaType.NONE
1407
- self.capabilities[model] = caps
1408
- processed_response = {
1409
- "content": [
1410
- {"text": f"Error during generation: {last_error_msg or 'Unknown error'}"}
1411
- ],
1412
- "stop_reason": "error",
1413
- "usage": {"input_tokens": 0, "output_tokens": 0},
1414
- "model": model,
1415
- "role": "assistant",
1416
- }
1417
-
1418
- # Track usage
1419
- if processed_response.get("usage"):
1420
- try:
1421
- usage = processed_response["usage"]
1422
- turn_usage = TurnUsage(
1423
- provider=Provider.BEDROCK.value,
1424
- model=model,
1425
- input_tokens=usage.get("input_tokens", 0),
1426
- output_tokens=usage.get("output_tokens", 0),
1427
- total_tokens=usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
1428
- raw_usage=usage,
1634
+ ):
1635
+ tried_system_fallback = True
1636
+ caps.system_mode = SystemMode.INJECT
1637
+ self.capabilities[model] = caps
1638
+ self.logger.info(
1639
+ f"Switching system mode to inject for {model} and retrying same schema"
1429
1640
  )
1430
- self.usage_accumulator.add_turn(turn_usage)
1431
- except Exception as e:
1432
- self.logger.warning(f"Failed to track usage: {e}")
1433
-
1434
- self.logger.debug(f"{model} response:", data=processed_response)
1435
-
1436
- # Convert response to message param and add to messages
1437
- response_message_param = self.convert_message_to_message_param(processed_response)
1438
- messages.append(response_message_param)
1439
-
1440
- # Extract text content for responses
1441
- if processed_response.get("content"):
1442
- for content_item in processed_response["content"]:
1443
- if content_item.get("text"):
1444
- responses.append(TextContent(type="text", text=content_item["text"]))
1445
-
1446
- # Handle different stop reasons
1447
- stop_reason = processed_response.get("stop_reason", "end_turn")
1448
-
1449
- # Determine if we should parse for system-prompt tool calls (unified capabilities)
1450
- caps_tmp = self.capabilities.get(model) or ModelCapabilities()
1451
- sys_prompt_schema = caps_tmp.schema == ToolSchemaType.SYSTEM_PROMPT
1452
-
1453
- if sys_prompt_schema and stop_reason == "end_turn":
1454
- # Only parse for tools if text contains actual function call structure
1455
- message_text = ""
1456
- for content_item in processed_response.get("content", []):
1457
- if isinstance(content_item, dict) and content_item.get("type") == "text":
1458
- message_text += content_item.get("text", "")
1459
-
1460
- # Check if there's a tool call in the response
1461
- parsed_tools = self._parse_tool_response(processed_response, model)
1462
- if parsed_tools:
1463
- # Loop guard: if the same single tool call repeats > N times in system-prompt mode, stop
1464
- if len(parsed_tools) == 1:
1465
- # Determine normalized tool name as we would use for execution
1466
- candidate_name = parsed_tools[0]["name"]
1467
- # Map to canonical name if available
1468
- canonical = self.tool_name_mapping.get(candidate_name)
1469
- if not canonical:
1470
- lowered = candidate_name.lower().replace("_", "-")
1471
- for key, original in self.tool_name_mapping.items():
1472
- if lowered == key.lower().replace("_", "-"):
1473
- canonical = original
1474
- break
1475
- normalized_name = canonical or candidate_name
1476
- try:
1477
- args_signature = json.dumps(
1478
- parsed_tools[0].get("arguments", {}), sort_keys=True
1479
- )
1480
- except Exception:
1481
- args_signature = str(parsed_tools[0].get("arguments", {}))
1482
- current_signature = f"{normalized_name}|{args_signature}"
1483
-
1484
- # Identify system-prompt schema mode via unified capabilities
1485
- caps_loop = self.capabilities.get(model) or ModelCapabilities()
1486
- is_system_prompt_schema_loop = (
1487
- caps_loop.schema == ToolSchemaType.SYSTEM_PROMPT
1488
- )
1489
-
1490
- if is_system_prompt_schema_loop:
1491
- if current_signature == last_tool_signature:
1492
- repeated_tool_calls_count += 1
1493
- else:
1494
- repeated_tool_calls_count = 1
1495
- last_tool_signature = current_signature
1496
-
1497
- if repeated_tool_calls_count > max_repeated_tool_calls:
1498
- # Return the last tool result content to avoid infinite loops
1499
- if tool_result_responses:
1500
- return cast(
1501
- "List[ContentBlock | CallToolRequestParams]",
1502
- tool_result_responses,
1503
- )
1504
- # Fallback: return a minimal text indicating no content
1505
- return cast(
1506
- "List[ContentBlock | CallToolRequestParams]",
1507
- [TextContent(text="[No content in tool result]")],
1641
+ # Retry the same schema immediately in inject mode
1642
+ try:
1643
+ # Rebuild messages for inject
1644
+ converse_args = {
1645
+ "modelId": model,
1646
+ "messages": [dict(m) for m in bedrock_messages],
1647
+ }
1648
+ # inject system into first user
1649
+ if (
1650
+ converse_args["messages"]
1651
+ and converse_args["messages"][0].get("role") == "user"
1652
+ ):
1653
+ fm = converse_args["messages"][0]
1654
+ if fm.get("content") and len(fm["content"]) > 0:
1655
+ original_text = fm["content"][0].get("text", "")
1656
+ fm["content"][0]["text"] = (
1657
+ f"System: {system_text}\n\nUser: {original_text}"
1508
1658
  )
1509
- # Override stop_reason to handle as tool_use
1510
- stop_reason = "tool_use"
1511
- self.logger.debug(
1512
- "Detected system prompt tool call, overriding stop_reason to 'tool_use'"
1513
- )
1514
-
1515
- if stop_reason == "end_turn":
1516
- # Extract text for display
1517
- message_text = ""
1518
- for content_item in processed_response.get("content", []):
1519
- if content_item.get("text"):
1520
- message_text += content_item["text"]
1521
-
1522
- await self.show_assistant_message(message_text)
1523
- self.logger.debug(f"Iteration {i}: Stopping because stop_reason is 'end_turn'")
1524
- break
1525
- elif stop_reason == "stop_sequence":
1526
- self.logger.debug(f"Iteration {i}: Stopping because stop_reason is 'stop_sequence'")
1527
- break
1528
- elif stop_reason == "max_tokens":
1529
- self.logger.debug(f"Iteration {i}: Stopping because stop_reason is 'max_tokens'")
1530
- if params.maxTokens is not None:
1531
- message_text = Text(
1532
- f"the assistant has reached the maximum token limit ({params.maxTokens})",
1533
- style="dim green italic",
1534
- )
1535
- else:
1536
- message_text = Text(
1537
- "the assistant has reached the maximum token limit",
1538
- style="dim green italic",
1539
- )
1540
- await self.show_assistant_message(message_text)
1541
- break
1542
- elif stop_reason in ["tool_use", "tool_calls"]:
1543
- # Handle tool use/calls - format depends on model type
1544
- message_text = ""
1545
- for content_item in processed_response.get("content", []):
1546
- if content_item.get("text"):
1547
- message_text += content_item["text"]
1548
-
1549
- # Parse tool calls using model-specific method
1550
- self.logger.info(f"DEBUG: About to parse tool response: {processed_response}")
1551
- parsed_tools = self._parse_tool_response(processed_response, model)
1552
- self.logger.info(f"DEBUG: Parsed tools: {parsed_tools}")
1553
-
1554
- if parsed_tools:
1555
- # Process tool calls and collect results
1556
- tool_results_for_batch = []
1557
- for tool_idx, parsed_tool in enumerate(parsed_tools):
1558
- # The original name is needed to call the tool, which is in tool_name_mapping.
1559
- tool_name_from_model = parsed_tool["name"]
1560
- tool_name = self.tool_name_mapping.get(
1561
- tool_name_from_model, tool_name_from_model
1562
- )
1563
1659
 
1564
- tool_args = parsed_tool["arguments"]
1565
- tool_use_id = parsed_tool["id"]
1660
+ # Re-add tools
1661
+ if (
1662
+ schema_choice
1663
+ in (ToolSchemaType.ANTHROPIC.value, ToolSchemaType.DEFAULT.value)
1664
+ and isinstance(tools_payload, list)
1665
+ and tools_payload
1666
+ ):
1667
+ converse_args["toolConfig"] = {"tools": tools_payload}
1566
1668
 
1567
- self.show_tool_call(
1568
- tool_list.tools if tool_list else [], tool_name, tool_args
1669
+ # Same streaming decision using cache
1670
+ has_tools = bool(tools_payload) and bool(
1671
+ (isinstance(tools_payload, list) and len(tools_payload) > 0)
1672
+ or (isinstance(tools_payload, str) and tools_payload.strip())
1569
1673
  )
1570
-
1571
- tool_call_request = CallToolRequest(
1572
- method="tools/call",
1573
- params=CallToolRequestParams(name=tool_name, arguments=tool_args),
1674
+ cache_pref = (
1675
+ self.capabilities.get(model) or ModelCapabilities()
1676
+ ).stream_with_tools
1677
+ if cache_pref == StreamPreference.NON_STREAM or not has_tools:
1678
+ response = client.converse(**converse_args)
1679
+ processed_response = self._process_non_streaming_response(
1680
+ response, model
1681
+ )
1682
+ else:
1683
+ response = client.converse_stream(**converse_args)
1684
+ processed_response = await self._process_stream(response, model)
1685
+ if not caps.schema and has_tools:
1686
+ caps.schema = ToolSchemaType(schema_choice)
1687
+ self.capabilities[model] = caps
1688
+ break
1689
+ except (ClientError, BotoCoreError) as e2:
1690
+ last_error_msg = str(e2)
1691
+ self.logger.debug(
1692
+ f"Bedrock API error after system inject fallback: {last_error_msg}"
1574
1693
  )
1694
+ # Fall through to next schema
1695
+ continue
1575
1696
 
1576
- # Call the tool and get the result
1577
- result = await self.call_tool(
1578
- request=tool_call_request, tool_call_id=tool_use_id
1579
- )
1580
- # We will also comment out showing the raw tool result to reduce verbosity.
1581
- # self.show_tool_result(result)
1582
-
1583
- # Add each result to our collection
1584
- tool_results_for_batch.append((tool_use_id, result, tool_name))
1585
- responses.extend(result.content)
1586
-
1587
- # Store tool results temporarily - we'll clear responses only if the model
1588
- # generates a follow-up message. This ensures tool results are preserved
1589
- # if the model doesn't generate any follow-up content (like Claude Haiku).
1590
- tool_result_responses = responses.copy()
1591
- responses.clear()
1592
-
1593
- # Decide result formatting based on unified capabilities
1594
- caps_tmp = self.capabilities.get(model) or ModelCapabilities()
1595
- is_system_prompt_schema = caps_tmp.schema == ToolSchemaType.SYSTEM_PROMPT
1596
-
1597
- if is_system_prompt_schema:
1598
- # For system prompt models (like Llama), format results as a simple text message.
1599
- # The model expects to see the results in a human-readable format to continue.
1600
- tool_result_parts = []
1601
- for _, tool_result, tool_name in tool_results_for_batch:
1602
- result_text = "".join(
1603
- [
1604
- part.text
1605
- for part in tool_result.content
1606
- if isinstance(part, TextContent)
1607
- ]
1608
- )
1697
+ # For any other error (including tool format errors), continue to next schema
1698
+ self.logger.debug(
1699
+ f"Continuing to next schema after error with {schema_choice}: {error_msg}"
1700
+ )
1701
+ continue
1702
+
1703
+ if processed_response is None:
1704
+ # All attempts failed; mark schema as none to avoid repeated retries this process
1705
+ caps.schema = ToolSchemaType.NONE
1706
+ self.capabilities[model] = caps
1707
+ processed_response = {
1708
+ "content": [
1709
+ {"text": f"Error during generation: {last_error_msg or 'Unknown error'}"}
1710
+ ],
1711
+ "stop_reason": "error",
1712
+ "usage": {"input_tokens": 0, "output_tokens": 0},
1713
+ "model": model,
1714
+ "role": "assistant",
1715
+ }
1609
1716
 
1610
- # Create a representation of the tool's output.
1611
- # Using a JSON-like string is a robust way to present this.
1612
- result_payload = {
1613
- "tool_name": tool_name,
1614
- "status": "error" if tool_result.isError else "success",
1615
- "result": result_text,
1616
- }
1617
- tool_result_parts.append(json.dumps(result_payload))
1717
+ # Track usage
1718
+ if processed_response.get("usage"):
1719
+ try:
1720
+ usage = processed_response["usage"]
1721
+ turn_usage = TurnUsage(
1722
+ provider=Provider.BEDROCK.value,
1723
+ model=model,
1724
+ input_tokens=usage.get("input_tokens", 0),
1725
+ output_tokens=usage.get("output_tokens", 0),
1726
+ total_tokens=usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
1727
+ raw_usage=usage,
1728
+ )
1729
+ self.usage_accumulator.add_turn(turn_usage)
1730
+ except Exception as e:
1731
+ self.logger.warning(f"Failed to track usage: {e}")
1732
+
1733
+ self.logger.debug(f"{model} response:", data=processed_response)
1734
+
1735
+ # Convert response to message param and add to messages
1736
+ response_message_param = self.convert_message_to_message_param(processed_response)
1737
+ messages.append(response_message_param)
1738
+
1739
+ # Extract text content for responses
1740
+ if processed_response.get("content"):
1741
+ for content_item in processed_response["content"]:
1742
+ if content_item.get("text"):
1743
+ response_content_blocks.append(
1744
+ TextContent(type="text", text=content_item["text"])
1745
+ )
1618
1746
 
1619
- if tool_result_parts:
1620
- # Combine all tool results into a single text block.
1621
- full_result_text = f"Tool Results:\n{', '.join(tool_result_parts)}"
1622
- messages.append(
1623
- {
1624
- "role": "user",
1625
- "content": [{"type": "text", "text": full_result_text}],
1626
- }
1627
- )
1628
- else:
1629
- # For native tool-using models (Anthropic, Nova), use the structured 'tool_result' format.
1630
- tool_result_blocks = []
1631
- for tool_id, tool_result, _ in tool_results_for_batch:
1632
- # Convert tool result content into a list of content blocks
1633
- # This mimics the native Anthropic provider's approach.
1634
- result_content_blocks = []
1635
- if tool_result.content:
1636
- for part in tool_result.content:
1637
- if isinstance(part, TextContent):
1638
- result_content_blocks.append({"text": part.text})
1639
- # Note: This can be extended to handle other content types like images
1640
- # For now, we are focusing on making text-based tools work correctly.
1641
-
1642
- # If there's no content, provide a default message.
1643
- if not result_content_blocks:
1644
- result_content_blocks.append(
1645
- {"text": "[No content in tool result]"}
1747
+ # Fallback: if no content returned and the last input contained tool results,
1748
+ # synthesize the assistant reply using the tool result text to preserve behavior.
1749
+ if not response_content_blocks:
1750
+ try:
1751
+ # messages currently includes the appended assistant response; inspect the prior user message
1752
+ last_index = len(messages) - 2 if len(messages) >= 2 else (len(messages) - 1)
1753
+ last_input = messages[last_index] if last_index >= 0 else None
1754
+ if isinstance(last_input, dict):
1755
+ contents = last_input.get("content", []) or []
1756
+ for c in contents:
1757
+ # Handle parameter-level representation
1758
+ if isinstance(c, dict) and c.get("type") == "tool_result":
1759
+ tr_content = c.get("content", []) or []
1760
+ fallback_text = " ".join(
1761
+ part.get("text", "")
1762
+ for part in tr_content
1763
+ if isinstance(part, dict)
1764
+ ).strip()
1765
+ if fallback_text:
1766
+ response_content_blocks.append(
1767
+ TextContent(type="text", text=fallback_text)
1768
+ )
1769
+ break
1770
+ # Handle bedrock-level representation
1771
+ if isinstance(c, dict) and "toolResult" in c:
1772
+ tr = c["toolResult"]
1773
+ tr_content = tr.get("content", []) or []
1774
+ fallback_text = " ".join(
1775
+ part.get("text", "")
1776
+ for part in tr_content
1777
+ if isinstance(part, dict)
1778
+ ).strip()
1779
+ if fallback_text:
1780
+ response_content_blocks.append(
1781
+ TextContent(type="text", text=fallback_text)
1646
1782
  )
1783
+ break
1784
+ except Exception:
1785
+ pass
1647
1786
 
1648
- # This is the format Bedrock expects for tool results in the Converse API
1649
- tool_result_blocks.append(
1650
- {
1651
- "type": "tool_result",
1652
- "tool_use_id": tool_id,
1653
- "content": result_content_blocks,
1654
- "status": "error" if tool_result.isError else "success",
1655
- }
1656
- )
1787
+ # Handle different stop reasons
1788
+ stop_reason = processed_response.get("stop_reason", "end_turn")
1657
1789
 
1658
- if tool_result_blocks:
1659
- # Append a single user message with all the tool results for this turn
1660
- messages.append(
1661
- {
1662
- "role": "user",
1663
- "content": tool_result_blocks,
1664
- }
1665
- )
1790
+ # Determine if we should parse for system-prompt tool calls (unified capabilities)
1791
+ caps_tmp = self.capabilities.get(model) or ModelCapabilities()
1666
1792
 
1667
- continue
1668
- else:
1669
- # No tool uses but stop_reason was tool_use/tool_calls, treat as end_turn
1670
- await self.show_assistant_message(message_text)
1671
- break
1672
- else:
1673
- # Unknown stop reason, continue or break based on content
1674
- message_text = ""
1675
- for content_item in processed_response.get("content", []):
1676
- if content_item.get("text"):
1677
- message_text += content_item["text"]
1678
-
1679
- if message_text:
1680
- await self.show_assistant_message(message_text)
1681
- break
1793
+ # Try to parse system prompt tool calls if we have an end_turn with tools available
1794
+ # This handles cases where native tool calling failed but model generates system prompt format
1795
+ if stop_reason == "end_turn" and tools:
1796
+ # Only parse for tools if text contains actual function call structure
1797
+ message_text = ""
1798
+ for content_item in processed_response.get("content", []):
1799
+ if isinstance(content_item, dict) and "text" in content_item:
1800
+ message_text += content_item.get("text", "")
1801
+
1802
+ # Check if there's a tool call in the response
1803
+ parsed_tools = self._parse_tool_response(processed_response, model)
1804
+ if parsed_tools:
1805
+ # Override stop_reason to handle as tool_use
1806
+ stop_reason = "tool_use"
1807
+ # Update capabilities cache to reflect successful system prompt tool calling
1808
+ if not caps_tmp.schema:
1809
+ caps_tmp.schema = ToolSchemaType.SYSTEM_PROMPT
1810
+ self.capabilities[model] = caps_tmp
1811
+
1812
+ # NEW: Handle tool calls without execution - return them for external handling
1813
+ tool_calls: Dict[str, CallToolRequest] | None = None
1814
+ if stop_reason in ["tool_use", "tool_calls"]:
1815
+ parsed_tools = self._parse_tool_response(processed_response, model)
1816
+ if parsed_tools:
1817
+ tool_calls = self._build_tool_calls_dict(parsed_tools)
1818
+
1819
+ # Map stop reason to LlmStopReason
1820
+ mapped_stop_reason = self._map_bedrock_stop_reason(stop_reason)
1682
1821
 
1683
1822
  # Update history
1684
1823
  if params.use_history:
@@ -1695,63 +1834,39 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1695
1834
 
1696
1835
  self.history.set(new_messages)
1697
1836
 
1698
- # If we have no responses but had tool results, restore the tool results
1699
- # This handles cases like Claude Haiku where the model calls tools but doesn't generate follow-up text
1700
- if not responses and tool_result_responses:
1701
- responses = tool_result_responses
1702
- self.logger.debug("Restored tool results as no follow-up content was generated")
1703
-
1704
- # Strip leading whitespace from the *last* non-empty text block of the final response
1705
- # to ensure the output is clean.
1706
- if responses:
1707
- for item in reversed(responses):
1708
- if isinstance(item, TextContent) and item.text:
1709
- item.text = item.text.lstrip()
1710
- break
1837
+ self._log_chat_finished(model=model)
1711
1838
 
1712
- return cast("List[ContentBlock | CallToolRequestParams]", responses)
1839
+ # Return PromptMessageExtended with tool calls for external execution
1840
+ from fast_agent.core.prompt import Prompt
1713
1841
 
1714
- async def generate_messages(
1715
- self,
1716
- message_param: BedrockMessageParam,
1717
- request_params: RequestParams | None = None,
1718
- ) -> PromptMessageMultipart:
1719
- """Generate messages using Bedrock."""
1720
- responses = await self._bedrock_completion(message_param, request_params)
1721
-
1722
- # Convert responses to PromptMessageMultipart
1723
- content_list = []
1724
- for response in responses:
1725
- if isinstance(response, TextContent):
1726
- content_list.append(response)
1727
-
1728
- return PromptMessageMultipart(role="assistant", content=content_list)
1842
+ return Prompt.assistant(
1843
+ *response_content_blocks, stop_reason=mapped_stop_reason, tool_calls=tool_calls
1844
+ )
1729
1845
 
1730
1846
  async def _apply_prompt_provider_specific(
1731
1847
  self,
1732
- multipart_messages: List[PromptMessageMultipart],
1848
+ multipart_messages: List[PromptMessageExtended],
1733
1849
  request_params: RequestParams | None = None,
1850
+ tools: List[Tool] | None = None,
1734
1851
  is_template: bool = False,
1735
- ) -> PromptMessageMultipart:
1852
+ ) -> PromptMessageExtended:
1736
1853
  """Apply Bedrock-specific prompt formatting."""
1737
1854
  if not multipart_messages:
1738
- return PromptMessageMultipart(role="user", content=[])
1855
+ return PromptMessageExtended(role="user", content=[])
1739
1856
 
1740
1857
  # Check the last message role
1741
1858
  last_message = multipart_messages[-1]
1742
1859
 
1743
1860
  # Add all previous messages to history (or all messages if last is from assistant)
1744
1861
  # if the last message is a "user" inference is required
1862
+ # if the last message is a "user" inference is required
1745
1863
  messages_to_add = (
1746
1864
  multipart_messages[:-1] if last_message.role == "user" else multipart_messages
1747
1865
  )
1748
1866
  converted = []
1749
1867
  for msg in messages_to_add:
1750
1868
  # Convert each message to Bedrock message parameter format
1751
- bedrock_msg = {"role": msg.role, "content": []}
1752
- for content_item in msg.content:
1753
- if isinstance(content_item, TextContent):
1754
- bedrock_msg["content"].append({"type": "text", "text": content_item.text})
1869
+ bedrock_msg = self._convert_multipart_to_bedrock_message(msg)
1755
1870
  converted.append(bedrock_msg)
1756
1871
 
1757
1872
  # Add messages to history
@@ -1761,15 +1876,16 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1761
1876
  # For assistant messages: Return the last message (no completion needed)
1762
1877
  return last_message
1763
1878
 
1764
- # Convert the last user message to Bedrock message parameter format
1765
- message_param = {"role": last_message.role, "content": []}
1766
- for content_item in last_message.content:
1767
- if isinstance(content_item, TextContent):
1768
- message_param["content"].append({"type": "text", "text": content_item.text})
1879
+ # For user messages with tool_results, we need to add the tool result message to the conversation
1880
+ if last_message.tool_results:
1881
+ # Convert the tool result message and use it as the final input
1882
+ message_param = self._convert_multipart_to_bedrock_message(last_message)
1883
+ else:
1884
+ # Convert the last user message to Bedrock message parameter format
1885
+ message_param = self._convert_multipart_to_bedrock_message(last_message)
1769
1886
 
1770
- # Generate response (structured paths set a one-shot non-streaming hint)
1771
- self._force_non_streaming_once = True
1772
- return await self.generate_messages(message_param, request_params)
1887
+ # Call the refactored completion method directly
1888
+ return await self._bedrock_completion(message_param, request_params, tools)
1773
1889
 
1774
1890
  def _generate_simplified_schema(self, model: Type[ModelT]) -> str:
1775
1891
  """Generates a simplified, human-readable schema with inline enum constraints."""
@@ -1834,10 +1950,10 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1834
1950
 
1835
1951
  async def _apply_prompt_provider_specific_structured(
1836
1952
  self,
1837
- multipart_messages: List[PromptMessageMultipart],
1953
+ multipart_messages: List[PromptMessageExtended],
1838
1954
  model: Type[ModelT],
1839
1955
  request_params: RequestParams | None = None,
1840
- ) -> Tuple[ModelT | None, PromptMessageMultipart]:
1956
+ ) -> Tuple[ModelT | None, PromptMessageExtended]:
1841
1957
  """Apply structured output for Bedrock using prompt engineering with a simplified schema."""
1842
1958
  # Short-circuit: if the last message is already an assistant JSON payload,
1843
1959
  # parse it directly without invoking the model. This restores pre-regression behavior
@@ -1873,7 +1989,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1873
1989
  if strategy == StructuredStrategy.SIMPLIFIED_SCHEMA:
1874
1990
  schema_text = self._generate_simplified_schema(model)
1875
1991
  else:
1876
- schema_text = AugmentedLLM.model_to_schema_str(model)
1992
+ schema_text = FastAgentLLM.model_to_schema_str(model)
1877
1993
 
1878
1994
  # Build the new simplified prompt
1879
1995
  prompt_parts = [
@@ -1900,7 +2016,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1900
2016
  temp_last = multipart_messages[-1].model_copy(deep=True)
1901
2017
  except Exception:
1902
2018
  # Fallback: construct a minimal copy if model_copy is unavailable
1903
- temp_last = PromptMessageMultipart(
2019
+ temp_last = PromptMessageExtended(
1904
2020
  role=multipart_messages[-1].role, content=list(multipart_messages[-1].content)
1905
2021
  )
1906
2022
 
@@ -1911,7 +2027,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1911
2027
  )
1912
2028
 
1913
2029
  try:
1914
- result: PromptMessageMultipart = await self._apply_prompt_provider_specific(
2030
+ result: PromptMessageExtended = await self._apply_prompt_provider_specific(
1915
2031
  [temp_last], request_params
1916
2032
  )
1917
2033
  try:
@@ -1933,17 +2049,17 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
1933
2049
  try:
1934
2050
  simplified_schema_text = self._generate_simplified_schema(model)
1935
2051
  except Exception:
1936
- simplified_schema_text = AugmentedLLM.model_to_schema_str(model)
2052
+ simplified_schema_text = FastAgentLLM.model_to_schema_str(model)
1937
2053
  try:
1938
2054
  temp_last_retry = multipart_messages[-1].model_copy(deep=True)
1939
2055
  except Exception:
1940
- temp_last_retry = PromptMessageMultipart(
2056
+ temp_last_retry = PromptMessageExtended(
1941
2057
  role=multipart_messages[-1].role,
1942
2058
  content=list(multipart_messages[-1].content),
1943
2059
  )
1944
2060
  temp_last_retry.add_text("\n".join(strict_parts + [simplified_schema_text]))
1945
2061
 
1946
- retry_result: PromptMessageMultipart = await self._apply_prompt_provider_specific(
2062
+ retry_result: PromptMessageExtended = await self._apply_prompt_provider_specific(
1947
2063
  [temp_last_retry], request_params
1948
2064
  )
1949
2065
  return self._structured_from_multipart(retry_result, model)
@@ -2007,8 +2123,8 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
2007
2123
  return text
2008
2124
 
2009
2125
  def _structured_from_multipart(
2010
- self, message: PromptMessageMultipart, model: Type[ModelT]
2011
- ) -> Tuple[ModelT | None, PromptMessageMultipart]:
2126
+ self, message: PromptMessageExtended, model: Type[ModelT]
2127
+ ) -> Tuple[ModelT | None, PromptMessageExtended]:
2012
2128
  """Override to apply JSON cleaning before parsing."""
2013
2129
  # Get the text from the multipart message
2014
2130
  text = message.all_text()
@@ -2020,7 +2136,7 @@ class BedrockAugmentedLLM(AugmentedLLM[BedrockMessageParam, BedrockMessage]):
2020
2136
  if cleaned_text != text:
2021
2137
  from mcp.types import TextContent
2022
2138
 
2023
- cleaned_multipart = PromptMessageMultipart(
2139
+ cleaned_multipart = PromptMessageExtended(
2024
2140
  role=message.role, content=[TextContent(type="text", text=cleaned_text)]
2025
2141
  )
2026
2142
  else: