fast-agent-mcp 0.2.44__py3-none-any.whl → 0.2.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fast-agent-mcp might be problematic. Click here for more details.
- {fast_agent_mcp-0.2.44.dist-info → fast_agent_mcp-0.2.46.dist-info}/METADATA +5 -5
- {fast_agent_mcp-0.2.44.dist-info → fast_agent_mcp-0.2.46.dist-info}/RECORD +28 -25
- mcp_agent/__init__.py +40 -0
- mcp_agent/agents/workflow/evaluator_optimizer.py +39 -63
- mcp_agent/agents/workflow/router_agent.py +46 -21
- mcp_agent/cli/commands/go.py +25 -4
- mcp_agent/context.py +4 -0
- mcp_agent/core/__init__.py +26 -0
- mcp_agent/core/direct_decorators.py +117 -20
- mcp_agent/core/enhanced_prompt.py +4 -5
- mcp_agent/human_input/__init__.py +50 -0
- mcp_agent/human_input/elicitation_form.py +16 -13
- mcp_agent/human_input/form_fields.py +252 -0
- mcp_agent/human_input/simple_form.py +111 -0
- mcp_agent/llm/augmented_llm.py +12 -4
- mcp_agent/llm/augmented_llm_passthrough.py +0 -11
- mcp_agent/llm/augmented_llm_playback.py +5 -3
- mcp_agent/llm/providers/augmented_llm_anthropic.py +254 -95
- mcp_agent/mcp/__init__.py +50 -0
- mcp_agent/mcp/helpers/__init__.py +23 -1
- mcp_agent/mcp/interfaces.py +13 -2
- mcp_agent/py.typed +0 -0
- mcp_agent/resources/examples/workflows/evaluator.py +2 -2
- mcp_agent/resources/examples/workflows/router.py +1 -1
- mcp_agent/ui/console_display.py +19 -9
- {fast_agent_mcp-0.2.44.dist-info → fast_agent_mcp-0.2.46.dist-info}/WHEEL +0 -0
- {fast_agent_mcp-0.2.44.dist-info → fast_agent_mcp-0.2.46.dist-info}/entry_points.txt +0 -0
- {fast_agent_mcp-0.2.44.dist-info → fast_agent_mcp-0.2.46.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Simple form API for elicitation schemas without MCP wrappers."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import Any, Dict, Optional, Union
|
|
5
|
+
|
|
6
|
+
from mcp.types import ElicitRequestedSchema
|
|
7
|
+
|
|
8
|
+
from mcp_agent.human_input.elicitation_form import show_simple_elicitation_form
|
|
9
|
+
from mcp_agent.human_input.form_fields import FormSchema
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def form(
|
|
13
|
+
schema: Union[FormSchema, ElicitRequestedSchema, Dict[str, Any]],
|
|
14
|
+
message: str = "Please fill out the form",
|
|
15
|
+
title: str = "Form Input",
|
|
16
|
+
) -> Optional[Dict[str, Any]]:
|
|
17
|
+
"""
|
|
18
|
+
Simple form API that presents an elicitation form and returns results.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
schema: FormSchema, ElicitRequestedSchema, or dict schema
|
|
22
|
+
message: Message to display to the user
|
|
23
|
+
title: Title for the form (used as agent_name)
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Dict with form data if accepted, None if cancelled/declined
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
from mcp_agent.human_input.form_fields import FormSchema, string, email, integer
|
|
30
|
+
|
|
31
|
+
schema = FormSchema(
|
|
32
|
+
name=string("Name", "Your full name", min_length=2),
|
|
33
|
+
email=email("Email", "Your email address"),
|
|
34
|
+
age=integer("Age", "Your age", minimum=0, maximum=120)
|
|
35
|
+
).required("name", "email")
|
|
36
|
+
|
|
37
|
+
result = await form(schema, "Please enter your information")
|
|
38
|
+
if result:
|
|
39
|
+
print(f"Name: {result['name']}, Email: {result['email']}")
|
|
40
|
+
"""
|
|
41
|
+
# Convert schema to ElicitRequestedSchema format
|
|
42
|
+
if isinstance(schema, FormSchema):
|
|
43
|
+
elicit_schema = schema.to_schema()
|
|
44
|
+
elif isinstance(schema, dict):
|
|
45
|
+
elicit_schema = schema
|
|
46
|
+
else:
|
|
47
|
+
elicit_schema = schema
|
|
48
|
+
|
|
49
|
+
# Show the form
|
|
50
|
+
action, result = await show_simple_elicitation_form(
|
|
51
|
+
schema=elicit_schema, message=message, agent_name=title, server_name="SimpleForm"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Return results based on action
|
|
55
|
+
if action == "accept":
|
|
56
|
+
return result
|
|
57
|
+
else:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def form_sync(
|
|
62
|
+
schema: Union[FormSchema, ElicitRequestedSchema, Dict[str, Any]],
|
|
63
|
+
message: str = "Please fill out the form",
|
|
64
|
+
title: str = "Form Input",
|
|
65
|
+
) -> Optional[Dict[str, Any]]:
|
|
66
|
+
"""
|
|
67
|
+
Synchronous wrapper for the form function.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
schema: FormSchema, ElicitRequestedSchema, or dict schema
|
|
71
|
+
message: Message to display to the user
|
|
72
|
+
title: Title for the form (used as agent_name)
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Dict with form data if accepted, None if cancelled/declined
|
|
76
|
+
"""
|
|
77
|
+
return asyncio.run(form(schema, message, title))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# Convenience function with a shorter name
|
|
81
|
+
async def ask(
|
|
82
|
+
schema: Union[FormSchema, ElicitRequestedSchema, Dict[str, Any]],
|
|
83
|
+
message: str = "Please provide the requested information",
|
|
84
|
+
) -> Optional[Dict[str, Any]]:
|
|
85
|
+
"""
|
|
86
|
+
Short alias for form() function.
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
from mcp_agent.human_input.form_fields import FormSchema, string, email
|
|
90
|
+
|
|
91
|
+
schema = FormSchema(
|
|
92
|
+
name=string("Name", "Your name"),
|
|
93
|
+
email=email("Email", "Your email")
|
|
94
|
+
).required("name")
|
|
95
|
+
|
|
96
|
+
result = await ask(schema, "What's your info?")
|
|
97
|
+
"""
|
|
98
|
+
return await form(schema, message)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def ask_sync(
|
|
102
|
+
schema: Union[FormSchema, ElicitRequestedSchema, Dict[str, Any]],
|
|
103
|
+
message: str = "Please provide the requested information",
|
|
104
|
+
) -> Optional[Dict[str, Any]]:
|
|
105
|
+
"""
|
|
106
|
+
Synchronous version of ask().
|
|
107
|
+
|
|
108
|
+
Example:
|
|
109
|
+
result = ask_sync(schema, "What's your info?")
|
|
110
|
+
"""
|
|
111
|
+
return form_sync(schema, message)
|
mcp_agent/llm/augmented_llm.py
CHANGED
|
@@ -9,6 +9,7 @@ from typing import (
|
|
|
9
9
|
Tuple,
|
|
10
10
|
Type,
|
|
11
11
|
TypeVar,
|
|
12
|
+
Union,
|
|
12
13
|
cast,
|
|
13
14
|
)
|
|
14
15
|
|
|
@@ -203,7 +204,7 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
|
203
204
|
|
|
204
205
|
async def generate(
|
|
205
206
|
self,
|
|
206
|
-
multipart_messages: List[PromptMessageMultipart],
|
|
207
|
+
multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
|
|
207
208
|
request_params: RequestParams | None = None,
|
|
208
209
|
) -> PromptMessageMultipart:
|
|
209
210
|
"""
|
|
@@ -212,8 +213,10 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
|
212
213
|
# note - check changes here are mirrored in structured(). i've thought hard about
|
|
213
214
|
# a strategy to reduce duplication etc, but aiming for simple but imperfect for the moment
|
|
214
215
|
|
|
215
|
-
#
|
|
216
|
-
|
|
216
|
+
# Convert PromptMessage to PromptMessageMultipart if needed
|
|
217
|
+
if multipart_messages and isinstance(multipart_messages[0], PromptMessage):
|
|
218
|
+
multipart_messages = PromptMessageMultipart.to_multipart(multipart_messages)
|
|
219
|
+
|
|
217
220
|
# TODO -- create a "fast-agent" control role rather than magic strings
|
|
218
221
|
|
|
219
222
|
if multipart_messages[-1].first_text().startswith("***SAVE_HISTORY"):
|
|
@@ -235,6 +238,7 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
|
235
238
|
|
|
236
239
|
# add generic error and termination reason handling/rollback
|
|
237
240
|
self._message_history.append(assistant_response)
|
|
241
|
+
|
|
238
242
|
return assistant_response
|
|
239
243
|
|
|
240
244
|
@abstractmethod
|
|
@@ -260,12 +264,16 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
|
|
|
260
264
|
|
|
261
265
|
async def structured(
|
|
262
266
|
self,
|
|
263
|
-
multipart_messages: List[PromptMessageMultipart],
|
|
267
|
+
multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
|
|
264
268
|
model: Type[ModelT],
|
|
265
269
|
request_params: RequestParams | None = None,
|
|
266
270
|
) -> Tuple[ModelT | None, PromptMessageMultipart]:
|
|
267
271
|
"""Return a structured response from the LLM using the provided messages."""
|
|
268
272
|
|
|
273
|
+
# Convert PromptMessage to PromptMessageMultipart if needed
|
|
274
|
+
if multipart_messages and isinstance(multipart_messages[0], PromptMessage):
|
|
275
|
+
multipart_messages = PromptMessageMultipart.to_multipart(multipart_messages)
|
|
276
|
+
|
|
269
277
|
self._precall(multipart_messages)
|
|
270
278
|
result, assistant_response = await self._apply_prompt_provider_specific_structured(
|
|
271
279
|
multipart_messages, model, request_params
|
|
@@ -164,15 +164,10 @@ class PassthroughLLM(AugmentedLLM):
|
|
|
164
164
|
request_params: RequestParams | None = None,
|
|
165
165
|
is_template: bool = False,
|
|
166
166
|
) -> PromptMessageMultipart:
|
|
167
|
-
print(
|
|
168
|
-
f"DEBUG: PassthroughLLM _apply_prompt_provider_specific called with {len(multipart_messages)} messages, is_template={is_template}"
|
|
169
|
-
)
|
|
170
|
-
|
|
171
167
|
# Add messages to history with proper is_prompt flag
|
|
172
168
|
self.history.extend(multipart_messages, is_prompt=is_template)
|
|
173
169
|
|
|
174
170
|
last_message = multipart_messages[-1]
|
|
175
|
-
print(f"DEBUG: Last message role: {last_message.role}, text: '{last_message.first_text()}'")
|
|
176
171
|
|
|
177
172
|
if self.is_tool_call(last_message):
|
|
178
173
|
result = Prompt.assistant(await self.generate_str(last_message.first_text()))
|
|
@@ -209,14 +204,8 @@ class PassthroughLLM(AugmentedLLM):
|
|
|
209
204
|
else:
|
|
210
205
|
# TODO -- improve when we support Audio/Multimodal gen models e.g. gemini . This should really just return the input as "assistant"...
|
|
211
206
|
concatenated: str = "\n".join(message.all_text() for message in multipart_messages)
|
|
212
|
-
print(
|
|
213
|
-
f"DEBUG: PassthroughLLM generating response: '{concatenated}' (is_template={is_template})"
|
|
214
|
-
)
|
|
215
207
|
await self.show_assistant_message(concatenated)
|
|
216
208
|
result = Prompt.assistant(concatenated)
|
|
217
|
-
print(f"DEBUG: PassthroughLLM created result: {result}")
|
|
218
|
-
print(f"DEBUG: Result first_text(): {result.first_text()}")
|
|
219
|
-
print(f"DEBUG: Result content: {result.content}")
|
|
220
209
|
|
|
221
210
|
# Track usage for this passthrough "turn"
|
|
222
211
|
try:
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
from typing import Any, List, Type
|
|
1
|
+
from typing import Any, List, Type, Union
|
|
2
|
+
|
|
3
|
+
from mcp.types import PromptMessage
|
|
2
4
|
|
|
3
5
|
from mcp_agent.core.exceptions import ModelConfigError
|
|
4
6
|
from mcp_agent.core.prompt import Prompt
|
|
@@ -51,7 +53,7 @@ class PlaybackLLM(PassthroughLLM):
|
|
|
51
53
|
|
|
52
54
|
async def generate(
|
|
53
55
|
self,
|
|
54
|
-
multipart_messages: List[PromptMessageMultipart],
|
|
56
|
+
multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
|
|
55
57
|
request_params: RequestParams | None = None,
|
|
56
58
|
) -> PromptMessageMultipart:
|
|
57
59
|
"""
|
|
@@ -106,7 +108,7 @@ class PlaybackLLM(PassthroughLLM):
|
|
|
106
108
|
|
|
107
109
|
async def structured(
|
|
108
110
|
self,
|
|
109
|
-
multipart_messages: List[PromptMessageMultipart],
|
|
111
|
+
multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
|
|
110
112
|
model: Type[ModelT],
|
|
111
113
|
request_params: RequestParams | None = None,
|
|
112
114
|
) -> tuple[ModelT | None, PromptMessageMultipart]:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
import json
|
|
2
|
+
from typing import TYPE_CHECKING, Any, List, Tuple, Type
|
|
2
3
|
|
|
3
4
|
from mcp.types import TextContent
|
|
4
5
|
|
|
@@ -33,6 +34,7 @@ from anthropic.types import (
|
|
|
33
34
|
from mcp.types import (
|
|
34
35
|
CallToolRequest,
|
|
35
36
|
CallToolRequestParams,
|
|
37
|
+
CallToolResult,
|
|
36
38
|
ContentBlock,
|
|
37
39
|
)
|
|
38
40
|
from rich.text import Text
|
|
@@ -99,6 +101,184 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
|
99
101
|
cache_mode = self.context.config.anthropic.cache_mode
|
|
100
102
|
return cache_mode
|
|
101
103
|
|
|
104
|
+
async def _prepare_tools(self, structured_model: Type[ModelT] | None = None) -> List[ToolParam]:
|
|
105
|
+
"""Prepare tools based on whether we're in structured output mode."""
|
|
106
|
+
if structured_model:
|
|
107
|
+
# JSON mode - create a single tool for structured output
|
|
108
|
+
return [
|
|
109
|
+
ToolParam(
|
|
110
|
+
name="return_structured_output",
|
|
111
|
+
description="Return the response in the required JSON format",
|
|
112
|
+
input_schema=structured_model.model_json_schema(),
|
|
113
|
+
)
|
|
114
|
+
]
|
|
115
|
+
else:
|
|
116
|
+
# Regular mode - use tools from aggregator
|
|
117
|
+
tool_list: ListToolsResult = await self.aggregator.list_tools()
|
|
118
|
+
return [
|
|
119
|
+
ToolParam(
|
|
120
|
+
name=tool.name,
|
|
121
|
+
description=tool.description or "",
|
|
122
|
+
input_schema=tool.inputSchema,
|
|
123
|
+
)
|
|
124
|
+
for tool in tool_list.tools
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
def _apply_system_cache(self, base_args: dict, cache_mode: str) -> None:
|
|
128
|
+
"""Apply cache control to system prompt if cache mode allows it."""
|
|
129
|
+
if cache_mode != "off" and base_args["system"]:
|
|
130
|
+
if isinstance(base_args["system"], str):
|
|
131
|
+
base_args["system"] = [
|
|
132
|
+
{
|
|
133
|
+
"type": "text",
|
|
134
|
+
"text": base_args["system"],
|
|
135
|
+
"cache_control": {"type": "ephemeral"},
|
|
136
|
+
}
|
|
137
|
+
]
|
|
138
|
+
self.logger.debug(
|
|
139
|
+
"Applied cache_control to system prompt (caches tools+system in one block)"
|
|
140
|
+
)
|
|
141
|
+
else:
|
|
142
|
+
self.logger.debug(f"System prompt is not a string: {type(base_args['system'])}")
|
|
143
|
+
|
|
144
|
+
async def _apply_conversation_cache(self, messages: List[MessageParam], cache_mode: str) -> int:
|
|
145
|
+
"""Apply conversation caching if in auto mode. Returns number of cache blocks applied."""
|
|
146
|
+
applied_count = 0
|
|
147
|
+
if cache_mode == "auto" and self.history.should_apply_conversation_cache():
|
|
148
|
+
cache_updates = self.history.get_conversation_cache_updates()
|
|
149
|
+
|
|
150
|
+
# Remove cache control from old positions
|
|
151
|
+
if cache_updates["remove"]:
|
|
152
|
+
self.history.remove_cache_control_from_messages(messages, cache_updates["remove"])
|
|
153
|
+
self.logger.debug(
|
|
154
|
+
f"Removed conversation cache_control from positions {cache_updates['remove']}"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Add cache control to new positions
|
|
158
|
+
if cache_updates["add"]:
|
|
159
|
+
applied_count = self.history.add_cache_control_to_messages(
|
|
160
|
+
messages, cache_updates["add"]
|
|
161
|
+
)
|
|
162
|
+
if applied_count > 0:
|
|
163
|
+
self.history.apply_conversation_cache_updates(cache_updates)
|
|
164
|
+
self.logger.debug(
|
|
165
|
+
f"Applied conversation cache_control to positions {cache_updates['add']} ({applied_count} blocks)"
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
self.logger.debug(
|
|
169
|
+
f"Failed to apply conversation cache_control to positions {cache_updates['add']}"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return applied_count
|
|
173
|
+
|
|
174
|
+
async def _process_structured_output(
|
|
175
|
+
self,
|
|
176
|
+
content_block: Any,
|
|
177
|
+
) -> Tuple[str, CallToolResult, TextContent]:
|
|
178
|
+
"""
|
|
179
|
+
Process a structured output tool call from Anthropic.
|
|
180
|
+
|
|
181
|
+
This handles the special case where Anthropic's model was forced to use
|
|
182
|
+
a 'return_structured_output' tool via tool_choice. The tool input contains
|
|
183
|
+
the JSON data we want, so we extract it and format it for display.
|
|
184
|
+
|
|
185
|
+
Even though we don't call an external tool, we must create a CallToolResult
|
|
186
|
+
to satisfy Anthropic's API requirement that every tool_use has a corresponding
|
|
187
|
+
tool_result in the next message.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Tuple of (tool_use_id, tool_result, content_block) for the structured data
|
|
191
|
+
"""
|
|
192
|
+
tool_args = content_block.input
|
|
193
|
+
tool_use_id = content_block.id
|
|
194
|
+
|
|
195
|
+
# Show the formatted JSON response to the user
|
|
196
|
+
json_response = json.dumps(tool_args, indent=2)
|
|
197
|
+
await self.show_assistant_message(json_response)
|
|
198
|
+
|
|
199
|
+
# Create the content for responses
|
|
200
|
+
structured_content = TextContent(type="text", text=json.dumps(tool_args))
|
|
201
|
+
|
|
202
|
+
# Create a CallToolResult to satisfy Anthropic's API requirements
|
|
203
|
+
# This represents the "result" of our structured output "tool"
|
|
204
|
+
tool_result = CallToolResult(isError=False, content=[structured_content])
|
|
205
|
+
|
|
206
|
+
return tool_use_id, tool_result, structured_content
|
|
207
|
+
|
|
208
|
+
async def _process_regular_tool_call(
|
|
209
|
+
self,
|
|
210
|
+
content_block: Any,
|
|
211
|
+
available_tools: List[ToolParam],
|
|
212
|
+
is_first_tool: bool,
|
|
213
|
+
message_text: str | Text,
|
|
214
|
+
) -> Tuple[str, CallToolResult]:
|
|
215
|
+
"""
|
|
216
|
+
Process a regular MCP tool call.
|
|
217
|
+
|
|
218
|
+
This handles actual tool execution via the MCP aggregator.
|
|
219
|
+
"""
|
|
220
|
+
tool_name = content_block.name
|
|
221
|
+
tool_args = content_block.input
|
|
222
|
+
tool_use_id = content_block.id
|
|
223
|
+
|
|
224
|
+
if is_first_tool:
|
|
225
|
+
await self.show_assistant_message(message_text, tool_name)
|
|
226
|
+
|
|
227
|
+
self.show_tool_call(available_tools, tool_name, tool_args)
|
|
228
|
+
tool_call_request = CallToolRequest(
|
|
229
|
+
method="tools/call",
|
|
230
|
+
params=CallToolRequestParams(name=tool_name, arguments=tool_args),
|
|
231
|
+
)
|
|
232
|
+
result = await self.call_tool(request=tool_call_request, tool_call_id=tool_use_id)
|
|
233
|
+
self.show_tool_result(result)
|
|
234
|
+
return tool_use_id, result
|
|
235
|
+
|
|
236
|
+
async def _process_tool_calls(
|
|
237
|
+
self,
|
|
238
|
+
tool_uses: List[Any],
|
|
239
|
+
available_tools: List[ToolParam],
|
|
240
|
+
message_text: str | Text,
|
|
241
|
+
structured_model: Type[ModelT] | None = None,
|
|
242
|
+
) -> Tuple[List[Tuple[str, CallToolResult]], List[ContentBlock]]:
|
|
243
|
+
"""
|
|
244
|
+
Process tool calls, handling both structured output and regular MCP tools.
|
|
245
|
+
|
|
246
|
+
For structured output mode:
|
|
247
|
+
- Extracts JSON data from the forced 'return_structured_output' tool
|
|
248
|
+
- Does NOT create fake CallToolResults
|
|
249
|
+
- Returns the JSON content directly
|
|
250
|
+
|
|
251
|
+
For regular tools:
|
|
252
|
+
- Calls actual MCP tools via the aggregator
|
|
253
|
+
- Returns real CallToolResults
|
|
254
|
+
"""
|
|
255
|
+
tool_results = []
|
|
256
|
+
responses = []
|
|
257
|
+
|
|
258
|
+
for tool_idx, content_block in enumerate(tool_uses):
|
|
259
|
+
tool_name = content_block.name
|
|
260
|
+
is_first_tool = tool_idx == 0
|
|
261
|
+
|
|
262
|
+
if tool_name == "return_structured_output" and structured_model:
|
|
263
|
+
# Structured output: extract JSON, don't call external tools
|
|
264
|
+
(
|
|
265
|
+
tool_use_id,
|
|
266
|
+
tool_result,
|
|
267
|
+
structured_content,
|
|
268
|
+
) = await self._process_structured_output(content_block)
|
|
269
|
+
responses.append(structured_content)
|
|
270
|
+
# Add to tool_results to satisfy Anthropic's API requirement for tool_result messages
|
|
271
|
+
tool_results.append((tool_use_id, tool_result))
|
|
272
|
+
else:
|
|
273
|
+
# Regular tool: call external MCP tool
|
|
274
|
+
tool_use_id, tool_result = await self._process_regular_tool_call(
|
|
275
|
+
content_block, available_tools, is_first_tool, message_text
|
|
276
|
+
)
|
|
277
|
+
tool_results.append((tool_use_id, tool_result))
|
|
278
|
+
responses.extend(tool_result.content)
|
|
279
|
+
|
|
280
|
+
return tool_results, responses
|
|
281
|
+
|
|
102
282
|
async def _process_stream(self, stream: AsyncMessageStream, model: str) -> Message:
|
|
103
283
|
"""Process the streaming response and display real-time token usage."""
|
|
104
284
|
# Track estimated output tokens by counting text chunks
|
|
@@ -150,6 +330,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
|
150
330
|
self,
|
|
151
331
|
message_param,
|
|
152
332
|
request_params: RequestParams | None = None,
|
|
333
|
+
structured_model: Type[ModelT] | None = None,
|
|
153
334
|
) -> list[ContentBlock]:
|
|
154
335
|
"""
|
|
155
336
|
Process a query using an LLM and available tools.
|
|
@@ -181,15 +362,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
|
181
362
|
cache_mode = self._get_cache_mode()
|
|
182
363
|
self.logger.debug(f"Anthropic cache_mode: {cache_mode}")
|
|
183
364
|
|
|
184
|
-
|
|
185
|
-
available_tools: List[ToolParam] = [
|
|
186
|
-
ToolParam(
|
|
187
|
-
name=tool.name,
|
|
188
|
-
description=tool.description or "",
|
|
189
|
-
input_schema=tool.inputSchema,
|
|
190
|
-
)
|
|
191
|
-
for tool in tool_list.tools
|
|
192
|
-
]
|
|
365
|
+
available_tools = await self._prepare_tools(structured_model)
|
|
193
366
|
|
|
194
367
|
responses: List[ContentBlock] = []
|
|
195
368
|
|
|
@@ -209,59 +382,25 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
|
209
382
|
"tools": available_tools,
|
|
210
383
|
}
|
|
211
384
|
|
|
212
|
-
#
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
if isinstance(base_args["system"], str):
|
|
216
|
-
base_args["system"] = [
|
|
217
|
-
{
|
|
218
|
-
"type": "text",
|
|
219
|
-
"text": base_args["system"],
|
|
220
|
-
"cache_control": {"type": "ephemeral"},
|
|
221
|
-
}
|
|
222
|
-
]
|
|
223
|
-
self.logger.debug(
|
|
224
|
-
"Applied cache_control to system prompt (caches tools+system in one block)"
|
|
225
|
-
)
|
|
226
|
-
else:
|
|
227
|
-
self.logger.debug(f"System prompt is not a string: {type(base_args['system'])}")
|
|
385
|
+
# Add tool_choice for structured output mode
|
|
386
|
+
if structured_model:
|
|
387
|
+
base_args["tool_choice"] = {"type": "tool", "name": "return_structured_output"}
|
|
228
388
|
|
|
229
|
-
# Apply
|
|
230
|
-
|
|
231
|
-
cache_updates = self.history.get_conversation_cache_updates()
|
|
389
|
+
# Apply cache control to system prompt
|
|
390
|
+
self._apply_system_cache(base_args, cache_mode)
|
|
232
391
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
self.history.remove_cache_control_from_messages(
|
|
236
|
-
messages, cache_updates["remove"]
|
|
237
|
-
)
|
|
238
|
-
self.logger.debug(
|
|
239
|
-
f"Removed conversation cache_control from positions {cache_updates['remove']}"
|
|
240
|
-
)
|
|
392
|
+
# Apply conversation caching
|
|
393
|
+
applied_count = await self._apply_conversation_cache(messages, cache_mode)
|
|
241
394
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
395
|
+
# Verify we don't exceed Anthropic's 4 cache block limit
|
|
396
|
+
if applied_count > 0:
|
|
397
|
+
total_cache_blocks = applied_count
|
|
398
|
+
if cache_mode != "off" and base_args["system"]:
|
|
399
|
+
total_cache_blocks += 1 # tools+system cache block
|
|
400
|
+
if total_cache_blocks > 4:
|
|
401
|
+
self.logger.warning(
|
|
402
|
+
f"Total cache blocks ({total_cache_blocks}) exceeds Anthropic limit of 4"
|
|
246
403
|
)
|
|
247
|
-
if applied_count > 0:
|
|
248
|
-
self.history.apply_conversation_cache_updates(cache_updates)
|
|
249
|
-
self.logger.debug(
|
|
250
|
-
f"Applied conversation cache_control to positions {cache_updates['add']} ({applied_count} blocks)"
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
# Verify we don't exceed Anthropic's 4 cache block limit
|
|
254
|
-
total_cache_blocks = applied_count
|
|
255
|
-
if cache_mode != "off" and base_args["system"]:
|
|
256
|
-
total_cache_blocks += 1 # tools+system cache block
|
|
257
|
-
if total_cache_blocks > 4:
|
|
258
|
-
self.logger.warning(
|
|
259
|
-
f"Total cache blocks ({total_cache_blocks}) exceeds Anthropic limit of 4"
|
|
260
|
-
)
|
|
261
|
-
else:
|
|
262
|
-
self.logger.debug(
|
|
263
|
-
f"Failed to apply conversation cache_control to positions {cache_updates['add']}"
|
|
264
|
-
)
|
|
265
404
|
|
|
266
405
|
if params.maxTokens is not None:
|
|
267
406
|
base_args["max_tokens"] = params.maxTokens
|
|
@@ -387,34 +526,22 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
|
387
526
|
style="dim green italic",
|
|
388
527
|
)
|
|
389
528
|
|
|
390
|
-
# Process all tool calls
|
|
391
|
-
tool_results =
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
tool_args = content_block.input
|
|
396
|
-
tool_use_id = content_block.id
|
|
397
|
-
|
|
398
|
-
if tool_idx == 0: # Only show message for first tool use
|
|
399
|
-
await self.show_assistant_message(message_text, tool_name)
|
|
400
|
-
|
|
401
|
-
self.show_tool_call(available_tools, tool_name, tool_args)
|
|
402
|
-
tool_call_request = CallToolRequest(
|
|
403
|
-
method="tools/call",
|
|
404
|
-
params=CallToolRequestParams(name=tool_name, arguments=tool_args),
|
|
405
|
-
)
|
|
406
|
-
# TODO -- support MCP isError etc.
|
|
407
|
-
result = await self.call_tool(
|
|
408
|
-
request=tool_call_request, tool_call_id=tool_use_id
|
|
409
|
-
)
|
|
410
|
-
self.show_tool_result(result)
|
|
411
|
-
|
|
412
|
-
# Add each result to our collection
|
|
413
|
-
tool_results.append((tool_use_id, result))
|
|
414
|
-
responses.extend(result.content)
|
|
529
|
+
# Process all tool calls using the helper method
|
|
530
|
+
tool_results, tool_responses = await self._process_tool_calls(
|
|
531
|
+
tool_uses, available_tools, message_text, structured_model
|
|
532
|
+
)
|
|
533
|
+
responses.extend(tool_responses)
|
|
415
534
|
|
|
535
|
+
# Always add tool_results_message first (required by Anthropic API)
|
|
416
536
|
messages.append(AnthropicConverter.create_tool_results_message(tool_results))
|
|
417
537
|
|
|
538
|
+
# For structured output, we have our result and should exit after sending tool_result
|
|
539
|
+
if structured_model and any(
|
|
540
|
+
tool.name == "return_structured_output" for tool in tool_uses
|
|
541
|
+
):
|
|
542
|
+
self.logger.debug("Structured output received, breaking iteration loop")
|
|
543
|
+
break
|
|
544
|
+
|
|
418
545
|
# Only save the new conversation messages to history if use_history is true
|
|
419
546
|
# Keep the prompt messages separate
|
|
420
547
|
if params.use_history:
|
|
@@ -501,19 +628,51 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
|
|
|
501
628
|
) -> Tuple[ModelT | None, PromptMessageMultipart]: # noqa: F821
|
|
502
629
|
request_params = self.get_request_params(request_params)
|
|
503
630
|
|
|
504
|
-
#
|
|
505
|
-
multipart_messages[-1]
|
|
506
|
-
"""YOU MUST RESPOND IN THE FOLLOWING FORMAT:
|
|
507
|
-
{schema}
|
|
508
|
-
RESPOND ONLY WITH THE JSON, NO PREAMBLE, CODE FENCES OR 'properties' ARE PERMISSABLE """.format(
|
|
509
|
-
schema=model.model_json_schema()
|
|
510
|
-
)
|
|
511
|
-
)
|
|
631
|
+
# Check the last message role
|
|
632
|
+
last_message = multipart_messages[-1]
|
|
512
633
|
|
|
513
|
-
|
|
514
|
-
|
|
634
|
+
# Add all previous messages to history (or all messages if last is from assistant)
|
|
635
|
+
messages_to_add = (
|
|
636
|
+
multipart_messages[:-1] if last_message.role == "user" else multipart_messages
|
|
515
637
|
)
|
|
516
|
-
|
|
638
|
+
converted = []
|
|
639
|
+
|
|
640
|
+
for msg in messages_to_add:
|
|
641
|
+
anthropic_msg = AnthropicConverter.convert_to_anthropic(msg)
|
|
642
|
+
converted.append(anthropic_msg)
|
|
643
|
+
|
|
644
|
+
self.history.extend(converted, is_prompt=False)
|
|
645
|
+
|
|
646
|
+
if last_message.role == "user":
|
|
647
|
+
self.logger.debug("Last message in prompt is from user, generating structured response")
|
|
648
|
+
message_param = AnthropicConverter.convert_to_anthropic(last_message)
|
|
649
|
+
|
|
650
|
+
# Call _anthropic_completion with the structured model
|
|
651
|
+
response_content = await self._anthropic_completion(
|
|
652
|
+
message_param, request_params, structured_model=model
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
# Extract the structured data from the response
|
|
656
|
+
for content in response_content:
|
|
657
|
+
if content.type == "text":
|
|
658
|
+
try:
|
|
659
|
+
# Parse the JSON response from the tool
|
|
660
|
+
data = json.loads(content.text)
|
|
661
|
+
parsed_model = model(**data)
|
|
662
|
+
# Create assistant response
|
|
663
|
+
assistant_response = Prompt.assistant(content)
|
|
664
|
+
return parsed_model, assistant_response
|
|
665
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
666
|
+
self.logger.error(f"Failed to parse structured output: {e}")
|
|
667
|
+
assistant_response = Prompt.assistant(content)
|
|
668
|
+
return None, assistant_response
|
|
669
|
+
|
|
670
|
+
# If no valid response found
|
|
671
|
+
return None, Prompt.assistant()
|
|
672
|
+
else:
|
|
673
|
+
# For assistant messages: Return the last message content
|
|
674
|
+
self.logger.debug("Last message in prompt is from assistant, returning it directly")
|
|
675
|
+
return None, last_message
|
|
517
676
|
|
|
518
677
|
def _show_usage(self, raw_usage: Usage, turn_usage: TurnUsage) -> None:
|
|
519
678
|
# Print raw usage for debugging
|