fast-agent-mcp 0.2.44__py3-none-any.whl → 0.2.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fast-agent-mcp might be problematic. Click here for more details.

@@ -0,0 +1,111 @@
1
+ """Simple form API for elicitation schemas without MCP wrappers."""
2
+
3
+ import asyncio
4
+ from typing import Any, Dict, Optional, Union
5
+
6
+ from mcp.types import ElicitRequestedSchema
7
+
8
+ from mcp_agent.human_input.elicitation_form import show_simple_elicitation_form
9
+ from mcp_agent.human_input.form_fields import FormSchema
10
+
11
+
12
+ async def form(
13
+ schema: Union[FormSchema, ElicitRequestedSchema, Dict[str, Any]],
14
+ message: str = "Please fill out the form",
15
+ title: str = "Form Input",
16
+ ) -> Optional[Dict[str, Any]]:
17
+ """
18
+ Simple form API that presents an elicitation form and returns results.
19
+
20
+ Args:
21
+ schema: FormSchema, ElicitRequestedSchema, or dict schema
22
+ message: Message to display to the user
23
+ title: Title for the form (used as agent_name)
24
+
25
+ Returns:
26
+ Dict with form data if accepted, None if cancelled/declined
27
+
28
+ Example:
29
+ from mcp_agent.human_input.form_fields import FormSchema, string, email, integer
30
+
31
+ schema = FormSchema(
32
+ name=string("Name", "Your full name", min_length=2),
33
+ email=email("Email", "Your email address"),
34
+ age=integer("Age", "Your age", minimum=0, maximum=120)
35
+ ).required("name", "email")
36
+
37
+ result = await form(schema, "Please enter your information")
38
+ if result:
39
+ print(f"Name: {result['name']}, Email: {result['email']}")
40
+ """
41
+ # Convert schema to ElicitRequestedSchema format
42
+ if isinstance(schema, FormSchema):
43
+ elicit_schema = schema.to_schema()
44
+ elif isinstance(schema, dict):
45
+ elicit_schema = schema
46
+ else:
47
+ elicit_schema = schema
48
+
49
+ # Show the form
50
+ action, result = await show_simple_elicitation_form(
51
+ schema=elicit_schema, message=message, agent_name=title, server_name="SimpleForm"
52
+ )
53
+
54
+ # Return results based on action
55
+ if action == "accept":
56
+ return result
57
+ else:
58
+ return None
59
+
60
+
61
+ def form_sync(
62
+ schema: Union[FormSchema, ElicitRequestedSchema, Dict[str, Any]],
63
+ message: str = "Please fill out the form",
64
+ title: str = "Form Input",
65
+ ) -> Optional[Dict[str, Any]]:
66
+ """
67
+ Synchronous wrapper for the form function.
68
+
69
+ Args:
70
+ schema: FormSchema, ElicitRequestedSchema, or dict schema
71
+ message: Message to display to the user
72
+ title: Title for the form (used as agent_name)
73
+
74
+ Returns:
75
+ Dict with form data if accepted, None if cancelled/declined
76
+ """
77
+ return asyncio.run(form(schema, message, title))
78
+
79
+
80
+ # Convenience function with a shorter name
81
+ async def ask(
82
+ schema: Union[FormSchema, ElicitRequestedSchema, Dict[str, Any]],
83
+ message: str = "Please provide the requested information",
84
+ ) -> Optional[Dict[str, Any]]:
85
+ """
86
+ Short alias for form() function.
87
+
88
+ Example:
89
+ from mcp_agent.human_input.form_fields import FormSchema, string, email
90
+
91
+ schema = FormSchema(
92
+ name=string("Name", "Your name"),
93
+ email=email("Email", "Your email")
94
+ ).required("name")
95
+
96
+ result = await ask(schema, "What's your info?")
97
+ """
98
+ return await form(schema, message)
99
+
100
+
101
+ def ask_sync(
102
+ schema: Union[FormSchema, ElicitRequestedSchema, Dict[str, Any]],
103
+ message: str = "Please provide the requested information",
104
+ ) -> Optional[Dict[str, Any]]:
105
+ """
106
+ Synchronous version of ask().
107
+
108
+ Example:
109
+ result = ask_sync(schema, "What's your info?")
110
+ """
111
+ return form_sync(schema, message)
@@ -9,6 +9,7 @@ from typing import (
9
9
  Tuple,
10
10
  Type,
11
11
  TypeVar,
12
+ Union,
12
13
  cast,
13
14
  )
14
15
 
@@ -203,7 +204,7 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
203
204
 
204
205
  async def generate(
205
206
  self,
206
- multipart_messages: List[PromptMessageMultipart],
207
+ multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
207
208
  request_params: RequestParams | None = None,
208
209
  ) -> PromptMessageMultipart:
209
210
  """
@@ -212,8 +213,10 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
212
213
  # note - check changes here are mirrored in structured(). i've thought hard about
213
214
  # a strategy to reduce duplication etc, but aiming for simple but imperfect for the moment
214
215
 
215
- # We never expect this for structured() calls - this is for interactive use - developers
216
- # can do this programatically
216
+ # Convert PromptMessage to PromptMessageMultipart if needed
217
+ if multipart_messages and isinstance(multipart_messages[0], PromptMessage):
218
+ multipart_messages = PromptMessageMultipart.to_multipart(multipart_messages)
219
+
217
220
  # TODO -- create a "fast-agent" control role rather than magic strings
218
221
 
219
222
  if multipart_messages[-1].first_text().startswith("***SAVE_HISTORY"):
@@ -235,6 +238,7 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
235
238
 
236
239
  # add generic error and termination reason handling/rollback
237
240
  self._message_history.append(assistant_response)
241
+
238
242
  return assistant_response
239
243
 
240
244
  @abstractmethod
@@ -260,12 +264,16 @@ class AugmentedLLM(ContextDependent, AugmentedLLMProtocol, Generic[MessageParamT
260
264
 
261
265
  async def structured(
262
266
  self,
263
- multipart_messages: List[PromptMessageMultipart],
267
+ multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
264
268
  model: Type[ModelT],
265
269
  request_params: RequestParams | None = None,
266
270
  ) -> Tuple[ModelT | None, PromptMessageMultipart]:
267
271
  """Return a structured response from the LLM using the provided messages."""
268
272
 
273
+ # Convert PromptMessage to PromptMessageMultipart if needed
274
+ if multipart_messages and isinstance(multipart_messages[0], PromptMessage):
275
+ multipart_messages = PromptMessageMultipart.to_multipart(multipart_messages)
276
+
269
277
  self._precall(multipart_messages)
270
278
  result, assistant_response = await self._apply_prompt_provider_specific_structured(
271
279
  multipart_messages, model, request_params
@@ -164,15 +164,10 @@ class PassthroughLLM(AugmentedLLM):
164
164
  request_params: RequestParams | None = None,
165
165
  is_template: bool = False,
166
166
  ) -> PromptMessageMultipart:
167
- print(
168
- f"DEBUG: PassthroughLLM _apply_prompt_provider_specific called with {len(multipart_messages)} messages, is_template={is_template}"
169
- )
170
-
171
167
  # Add messages to history with proper is_prompt flag
172
168
  self.history.extend(multipart_messages, is_prompt=is_template)
173
169
 
174
170
  last_message = multipart_messages[-1]
175
- print(f"DEBUG: Last message role: {last_message.role}, text: '{last_message.first_text()}'")
176
171
 
177
172
  if self.is_tool_call(last_message):
178
173
  result = Prompt.assistant(await self.generate_str(last_message.first_text()))
@@ -209,14 +204,8 @@ class PassthroughLLM(AugmentedLLM):
209
204
  else:
210
205
  # TODO -- improve when we support Audio/Multimodal gen models e.g. gemini . This should really just return the input as "assistant"...
211
206
  concatenated: str = "\n".join(message.all_text() for message in multipart_messages)
212
- print(
213
- f"DEBUG: PassthroughLLM generating response: '{concatenated}' (is_template={is_template})"
214
- )
215
207
  await self.show_assistant_message(concatenated)
216
208
  result = Prompt.assistant(concatenated)
217
- print(f"DEBUG: PassthroughLLM created result: {result}")
218
- print(f"DEBUG: Result first_text(): {result.first_text()}")
219
- print(f"DEBUG: Result content: {result.content}")
220
209
 
221
210
  # Track usage for this passthrough "turn"
222
211
  try:
@@ -1,4 +1,6 @@
1
- from typing import Any, List, Type
1
+ from typing import Any, List, Type, Union
2
+
3
+ from mcp.types import PromptMessage
2
4
 
3
5
  from mcp_agent.core.exceptions import ModelConfigError
4
6
  from mcp_agent.core.prompt import Prompt
@@ -51,7 +53,7 @@ class PlaybackLLM(PassthroughLLM):
51
53
 
52
54
  async def generate(
53
55
  self,
54
- multipart_messages: List[PromptMessageMultipart],
56
+ multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
55
57
  request_params: RequestParams | None = None,
56
58
  ) -> PromptMessageMultipart:
57
59
  """
@@ -106,7 +108,7 @@ class PlaybackLLM(PassthroughLLM):
106
108
 
107
109
  async def structured(
108
110
  self,
109
- multipart_messages: List[PromptMessageMultipart],
111
+ multipart_messages: List[Union[PromptMessageMultipart, PromptMessage]],
110
112
  model: Type[ModelT],
111
113
  request_params: RequestParams | None = None,
112
114
  ) -> tuple[ModelT | None, PromptMessageMultipart]:
@@ -1,4 +1,5 @@
1
- from typing import TYPE_CHECKING, List, Tuple, Type
1
+ import json
2
+ from typing import TYPE_CHECKING, Any, List, Tuple, Type
2
3
 
3
4
  from mcp.types import TextContent
4
5
 
@@ -33,6 +34,7 @@ from anthropic.types import (
33
34
  from mcp.types import (
34
35
  CallToolRequest,
35
36
  CallToolRequestParams,
37
+ CallToolResult,
36
38
  ContentBlock,
37
39
  )
38
40
  from rich.text import Text
@@ -99,6 +101,184 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
99
101
  cache_mode = self.context.config.anthropic.cache_mode
100
102
  return cache_mode
101
103
 
104
+ async def _prepare_tools(self, structured_model: Type[ModelT] | None = None) -> List[ToolParam]:
105
+ """Prepare tools based on whether we're in structured output mode."""
106
+ if structured_model:
107
+ # JSON mode - create a single tool for structured output
108
+ return [
109
+ ToolParam(
110
+ name="return_structured_output",
111
+ description="Return the response in the required JSON format",
112
+ input_schema=structured_model.model_json_schema(),
113
+ )
114
+ ]
115
+ else:
116
+ # Regular mode - use tools from aggregator
117
+ tool_list: ListToolsResult = await self.aggregator.list_tools()
118
+ return [
119
+ ToolParam(
120
+ name=tool.name,
121
+ description=tool.description or "",
122
+ input_schema=tool.inputSchema,
123
+ )
124
+ for tool in tool_list.tools
125
+ ]
126
+
127
+ def _apply_system_cache(self, base_args: dict, cache_mode: str) -> None:
128
+ """Apply cache control to system prompt if cache mode allows it."""
129
+ if cache_mode != "off" and base_args["system"]:
130
+ if isinstance(base_args["system"], str):
131
+ base_args["system"] = [
132
+ {
133
+ "type": "text",
134
+ "text": base_args["system"],
135
+ "cache_control": {"type": "ephemeral"},
136
+ }
137
+ ]
138
+ self.logger.debug(
139
+ "Applied cache_control to system prompt (caches tools+system in one block)"
140
+ )
141
+ else:
142
+ self.logger.debug(f"System prompt is not a string: {type(base_args['system'])}")
143
+
144
+ async def _apply_conversation_cache(self, messages: List[MessageParam], cache_mode: str) -> int:
145
+ """Apply conversation caching if in auto mode. Returns number of cache blocks applied."""
146
+ applied_count = 0
147
+ if cache_mode == "auto" and self.history.should_apply_conversation_cache():
148
+ cache_updates = self.history.get_conversation_cache_updates()
149
+
150
+ # Remove cache control from old positions
151
+ if cache_updates["remove"]:
152
+ self.history.remove_cache_control_from_messages(messages, cache_updates["remove"])
153
+ self.logger.debug(
154
+ f"Removed conversation cache_control from positions {cache_updates['remove']}"
155
+ )
156
+
157
+ # Add cache control to new positions
158
+ if cache_updates["add"]:
159
+ applied_count = self.history.add_cache_control_to_messages(
160
+ messages, cache_updates["add"]
161
+ )
162
+ if applied_count > 0:
163
+ self.history.apply_conversation_cache_updates(cache_updates)
164
+ self.logger.debug(
165
+ f"Applied conversation cache_control to positions {cache_updates['add']} ({applied_count} blocks)"
166
+ )
167
+ else:
168
+ self.logger.debug(
169
+ f"Failed to apply conversation cache_control to positions {cache_updates['add']}"
170
+ )
171
+
172
+ return applied_count
173
+
174
+ async def _process_structured_output(
175
+ self,
176
+ content_block: Any,
177
+ ) -> Tuple[str, CallToolResult, TextContent]:
178
+ """
179
+ Process a structured output tool call from Anthropic.
180
+
181
+ This handles the special case where Anthropic's model was forced to use
182
+ a 'return_structured_output' tool via tool_choice. The tool input contains
183
+ the JSON data we want, so we extract it and format it for display.
184
+
185
+ Even though we don't call an external tool, we must create a CallToolResult
186
+ to satisfy Anthropic's API requirement that every tool_use has a corresponding
187
+ tool_result in the next message.
188
+
189
+ Returns:
190
+ Tuple of (tool_use_id, tool_result, content_block) for the structured data
191
+ """
192
+ tool_args = content_block.input
193
+ tool_use_id = content_block.id
194
+
195
+ # Show the formatted JSON response to the user
196
+ json_response = json.dumps(tool_args, indent=2)
197
+ await self.show_assistant_message(json_response)
198
+
199
+ # Create the content for responses
200
+ structured_content = TextContent(type="text", text=json.dumps(tool_args))
201
+
202
+ # Create a CallToolResult to satisfy Anthropic's API requirements
203
+ # This represents the "result" of our structured output "tool"
204
+ tool_result = CallToolResult(isError=False, content=[structured_content])
205
+
206
+ return tool_use_id, tool_result, structured_content
207
+
208
+ async def _process_regular_tool_call(
209
+ self,
210
+ content_block: Any,
211
+ available_tools: List[ToolParam],
212
+ is_first_tool: bool,
213
+ message_text: str | Text,
214
+ ) -> Tuple[str, CallToolResult]:
215
+ """
216
+ Process a regular MCP tool call.
217
+
218
+ This handles actual tool execution via the MCP aggregator.
219
+ """
220
+ tool_name = content_block.name
221
+ tool_args = content_block.input
222
+ tool_use_id = content_block.id
223
+
224
+ if is_first_tool:
225
+ await self.show_assistant_message(message_text, tool_name)
226
+
227
+ self.show_tool_call(available_tools, tool_name, tool_args)
228
+ tool_call_request = CallToolRequest(
229
+ method="tools/call",
230
+ params=CallToolRequestParams(name=tool_name, arguments=tool_args),
231
+ )
232
+ result = await self.call_tool(request=tool_call_request, tool_call_id=tool_use_id)
233
+ self.show_tool_result(result)
234
+ return tool_use_id, result
235
+
236
+ async def _process_tool_calls(
237
+ self,
238
+ tool_uses: List[Any],
239
+ available_tools: List[ToolParam],
240
+ message_text: str | Text,
241
+ structured_model: Type[ModelT] | None = None,
242
+ ) -> Tuple[List[Tuple[str, CallToolResult]], List[ContentBlock]]:
243
+ """
244
+ Process tool calls, handling both structured output and regular MCP tools.
245
+
246
+ For structured output mode:
247
+ - Extracts JSON data from the forced 'return_structured_output' tool
248
+ - Does NOT create fake CallToolResults
249
+ - Returns the JSON content directly
250
+
251
+ For regular tools:
252
+ - Calls actual MCP tools via the aggregator
253
+ - Returns real CallToolResults
254
+ """
255
+ tool_results = []
256
+ responses = []
257
+
258
+ for tool_idx, content_block in enumerate(tool_uses):
259
+ tool_name = content_block.name
260
+ is_first_tool = tool_idx == 0
261
+
262
+ if tool_name == "return_structured_output" and structured_model:
263
+ # Structured output: extract JSON, don't call external tools
264
+ (
265
+ tool_use_id,
266
+ tool_result,
267
+ structured_content,
268
+ ) = await self._process_structured_output(content_block)
269
+ responses.append(structured_content)
270
+ # Add to tool_results to satisfy Anthropic's API requirement for tool_result messages
271
+ tool_results.append((tool_use_id, tool_result))
272
+ else:
273
+ # Regular tool: call external MCP tool
274
+ tool_use_id, tool_result = await self._process_regular_tool_call(
275
+ content_block, available_tools, is_first_tool, message_text
276
+ )
277
+ tool_results.append((tool_use_id, tool_result))
278
+ responses.extend(tool_result.content)
279
+
280
+ return tool_results, responses
281
+
102
282
  async def _process_stream(self, stream: AsyncMessageStream, model: str) -> Message:
103
283
  """Process the streaming response and display real-time token usage."""
104
284
  # Track estimated output tokens by counting text chunks
@@ -150,6 +330,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
150
330
  self,
151
331
  message_param,
152
332
  request_params: RequestParams | None = None,
333
+ structured_model: Type[ModelT] | None = None,
153
334
  ) -> list[ContentBlock]:
154
335
  """
155
336
  Process a query using an LLM and available tools.
@@ -181,15 +362,7 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
181
362
  cache_mode = self._get_cache_mode()
182
363
  self.logger.debug(f"Anthropic cache_mode: {cache_mode}")
183
364
 
184
- tool_list: ListToolsResult = await self.aggregator.list_tools()
185
- available_tools: List[ToolParam] = [
186
- ToolParam(
187
- name=tool.name,
188
- description=tool.description or "",
189
- input_schema=tool.inputSchema,
190
- )
191
- for tool in tool_list.tools
192
- ]
365
+ available_tools = await self._prepare_tools(structured_model)
193
366
 
194
367
  responses: List[ContentBlock] = []
195
368
 
@@ -209,59 +382,25 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
209
382
  "tools": available_tools,
210
383
  }
211
384
 
212
- # Apply cache_control to system prompt if cache_mode is not "off"
213
- # This caches both tools and system prompt together in one cache block
214
- if cache_mode != "off" and base_args["system"]:
215
- if isinstance(base_args["system"], str):
216
- base_args["system"] = [
217
- {
218
- "type": "text",
219
- "text": base_args["system"],
220
- "cache_control": {"type": "ephemeral"},
221
- }
222
- ]
223
- self.logger.debug(
224
- "Applied cache_control to system prompt (caches tools+system in one block)"
225
- )
226
- else:
227
- self.logger.debug(f"System prompt is not a string: {type(base_args['system'])}")
385
+ # Add tool_choice for structured output mode
386
+ if structured_model:
387
+ base_args["tool_choice"] = {"type": "tool", "name": "return_structured_output"}
228
388
 
229
- # Apply conversation caching using walking algorithm if in auto mode
230
- if cache_mode == "auto" and self.history.should_apply_conversation_cache():
231
- cache_updates = self.history.get_conversation_cache_updates()
389
+ # Apply cache control to system prompt
390
+ self._apply_system_cache(base_args, cache_mode)
232
391
 
233
- # Remove cache control from old positions
234
- if cache_updates["remove"]:
235
- self.history.remove_cache_control_from_messages(
236
- messages, cache_updates["remove"]
237
- )
238
- self.logger.debug(
239
- f"Removed conversation cache_control from positions {cache_updates['remove']}"
240
- )
392
+ # Apply conversation caching
393
+ applied_count = await self._apply_conversation_cache(messages, cache_mode)
241
394
 
242
- # Add cache control to new positions
243
- if cache_updates["add"]:
244
- applied_count = self.history.add_cache_control_to_messages(
245
- messages, cache_updates["add"]
395
+ # Verify we don't exceed Anthropic's 4 cache block limit
396
+ if applied_count > 0:
397
+ total_cache_blocks = applied_count
398
+ if cache_mode != "off" and base_args["system"]:
399
+ total_cache_blocks += 1 # tools+system cache block
400
+ if total_cache_blocks > 4:
401
+ self.logger.warning(
402
+ f"Total cache blocks ({total_cache_blocks}) exceeds Anthropic limit of 4"
246
403
  )
247
- if applied_count > 0:
248
- self.history.apply_conversation_cache_updates(cache_updates)
249
- self.logger.debug(
250
- f"Applied conversation cache_control to positions {cache_updates['add']} ({applied_count} blocks)"
251
- )
252
-
253
- # Verify we don't exceed Anthropic's 4 cache block limit
254
- total_cache_blocks = applied_count
255
- if cache_mode != "off" and base_args["system"]:
256
- total_cache_blocks += 1 # tools+system cache block
257
- if total_cache_blocks > 4:
258
- self.logger.warning(
259
- f"Total cache blocks ({total_cache_blocks}) exceeds Anthropic limit of 4"
260
- )
261
- else:
262
- self.logger.debug(
263
- f"Failed to apply conversation cache_control to positions {cache_updates['add']}"
264
- )
265
404
 
266
405
  if params.maxTokens is not None:
267
406
  base_args["max_tokens"] = params.maxTokens
@@ -387,34 +526,22 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
387
526
  style="dim green italic",
388
527
  )
389
528
 
390
- # Process all tool calls and collect results
391
- tool_results = []
392
- # Use a different loop variable for tool enumeration if 'i' is outer loop counter
393
- for tool_idx, content_block in enumerate(tool_uses):
394
- tool_name = content_block.name
395
- tool_args = content_block.input
396
- tool_use_id = content_block.id
397
-
398
- if tool_idx == 0: # Only show message for first tool use
399
- await self.show_assistant_message(message_text, tool_name)
400
-
401
- self.show_tool_call(available_tools, tool_name, tool_args)
402
- tool_call_request = CallToolRequest(
403
- method="tools/call",
404
- params=CallToolRequestParams(name=tool_name, arguments=tool_args),
405
- )
406
- # TODO -- support MCP isError etc.
407
- result = await self.call_tool(
408
- request=tool_call_request, tool_call_id=tool_use_id
409
- )
410
- self.show_tool_result(result)
411
-
412
- # Add each result to our collection
413
- tool_results.append((tool_use_id, result))
414
- responses.extend(result.content)
529
+ # Process all tool calls using the helper method
530
+ tool_results, tool_responses = await self._process_tool_calls(
531
+ tool_uses, available_tools, message_text, structured_model
532
+ )
533
+ responses.extend(tool_responses)
415
534
 
535
+ # Always add tool_results_message first (required by Anthropic API)
416
536
  messages.append(AnthropicConverter.create_tool_results_message(tool_results))
417
537
 
538
+ # For structured output, we have our result and should exit after sending tool_result
539
+ if structured_model and any(
540
+ tool.name == "return_structured_output" for tool in tool_uses
541
+ ):
542
+ self.logger.debug("Structured output received, breaking iteration loop")
543
+ break
544
+
418
545
  # Only save the new conversation messages to history if use_history is true
419
546
  # Keep the prompt messages separate
420
547
  if params.use_history:
@@ -501,19 +628,51 @@ class AnthropicAugmentedLLM(AugmentedLLM[MessageParam, Message]):
501
628
  ) -> Tuple[ModelT | None, PromptMessageMultipart]: # noqa: F821
502
629
  request_params = self.get_request_params(request_params)
503
630
 
504
- # TODO - convert this to use Tool Calling convention for Anthropic Structured outputs
505
- multipart_messages[-1].add_text(
506
- """YOU MUST RESPOND IN THE FOLLOWING FORMAT:
507
- {schema}
508
- RESPOND ONLY WITH THE JSON, NO PREAMBLE, CODE FENCES OR 'properties' ARE PERMISSABLE """.format(
509
- schema=model.model_json_schema()
510
- )
511
- )
631
+ # Check the last message role
632
+ last_message = multipart_messages[-1]
512
633
 
513
- result: PromptMessageMultipart = await self._apply_prompt_provider_specific(
514
- multipart_messages, request_params
634
+ # Add all previous messages to history (or all messages if last is from assistant)
635
+ messages_to_add = (
636
+ multipart_messages[:-1] if last_message.role == "user" else multipart_messages
515
637
  )
516
- return self._structured_from_multipart(result, model)
638
+ converted = []
639
+
640
+ for msg in messages_to_add:
641
+ anthropic_msg = AnthropicConverter.convert_to_anthropic(msg)
642
+ converted.append(anthropic_msg)
643
+
644
+ self.history.extend(converted, is_prompt=False)
645
+
646
+ if last_message.role == "user":
647
+ self.logger.debug("Last message in prompt is from user, generating structured response")
648
+ message_param = AnthropicConverter.convert_to_anthropic(last_message)
649
+
650
+ # Call _anthropic_completion with the structured model
651
+ response_content = await self._anthropic_completion(
652
+ message_param, request_params, structured_model=model
653
+ )
654
+
655
+ # Extract the structured data from the response
656
+ for content in response_content:
657
+ if content.type == "text":
658
+ try:
659
+ # Parse the JSON response from the tool
660
+ data = json.loads(content.text)
661
+ parsed_model = model(**data)
662
+ # Create assistant response
663
+ assistant_response = Prompt.assistant(content)
664
+ return parsed_model, assistant_response
665
+ except (json.JSONDecodeError, ValueError) as e:
666
+ self.logger.error(f"Failed to parse structured output: {e}")
667
+ assistant_response = Prompt.assistant(content)
668
+ return None, assistant_response
669
+
670
+ # If no valid response found
671
+ return None, Prompt.assistant()
672
+ else:
673
+ # For assistant messages: Return the last message content
674
+ self.logger.debug("Last message in prompt is from assistant, returning it directly")
675
+ return None, last_message
517
676
 
518
677
  def _show_usage(self, raw_usage: Usage, turn_usage: TurnUsage) -> None:
519
678
  # Print raw usage for debugging