aury-agent 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,680 @@
1
+ """LLM step execution helpers for ReactAgent."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ from ..core.base import ToolInjectionMode
9
+ from ..core.logging import react_logger as logger
10
+ from ..core.event_bus import Events
11
+ from ..core.types.block import BlockEvent, BlockKind, BlockOp
12
+ from ..core.types import (
13
+ ToolInvocation,
14
+ ToolInvocationState,
15
+ generate_id,
16
+ )
17
+ from ..llm import LLMMessage, ToolDefinition
18
+ from ..middleware import HookAction
19
+
20
+ if TYPE_CHECKING:
21
+ from .agent import ReactAgent
22
+
23
+
24
+ def get_effective_tool_mode(agent: "ReactAgent") -> ToolInjectionMode:
25
+ """Get effective tool mode (auto-detect based on model capabilities).
26
+
27
+ Returns:
28
+ FUNCTION_CALL if model supports tools, else PROMPT
29
+ """
30
+ # If explicitly set to PROMPT, use PROMPT
31
+ if agent.config.tool_mode == ToolInjectionMode.PROMPT:
32
+ return ToolInjectionMode.PROMPT
33
+
34
+ # Auto-detect: if model doesn't support tools, use PROMPT
35
+ caps = agent.llm.capabilities
36
+ if not caps.supports_tools:
37
+ logger.info(
38
+ f"Model {agent.llm.model} does not support function calling, "
39
+ "auto-switching to PROMPT mode for tools"
40
+ )
41
+ return ToolInjectionMode.PROMPT
42
+
43
+ return ToolInjectionMode.FUNCTION_CALL
44
+
45
+
46
+ def build_tool_prompt(tools: list) -> str:
47
+ """Build tool description for PROMPT mode injection.
48
+
49
+ Args:
50
+ tools: List of BaseTool objects
51
+
52
+ Returns:
53
+ Tool prompt string to inject into system message
54
+ """
55
+ if not tools:
56
+ return ""
57
+
58
+ tool_descriptions = []
59
+ for tool in tools:
60
+ info = tool.get_info()
61
+ # Build parameter description
62
+ params_desc = ""
63
+ if info.parameters and "properties" in info.parameters:
64
+ params = []
65
+ properties = info.parameters.get("properties", {})
66
+ required = info.parameters.get("required", [])
67
+ for name, schema in properties.items():
68
+ param_type = schema.get("type", "any")
69
+ param_desc = schema.get("description", "")
70
+ is_required = "required" if name in required else "optional"
71
+ params.append(f" - {name} ({param_type}, {is_required}): {param_desc}")
72
+ params_desc = "\n" + "\n".join(params) if params else ""
73
+
74
+ tool_descriptions.append(
75
+ f"### {info.name}\n"
76
+ f"{info.description}{params_desc}"
77
+ )
78
+
79
+ return f"""## Available Tools
80
+
81
+ You have access to the following tools. To use a tool, output a JSON block in this exact format:
82
+
83
+ ```tool_call
84
+ {{
85
+ "tool": "tool_name",
86
+ "arguments": {{
87
+ "param1": "value1",
88
+ "param2": "value2"
89
+ }}
90
+ }}
91
+ ```
92
+
93
+ IMPORTANT:
94
+ - Use the exact format above with ```tool_call code block
95
+ - You can make multiple tool calls in one response
96
+ - Wait for tool results before continuing
97
+
98
+ {chr(10).join(tool_descriptions)}
99
+ """
100
+
101
+
102
+ def parse_tool_calls_from_text(text: str) -> list[dict]:
103
+ """Parse tool calls from LLM text output (for PROMPT mode).
104
+
105
+ Looks for ```tool_call blocks in the format:
106
+ ```tool_call
107
+ {"tool": "name", "arguments": {...}}
108
+ ```
109
+
110
+ Args:
111
+ text: LLM output text
112
+
113
+ Returns:
114
+ List of parsed tool calls: [{"name": str, "arguments": dict}, ...]
115
+ """
116
+ import re
117
+
118
+ tool_calls = []
119
+
120
+ # Match ```tool_call ... ``` blocks
121
+ pattern = r"```tool_call\s*\n?(.+?)\n?```"
122
+ matches = re.findall(pattern, text, re.DOTALL)
123
+
124
+ for match in matches:
125
+ try:
126
+ data = json.loads(match.strip())
127
+ if "tool" in data:
128
+ tool_calls.append({
129
+ "name": data["tool"],
130
+ "arguments": data.get("arguments", {}),
131
+ })
132
+ except json.JSONDecodeError as e:
133
+ logger.warning(f"Failed to parse tool call JSON: {e}")
134
+ continue
135
+
136
+ return tool_calls
137
+
138
+
139
+ async def execute_step(agent: "ReactAgent") -> str | None:
140
+ """Execute a single LLM step with middleware hooks.
141
+
142
+ This function directly modifies agent's internal state:
143
+ - agent._text_buffer
144
+ - agent._thinking_buffer
145
+ - agent._tool_invocations
146
+ - agent._current_text_block_id
147
+ - agent._current_thinking_block_id
148
+ - agent._call_id_to_tool
149
+ - agent._tool_call_blocks
150
+ - agent._message_history
151
+
152
+ Args:
153
+ agent: ReactAgent instance
154
+
155
+ Returns:
156
+ finish_reason from LLM
157
+ """
158
+ from ..core.context import emit as global_emit
159
+
160
+ # Get tools from AgentContext (from providers)
161
+ all_tools = agent._agent_context.tools if agent._agent_context else []
162
+
163
+ # Determine effective tool mode (auto-detect based on capabilities)
164
+ effective_tool_mode = get_effective_tool_mode(agent)
165
+
166
+ # Get tool definitions (only for FUNCTION_CALL mode)
167
+ tool_defs = None
168
+ if effective_tool_mode == ToolInjectionMode.FUNCTION_CALL and all_tools:
169
+ tool_defs = [
170
+ ToolDefinition(
171
+ name=t.name,
172
+ description=t.description,
173
+ input_schema=t.parameters,
174
+ )
175
+ for t in all_tools
176
+ ]
177
+
178
+ # For PROMPT mode, inject tools into system message
179
+ if effective_tool_mode == ToolInjectionMode.PROMPT and all_tools:
180
+ tool_prompt = build_tool_prompt(all_tools)
181
+ # Inject into first system message
182
+ if agent._message_history and agent._message_history[0].role == "system":
183
+ original_content = agent._message_history[0].content
184
+ agent._message_history[0] = LLMMessage(
185
+ role="system",
186
+ content=f"{original_content}\n\n{tool_prompt}",
187
+ )
188
+
189
+ # Reset buffers
190
+ agent._text_buffer = ""
191
+ agent._thinking_buffer = "" # Buffer for non-streaming thinking
192
+ agent._tool_invocations = []
193
+
194
+ # Reset block IDs for this step (each step gets fresh block IDs)
195
+ agent._current_text_block_id = None
196
+ agent._current_thinking_block_id = None
197
+
198
+ # Reset tool call tracking
199
+ agent._call_id_to_tool = {}
200
+ agent._tool_call_blocks = {}
201
+
202
+ # Track accumulated arguments for streaming tool calls (for middleware context)
203
+ tool_call_accumulated_args: dict[str, dict[str, Any]] = {}
204
+
205
+ # Build middleware context for this step
206
+ mw_context = {
207
+ "session_id": agent.session.id,
208
+ "invocation_id": agent._current_invocation.id if agent._current_invocation else "",
209
+ "step": agent._current_step,
210
+ "agent_id": agent.name,
211
+ "emit": global_emit, # For middleware to emit BlockEvent/ActionEvent
212
+ "backends": agent.ctx.backends,
213
+ "tool_mode": effective_tool_mode.value, # Add tool mode to context
214
+ }
215
+
216
+ # Build LLM call kwargs
217
+ # Note: temperature, max_tokens, timeout, retries are configured on LLMProvider
218
+ llm_kwargs: dict[str, Any] = {
219
+ "messages": agent._message_history,
220
+ "tools": tool_defs, # None for PROMPT mode
221
+ }
222
+
223
+ # Get model capabilities
224
+ caps = agent.llm.capabilities
225
+
226
+ # Add thinking configuration (use runtime override if set)
227
+ # Only if model supports thinking
228
+ enable_thinking = agent._get_enable_thinking()
229
+ reasoning_effort = agent._get_reasoning_effort()
230
+ if enable_thinking:
231
+ if caps.supports_thinking:
232
+ llm_kwargs["enable_thinking"] = True
233
+ if reasoning_effort:
234
+ llm_kwargs["reasoning_effort"] = reasoning_effort
235
+ else:
236
+ logger.debug(
237
+ f"Model {agent.llm.model} does not support thinking, "
238
+ "enable_thinking will be ignored"
239
+ )
240
+
241
+ # === Middleware: on_request ===
242
+ if agent.middleware:
243
+ logger.info(
244
+ "Calling middleware: on_request",
245
+ extra={"invocation_id": agent._current_invocation.id},
246
+ )
247
+ llm_kwargs = await agent.middleware.process_request(llm_kwargs, mw_context)
248
+ if llm_kwargs is None:
249
+ logger.warning(
250
+ "LLM request cancelled by middleware",
251
+ extra={"invocation_id": agent._current_invocation.id},
252
+ )
253
+ return None
254
+
255
+ # Log message history before LLM call
256
+ logger.info(
257
+ f"LLM call - Step {agent._current_step}, messages: {len(agent._message_history)}, "
258
+ f"tools: {len(tool_defs) if tool_defs else 0}, "
259
+ f"thinking: {enable_thinking}, mode: {effective_tool_mode.value}",
260
+ extra={"invocation_id": agent._current_invocation.id},
261
+ )
262
+ # Detailed message log (for debugging model issues like repeated calls)
263
+ for i, msg in enumerate(agent._message_history):
264
+ content_preview = str(msg.content)[:300] if msg.content else "<empty>"
265
+ tool_call_id = getattr(msg, 'tool_call_id', None)
266
+ logger.debug(
267
+ f" msg[{i}] role={msg.role}"
268
+ f"{f', tool_call_id={tool_call_id}' if tool_call_id else ''}"
269
+ f", content={content_preview}"
270
+ )
271
+
272
+ # Call LLM
273
+ await agent.bus.publish(
274
+ Events.LLM_START,
275
+ {
276
+ "provider": agent.llm.provider,
277
+ "model": agent.llm.model,
278
+ "step": agent._current_step,
279
+ "enable_thinking": enable_thinking,
280
+ },
281
+ )
282
+
283
+ finish_reason = None
284
+ llm_response_data: dict[str, Any] = {} # Collect response for middleware
285
+
286
+ # Reset middleware stream state
287
+ if agent.middleware:
288
+ logger.debug(
289
+ "Resetting middleware stream state",
290
+ extra={"invocation_id": agent._current_invocation.id},
291
+ )
292
+ agent.middleware.reset_stream_state()
293
+
294
+ logger.info(
295
+ "Starting LLM stream",
296
+ extra={"invocation_id": agent._current_invocation.id, "model": agent.llm.model},
297
+ )
298
+
299
+ async for event in agent.llm.complete(**llm_kwargs):
300
+ if await agent._check_abort():
301
+ break
302
+
303
+ if event.type == "content":
304
+ # Text content
305
+ if event.delta:
306
+ # === Middleware: on_model_stream ===
307
+ stream_chunk = {"delta": event.delta, "type": "content"}
308
+ if agent.middleware:
309
+ stream_chunk = await agent.middleware.process_stream_chunk(
310
+ stream_chunk, mw_context
311
+ )
312
+ if stream_chunk is None:
313
+ continue # Skip this chunk
314
+
315
+ delta = stream_chunk.get("delta", event.delta)
316
+ agent._text_buffer += delta
317
+
318
+ # Reuse or create block_id for text streaming
319
+ if agent._current_text_block_id is None:
320
+ agent._current_text_block_id = generate_id("blk")
321
+
322
+ await agent.ctx.emit(BlockEvent(
323
+ block_id=agent._current_text_block_id,
324
+ kind=BlockKind.TEXT,
325
+ op=BlockOp.DELTA,
326
+ data={"content": delta},
327
+ ))
328
+
329
+ await agent.bus.publish(
330
+ Events.LLM_STREAM,
331
+ {
332
+ "delta": delta,
333
+ "step": agent._current_step,
334
+ },
335
+ )
336
+
337
+ elif event.type == "thinking":
338
+ # Thinking content - only emit if thinking is enabled
339
+ stream_thinking = agent._get_stream_thinking()
340
+ if event.delta and enable_thinking:
341
+ # === Middleware: on_model_stream (type=thinking) ===
342
+ stream_chunk = {"delta": event.delta, "type": "thinking"}
343
+ if agent.middleware:
344
+ stream_chunk = await agent.middleware.process_stream_chunk(
345
+ stream_chunk, mw_context
346
+ )
347
+ if stream_chunk is None:
348
+ continue # Skip this chunk
349
+
350
+ delta = stream_chunk.get("delta", event.delta)
351
+
352
+ # Always accumulate to buffer (for middleware on_response)
353
+ agent._thinking_buffer += delta
354
+
355
+ if stream_thinking:
356
+ # Reuse or create block_id for thinking streaming
357
+ if agent._current_thinking_block_id is None:
358
+ agent._current_thinking_block_id = generate_id("blk")
359
+
360
+ # Stream thinking in real-time
361
+ await agent.ctx.emit(BlockEvent(
362
+ block_id=agent._current_thinking_block_id,
363
+ kind=BlockKind.THINKING,
364
+ op=BlockOp.DELTA,
365
+ data={"content": delta},
366
+ ))
367
+
368
+ elif event.type == "tool_call_start":
369
+ # Tool call started (name known, arguments pending)
370
+ if event.tool_call:
371
+ tc = event.tool_call
372
+ logger.debug(
373
+ f"Tool call start: {tc.name}",
374
+ extra={
375
+ "invocation_id": agent._current_invocation.id,
376
+ "call_id": tc.id,
377
+ },
378
+ )
379
+ agent._call_id_to_tool[tc.id] = tc.name
380
+
381
+ # Always emit start notification (privacy-safe, no arguments)
382
+ block_id = generate_id("blk")
383
+ agent._tool_call_blocks[tc.id] = block_id
384
+
385
+ await agent.ctx.emit(BlockEvent(
386
+ block_id=block_id,
387
+ kind=BlockKind.TOOL_USE,
388
+ op=BlockOp.APPLY,
389
+ data={
390
+ "name": tc.name,
391
+ "call_id": tc.id,
392
+ "status": "streaming", # Indicate arguments are streaming
393
+ },
394
+ ))
395
+
396
+ elif event.type == "tool_call_delta":
397
+ # Tool arguments delta (streaming)
398
+ if event.tool_call_delta:
399
+ call_id = event.tool_call_delta.get("call_id")
400
+ arguments_delta = event.tool_call_delta.get("arguments_delta")
401
+
402
+ logger.debug(
403
+ f"Tool call delta received: call_id={call_id}, delta_type={type(arguments_delta).__name__}, delta={arguments_delta}",
404
+ extra={"invocation_id": agent._current_invocation.id},
405
+ )
406
+
407
+ if call_id and arguments_delta:
408
+ tool_name = agent._call_id_to_tool.get(call_id)
409
+ if tool_name:
410
+ tool = agent._get_tool(tool_name)
411
+
412
+ # Check if tool allows streaming arguments
413
+ if tool and tool.config.stream_arguments:
414
+ # Update accumulated args for middleware context
415
+ if call_id not in tool_call_accumulated_args:
416
+ tool_call_accumulated_args[call_id] = {}
417
+
418
+ # Handle different delta formats
419
+ # Some providers send dict, others send JSON string
420
+ if isinstance(arguments_delta, str):
421
+ # It's a JSON string fragment, accumulate as single "_raw" key
422
+ if "_raw" not in tool_call_accumulated_args[call_id]:
423
+ tool_call_accumulated_args[call_id]["_raw"] = ""
424
+ tool_call_accumulated_args[call_id]["_raw"] += arguments_delta
425
+
426
+ # Convert string delta to dict format for middleware
427
+ arguments_delta = {"_raw": arguments_delta}
428
+
429
+ elif isinstance(arguments_delta, dict):
430
+ # Merge delta into accumulated (dict format)
431
+ for key, value in arguments_delta.items():
432
+ if key in tool_call_accumulated_args[call_id]:
433
+ # Concatenate strings, or replace other types
434
+ if isinstance(value, str) and isinstance(tool_call_accumulated_args[call_id][key], str):
435
+ tool_call_accumulated_args[call_id][key] += value
436
+ else:
437
+ tool_call_accumulated_args[call_id][key] = value
438
+ else:
439
+ tool_call_accumulated_args[call_id][key] = value
440
+ for key, value in arguments_delta.items():
441
+ if key in tool_call_accumulated_args[call_id]:
442
+ # Concatenate strings, or replace other types
443
+ if isinstance(value, str) and isinstance(tool_call_accumulated_args[call_id][key], str):
444
+ tool_call_accumulated_args[call_id][key] += value
445
+ else:
446
+ tool_call_accumulated_args[call_id][key] = value
447
+ else:
448
+ tool_call_accumulated_args[call_id][key] = value
449
+
450
+ # === Middleware: on_tool_call_delta ===
451
+ processed_delta = arguments_delta
452
+ if agent.middleware:
453
+ delta_context = {
454
+ **mw_context,
455
+ "accumulated_args": tool_call_accumulated_args.get(call_id, {}),
456
+ }
457
+ processed_delta = await agent.middleware.process_tool_call_delta(
458
+ call_id, tool_name, arguments_delta, delta_context
459
+ )
460
+ if processed_delta is None:
461
+ continue # Skip this delta
462
+
463
+ block_id = agent._tool_call_blocks.get(call_id)
464
+ if block_id:
465
+ await agent.ctx.emit(BlockEvent(
466
+ block_id=block_id,
467
+ kind=BlockKind.TOOL_USE,
468
+ op=BlockOp.DELTA,
469
+ data={
470
+ "call_id": call_id,
471
+ "arguments_delta": processed_delta,
472
+ },
473
+ ))
474
+
475
+ elif event.type == "tool_call_progress":
476
+ # Tool arguments progress (bytes received)
477
+ if event.tool_call_progress:
478
+ call_id = event.tool_call_progress.get("call_id")
479
+ bytes_received = event.tool_call_progress.get("bytes_received")
480
+
481
+ if call_id and bytes_received is not None:
482
+ block_id = agent._tool_call_blocks.get(call_id)
483
+ if block_id:
484
+ # Always emit progress (privacy-safe, no content)
485
+ await agent.ctx.emit(BlockEvent(
486
+ block_id=block_id,
487
+ kind=BlockKind.TOOL_USE,
488
+ op=BlockOp.PATCH,
489
+ data={
490
+ "call_id": call_id,
491
+ "bytes_received": bytes_received,
492
+ "status": "receiving",
493
+ },
494
+ ))
495
+
496
+ elif event.type == "tool_call":
497
+ # Tool call complete (arguments fully received)
498
+ if event.tool_call:
499
+ tc = event.tool_call
500
+ invocation = ToolInvocation(
501
+ tool_call_id=tc.id,
502
+ tool_name=tc.name,
503
+ args_raw=tc.arguments,
504
+ state=ToolInvocationState.CALL,
505
+ )
506
+
507
+ # Parse arguments
508
+ try:
509
+ invocation.args = json.loads(tc.arguments)
510
+ except json.JSONDecodeError:
511
+ invocation.args = {}
512
+
513
+ agent._tool_invocations.append(invocation)
514
+
515
+ # Strict mode: tool_call_start must have been received
516
+ block_id = agent._tool_call_blocks[tc.id] # Will raise KeyError if not found
517
+ await agent.ctx.emit(BlockEvent(
518
+ block_id=block_id,
519
+ kind=BlockKind.TOOL_USE,
520
+ op=BlockOp.PATCH,
521
+ data={
522
+ "call_id": tc.id,
523
+ "arguments": invocation.args,
524
+ "status": "ready",
525
+ },
526
+ ))
527
+
528
+ await agent.bus.publish(
529
+ Events.TOOL_START,
530
+ {
531
+ "call_id": tc.id,
532
+ "tool": tc.name,
533
+ "arguments": invocation.args,
534
+ },
535
+ )
536
+
537
+ elif event.type == "completed":
538
+ finish_reason = event.finish_reason
539
+
540
+ elif event.type == "usage":
541
+ if event.usage:
542
+ await agent.bus.publish(
543
+ Events.USAGE_RECORDED,
544
+ {
545
+ "provider": agent.llm.provider,
546
+ "model": agent.llm.model,
547
+ "input_tokens": event.usage.input_tokens,
548
+ "output_tokens": event.usage.output_tokens,
549
+ "cache_read_tokens": event.usage.cache_read_tokens,
550
+ "cache_write_tokens": event.usage.cache_write_tokens,
551
+ "reasoning_tokens": event.usage.reasoning_tokens,
552
+ },
553
+ )
554
+
555
+ elif event.type == "error":
556
+ await agent.ctx.emit(BlockEvent(
557
+ kind=BlockKind.ERROR,
558
+ op=BlockOp.APPLY,
559
+ data={"message": event.error or "Unknown LLM error"},
560
+ ))
561
+
562
+ # If thinking was buffered, emit it now
563
+ if agent._thinking_buffer and not agent.config.stream_thinking:
564
+ await agent.ctx.emit(BlockEvent(
565
+ kind=BlockKind.THINKING,
566
+ op=BlockOp.APPLY,
567
+ data={"content": agent._thinking_buffer},
568
+ ))
569
+
570
+ # PROMPT mode: parse tool calls from text output
571
+ if effective_tool_mode == ToolInjectionMode.PROMPT and agent._text_buffer:
572
+ parsed_calls = parse_tool_calls_from_text(agent._text_buffer)
573
+ for i, call in enumerate(parsed_calls):
574
+ call_id = generate_id("call")
575
+ invocation = ToolInvocation(
576
+ tool_call_id=call_id,
577
+ tool_name=call["name"],
578
+ args_raw=json.dumps(call["arguments"]),
579
+ args=call["arguments"],
580
+ state=ToolInvocationState.CALL,
581
+ )
582
+ agent._tool_invocations.append(invocation)
583
+
584
+ # Create block for tool call (no streaming events in PROMPT mode)
585
+ block_id = generate_id("blk")
586
+ agent._tool_call_blocks[call_id] = block_id
587
+ agent._call_id_to_tool[call_id] = call["name"]
588
+
589
+ await agent.ctx.emit(BlockEvent(
590
+ block_id=block_id,
591
+ kind=BlockKind.TOOL_USE,
592
+ op=BlockOp.APPLY,
593
+ data={
594
+ "name": call["name"],
595
+ "call_id": call_id,
596
+ "arguments": call["arguments"],
597
+ "status": "ready",
598
+ "source": "prompt", # Indicate parsed from text
599
+ },
600
+ ))
601
+
602
+ await agent.bus.publish(
603
+ Events.TOOL_START,
604
+ {
605
+ "call_id": call_id,
606
+ "tool": call["name"],
607
+ "arguments": call["arguments"],
608
+ "source": "prompt",
609
+ },
610
+ )
611
+
612
+ if parsed_calls:
613
+ logger.info(
614
+ f"PROMPT mode: parsed {len(parsed_calls)} tool calls from text",
615
+ extra={
616
+ "invocation_id": agent._current_invocation.id,
617
+ "tool_calls": [call["name"] for call in parsed_calls],
618
+ },
619
+ )
620
+
621
+ # === Middleware: on_response ===
622
+ llm_response_data = {
623
+ "text": agent._text_buffer,
624
+ "thinking": agent._thinking_buffer,
625
+ "tool_calls": len(agent._tool_invocations),
626
+ "finish_reason": finish_reason,
627
+ }
628
+ if agent.middleware:
629
+ logger.debug(
630
+ "Calling middleware: on_response",
631
+ extra={
632
+ "invocation_id": agent._current_invocation.id,
633
+ "text_length": len(agent._text_buffer),
634
+ "tool_calls": len(agent._tool_invocations),
635
+ },
636
+ )
637
+ llm_response_data = await agent.middleware.process_response(
638
+ llm_response_data, mw_context
639
+ )
640
+
641
+ await agent.bus.publish(
642
+ Events.LLM_END,
643
+ {
644
+ "step": agent._current_step,
645
+ "finish_reason": finish_reason,
646
+ "text_length": len(agent._text_buffer),
647
+ "thinking_length": len(agent._thinking_buffer),
648
+ "tool_calls": len(agent._tool_invocations),
649
+ },
650
+ )
651
+
652
+ # Add assistant message to history
653
+ # Save thinking for verification, but adapter will filter it out when sending to model
654
+ if agent._text_buffer or agent._tool_invocations or agent._thinking_buffer:
655
+ content_parts = []
656
+
657
+ # Save thinking for verification (won't be sent to model)
658
+ if agent._thinking_buffer:
659
+ content_parts.append({"type": "thinking", "thinking": agent._thinking_buffer})
660
+
661
+ # Add text content
662
+ if agent._text_buffer:
663
+ content_parts.append({"type": "text", "text": agent._text_buffer})
664
+
665
+ # Add tool_use parts
666
+ if agent._tool_invocations:
667
+ for inv in agent._tool_invocations:
668
+ content_parts.append({
669
+ "type": "tool_use",
670
+ "id": inv.tool_call_id,
671
+ "name": inv.tool_name,
672
+ "input": inv.args,
673
+ })
674
+
675
+ # content_parts is guaranteed non-empty due to outer if condition
676
+ agent._message_history.append(
677
+ LLMMessage(role="assistant", content=content_parts)
678
+ )
679
+
680
+ return finish_reason