codesm 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codesm/__init__.py +3 -0
- codesm/agent/__init__.py +4 -0
- codesm/agent/agent.py +202 -0
- codesm/agent/loop.py +194 -0
- codesm/agent/optimizer.py +452 -0
- codesm/agent/orchestrator.py +434 -0
- codesm/agent/prompt.py +164 -0
- codesm/agent/router.py +317 -0
- codesm/agent/subagent.py +432 -0
- codesm/atomic_edit.py +396 -0
- codesm/audit/__init__.py +19 -0
- codesm/audit/audit.py +334 -0
- codesm/auth/__init__.py +6 -0
- codesm/auth/claude_oauth.py +180 -0
- codesm/auth/credentials.py +98 -0
- codesm/cli.py +540 -0
- codesm/config/__init__.py +3 -0
- codesm/config/config.py +51 -0
- codesm/config/schema.py +96 -0
- codesm/diff_preview.py +213 -0
- codesm/eval/__init__.py +27 -0
- codesm/eval/compare.py +158 -0
- codesm/eval/metrics.py +117 -0
- codesm/eval/runner.py +232 -0
- codesm/eval/task.py +91 -0
- codesm/file_watcher.py +320 -0
- codesm/formatter.py +305 -0
- codesm/index/__init__.py +12 -0
- codesm/index/chunking.py +112 -0
- codesm/index/cli.py +119 -0
- codesm/index/index_store.py +68 -0
- codesm/index/indexer.py +206 -0
- codesm/index/watcher.py +41 -0
- codesm/lsp/__init__.py +415 -0
- codesm/lsp/client.py +761 -0
- codesm/lsp/servers.py +250 -0
- codesm/mcp/__init__.py +41 -0
- codesm/mcp/client.py +395 -0
- codesm/mcp/codegen.py +242 -0
- codesm/mcp/config.py +134 -0
- codesm/mcp/manager.py +195 -0
- codesm/mcp/sandbox.py +314 -0
- codesm/mcp/tool.py +75 -0
- codesm/memory/__init__.py +16 -0
- codesm/memory/cli.py +108 -0
- codesm/memory/extractor.py +110 -0
- codesm/memory/inject.py +15 -0
- codesm/memory/models.py +53 -0
- codesm/memory/retrieval.py +65 -0
- codesm/memory/store.py +69 -0
- codesm/permission/__init__.py +49 -0
- codesm/permission/permission.py +364 -0
- codesm/provider/__init__.py +3 -0
- codesm/provider/anthropic.py +246 -0
- codesm/provider/base.py +68 -0
- codesm/provider/ollama.py +158 -0
- codesm/provider/openai.py +158 -0
- codesm/provider/openrouter.py +203 -0
- codesm/provider/router.py +102 -0
- codesm/review/__init__.py +22 -0
- codesm/review/refactor.py +479 -0
- codesm/review/reviewer.py +342 -0
- codesm/rules/__init__.py +13 -0
- codesm/rules/discovery.py +159 -0
- codesm/rules/init.py +513 -0
- codesm/search/__init__.py +3 -0
- codesm/search/embeddings.py +50 -0
- codesm/server/__init__.py +3 -0
- codesm/server/server.py +41 -0
- codesm/session/__init__.py +17 -0
- codesm/session/context.py +333 -0
- codesm/session/message.py +72 -0
- codesm/session/revert.py +81 -0
- codesm/session/search.py +414 -0
- codesm/session/session.py +352 -0
- codesm/session/summarize.py +280 -0
- codesm/session/title.py +202 -0
- codesm/session/todo.py +188 -0
- codesm/session/topics.py +348 -0
- codesm/skills/__init__.py +17 -0
- codesm/skills/install.py +206 -0
- codesm/skills/loader.py +170 -0
- codesm/skills/manager.py +411 -0
- codesm/snapshot/__init__.py +5 -0
- codesm/snapshot/snapshot.py +234 -0
- codesm/storage/__init__.py +3 -0
- codesm/storage/storage.py +52 -0
- codesm/tool/__init__.py +7 -0
- codesm/tool/base.py +35 -0
- codesm/tool/bash.py +137 -0
- codesm/tool/bash.txt +129 -0
- codesm/tool/batch.py +108 -0
- codesm/tool/batch.txt +31 -0
- codesm/tool/bug_localize.py +241 -0
- codesm/tool/bug_localize.txt +43 -0
- codesm/tool/code_review.py +186 -0
- codesm/tool/code_review.txt +33 -0
- codesm/tool/codesearch.py +379 -0
- codesm/tool/codesearch.txt +61 -0
- codesm/tool/diagnostics.py +68 -0
- codesm/tool/diagnostics.txt +105 -0
- codesm/tool/edit.py +431 -0
- codesm/tool/edit.txt +77 -0
- codesm/tool/find_thread.py +84 -0
- codesm/tool/find_thread.txt +53 -0
- codesm/tool/finder.py +236 -0
- codesm/tool/finder.txt +29 -0
- codesm/tool/git.py +299 -0
- codesm/tool/git.txt +63 -0
- codesm/tool/glob.py +64 -0
- codesm/tool/glob.txt +95 -0
- codesm/tool/grep.py +88 -0
- codesm/tool/grep.txt +95 -0
- codesm/tool/handoff.py +244 -0
- codesm/tool/handoff.txt +27 -0
- codesm/tool/lookat.py +292 -0
- codesm/tool/ls.py +111 -0
- codesm/tool/ls.txt +31 -0
- codesm/tool/lsp.py +279 -0
- codesm/tool/lsp.txt +134 -0
- codesm/tool/mcp_execute.py +287 -0
- codesm/tool/mermaid.py +444 -0
- codesm/tool/multiedit.py +251 -0
- codesm/tool/multiedit.txt +52 -0
- codesm/tool/multifile_edit.py +331 -0
- codesm/tool/multifile_edit.txt +40 -0
- codesm/tool/oracle.py +146 -0
- codesm/tool/oracle.txt +66 -0
- codesm/tool/orchestrate.py +274 -0
- codesm/tool/parallel_tasks.txt +72 -0
- codesm/tool/patch.py +458 -0
- codesm/tool/patch.txt +51 -0
- codesm/tool/read.py +84 -0
- codesm/tool/read.txt +42 -0
- codesm/tool/read_thread.py +164 -0
- codesm/tool/read_thread.txt +42 -0
- codesm/tool/redo.py +78 -0
- codesm/tool/refactor.py +351 -0
- codesm/tool/refactor.txt +29 -0
- codesm/tool/registry.py +216 -0
- codesm/tool/skill.py +454 -0
- codesm/tool/task.py +348 -0
- codesm/tool/task.txt +52 -0
- codesm/tool/testgen.py +248 -0
- codesm/tool/testgen.txt +35 -0
- codesm/tool/todo.py +115 -0
- codesm/tool/todo.txt +35 -0
- codesm/tool/undo.py +124 -0
- codesm/tool/web.py +65 -0
- codesm/tool/webfetch.py +120 -0
- codesm/tool/webfetch.txt +77 -0
- codesm/tool/websearch.py +85 -0
- codesm/tool/websearch.txt +109 -0
- codesm/tool/write.py +262 -0
- codesm/tool/write.txt +50 -0
- codesm/tui/__init__.py +19 -0
- codesm/tui/app.py +1915 -0
- codesm/tui/autocomplete.py +379 -0
- codesm/tui/chat.py +696 -0
- codesm/tui/clipboard.py +116 -0
- codesm/tui/command_palette.py +173 -0
- codesm/tui/modals.py +1575 -0
- codesm/tui/session_modal.py +402 -0
- codesm/tui/sidebar.py +42 -0
- codesm/tui/styles.tcss +132 -0
- codesm/tui/themes.py +480 -0
- codesm/tui/tools.py +1963 -0
- codesm/undo_history.py +379 -0
- codesm/util/__init__.py +21 -0
- codesm/util/citations.py +228 -0
- codesm/util/project_id.py +14 -0
- codesm-0.1.0.dist-info/METADATA +854 -0
- codesm-0.1.0.dist-info/RECORD +176 -0
- codesm-0.1.0.dist-info/WHEEL +4 -0
- codesm-0.1.0.dist-info/entry_points.txt +2 -0
- codesm-0.1.0.dist-info/licenses/LICENSE +21 -0
codesm/__init__.py
ADDED
codesm/agent/__init__.py
ADDED
codesm/agent/agent.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""Main agent - orchestrates LLM calls and tool execution"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import AsyncIterator
|
|
6
|
+
|
|
7
|
+
from codesm.provider.base import get_provider, StreamChunk
|
|
8
|
+
from codesm.tool.registry import ToolRegistry
|
|
9
|
+
from codesm.session.session import Session
|
|
10
|
+
from codesm.agent.prompt import SYSTEM_PROMPT, build_system_prompt, format_available_skills
|
|
11
|
+
from codesm.agent.loop import ReActLoop
|
|
12
|
+
from codesm.mcp import MCPManager, load_mcp_config
|
|
13
|
+
from codesm.skills import SkillManager
|
|
14
|
+
from codesm.rules import RulesDiscovery
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Agent:
|
|
20
|
+
"""AI coding agent that can read, write, and execute code"""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
directory: Path,
|
|
25
|
+
model: str,
|
|
26
|
+
session: Session | None = None,
|
|
27
|
+
max_iterations: int = 0, # 0 = unlimited
|
|
28
|
+
mcp_config_path: Path | str | None = None,
|
|
29
|
+
):
|
|
30
|
+
self.directory = Path(directory).resolve()
|
|
31
|
+
self._model = model
|
|
32
|
+
self.session = session or Session.create(self.directory)
|
|
33
|
+
self.max_iterations = max_iterations
|
|
34
|
+
self.tools = ToolRegistry()
|
|
35
|
+
self.provider = get_provider(self._model)
|
|
36
|
+
self.react_loop = ReActLoop(max_iterations=self.max_iterations)
|
|
37
|
+
|
|
38
|
+
# MCP support - will be initialized on first chat
|
|
39
|
+
self._mcp_manager: MCPManager | None = None
|
|
40
|
+
self._mcp_config_path = mcp_config_path
|
|
41
|
+
self._mcp_initialized = False
|
|
42
|
+
|
|
43
|
+
# Skill system
|
|
44
|
+
self.skills = SkillManager(
|
|
45
|
+
workspace_dir=self.directory,
|
|
46
|
+
auto_triggers_enabled=True,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Rules discovery (AGENTS.md, CLAUDE.md, etc.)
|
|
50
|
+
self.rules = RulesDiscovery(workspace=self.directory)
|
|
51
|
+
|
|
52
|
+
# Optional eval instrumentation hooks. When the eval runner sets
|
|
53
|
+
# these, chat() propagates them into the ReAct loop context so the
|
|
54
|
+
# loop can record compaction, tool errors, and token usage.
|
|
55
|
+
self._eval_events: list[dict] | None = None
|
|
56
|
+
self._eval_usage: dict | None = None
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def model(self) -> str:
|
|
60
|
+
"""Get current model"""
|
|
61
|
+
return self._model
|
|
62
|
+
|
|
63
|
+
@model.setter
|
|
64
|
+
def model(self, value: str):
|
|
65
|
+
"""Set model and recreate provider"""
|
|
66
|
+
self._model = value
|
|
67
|
+
self.provider = get_provider(value)
|
|
68
|
+
|
|
69
|
+
async def _init_mcp(self):
|
|
70
|
+
"""Initialize MCP servers if configured"""
|
|
71
|
+
if self._mcp_initialized:
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
self._mcp_initialized = True
|
|
75
|
+
|
|
76
|
+
# Load MCP config - search in working directory first
|
|
77
|
+
config_path = self._mcp_config_path
|
|
78
|
+
if not config_path:
|
|
79
|
+
# Try common locations relative to working directory
|
|
80
|
+
for candidate in [
|
|
81
|
+
self.directory / "mcp-servers.json",
|
|
82
|
+
self.directory / ".mcp" / "servers.json",
|
|
83
|
+
self.directory / "codesm.json",
|
|
84
|
+
]:
|
|
85
|
+
if candidate.exists():
|
|
86
|
+
config_path = candidate
|
|
87
|
+
break
|
|
88
|
+
|
|
89
|
+
servers = load_mcp_config(config_path)
|
|
90
|
+
if not servers:
|
|
91
|
+
logger.debug(f"No MCP servers configured (searched {config_path or 'default locations'})")
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
# Create manager and connect
|
|
95
|
+
self._mcp_manager = MCPManager()
|
|
96
|
+
for name, config in servers.items():
|
|
97
|
+
self._mcp_manager.add_server(config)
|
|
98
|
+
|
|
99
|
+
logger.info(f"Connecting to {len(servers)} MCP servers...")
|
|
100
|
+
results = await self._mcp_manager.connect_all()
|
|
101
|
+
|
|
102
|
+
connected = sum(1 for v in results.values() if v)
|
|
103
|
+
if connected > 0:
|
|
104
|
+
# Register MCP manager with tool registry (includes code execution tools)
|
|
105
|
+
self.tools.set_mcp_manager(self._mcp_manager, workspace_dir=self.directory)
|
|
106
|
+
logger.info(f"Connected to {connected} MCP servers, {len(self._mcp_manager.get_tools())} MCP tools + code execution available")
|
|
107
|
+
|
|
108
|
+
async def chat(self, message: str) -> AsyncIterator[str]:
|
|
109
|
+
"""Send a message and stream the response"""
|
|
110
|
+
# Initialize MCP on first chat
|
|
111
|
+
await self._init_mcp()
|
|
112
|
+
|
|
113
|
+
# Add user message to session (saved immediately)
|
|
114
|
+
self.session.add_message(role="user", content=message)
|
|
115
|
+
|
|
116
|
+
# Generate title asynchronously using Claude Haiku via OpenRouter
|
|
117
|
+
# This runs in background and doesn't block the chat
|
|
118
|
+
import asyncio
|
|
119
|
+
asyncio.create_task(self.session.generate_title_from_message(message))
|
|
120
|
+
|
|
121
|
+
# Get conversation history
|
|
122
|
+
messages = self.session.get_messages()
|
|
123
|
+
|
|
124
|
+
# Auto-load skills based on message triggers
|
|
125
|
+
auto_loaded = self.skills.auto_load_for_message(message)
|
|
126
|
+
if auto_loaded:
|
|
127
|
+
logger.info(f"Auto-loaded skills: {auto_loaded}")
|
|
128
|
+
|
|
129
|
+
# Build system prompt with skills and rules
|
|
130
|
+
skills_block = self.skills.render_active_for_prompt()
|
|
131
|
+
available_skills_summary = format_available_skills(self.skills.list())
|
|
132
|
+
custom_rules = self.rules.get_combined_rules()
|
|
133
|
+
system_prompt = build_system_prompt(
|
|
134
|
+
cwd=str(self.directory),
|
|
135
|
+
skills_block=skills_block,
|
|
136
|
+
available_skills_summary=available_skills_summary,
|
|
137
|
+
custom_rules=custom_rules,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Build context for tools
|
|
141
|
+
context = {
|
|
142
|
+
"session": self.session,
|
|
143
|
+
"session_id": self.session.id,
|
|
144
|
+
"cwd": self.directory,
|
|
145
|
+
"workspace_dir": str(self.directory),
|
|
146
|
+
"tools": self.tools,
|
|
147
|
+
"model": self._model,
|
|
148
|
+
"skills": self.skills, # Add skills manager to context
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# Propagate eval instrumentation hooks if the runner attached any.
|
|
152
|
+
if self._eval_events is not None:
|
|
153
|
+
context["eval_events"] = self._eval_events
|
|
154
|
+
if self._eval_usage is not None:
|
|
155
|
+
context["eval_usage"] = self._eval_usage
|
|
156
|
+
|
|
157
|
+
# Run ReAct loop
|
|
158
|
+
full_response = ""
|
|
159
|
+
async for chunk in self.react_loop.execute(
|
|
160
|
+
provider=self.provider,
|
|
161
|
+
system_prompt=system_prompt,
|
|
162
|
+
messages=messages,
|
|
163
|
+
tools=self.tools,
|
|
164
|
+
context=context,
|
|
165
|
+
):
|
|
166
|
+
if chunk.type == "text":
|
|
167
|
+
full_response += chunk.content
|
|
168
|
+
yield chunk
|
|
169
|
+
elif chunk.type == "tool_call":
|
|
170
|
+
yield chunk
|
|
171
|
+
elif chunk.type == "tool_result":
|
|
172
|
+
# Save tool results immediately for session recovery
|
|
173
|
+
if chunk.name in ["edit", "write", "bash", "grep", "glob", "todo"]:
|
|
174
|
+
self.session.add_message(
|
|
175
|
+
role="tool_display",
|
|
176
|
+
content=chunk.content,
|
|
177
|
+
tool_name=chunk.name,
|
|
178
|
+
tool_call_id=chunk.id,
|
|
179
|
+
)
|
|
180
|
+
yield chunk
|
|
181
|
+
|
|
182
|
+
# Save final assistant response
|
|
183
|
+
if full_response:
|
|
184
|
+
self.session.add_message(role="assistant", content=full_response)
|
|
185
|
+
|
|
186
|
+
def new_session(self):
|
|
187
|
+
"""Start a new session"""
|
|
188
|
+
self.session = Session.create(self.directory)
|
|
189
|
+
self.skills.clear() # Clear loaded skills for new session
|
|
190
|
+
|
|
191
|
+
async def cleanup(self):
|
|
192
|
+
"""Cleanup resources (disconnect MCP servers, etc.)"""
|
|
193
|
+
if self._mcp_manager:
|
|
194
|
+
await self._mcp_manager.disconnect_all()
|
|
195
|
+
self._mcp_manager = None
|
|
196
|
+
self._mcp_initialized = False
|
|
197
|
+
|
|
198
|
+
def get_mcp_tools(self) -> list[dict]:
|
|
199
|
+
"""Get list of available MCP tools"""
|
|
200
|
+
if self._mcp_manager:
|
|
201
|
+
return self._mcp_manager.list_all_tools()
|
|
202
|
+
return []
|
codesm/agent/loop.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""ReAct loop implementation for agent execution"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import AsyncIterator
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
from codesm.provider.base import StreamChunk
|
|
9
|
+
from codesm.tool.registry import ToolRegistry
|
|
10
|
+
from codesm.session.context import ContextManager
|
|
11
|
+
from codesm.session.summarize import summarize_messages
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ReActLoop:
|
|
18
|
+
"""Implements the ReAct (Reasoning + Acting) loop"""
|
|
19
|
+
|
|
20
|
+
max_iterations: int = 0 # 0 = unlimited
|
|
21
|
+
|
|
22
|
+
async def execute(
|
|
23
|
+
self,
|
|
24
|
+
provider,
|
|
25
|
+
system_prompt: str,
|
|
26
|
+
messages: list[dict],
|
|
27
|
+
tools: ToolRegistry,
|
|
28
|
+
context: dict,
|
|
29
|
+
) -> AsyncIterator[StreamChunk]:
|
|
30
|
+
"""Execute the ReAct loop with tool calling"""
|
|
31
|
+
|
|
32
|
+
iteration = 0
|
|
33
|
+
current_messages = list(messages) # Copy to avoid mutating original
|
|
34
|
+
session = context.get("session")
|
|
35
|
+
|
|
36
|
+
# Optional eval instrumentation sink. When the eval runner attached a
|
|
37
|
+
# list under this key, we append structured events to it. This is
|
|
38
|
+
# backwards compatible: if the key is absent, nothing happens.
|
|
39
|
+
eval_events = context.get("eval_events")
|
|
40
|
+
|
|
41
|
+
# Get or create ContextManager for compaction
|
|
42
|
+
context_manager = context.get("context_manager")
|
|
43
|
+
if context_manager is None:
|
|
44
|
+
context_manager = ContextManager()
|
|
45
|
+
|
|
46
|
+
while self.max_iterations == 0 or iteration < self.max_iterations:
|
|
47
|
+
iteration += 1
|
|
48
|
+
|
|
49
|
+
if isinstance(eval_events, list):
|
|
50
|
+
eval_events.append({"type": "iteration_start", "n": iteration})
|
|
51
|
+
|
|
52
|
+
# Compact context if needed
|
|
53
|
+
if context_manager.should_compact(current_messages):
|
|
54
|
+
tokens_before = context_manager.estimate_tokens(current_messages)
|
|
55
|
+
|
|
56
|
+
async def summarizer(msgs):
|
|
57
|
+
return await summarize_messages(msgs)
|
|
58
|
+
|
|
59
|
+
current_messages = await context_manager.compact_messages_async(
|
|
60
|
+
current_messages,
|
|
61
|
+
summarizer=summarizer,
|
|
62
|
+
)
|
|
63
|
+
tokens_after = context_manager.estimate_tokens(current_messages)
|
|
64
|
+
logger.info(f"Compacted context from {tokens_before} to {tokens_after} tokens")
|
|
65
|
+
|
|
66
|
+
if isinstance(eval_events, list):
|
|
67
|
+
eval_events.append({
|
|
68
|
+
"type": "compaction",
|
|
69
|
+
"iteration": iteration,
|
|
70
|
+
"tokens_before": tokens_before,
|
|
71
|
+
"tokens_after": tokens_after,
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
# Get response from LLM
|
|
75
|
+
response_text = ""
|
|
76
|
+
tool_calls = []
|
|
77
|
+
pending_tool_call = None
|
|
78
|
+
|
|
79
|
+
async for chunk in provider.stream(
|
|
80
|
+
system=system_prompt,
|
|
81
|
+
messages=current_messages,
|
|
82
|
+
tools=tools.get_schemas(),
|
|
83
|
+
):
|
|
84
|
+
if chunk.type == "text":
|
|
85
|
+
response_text += chunk.content
|
|
86
|
+
yield chunk
|
|
87
|
+
elif chunk.type == "tool_call":
|
|
88
|
+
tool_calls.append(chunk)
|
|
89
|
+
yield chunk
|
|
90
|
+
elif chunk.type == "tool_call_delta":
|
|
91
|
+
# Handle streaming tool call arguments
|
|
92
|
+
if pending_tool_call is None:
|
|
93
|
+
pending_tool_call = chunk
|
|
94
|
+
else:
|
|
95
|
+
# Accumulate args
|
|
96
|
+
if chunk.args:
|
|
97
|
+
pending_tool_call.args.update(chunk.args)
|
|
98
|
+
|
|
99
|
+
# Add any pending tool call
|
|
100
|
+
if pending_tool_call and pending_tool_call not in tool_calls:
|
|
101
|
+
tool_calls.append(pending_tool_call)
|
|
102
|
+
|
|
103
|
+
# If no tool calls, we're done
|
|
104
|
+
if not tool_calls:
|
|
105
|
+
break
|
|
106
|
+
|
|
107
|
+
# Add assistant message with tool calls to history
|
|
108
|
+
assistant_msg = {"role": "assistant", "content": response_text or ""}
|
|
109
|
+
if tool_calls:
|
|
110
|
+
assistant_msg["tool_calls"] = [
|
|
111
|
+
{
|
|
112
|
+
"id": tc.id,
|
|
113
|
+
"type": "function",
|
|
114
|
+
"function": {
|
|
115
|
+
"name": tc.name,
|
|
116
|
+
"arguments": json.dumps(tc.args) if isinstance(tc.args, dict) else tc.args,
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
for tc in tool_calls
|
|
120
|
+
]
|
|
121
|
+
current_messages.append(assistant_msg)
|
|
122
|
+
|
|
123
|
+
# Execute tool calls in parallel (limit to avoid API errors)
|
|
124
|
+
MAX_PARALLEL_CALLS = 64 # API limit is 128, stay well under
|
|
125
|
+
|
|
126
|
+
parsed_calls = []
|
|
127
|
+
for tool_call in tool_calls[:MAX_PARALLEL_CALLS]: # Cap the number
|
|
128
|
+
args = tool_call.args
|
|
129
|
+
if isinstance(args, str):
|
|
130
|
+
try:
|
|
131
|
+
args = json.loads(args)
|
|
132
|
+
except json.JSONDecodeError:
|
|
133
|
+
args = {}
|
|
134
|
+
parsed_calls.append((tool_call.id, tool_call.name, args))
|
|
135
|
+
|
|
136
|
+
if len(tool_calls) > MAX_PARALLEL_CALLS:
|
|
137
|
+
# Log that we're dropping some calls
|
|
138
|
+
dropped = len(tool_calls) - MAX_PARALLEL_CALLS
|
|
139
|
+
parsed_calls.append((
|
|
140
|
+
f"dropped_{dropped}",
|
|
141
|
+
"_system",
|
|
142
|
+
{"message": f"Warning: {dropped} tool calls dropped (max {MAX_PARALLEL_CALLS} per turn)"}
|
|
143
|
+
))
|
|
144
|
+
|
|
145
|
+
# Execute all tools in parallel
|
|
146
|
+
results = await tools.execute_parallel(parsed_calls, context)
|
|
147
|
+
|
|
148
|
+
# Emit tool_error events for results that look like errors
|
|
149
|
+
if isinstance(eval_events, list):
|
|
150
|
+
for call_id, name, result in results:
|
|
151
|
+
if isinstance(result, str) and result.strip().lower().startswith("error"):
|
|
152
|
+
eval_events.append({
|
|
153
|
+
"type": "tool_error",
|
|
154
|
+
"iteration": iteration,
|
|
155
|
+
"tool": name,
|
|
156
|
+
"message": result[:500],
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
# Process results in order
|
|
160
|
+
for call_id, name, result in results:
|
|
161
|
+
# Add tool result to messages (for this turn only, not persisted)
|
|
162
|
+
tool_result_msg = {
|
|
163
|
+
"role": "tool",
|
|
164
|
+
"tool_call_id": call_id,
|
|
165
|
+
"content": result,
|
|
166
|
+
}
|
|
167
|
+
current_messages.append(tool_result_msg)
|
|
168
|
+
|
|
169
|
+
# Yield tool result as a chunk
|
|
170
|
+
yield StreamChunk(
|
|
171
|
+
type="tool_result",
|
|
172
|
+
content=result,
|
|
173
|
+
id=call_id,
|
|
174
|
+
name=name,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Check if handoff was triggered
|
|
178
|
+
if context.get("_handoff_follow") and context.get("_handoff_session_id"):
|
|
179
|
+
handoff_session_id = context.pop("_handoff_session_id")
|
|
180
|
+
context.pop("_handoff_follow", None)
|
|
181
|
+
yield StreamChunk(
|
|
182
|
+
type="handoff",
|
|
183
|
+
content=f"Switching to session {handoff_session_id}",
|
|
184
|
+
new_session_id=handoff_session_id,
|
|
185
|
+
)
|
|
186
|
+
return # Stop the loop after handoff
|
|
187
|
+
|
|
188
|
+
if self.max_iterations > 0 and iteration >= self.max_iterations:
|
|
189
|
+
if isinstance(eval_events, list):
|
|
190
|
+
eval_events.append({"type": "max_iterations", "n": iteration})
|
|
191
|
+
yield StreamChunk(
|
|
192
|
+
type="text",
|
|
193
|
+
content="\n\n[Maximum iterations reached - stopping]",
|
|
194
|
+
)
|