krnl-code 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. krnl_agent/__init__.py +9 -0
  2. krnl_agent/__main__.py +7 -0
  3. krnl_agent/agent_registry.py +95 -0
  4. krnl_agent/agent_selector.py +69 -0
  5. krnl_agent/audit_log.py +155 -0
  6. krnl_agent/background.py +94 -0
  7. krnl_agent/checkpoints.py +67 -0
  8. krnl_agent/ci.py +73 -0
  9. krnl_agent/cli.py +1458 -0
  10. krnl_agent/commands.py +42 -0
  11. krnl_agent/config.py +425 -0
  12. krnl_agent/context.py +352 -0
  13. krnl_agent/depaudit.py +63 -0
  14. krnl_agent/deploy.py +245 -0
  15. krnl_agent/doctor.py +106 -0
  16. krnl_agent/events.py +141 -0
  17. krnl_agent/gitignore.py +47 -0
  18. krnl_agent/graph.py +928 -0
  19. krnl_agent/guardrails.py +70 -0
  20. krnl_agent/headless.py +60 -0
  21. krnl_agent/history.py +49 -0
  22. krnl_agent/hooks.py +72 -0
  23. krnl_agent/ingest.py +129 -0
  24. krnl_agent/llm.py +456 -0
  25. krnl_agent/loop.py +779 -0
  26. krnl_agent/mcp_client.py +128 -0
  27. krnl_agent/memory.py +61 -0
  28. krnl_agent/modelrouter.py +151 -0
  29. krnl_agent/monitor.py +112 -0
  30. krnl_agent/notify.py +119 -0
  31. krnl_agent/parallel_executor.py +139 -0
  32. krnl_agent/permissions.py +128 -0
  33. krnl_agent/plugins.py +105 -0
  34. krnl_agent/pricing.py +85 -0
  35. krnl_agent/prompts.py +60 -0
  36. krnl_agent/repomap.py +133 -0
  37. krnl_agent/sandbox.py +69 -0
  38. krnl_agent/scaffold.py +167 -0
  39. krnl_agent/schedules.py +137 -0
  40. krnl_agent/secrets.py +100 -0
  41. krnl_agent/selfheal.py +87 -0
  42. krnl_agent/server.py +302 -0
  43. krnl_agent/sessions.py +258 -0
  44. krnl_agent/settings.py +59 -0
  45. krnl_agent/skills.py +73 -0
  46. krnl_agent/teams.py +38 -0
  47. krnl_agent/tool_schemas.py +431 -0
  48. krnl_agent/tools.py +694 -0
  49. krnl_agent/webtools.py +139 -0
  50. krnl_code-1.0.4.dist-info/METADATA +214 -0
  51. krnl_code-1.0.4.dist-info/RECORD +56 -0
  52. krnl_code-1.0.4.dist-info/WHEEL +5 -0
  53. krnl_code-1.0.4.dist-info/entry_points.txt +2 -0
  54. krnl_code-1.0.4.dist-info/licenses/LICENSE +147 -0
  55. krnl_code-1.0.4.dist-info/licenses/NOTICE +4 -0
  56. krnl_code-1.0.4.dist-info/top_level.txt +1 -0
krnl_agent/loop.py ADDED
@@ -0,0 +1,779 @@
1
+ """The agentic loop.
2
+
3
+ `AgentSession` holds one conversation. `run(task)` processes a user turn: it
4
+ drives the LLM, executes tool calls (gating mutations behind approval), and
5
+ streams everything through AgentIO. Features: cancellation, @file mentions,
6
+ context compaction, per-turn checkpoints (undo), streamed command output, token
7
+ usage, persistence, **plan mode**, **TODO checklist**, **sub-agents**, and
8
+ **MCP** tool routing.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import json
14
+ import re
15
+ from dataclasses import replace
16
+ from datetime import datetime, timezone
17
+ from pathlib import Path
18
+
19
+ from collections import Counter
20
+
21
+ from . import events as ev
22
+ from . import audit_log, gitignore, ingest, memory, pricing, sandbox, sessions
23
+ from . import skills as skills_mod
24
+ from .checkpoints import Checkpointer
25
+ from .config import Config
26
+ from .context import compact_history, expand_mentions
27
+ from .events import AgentIO, ApprovalDecision
28
+ from .hooks import HookRunner
29
+ from .llm import LLMError, build_client
30
+ from .mcp_client import MCPManager, is_mcp_tool
31
+ from .modelrouter import ModelRouter
32
+ from .permissions import Permissions
33
+ from .prompts import system_prompt
34
+ from .tool_schemas import LOOP_TOOLS, schemas_for
35
+ from .tools import ToolContext, execute, list_files, preview_for, run_command
36
+ from .graph import GraphManager
37
+ from .agent_selector import AgentSelector
38
+ from .sessions import SessionMemory
39
+
40
+ _FILE_MUTATORS = {"write_file", "edit_file", "multi_edit", "create_file", "delete_file"}
41
+ _MAX_SUBAGENT_DEPTH = 2
42
+ # Memory/context: bound how much of any single tool result is kept in history.
43
+ _MAX_TOOL_RESULT_CHARS = 16000
44
+
45
+
46
+ class _SubAgentIO(AgentIO):
47
+ """IO for a sub-agent: forwards tool activity to the parent and captures the
48
+ sub-agent's final message as the result."""
49
+
50
+ def __init__(self, parent: AgentIO):
51
+ self.parent = parent
52
+ self.last_assistant = ""
53
+
54
+ def emit_sync(self, event: dict) -> None:
55
+ if event.get("type") == "command_output":
56
+ self.parent.emit_sync(event)
57
+
58
+ async def emit(self, event: dict) -> None:
59
+ t = event.get("type")
60
+ if t == "assistant_message":
61
+ self.last_assistant = event["text"]
62
+ if t in ("tool_start", "tool_result", "diff", "error"):
63
+ await self.parent.emit(event)
64
+
65
+ async def request_approval(self, request: dict) -> ApprovalDecision:
66
+ return await self.parent.request_approval(request)
67
+
68
+
69
+ class AgentSession:
70
+ def __init__(
71
+ self,
72
+ workspace: str,
73
+ config: Config,
74
+ io: AgentIO,
75
+ *,
76
+ persist: bool = False,
77
+ plan_mode: bool = False,
78
+ depth: int = 0,
79
+ session_id: str | None = None,
80
+ team: str | None = None,
81
+ dangerous: bool = False,
82
+ audit: "audit_log.AuditLog | None" = None,
83
+ budget: dict | None = None,
84
+ ):
85
+ self.workspace = workspace
86
+ self.config = config
87
+ self.io = io
88
+ self.team = team
89
+ self.dangerous = dangerous # YOLO: never ask, run everything
90
+ # Multi-model router: one model per phase (plan/execute/subagent/verify),
91
+ # cost-aware with escalation. self.client is the active/execute-phase client
92
+ # (built here so it stays the patch/injection point); the router supplies a
93
+ # distinct client only for phases that diverge to another model.
94
+ self.client = build_client(config.provider, config.agent)
95
+ self.router = ModelRouter(config)
96
+ # Tamper-evident audit trail (shared across nested sub-agents).
97
+ self.audit = audit or audit_log.AuditLog(workspace, enabled=config.agent.audit_log)
98
+ # Shared agent-of-agent budget (calls + tokens) across the whole tree.
99
+ self._budget = budget if budget is not None else {"calls": 0, "tokens": 0}
100
+ self._turn_mutated: set[str] = set()
101
+ self._heal_attempts: dict[str, int] = {}
102
+ self._escalated = False
103
+ self._fail_streak = 0
104
+ self._idle_streak = 0 # steps where only todo_write was called (no real work done)
105
+ self._step_total = 0
106
+ self._step_fail = 0
107
+ if team:
108
+ from . import teams
109
+
110
+ persist = True
111
+ session_id = teams.team_id(team)
112
+
113
+ ignore = list(config.ignore)
114
+ if config.agent.use_gitignore:
115
+ ignore += gitignore.load_gitignore_patterns(Path(workspace))
116
+ self.ctx = ToolContext(workspace, config.agent, ignore, web=config.web,
117
+ deploy_cfg=config.deploy)
118
+
119
+ self.checkpointer = Checkpointer()
120
+ self.session_tokens = 0
121
+ self.session_cost = 0.0
122
+ self.tool_calls: Counter = Counter()
123
+ self.subagent_calls = 0
124
+ self.skills = skills_mod.load_skills(workspace)
125
+ self.persist = persist
126
+ self.plan_mode = plan_mode
127
+ self.depth = depth
128
+ self.todos: list = []
129
+ if depth == 0:
130
+ from . import plugins as _plugins
131
+
132
+ mcp_servers = {**_plugins.plugin_mcp_servers(), **config.mcp_servers}
133
+ else:
134
+ mcp_servers = {}
135
+ self.mcp = MCPManager(mcp_servers)
136
+ self.permissions = Permissions.from_config(
137
+ config.permissions,
138
+ auto_writes=config.agent.auto_approve_writes,
139
+ auto_commands=config.agent.auto_approve_commands,
140
+ )
141
+ self.hooks = HookRunner(config.hooks, workspace)
142
+ self._cancelled = False
143
+ self.session_id = None
144
+ self.messages: list[dict] = []
145
+ self.session_memory = None
146
+ if persist:
147
+ self.session_id = session_id or sessions.latest_id(workspace) or sessions.new_id()
148
+ self.messages, self.session_memory = sessions.load(self.session_id)
149
+ if self.session_memory is None:
150
+ self.session_memory = SessionMemory()
151
+
152
+ self.graph_manager = None
153
+ if self.config.graph.enabled:
154
+ db_path = Path(workspace) / self.config.graph.db_path
155
+ need_build = not db_path.exists()
156
+ self.graph_manager = GraphManager(
157
+ workspace=workspace,
158
+ db_path=str(db_path),
159
+ languages=self.config.graph.languages
160
+ )
161
+ if need_build:
162
+ self.graph_manager.build_initial()
163
+ else:
164
+ self.graph_manager.load()
165
+
166
+ self.agent_selector = None
167
+ if self.config.agents.specialization:
168
+ self.agent_selector = AgentSelector()
169
+
170
+ # ------------------------------------------------------------------ #
171
+ def cancel(self) -> None:
172
+ self._cancelled = True
173
+
174
+ def undo(self) -> list[str]:
175
+ return self.checkpointer.undo_last_turn()
176
+
177
+ @staticmethod
178
+ def _now() -> str:
179
+ return datetime.now(timezone.utc).isoformat(timespec="seconds")
180
+
181
+ def _note(self, ok: bool) -> None:
182
+ """Record a per-step tool outcome (drives cost-aware escalation)."""
183
+ self._step_total += 1
184
+ if not ok:
185
+ self._step_fail += 1
186
+
187
+ def _record_audit(self, tool: str, args: dict, decision: str,
188
+ ok: bool | None, summary: str = "") -> None:
189
+ try:
190
+ self.audit.record(ts=self._now(), tool=tool, args=args, decision=decision,
191
+ ok=ok, summary=summary, depth=self.depth)
192
+ except Exception:
193
+ pass
194
+
195
+ def _system_content(self) -> str:
196
+ tree = list_files(self.ctx).output
197
+ base = system_prompt(self.workspace, tree)
198
+ mem = memory.load_memory(self.workspace)
199
+ if mem:
200
+ base += "\n\n# Memory / project instructions\n" + mem
201
+ skill_text = skills_mod.skills_summary(self.skills)
202
+ if skill_text:
203
+ base += "\n\n# Skills\n" + skill_text
204
+ if self.team:
205
+ from . import teams
206
+
207
+ base += teams.coordinator_prompt(self.team)
208
+
209
+ # Extract potential context files and mentioned words
210
+ context_files = []
211
+ mentioned_words = set()
212
+ for msg in reversed(self.messages):
213
+ if msg.get("role") == "user":
214
+ content = msg.get("content") or ""
215
+ if isinstance(content, list):
216
+ content = " ".join(b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text")
217
+ for match in re.findall(r"\b[\w\-\.\/]+\.[a-zA-Z0-9]+\b", content):
218
+ context_files.append(match)
219
+ for word in re.findall(r"\b\w+(?:\.\w+)?\b", content):
220
+ mentioned_words.add(word)
221
+
222
+ # Load Phase 3: Per-Session Memory
223
+ if self.config.memory.per_session and self.session_memory:
224
+ gm = self.graph_manager if self.config.memory.staleness_check_against_graph else None
225
+ retrieved = self.session_memory.retrieve(context_files=context_files or None, graph_manager=gm)
226
+ if retrieved:
227
+ mem_lines = ["# Session Memory"]
228
+ for e in retrieved:
229
+ files_str = f" (files: {', '.join(e.related_files)})" if e.related_files else ""
230
+ mem_lines.append(f"- [{e.type}]{files_str} {e.content}")
231
+ base += "\n\n" + "\n".join(mem_lines)
232
+
233
+ # Graph-aware context enrichment
234
+ if self.graph_manager and self.config.context.graph_aware:
235
+ matched_nodes = []
236
+ for word in mentioned_words:
237
+ nodes = self.graph_manager.graph.find_nodes_by_name(word)
238
+ matched_nodes.extend(nodes)
239
+ node = self.graph_manager.get_node_by_qualified_name(word)
240
+ if node:
241
+ matched_nodes.append(node)
242
+
243
+ if matched_nodes:
244
+ graph_context = ["# Code Knowledge Graph Context"]
245
+ seen_nodes = set()
246
+ for node in matched_nodes[:10]:
247
+ if node.id not in seen_nodes:
248
+ seen_nodes.add(node.id)
249
+ try:
250
+ rel_path = self.ctx.rel(Path(node.file_path))
251
+ graph_context.append(f"- {node.type} `{node.qualified_name}` in `{rel_path}` (lines {node.line_start}-{node.line_end})")
252
+ neighbors = self.graph_manager.query_neighbors(node.id, hop_limit=self.config.context.graph_hop_limit)
253
+ for neighbor in neighbors[:5]:
254
+ if neighbor.id not in seen_nodes:
255
+ seen_nodes.add(neighbor.id)
256
+ n_rel_path = self.ctx.rel(Path(neighbor.file_path))
257
+ graph_context.append(f" * Relates to {neighbor.type} `{neighbor.qualified_name}` in `{n_rel_path}`")
258
+ except Exception:
259
+ pass
260
+ if len(graph_context) > 1:
261
+ base += "\n\n" + "\n".join(graph_context)
262
+
263
+ return base
264
+
265
+ def _ensure_system(self) -> None:
266
+ if not self.messages:
267
+ self.messages.append({"role": "system", "content": self._system_content()})
268
+ elif self.messages[0].get("role") != "system":
269
+ self.messages.insert(0, {"role": "system", "content": self._system_content()})
270
+
271
+ def _tools_for_turn(self) -> list:
272
+ return schemas_for(self.plan_mode) + (self.mcp.schemas() if not self.plan_mode else [])
273
+
274
+ # ------------------------------------------------------------------ #
275
+ async def run(self, task: str, images: list | None = None) -> None:
276
+ self._cancelled = False
277
+ if not self.mcp.started:
278
+ try:
279
+ await self.mcp.start()
280
+ if self.mcp.errors:
281
+ await self.io.emit(ev.info("MCP: " + "; ".join(self.mcp.errors)))
282
+ except Exception as e: # noqa: BLE001
283
+ await self.io.emit(ev.info(f"MCP unavailable: {e}"))
284
+ self._ensure_system()
285
+ self.checkpointer.begin_turn()
286
+ self._turn_mutated = set()
287
+ self._heal_attempts = {}
288
+ self._escalated = False
289
+ self._fail_streak = 0
290
+
291
+ if self.hooks.active:
292
+ await asyncio.to_thread(self.hooks.run, "UserPromptSubmit", {"prompt": task})
293
+
294
+ if self.plan_mode:
295
+ task = "[PLAN MODE — research and propose a plan with update_plan; do NOT edit yet]\n" + task
296
+ expanded = expand_mentions(task, self.ctx)
297
+ # Ingest any dropped/referenced files, folders, or images.
298
+ expanded, dropped_images = ingest.gather(expanded, self.ctx)
299
+ all_images = list(images or []) + dropped_images
300
+ if all_images:
301
+ content = [{"type": "text", "text": expanded}] + [
302
+ {"type": "image_url", "image_url": {"url": u}} for u in all_images
303
+ ]
304
+ else:
305
+ content = expanded
306
+ self.messages.append({"role": "user", "content": content})
307
+
308
+ status = "done"
309
+ try:
310
+ await self._agent_loop()
311
+ await self._maybe_verify_edits()
312
+ except LLMError as e:
313
+ status = "error"
314
+ await self.io.emit(ev.error(f"LLM error: {e}"))
315
+ await self.io.emit(ev.done("failed"))
316
+ except Exception as e: # noqa: BLE001
317
+ status = "error"
318
+ await self.io.emit(ev.error(f"Agent error: {type(e).__name__}: {e}"))
319
+ await self.io.emit(ev.done("failed"))
320
+ finally:
321
+ if self.hooks.active:
322
+ await asyncio.to_thread(self.hooks.run, "Stop", {})
323
+ if self.persist and self.session_id:
324
+ sessions.save(self.session_id, self.workspace, self.messages,
325
+ memory=self.session_memory)
326
+ await self._maybe_notify(status)
327
+
328
+ async def _maybe_notify(self, status: str) -> None:
329
+ notif = self.config.notifications
330
+ if not notif or self.depth != 0:
331
+ return
332
+ if status not in notif.get("on", ["done"]):
333
+ return
334
+ summary = next(
335
+ (m["content"] for m in reversed(self.messages)
336
+ if m.get("role") == "assistant" and m.get("content")),
337
+ "(no summary)",
338
+ )
339
+ from . import notify as _notify
340
+
341
+ text = f"Krnl Agent [{status}] in {self.workspace}\n{str(summary)[:1500]}"
342
+ await asyncio.to_thread(_notify.dispatch, notif, text)
343
+
344
+ async def _agent_loop(self) -> None:
345
+ for step in range(self.config.agent.max_steps):
346
+ if self._cancelled:
347
+ await self.io.emit(ev.cancelled())
348
+ await self.io.emit(ev.done("cancelled"))
349
+ return
350
+
351
+ if self.config.agent.compact_history:
352
+ compacted, changed = compact_history(
353
+ self.messages, self.config.agent.max_context_tokens
354
+ )
355
+ if changed:
356
+ self.messages = compacted
357
+ await self.io.emit(ev.info("compacted older context to fit the window"))
358
+
359
+ # Pick the model for this step: planner in plan mode, executor
360
+ # otherwise — or the escalation model once the cheaper one has
361
+ # struggled (cost-aware, but never at the expense of getting it done).
362
+ phase = "plan" if self.plan_mode else "execute"
363
+ if self._escalated and self.router.strategy != "fixed":
364
+ phase = "escalate"
365
+ # Reuse the injected/active client for phases that resolve to the active
366
+ # model; only build a separate client when a distinct model is configured.
367
+ client = (self.client if self.router.is_default_phase(phase)
368
+ else self.router.client_for_phase(phase))
369
+ model_used = getattr(getattr(client, "provider", None), "model",
370
+ self.config.provider.model)
371
+ await self.io.emit(ev.status(f"thinking (step {step + 1}) Ā· {model_used}"))
372
+
373
+ resp = await asyncio.to_thread(
374
+ client.chat,
375
+ self.messages,
376
+ self._tools_for_turn(),
377
+ self.io.on_token,
378
+ self.config.agent.stream,
379
+ )
380
+
381
+ self.session_tokens += resp.prompt_tokens + resp.completion_tokens
382
+ self.session_cost += pricing.cost_for(
383
+ model_used, resp.prompt_tokens, resp.completion_tokens,
384
+ self.config.pricing,
385
+ )
386
+ await self.io.emit(
387
+ ev.usage(
388
+ resp.prompt_tokens, resp.completion_tokens, self.session_tokens, self.session_cost
389
+ )
390
+ )
391
+
392
+ if resp.content:
393
+ await self.io.emit(ev.assistant_message(resp.content))
394
+
395
+ if not resp.tool_calls:
396
+ self.messages.append({"role": "assistant", "content": resp.content})
397
+ await self.io.emit(ev.done(resp.content[:280]))
398
+ return
399
+
400
+ self.messages.append(
401
+ {
402
+ "role": "assistant",
403
+ "content": resp.content or None,
404
+ "tool_calls": [
405
+ {
406
+ "id": tc.id,
407
+ "type": "function",
408
+ "function": {"name": tc.name, "arguments": json.dumps(tc.arguments)},
409
+ }
410
+ for tc in resp.tool_calls
411
+ ],
412
+ }
413
+ )
414
+
415
+ self._step_total = 0
416
+ self._step_fail = 0
417
+
418
+ # Phase 5: Parallel execution — run independent spawn_agent calls
419
+ # concurrently; all other tools run sequentially (order matters for
420
+ # file mutations, approvals, etc.).
421
+ spawn_calls = [tc for tc in resp.tool_calls if tc.name == "spawn_agent"]
422
+ other_calls = [tc for tc in resp.tool_calls if tc.name != "spawn_agent"]
423
+
424
+ for tc in other_calls:
425
+ await self._handle_tool_call(tc)
426
+
427
+ if len(spawn_calls) > 1:
428
+ await asyncio.gather(*[self._handle_tool_call(tc) for tc in spawn_calls])
429
+ elif spawn_calls:
430
+ await self._handle_tool_call(spawn_calls[0])
431
+
432
+ # Detect planning-only steps: the agent called only todo_write and no
433
+ # real tools. After 2 consecutive idle steps inject a hard nudge so the
434
+ # model stops looping and starts executing.
435
+ real_calls = [tc for tc in resp.tool_calls if tc.name not in LOOP_TOOLS]
436
+ if resp.tool_calls and not real_calls:
437
+ self._idle_streak += 1
438
+ if self._idle_streak >= 2:
439
+ nudge = (
440
+ "[SYSTEM NUDGE] You have updated the checklist multiple times without "
441
+ "calling any real tools. Stop planning and START EXECUTING now. "
442
+ "Call write_file, edit_file, run_command, or another action tool immediately. "
443
+ "If the file content is too large for one call, write it in parts using "
444
+ "multiple write_file calls (first call creates the file, subsequent calls "
445
+ "use edit_file to append sections). Do NOT call todo_write again until "
446
+ "you have made real progress."
447
+ )
448
+ await self.io.emit(ev.info(f"[loop guard] Detected {self._idle_streak} consecutive planning-only steps — nudging agent to execute."))
449
+ # Inject nudge as a fake tool result so the model sees it in context
450
+ self.messages.append({"role": "user", "content": nudge})
451
+ else:
452
+ self._idle_streak = 0
453
+
454
+ await self._maybe_escalate()
455
+
456
+ await self.io.emit(ev.error("Reached max steps without finishing."))
457
+ await self.io.emit(ev.done("max_steps"))
458
+
459
+ async def _maybe_escalate(self) -> None:
460
+ """Cost-aware quality guard: if every tool in a step failed for several
461
+ steps running, OR if the agent is stuck in a planning loop (idle streak),
462
+ jump to the stronger 'escalate' model for the rest of the turn."""
463
+ if self._escalated or self.router.strategy == "fixed":
464
+ return
465
+ if self._step_total and self._step_fail == self._step_total:
466
+ self._fail_streak += 1
467
+ else:
468
+ self._fail_streak = 0
469
+ # Escalate on repeated tool failures OR on a sustained idle/planning loop
470
+ should_escalate = (
471
+ (self._fail_streak >= self.router.escalate_after)
472
+ or (self._idle_streak >= 3)
473
+ )
474
+ if should_escalate and self.router.has_distinct_escalation():
475
+ self._escalated = True
476
+ reason = "repeated failures" if self._fail_streak >= self.router.escalate_after else "planning loop detected"
477
+ self._fail_streak = 0
478
+ self._idle_streak = 0
479
+ await self.io.emit(ev.info(
480
+ f"escalating to a stronger model ({self.router.model_for_phase('escalate')}) "
481
+ f"after {reason}"
482
+ ))
483
+
484
+ # ------------------------------------------------------------------ #
485
+ async def _handle_tool_call(self, tc) -> None:
486
+ self.tool_calls[tc.name] += 1
487
+ if self._cancelled:
488
+ msg = "Cancelled by user before execution."
489
+ await self.io.emit(ev.tool_result(tc.id, tc.name, False, msg))
490
+ self._append_tool_result(tc.id, msg)
491
+ return
492
+
493
+ # Loop-handled meta tools.
494
+ if tc.name in LOOP_TOOLS:
495
+ await self._handle_loop_tool(tc)
496
+ return
497
+
498
+ await self.io.emit(ev.tool_start(tc.id, tc.name, tc.arguments))
499
+
500
+ # Sandbox / egress policy for shell commands — deny-by-default, pre-approval,
501
+ # enforced even in dangerous mode so autonomous runs stay contained.
502
+ if tc.name in ("run_command", "bash_background"):
503
+ allowed, reason = sandbox.check_command(
504
+ self.config.sandbox, tc.arguments.get("command", "")
505
+ )
506
+ if not allowed:
507
+ msg = f"Command blocked by sandbox policy: {reason}"
508
+ await self.io.emit(ev.tool_result(tc.id, tc.name, False, msg))
509
+ self._append_tool_result(tc.id, msg)
510
+ self._record_audit(tc.name, tc.arguments, "sandbox-deny", False, reason)
511
+ self._note(False)
512
+ return
513
+
514
+ # Permission decision: allow / ask / deny. Dangerous mode allows everything.
515
+ decision = "allow" if self.dangerous else self.permissions.decide(tc.name, tc.arguments)
516
+ if decision == "deny":
517
+ msg = f"Denied by permission policy: {tc.name} is not allowed."
518
+ await self.io.emit(ev.tool_result(tc.id, tc.name, False, msg))
519
+ self._append_tool_result(tc.id, msg)
520
+ self._record_audit(tc.name, tc.arguments, "deny", False)
521
+ self._note(False)
522
+ return
523
+ if decision == "ask":
524
+ preview = (
525
+ preview_for(self.ctx, tc.name, tc.arguments)
526
+ if not is_mcp_tool(tc.name)
527
+ else f"{tc.name}({json.dumps(tc.arguments)[:400]})"
528
+ )
529
+ ok = await self.io.request_approval(
530
+ ev.approval_request(tc.id, tc.name, tc.arguments, preview)
531
+ )
532
+ if not ok.approved:
533
+ note = ok.feedback or "no reason given"
534
+ msg = f"User REJECTED this action. Reason: {note}. Do not retry it as-is."
535
+ await self.io.emit(ev.tool_result(tc.id, tc.name, False, msg))
536
+ self._append_tool_result(tc.id, msg)
537
+ self._record_audit(tc.name, tc.arguments, "reject", False, note)
538
+ self._note(False)
539
+ return
540
+ if ok.always:
541
+ self.permissions.add_always_allow(tc.name, tc.arguments)
542
+ await self.io.emit(ev.info(f"Always allowing {tc.name} for similar actions."))
543
+
544
+ # PreToolUse hook — non-zero exit blocks the tool.
545
+ if self.hooks.active:
546
+ blocked, hookmsg = await asyncio.to_thread(
547
+ self.hooks.run, "PreToolUse", {"tool": tc.name, "args": tc.arguments}
548
+ )
549
+ if blocked:
550
+ msg = f"Blocked by PreToolUse hook: {hookmsg}"
551
+ await self.io.emit(ev.tool_result(tc.id, tc.name, False, msg))
552
+ self._append_tool_result(tc.id, msg)
553
+ return
554
+
555
+ # MCP routing (async).
556
+ if is_mcp_tool(tc.name):
557
+ out = await self.mcp.call(tc.name, tc.arguments)
558
+ await self.io.emit(ev.tool_result(tc.id, tc.name, True, out))
559
+ self._append_tool_result(tc.id, out)
560
+ self._note(True)
561
+ return
562
+
563
+ if tc.name in _FILE_MUTATORS and tc.arguments.get("path"):
564
+ try:
565
+ self.checkpointer.record(self.ctx.resolve(tc.arguments["path"]))
566
+ self._turn_mutated.add(tc.arguments["path"])
567
+ except Exception:
568
+ pass
569
+
570
+ if tc.name == "run_command":
571
+ def on_out(text: str, _id=tc.id):
572
+ self.io.emit_sync(ev.command_output(_id, text))
573
+
574
+ outcome = await asyncio.to_thread(
575
+ run_command, self.ctx, tc.arguments.get("command", ""), on_out
576
+ )
577
+ else:
578
+ outcome = await asyncio.to_thread(execute, self.ctx, tc.name, tc.arguments)
579
+
580
+ if outcome.diff:
581
+ await self.io.emit(ev.diff(tc.arguments.get("path", ""), outcome.diff, outcome.is_new))
582
+ await self.io.emit(ev.tool_result(tc.id, tc.name, outcome.ok, outcome.output))
583
+ self._record_audit(tc.name, tc.arguments, "allow", outcome.ok,
584
+ (outcome.output or "")[:200])
585
+ self._note(outcome.ok)
586
+
587
+ result_text = outcome.output or "(no output)"
588
+ heal = self._self_heal_note(tc, outcome)
589
+ if heal:
590
+ result_text += "\n\n" + heal
591
+ self._append_tool_result(tc.id, result_text)
592
+
593
+ # Phase 1: Invalidate graph for mutated files so next turn has fresh context.
594
+ if outcome.ok and tc.name in _FILE_MUTATORS and self.graph_manager:
595
+ raw_path = tc.arguments.get("path", "")
596
+ if raw_path:
597
+ try:
598
+ abs_path = str(self.ctx.resolve(raw_path))
599
+ await asyncio.to_thread(self.graph_manager.invalidate_file, abs_path)
600
+ except Exception:
601
+ pass
602
+
603
+ if self.hooks.active:
604
+ await asyncio.to_thread(
605
+ self.hooks.run, "PostToolUse",
606
+ {"tool": tc.name, "args": tc.arguments, "ok": outcome.ok},
607
+ )
608
+
609
+ async def _handle_loop_tool(self, tc) -> None:
610
+ if tc.name == "todo_write":
611
+ self.todos = tc.arguments.get("todos", [])
612
+ await self.io.emit(ev.todos(self.todos))
613
+ done = sum(1 for t in self.todos if t.get("status") == "completed")
614
+ self._append_tool_result(tc.id, f"Checklist updated ({done}/{len(self.todos)} done).")
615
+ return
616
+
617
+ if tc.name == "update_plan":
618
+ plan_text = tc.arguments.get("plan", "")
619
+ await self.io.emit(ev.plan(plan_text))
620
+ decision = await self.io.request_approval(
621
+ ev.approval_request(tc.id, "update_plan", {}, plan_text)
622
+ )
623
+ if decision.approved:
624
+ self.plan_mode = False
625
+ self._append_tool_result(
626
+ tc.id, "Plan APPROVED. Plan mode is now off — implement the plan."
627
+ )
628
+ else:
629
+ note = decision.feedback or "revise it"
630
+ self._append_tool_result(tc.id, f"Plan rejected: {note}. Revise the plan.")
631
+ return
632
+
633
+ if tc.name == "spawn_agent":
634
+ await self._spawn_agent(tc)
635
+ return
636
+
637
+ if tc.name == "use_skill":
638
+ name = tc.arguments.get("name", "")
639
+ skill = self.skills.get(name)
640
+ await self.io.emit(ev.tool_start(tc.id, "use_skill", {"name": name}))
641
+ if skill:
642
+ out = f"Skill '{name}':\n{skill['body']}"
643
+ else:
644
+ out = f"No such skill: {name}. Available: {', '.join(self.skills) or '(none)'}"
645
+ await self.io.emit(ev.tool_result(tc.id, "use_skill", bool(skill), out[:300]))
646
+ self._append_tool_result(tc.id, out)
647
+ return
648
+
649
+ if tc.name == "memory_write":
650
+ await self.io.emit(ev.tool_start(tc.id, "memory_write", tc.arguments))
651
+ mem_type = tc.arguments.get("type", "fact")
652
+ content = tc.arguments.get("content", "")
653
+ related_files = tc.arguments.get("related_files", [])
654
+ if self.session_memory and content:
655
+ try:
656
+ self.session_memory.add(mem_type, content, related_files or None)
657
+ result = f"Stored [{mem_type}] memory: {content[:120]}"
658
+ await self.io.emit(ev.tool_result(tc.id, "memory_write", True, result))
659
+ self._append_tool_result(tc.id, result)
660
+ except ValueError as exc:
661
+ err = f"memory_write failed: {exc}"
662
+ await self.io.emit(ev.tool_result(tc.id, "memory_write", False, err))
663
+ self._append_tool_result(tc.id, err)
664
+ else:
665
+ msg = "memory_write: no content provided or session memory unavailable."
666
+ await self.io.emit(ev.tool_result(tc.id, "memory_write", False, msg))
667
+ self._append_tool_result(tc.id, msg)
668
+ return
669
+
670
+ self._append_tool_result(tc.id, f"Unknown meta-tool: {tc.name}")
671
+
672
+ async def _spawn_agent(self, tc) -> None:
673
+ if self.depth >= _MAX_SUBAGENT_DEPTH:
674
+ self._append_tool_result(tc.id, "Sub-agents cannot spawn more sub-agents.")
675
+ return
676
+ # Agent-of-agent budget guard (shared across the whole sub-agent tree).
677
+ if self._budget["calls"] >= self.config.agent.subagent_max_calls:
678
+ self._append_tool_result(
679
+ tc.id, f"Sub-agent budget exhausted "
680
+ f"({self.config.agent.subagent_max_calls} calls). Do the work yourself.")
681
+ return
682
+ if self._budget["tokens"] >= self.config.agent.subagent_token_budget:
683
+ self._append_tool_result(
684
+ tc.id, "Sub-agent token budget exhausted. Finish without more sub-agents.")
685
+ return
686
+ self._budget["calls"] += 1
687
+ self.subagent_calls += 1
688
+ desc = tc.arguments.get("description", "sub-task")
689
+ prompt = tc.arguments.get("prompt", "")
690
+
691
+ # Phase 4: Dynamic specialization — detect the best specialist agent and
692
+ # prepend its system guidance to the sub-agent's prompt.
693
+ if self.agent_selector:
694
+ # Extract file paths mentioned in the prompt (basic heuristic)
695
+ mentioned_files = re.findall(r"[\w\-\/]+\.\w+", prompt)
696
+ specialist = self.agent_selector.select_agent(prompt, mentioned_files or None)
697
+ if specialist:
698
+ specialist_header = (
699
+ f"[Specialist Mode: {specialist.name}]\n"
700
+ f"{specialist.description}\n"
701
+ f"Focus your work on this domain. Keywords that matched: "
702
+ f"{', '.join(k for k in specialist.keywords if k.lower() in prompt.lower()[:200])}\n\n"
703
+ )
704
+ prompt = specialist_header + prompt
705
+
706
+ await self.io.emit(ev.subagent_start(tc.id, desc))
707
+
708
+ sub_cfg = self._cfg_for_phase("subagent")
709
+ sub_io = _SubAgentIO(self.io)
710
+ sub = AgentSession(self.workspace, sub_cfg, sub_io, depth=self.depth + 1,
711
+ audit=self.audit, budget=self._budget)
712
+ await sub.run(prompt)
713
+ self.session_tokens += sub.session_tokens
714
+ self._budget["tokens"] += sub.session_tokens
715
+ summary = sub_io.last_assistant or "(sub-agent produced no summary)"
716
+ await self.io.emit(ev.subagent_end(tc.id, summary[:280]))
717
+ self._append_tool_result(tc.id, f"Sub-agent '{desc}' result:\n{summary}")
718
+
719
+ def _cfg_for_phase(self, phase: str):
720
+ """A config whose ACTIVE provider is the model assigned to `phase` — so a
721
+ spawned sub-agent runs on (say) the cheap model while still inheriting the
722
+ full models/routing config for its own nested decisions."""
723
+ prov = self.router.provider_for_phase(phase)
724
+ if prov is self.config.provider:
725
+ return self.config
726
+ return replace(self.config, provider=prov)
727
+
728
+ def _self_heal_note(self, tc, outcome) -> str:
729
+ """When a test/build command fails and self_heal is on, nudge the agent to
730
+ diagnose and fix it, bounded by `agent.self_heal` attempts per command."""
731
+ limit = self.config.agent.self_heal
732
+ if limit <= 0 or outcome.ok or tc.name != "run_command":
733
+ return ""
734
+ cmd = tc.arguments.get("command", "")
735
+ if not re.search(r"(?i)\b(test|pytest|build|lint|tsc|mypy|cargo|gradle|jest|vitest)\b", cmd):
736
+ return ""
737
+ n = self._heal_attempts.get(cmd, 0) + 1
738
+ self._heal_attempts[cmd] = n
739
+ if n > limit:
740
+ return (f"[self-heal] Gave up after {limit} attempt(s) on `{cmd}`. "
741
+ "Summarize the remaining failure for the user.")
742
+ return (f"[self-heal {n}/{limit}] This command FAILED. Diagnose the root cause, "
743
+ f"apply a fix, then re-run `{cmd}` to confirm it passes.")
744
+
745
+ async def _maybe_verify_edits(self) -> None:
746
+ """After a turn that changed files, optionally spawn a critic sub-agent that
747
+ adversarially reviews the diff before the user trusts it."""
748
+ if (not self.config.agent.verify_edits or self.depth != 0
749
+ or not self._turn_mutated):
750
+ return
751
+ if self._budget["calls"] >= self.config.agent.subagent_max_calls:
752
+ return
753
+ files = ", ".join(sorted(self._turn_mutated)[:20])
754
+ await self.io.emit(ev.info(f"verifying edits to: {files}"))
755
+ prompt = (
756
+ "You are a strict code reviewer. Review the changes just made to these "
757
+ f"files: {files}. Use git_diff (and read_file) to inspect them. Check: "
758
+ "does it compile/parse, does it match the apparent intent, did it break "
759
+ "anything, are there bugs, security issues, or missing tests? Reply with "
760
+ "a short verdict (APPROVE or NEEDS-WORK) and a bullet list of concrete "
761
+ "issues. Do NOT edit files."
762
+ )
763
+ self._budget["calls"] += 1
764
+ sub_io = _SubAgentIO(self.io)
765
+ sub_cfg = self._cfg_for_phase("verify")
766
+ sub = AgentSession(self.workspace, sub_cfg, sub_io, depth=self.depth + 1,
767
+ audit=self.audit, budget=self._budget)
768
+ await sub.run(prompt)
769
+ self.session_tokens += sub.session_tokens
770
+ verdict = sub_io.last_assistant or "(no verdict)"
771
+ await self.io.emit(ev.assistant_message("šŸ”Ž Verifier review:\n" + verdict))
772
+
773
+ def _append_tool_result(self, call_id: str, content: str) -> None:
774
+ # Bound very large tool outputs kept in context (keep head + tail).
775
+ if content and len(content) > _MAX_TOOL_RESULT_CHARS:
776
+ head = _MAX_TOOL_RESULT_CHARS * 2 // 3
777
+ tail = _MAX_TOOL_RESULT_CHARS - head
778
+ content = (content[:head] + "\n…[output trimmed to save context]…\n" + content[-tail:])
779
+ self.messages.append({"role": "tool", "tool_call_id": call_id, "content": content})