PyPI - ata-coder - Versions diffs - 2.4.2__py3-none-any.whl - Mend

ata-coder 2.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

ata_coder/__init__.py +1 -0
ata_coder/agent.py +874 -0
ata_coder/agent_compact.py +190 -0
ata_coder/agent_controller.py +218 -0
ata_coder/agent_extension.py +69 -0
ata_coder/agent_routing.py +105 -0
ata_coder/agent_subsystems.py +72 -0
ata_coder/agent_tools.py +318 -0
ata_coder/agent_undo.py +63 -0
ata_coder/anthropic_client.py +465 -0
ata_coder/change_tracker.py +368 -0
ata_coder/clawd_integration.py +574 -0
ata_coder/commands/__init__.py +128 -0
ata_coder/commands/_core.py +184 -0
ata_coder/commands/_safety.py +95 -0
ata_coder/commands/_settings.py +241 -0
ata_coder/commands/_workflow.py +451 -0
ata_coder/commands.py +974 -0
ata_coder/config.py +257 -0
ata_coder/core/__init__.py +35 -0
ata_coder/core/events.py +73 -0
ata_coder/core/queue.py +85 -0
ata_coder/core/state.py +17 -0
ata_coder/event_queue.py +5 -0
ata_coder/extension.py +654 -0
ata_coder/extensions/__init__.py +1 -0
ata_coder/extensions/hello_skill.py +47 -0
ata_coder/fool_proof.py +295 -0
ata_coder/git_workflow.py +371 -0
ata_coder/gui.py +511 -0
ata_coder/llm_client.py +543 -0
ata_coder/main.py +814 -0
ata_coder/mcp_client.py +1095 -0
ata_coder/memory.py +539 -0
ata_coder/model_registry.py +134 -0
ata_coder/model_router.py +105 -0
ata_coder/permissions.py +274 -0
ata_coder/privilege.py +464 -0
ata_coder/project.py +273 -0
ata_coder/prompt_template.py +423 -0
ata_coder/prompts/auto-mode.md +7 -0
ata_coder/prompts/coding-rules.md +40 -0
ata_coder/prompts/execution-guardrails.md +14 -0
ata_coder/prompts/memory-system.md +24 -0
ata_coder/prompts/output-style.md +23 -0
ata_coder/prompts/safety.md +17 -0
ata_coder/prompts/slash-commands.md +24 -0
ata_coder/prompts/sub-agents.md +38 -0
ata_coder/prompts/system-reminders.md +17 -0
ata_coder/prompts/system.md +105 -0
ata_coder/prompts/tool-policy.md +46 -0
ata_coder/repl_theme.py +99 -0
ata_coder/repl_tracker.py +89 -0
ata_coder/repl_ui.py +1214 -0
ata_coder/safety_guard.py +434 -0
ata_coder/self_correct.py +346 -0
ata_coder/server.py +882 -0
ata_coder/server_session.py +159 -0
ata_coder/server_shell.py +129 -0
ata_coder/session.py +431 -0
ata_coder/settings.py +439 -0
ata_coder/setup_wizard.py +136 -0
ata_coder/skill_extension.py +92 -0
ata_coder/skills/architect/SKILL.md +42 -0
ata_coder/skills/code-reviewer/SKILL.md +37 -0
ata_coder/skills/codecraft/SKILL.md +452 -0
ata_coder/skills/debugger/SKILL.md +45 -0
ata_coder/skills/doc-writer/SKILL.md +36 -0
ata_coder/skills/general-coder/SKILL.md +76 -0
ata_coder/skills/math-calculator/README.md +40 -0
ata_coder/skills/math-calculator/SKILL.md +59 -0
ata_coder/skills/math-calculator/handler.py +103 -0
ata_coder/skills/math-calculator/prompts/system.md +8 -0
ata_coder/skills/math-calculator/requirements.txt +2 -0
ata_coder/skills/math-calculator/resources/constants.json +8 -0
ata_coder/skills/math-calculator/tests/test_handler.py +53 -0
ata_coder/skills/security-auditor/SKILL.md +40 -0
ata_coder/skills/test-writer/SKILL.md +36 -0
ata_coder/skills/weather-skill/README.md +45 -0
ata_coder/skills/weather-skill/handler.py +76 -0
ata_coder/skills/weather-skill/manifest.json +48 -0
ata_coder/skills/weather-skill/prompts/system_prompt.txt +9 -0
ata_coder/skills/weather-skill/prompts/user_prompt_template.txt +3 -0
ata_coder/skills/weather-skill/requirements.txt +1 -0
ata_coder/skills/weather-skill/resources/city_list.json +17 -0
ata_coder/skills/weather-skill/resources/error_messages.json +7 -0
ata_coder/skills/weather-skill/tests/test_handler.py +28 -0
ata_coder/skills/weather-skill/weather_utils.py +50 -0
ata_coder/skills.py +1014 -0
ata_coder/sub_agent.py +273 -0
ata_coder/sub_agent_manager.py +203 -0
ata_coder/system_prompt_builder.py +146 -0
ata_coder/task_planner.py +391 -0
ata_coder/terminal.py +318 -0
ata_coder/test_runner.py +219 -0
ata_coder/thread_supervisor.py +195 -0
ata_coder/tool_defs.py +335 -0
ata_coder/tools/__init__.py +11 -0
ata_coder/tools/definitions.py +335 -0
ata_coder/tools/executor.py +1036 -0
ata_coder/tools/result.py +26 -0
ata_coder/tools/subagent.py +332 -0
ata_coder/tools/web.py +361 -0
ata_coder/tools.py +1576 -0
ata_coder/types.py +92 -0
ata_coder/utils.py +113 -0
ata_coder/web/css/style.css +180 -0
ata_coder/web/index.html +84 -0
ata_coder/web/js/app.js +489 -0
ata_coder/web/package-lock.json +25 -0
ata_coder/web/package.json +10 -0
ata_coder/web/tsconfig.json +13 -0
ata_coder-2.4.2.dist-info/METADATA +799 -0
ata_coder-2.4.2.dist-info/RECORD +118 -0
ata_coder-2.4.2.dist-info/WHEEL +5 -0
ata_coder-2.4.2.dist-info/entry_points.txt +2 -0
ata_coder-2.4.2.dist-info/licenses/LICENSE +21 -0
ata_coder-2.4.2.dist-info/top_level.txt +1 -0

ata_coder/agent.py ADDED Viewed

@@ -0,0 +1,874 @@
+# -*- coding: utf-8 -*-
+"""
+Core Agent loop for ATA Coder.
+Integrates:
+- Skills system (configurable personas)
+- Memory system (persistent context across sessions)
+- MCP client (cross-system tool interoperability)
+- Prompt templates (dynamic context injection)
+- Permission system (interactive allow/deny)
+- Project detection (language, framework, build system)
+- Session persistence (save/resume/export)
+The agent runs a conversation loop:
+1. Build system prompt from skill + memory + templates + project context
+2. Send conversation to the LLM
+3. Execute tool calls (built-in + MCP) with permission checks
+4. Feed results back and continue
+5. Complete when the task is done
+"""
+import asyncio
+import json
+import logging
+import os
+import time
+from typing import Any, Callable
+from .config import AppConfig
+from .llm_client import LLMClient, SYSTEM_PROMPT
+from .anthropic_client import AnthropicClient
+from .tools import ToolExecutor, TOOL_DEFINITIONS, ToolResult
+from .types import Message
+from .agent_subsystems import AgentSubsystems
+from .system_prompt_builder import SystemPromptBuilder
+from .fool_proof import FoolProofEngine
+from .change_tracker import ChangeTracker
+from .privilege import PrivilegeManager
+from .self_correct import SelfCorrectionEngine
+from .git_workflow import GitWorkflow
+from .extension import get_extension_manager
+from .clawd_integration import get_clawd
+from .agent_compact import CompactionMixin
+from .agent_tools import ToolExecutionMixin
+from .agent_routing import ModelRoutingMixin
+from .agent_extension import ExtensionMixin
+# ── Event types & Agent state ──────────────────────────────────────────
+from .core import (  # noqa: F401 — re-exported for external use
+    AgentEvent, CompleteEvent, ErrorEvent, ReasoningEvent,
+    SkillChangedEvent, TextDeltaEvent, ThinkingEvent,
+    ToolCallEvent, ToolResultEvent, ToolStreamEvent,
+)
+from .core.state import AgentState
+logger = logging.getLogger(__name__)
+class _SessionLogger(logging.LoggerAdapter):
+    """Injects ``session_id`` into log records for structured tracing."""
+    def process(self, msg, kwargs):
+        sid = self.extra.get("session_id", "") if self.extra else ""
+        if sid:
+            return f"[{sid[:8]}] {msg}", kwargs
+        return msg, kwargs
+# ── The Agent ────────────────────────────────────────────────────────────────
+class CoderAgent(CompactionMixin, ToolExecutionMixin,
+                 ModelRoutingMixin, ExtensionMixin):
+    """
+    The main ATA Coder agent with skills, memory, MCP, templates,
+    permissions, project detection, and session persistence.
+    """
+    def __init__(
+        self,
+        config: AppConfig | None = None,
+        tool_executor: ToolExecutor | None = None,
+        subsystems: AgentSubsystems | None = None,
+    ):
+        self.config = config or AppConfig.load()
+        # Choose client: Anthropic or OpenAI format
+        if self.config.llm.use_anthropic:
+            self.llm = AnthropicClient(self.config.llm)
+            self._use_anthropic = True
+        else:
+            self.llm = LLMClient(self.config.llm)
+            self._use_anthropic = False
+        self.tools = tool_executor or ToolExecutor(self.config.agent)
+        # ── Subsystems ────────────────────────────────────────────────────
+        self.subsys = subsystems or AgentSubsystems()
+        self.skills = self.subsys.skills
+        self.memory = self.subsys.memory
+        self.mcp = self.subsys.mcp
+        if self.mcp:
+            self.tools.set_mcp_client(self.mcp)
+        self.templates = self.subsys.templates
+        self.permissions = self.subsys.permissions
+        self.project_info = self.subsys.project_info
+        self.sessions = self.subsys.sessions
+        # ── Extension Manager ─────────────────────────────────────────────
+        if self.subsys.extensions is not None:
+            self.ext_mgr = self.subsys.extensions
+        else:
+            self.ext_mgr = get_extension_manager()
+            self.subsys.extensions = self.ext_mgr
+        # Register skills as extensions
+        self._register_skills_as_extensions()
+        # Discover extensions from extension directories
+        self._discover_extensions()
+        # Register extension points for agent lifecycle hooks
+        self._register_extension_points()
+        # Activate all skill-tagged extensions (multi-skill)
+        for ext_name in [e.meta.name for e in self.ext_mgr.get_by_tag("skill")]:
+            self.ext_mgr.activate(ext_name)
+        # ── System prompt builder ─────────────────────────────────────────
+        self._prompt_builder = SystemPromptBuilder(
+            subsystems=self.subsys,
+            workspace_dir=self.tools.workspace,
+            model=self.config.llm.model,
+            default_prompt=SYSTEM_PROMPT,
+        )
+        # ── Tool & safety infrastructure ──────────────────────────────────
+        self.change_tracker = ChangeTracker()
+        self.fool_proof = FoolProofEngine(
+            workspace=self.tools.workspace,
+            permission_store=self.permissions,
+            change_tracker=self.change_tracker,
+        )
+        self.privilege_mgr = PrivilegeManager(self.tools.workspace)
+        self.self_correct = SelfCorrectionEngine(max_retries=1)
+        self.git = GitWorkflow(self.tools.workspace)
+        self._state = AgentState()
+        self._on_event: Callable[[AgentEvent], None] | None = None
+        self._current_session_id: str = ""
+        self._pending_memory_suggestions: list[str] = []
+        self._cached_system_prompt: str | None = None  # invalidated on new build / compact
+        self._cached_allowed_tools: set[str] | None = None  # invalidated on skill change
+        # Build the combined tool list
+        self._all_tools = list(TOOL_DEFINITIONS)
+        if self.mcp:
+            mcp_tools = self.mcp.get_tools()
+            self._all_tools.extend(mcp_tools)
+            logger.debug(
+                "MCP tools added: %d", len(mcp_tools),
+            )
+        # Extension tools
+        ext_tools = self.ext_mgr.aggregate_tools()
+        if ext_tools:
+            self._all_tools.extend(ext_tools)
+            logger.debug("Extension tools added: %d", len(ext_tools))
+        logger.debug(
+            "Total tools: %d builtin + %s MCP + %s extensions = %d",
+            len(TOOL_DEFINITIONS),
+            len(self.mcp.get_tools()) if self.mcp else 0,
+            len(ext_tools),
+            len(self._all_tools),
+        )
+        self.llm.register_tools(self._all_tools)
+        # ── Sub-agent manager (set later by AgentController if used) ──────
+        self._sub_agent_mgr = None
+        # ── Parallel tool execution uses asyncio.gather ───────────────────
+    # ── Model routing → agent_routing.py (ModelRoutingMixin)
+    # ── Extension management → agent_extension.py (ExtensionMixin)
+    # ── Event system ──────────────────────────────────────────────────────
+    def on_event(self, callback: Callable[[AgentEvent], None]) -> None:
+        self._on_event = callback
+    def _emit(self, event: AgentEvent) -> None:
+        """Emit event to both callback and EventQueue (if available).
+        Uses put_nowait() for non-blocking FIFO — safe to call from
+        both asyncio tasks and asyncio.to_thread() contexts.
+        """
+        event_queue = getattr(self, "_event_queue", None)
+        if event_queue is not None:
+            try:
+                event_queue.put_nowait(event)
+            except asyncio.QueueFull:
+                logger.warning("Event queue full (%d pending), dropping event: %s",
+                              event_queue.count(), type(event).__name__)
+            except Exception:
+                logger.debug("Event queue closed — dropping event: %s", type(event).__name__)
+        # Backward-compatible callback
+        if self._on_event:
+            self._on_event(event)
+    # ── Main entry point ──────────────────────────────────────────────────
+    async def run(self, task: str, stream: bool = True, skill_name: str | None = None,
+            explicit_model: str = "", reset_context: bool = True) -> str:
+        """
+        Run the agent on a given task.
+        Args:
+            task: User task description
+            stream: Enable streaming output
+            skill_name: Force a specific skill (or None for auto-detect)
+            explicit_model: Explicit model override (bypasses auto-routing)
+            reset_context: If False, preserve existing conversation history
+                           (for persistent sessions like the HTTP API).
+        Returns:
+            Final response text
+        """
+        # ── Persistent session: preserve existing conversation ─────────────
+        if not reset_context and self._state.messages:
+            # Append new user message to existing conversation; keep system
+            # prompt and all prior messages intact.
+            self._state.messages.append({"role": "user", "content": task})
+            # Rebuild system prompt for updated memory context but don't
+            # replace the original system message (memory/git context may
+            # have changed, but conversation integrity is paramount).
+            system_prompt = self._build_system_prompt(task)
+            self._cached_system_prompt = system_prompt
+            self._cached_allowed_tools = None
+            self._state.tool_call_count = 0  # reset per-run counter
+            logger.info("Agent run (session): skill=%s, model=%s, session=%s, "
+                         "history=%d msgs, task=%.100s",
+                         self.skills.active_skill.name if self.skills and self.skills.active_skill else "default",
+                         self.current_model,
+                         self._current_session_id,
+                         len(self._state.messages),
+                         task)
+        else:
+            self._state = AgentState(start_time=time.time())
+            # ── Model routing ──────────────────────────────────────────────
+            self._route_for_task(task, explicit_model)
+            # Trigger extension point: on_model_route
+            self._ep_on_model_route.trigger(
+                task=task, complexity=self._routed_complexity, model=self.current_model
+            )
+            # Trigger extension point: on_run_start
+            self._ep_on_run_start.trigger(task=task, skill_name=skill_name)
+            # Reset change tracker for new run
+            self.change_tracker.reset()
+            self.change_tracker.dry_run = False
+            # Generate session ID
+            from .session import generate_session_id
+            self._current_session_id = generate_session_id(
+                task,
+                skill_name or (self.skills.active_skill.name if self.skills and self.skills.active_skill else ""),
+            )
+            # Per-session structured logger — injects session_id prefix
+            self._slog = _SessionLogger(logger, {"session_id": self._current_session_id})
+            # Skill selection (multi-skill support)
+            if skill_name and self.skills:
+                skill = self.skills.activate(skill_name, merge=True)
+                if skill:
+                    self._emit(SkillChangedEvent(skill.name))
+            elif self.skills:
+                # Keyword-based skill detection — zero extra LLM calls.
+                # Single-skill activation only: multi-skill merging causes
+                # confusion with weaker models (prompt dilution).
+                detected = self.skills.detect_skill(task)
+                if detected and detected.name != "general-coder":
+                    self.skills.activate(detected.name, merge=False)
+                    self._emit(SkillChangedEvent(detected.name))
+                    logger.info(
+                        "Skill route: %s for: %.80s", detected.name, task
+                    )
+            # Build system prompt — pass the user task for targeted memory recall
+            system_prompt = self._build_system_prompt(task)
+            self._cached_system_prompt = system_prompt  # pre-seed cache
+            self._cached_allowed_tools = None  # invalidate on new run
+            self._state.messages = [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": task},
+            ]
+            logger.info("Agent run: skill=%s, model=%s, session=%s, task=%.100s",
+                         self.skills.active_skill.name if self.skills and self.skills.active_skill else "default",
+                         self.current_model,
+                         self._current_session_id,
+                         task)
+        # ── Main agent loop with error boundary ──────────────────────────────
+        try:
+            return await self._run_loop(task, stream)
+        except (KeyboardInterrupt, SystemExit, asyncio.CancelledError):
+            raise
+        except Exception as e:
+            logger.critical("Agent fatal error: %s", e, exc_info=True)
+            self._emit(ErrorEvent(f"Fatal error: {e}"))
+            return f"Error: {e}"
+        finally:
+            # Auto-save session after every task (best-effort, never crashes)
+            self._auto_save_session()
+            # Always deactivate skill after task — prevents state leak
+            if self.skills:
+                self.skills.deactivate()
+    async def _run_loop(self, task: str, stream: bool = True) -> str:
+        """Main agent loop — extracted for error boundary isolation."""
+        SAFETY_LIMIT = 999  # circuit breaker — not a tool-call "limit"
+        _consecutive_failures = 0  # break loop when model is stuck failing
+        _MAX_CONSECUTIVE_FAILURES = 5
+        last_text = ""  # guard against UnboundLocalError when LLM returns empty content
+        while True:
+            # Circuit breaker: prevent infinite loop when the model keeps
+            # emitting tool calls (hallucination / API bug).  This is NOT a
+            # user-facing tool limit — just a last-resort safety net.
+            if self._state.tool_call_count >= SAFETY_LIMIT:
+                logger.critical(
+                    "SAFETY_LIMIT reached: %d tool calls. Breaking loop.",
+                    self._state.tool_call_count,
+                )
+                self._emit(ErrorEvent(
+                    f"Safety limit reached ({SAFETY_LIMIT} tool calls). "
+                    "The model may be stuck in a tool-call loop."
+                ))
+                # Clawd: error state — prevent stuck thinking animation
+                get_clawd().error(
+                    f"Safety limit reached ({SAFETY_LIMIT} tool calls)"
+                )
+                break
+            self._emit(ThinkingEvent())
+            # Clawd: model is generating, show thinking animation
+            get_clawd().thinking()
+            # Auto-compact when approaching the effective context limit.
+            # effective_context_tokens (default 200k) reflects the range where
+            # the model actually pays attention, not the theoretical 1M window.
+            # We compact at 80% of effective limit, which is well below the
+            # theoretical max_context_tokens.
+            est_tokens = self.get_token_estimate()
+            max_tokens = self.config.agent.max_context_tokens
+            effective = self.config.agent.effective_context_tokens
+            if est_tokens > effective:
+                logger.warning("Token budget: %d/%d effective (%.0f%% of %d max), auto-compacting",
+                             est_tokens, effective, est_tokens / max(max_tokens, 1) * 100, max_tokens)
+                await self.compact()
+                # Re-estimate AFTER compaction — the message list has changed
+                est_tokens = self.get_token_estimate()
+            # Hard ceiling: if compaction didn't help enough, force-truncate
+            if est_tokens > max_tokens * 0.95:
+                logger.critical("Hard token ceiling: %d > 95%% of %d max. Force-truncating.",
+                               est_tokens, max_tokens)
+                self._force_truncate()
+            # Get allowed tools from multi-skill intersection
+            allowed_tool_names = self._compute_allowed_tools()
+            filtered_tools = self._all_tools
+            if allowed_tool_names is not None and len(allowed_tool_names) > 0:
+                filtered_tools = [
+                    t for t in self._all_tools
+                    if t["function"]["name"] in allowed_tool_names
+                    or t["function"]["name"].startswith("mcp__")
+                ]
+            if stream:
+                response = await self._streaming_chat(filtered_tools)
+            else:
+                response = await self.llm.chat(
+                    self._state.messages,
+                    tools=filtered_tools,
+                    system_prompt=self._extract_system_prompt(),
+                )
+            tool_calls = response.get("tool_calls", [])
+            text = response.get("content", "")
+            if text:
+                last_text = text
+            if not tool_calls:
+                final_response = text or last_text
+                # Clawd: Stop — model finished its turn
+                get_clawd().stop(assistant_output=final_response)
+                break
+            # Pre-parse args for both parallelization check and execution
+            pre_parsed: dict[int, dict] = {}
+            for i, tc in enumerate(tool_calls):
+                try:
+                    pre_parsed[i] = json.loads(tc["function"]["arguments"])
+                except json.JSONDecodeError:
+                    pre_parsed[i] = {}
+            batch_results: list[ToolResult] = []
+            # Execute tool calls (parallel if independent, serial if dependent)
+            if len(tool_calls) > 1 and self._can_parallelize(tool_calls, pre_parsed):
+                # Clawd: one PreToolUse for the batch (not per-tool)
+                get_clawd().tool_use(
+                    tool_name=", ".join(tc["function"]["name"] for tc in tool_calls[:3]),
+                    tool_input={"batch_size": len(tool_calls)},
+                )
+                results = await self._execute_parallel(tool_calls, text)
+                batch_results = results
+                self._state.tool_call_count += len(tool_calls)
+                # Clawd: one PostToolUse for the batch
+                all_ok = all(r.success for r in results)
+                get_clawd().tool_result(tool_name="batch", success=all_ok)
+                # One assistant message with ALL tool_calls (OpenAI standard)
+                assistant_msg: dict[str, Any] = {
+                    "role": "assistant", "content": text or None, "tool_calls": tool_calls,
+                }
+                if response.get("reasoning_content"):
+                    assistant_msg["reasoning_content"] = response["reasoning_content"]
+                self._state.messages.append(assistant_msg)
+                for tc, result in zip(tool_calls, results, strict=True):
+                    self._warn_if_large_result(result, tc["function"]["name"])
+                    self._store_tool_result(result, tc["id"])
+            else:
+                # Clawd: one PreToolUse for the batch (not per-tool)
+                get_clawd().tool_use(
+                    tool_name=", ".join(tc["function"]["name"] for tc in tool_calls[:3]),
+                    tool_input={"batch_size": len(tool_calls)},
+                )
+                for i, tc in enumerate(tool_calls):
+                    self._state.tool_call_count += 1
+                    tool_name = tc["function"]["name"]
+                    arguments = pre_parsed.get(i, {})
+                    result = await self._execute_tool(tool_name, arguments)
+                    batch_results.append(result)
+                    self._warn_if_large_result(result, tool_name)
+                    assistant_msg: dict[str, Any] = {
+                        "role": "assistant", "content": text or None, "tool_calls": [tc],
+                    }
+                    if response.get("reasoning_content"):
+                        assistant_msg["reasoning_content"] = response["reasoning_content"]
+                    self._state.messages.append(assistant_msg)
+                    self._store_tool_result(result, tc["id"])
+                # Clawd: one PostToolUse for the serial batch
+                all_ok = all(r.success for r in batch_results)
+                get_clawd().tool_result(tool_name="batch", success=all_ok)
+            # ── Consecutive failure detection ──────────────────────────
+            # When every tool call in a batch fails, increment counter.
+            # Break the loop after N consecutive all-fail batches to
+            # prevent infinite token burn when the model is stuck.
+            if batch_results and not any(r.success for r in batch_results):
+                _consecutive_failures += 1
+                logger.warning("All %d tool(s) failed this turn (streak=%d/%d)",
+                              len(batch_results), _consecutive_failures, _MAX_CONSECUTIVE_FAILURES)
+                if _consecutive_failures >= _MAX_CONSECUTIVE_FAILURES:
+                    self._emit(ErrorEvent(
+                        f"Too many consecutive tool failures "
+                        f"({_consecutive_failures} batches). "
+                        "The model may be stuck in a failure loop."
+                    ))
+                    # Clawd: error state — prevent stuck thinking animation
+                    get_clawd().error(
+                        "Too many consecutive tool failures"
+                    )
+                    break
+            else:
+                _consecutive_failures = 0  # any success resets the streak
+        elapsed = time.time() - self._state.start_time
+        self._emit(CompleteEvent(
+            self._state.tool_call_count, elapsed,
+            estimated_tokens=self.get_token_estimate(),
+        ))
+        # ── Auto-suggest memories ────────────────────────────────────────
+        if self.memory:
+            try:
+                user_msgs = [m.get("content", "") for m in self._state.messages
+                            if m.get("role") == "user"]
+                # Collect tool error messages so the memory system can learn
+                # from failed patterns (e.g. "cd is blocked" → "use python subprocess")
+                tool_errors = [m.get("content", "") for m in self._state.messages
+                              if m.get("role") == "tool"
+                              and m.get("content", "").startswith("Error:")]
+                suggestions = self.memory.suggest_from_conversation(
+                    user_msgs, tool_errors=tool_errors,
+                )
+                if suggestions:
+                    logger.info("Memory suggestions: %d", len(suggestions))
+                    # Store suggestions on the instance so the UI can display them
+                    self._pending_memory_suggestions = suggestions
+            except Exception:
+                self._pending_memory_suggestions = []
+        # Trigger extension point: on_run_complete
+        self._ep_on_run_complete.trigger(
+            task=task,
+            result=final_response or "Task completed.",
+            tool_call_count=self._state.tool_call_count,
+        )
+        return final_response or "Task completed."
+    # ── Tool execution → agent_tools.py (ToolExecutionMixin)
+    async def _streaming_chat(self, filtered_tools: list[dict] | None = None) -> Message:
+        """Stream chat with tool collection."""
+        collected_text = ""
+        tool_calls: list[dict] = []
+        reasoning_content = ""
+        _thinking_sent = False  # throttle Clawd thinking updates
+        async for delta_type, content in self.llm.chat_stream(
+            self._state.messages,
+            tools=filtered_tools or None,
+            system_prompt=self._extract_system_prompt(),
+        ):
+            if delta_type == "text":
+                collected_text += content
+                self._emit(TextDeltaEvent(content))
+                if not _thinking_sent:
+                    get_clawd().thinking()
+                    _thinking_sent = True
+            elif delta_type == "tool_call":
+                tool_calls.append(content)
+            elif delta_type == "finish":
+                pass
+            elif delta_type == "reasoning":
+                reasoning_content += content
+                self._emit(ReasoningEvent(content))
+                if not _thinking_sent:
+                    get_clawd().thinking()
+                    _thinking_sent = True
+        result: Message = {
+            "role": "assistant",
+            "content": collected_text,
+            "tool_calls": tool_calls,
+        }
+        if reasoning_content:
+            result["reasoning_content"] = reasoning_content
+        return result
+    async def chat(self, message: str, stream: bool = True) -> str:
+        """Continue conversation with follow-up.
+        Mirrors the main run() loop: skill tool filtering, token compaction,
+        consecutive-failure detection, and circuit breaker.
+        """
+        self._state.messages.append({"role": "user", "content": message})
+        SAFETY_LIMIT = 999  # circuit breaker
+        _consecutive_failures = 0
+        _MAX_CONSECUTIVE_FAILURES = 5
+        while self._state.tool_call_count < SAFETY_LIMIT:
+            # ── Token budget: auto-compact when approaching the limit ────
+            est_tokens = self.get_token_estimate()
+            max_tokens = self.config.agent.max_context_tokens
+            effective = self.config.agent.effective_context_tokens
+            if est_tokens > effective:
+                logger.warning("chat(): token budget %d/%d effective, auto-compacting",
+                             est_tokens, effective)
+                await self.compact()
+                est_tokens = self.get_token_estimate()
+            if est_tokens > max_tokens * 0.95:
+                logger.critical("chat(): hard ceiling %d > 95%% of %d, force-truncating",
+                               est_tokens, max_tokens)
+                self._force_truncate()
+            # ── Skill tool filtering ────────────────────────────────────
+            allowed_tool_names = self._compute_allowed_tools()
+            filtered_tools = self._all_tools
+            if allowed_tool_names is not None and len(allowed_tool_names) > 0:
+                filtered_tools = [
+                    t for t in self._all_tools
+                    if t["function"]["name"] in allowed_tool_names
+                    or t["function"]["name"].startswith("mcp__")
+                ]
+            if stream:
+                response = await self._streaming_chat(filtered_tools)
+            else:
+                response = await self.llm.chat(
+                    self._state.messages,
+                    tools=filtered_tools,
+                    system_prompt=self._extract_system_prompt(),
+                )
+            tool_calls = response.get("tool_calls", [])
+            text = response.get("content", "")
+            if not tool_calls:
+                return text or "Done."
+            # Execute tool calls (serial for safety in follow-up context)
+            batch_results: list[ToolResult] = []
+            for tc in tool_calls:
+                self._state.tool_call_count += 1
+                tool_name = tc["function"]["name"]
+                try:
+                    arguments = json.loads(tc["function"]["arguments"])
+                except json.JSONDecodeError:
+                    arguments = {}
+                result = await self._execute_tool(tool_name, arguments)
+                batch_results.append(result)
+                self._warn_if_large_result(result, tool_name)
+                self._state.messages.append({
+                    "role": "assistant",
+                    "content": text or None,
+                    "tool_calls": [tc],
+                })
+                self._store_tool_result(result, tc["id"])
+            # ── Consecutive failure detection ───────────────────────────
+            if batch_results and not any(r.success for r in batch_results):
+                _consecutive_failures += 1
+                logger.warning("chat(): all %d tool(s) failed (streak=%d/%d)",
+                             len(batch_results), _consecutive_failures, _MAX_CONSECUTIVE_FAILURES)
+                if _consecutive_failures >= _MAX_CONSECUTIVE_FAILURES:
+                    self._emit(ErrorEvent(
+                        f"Too many consecutive tool failures "
+                        f"({_consecutive_failures} batches)."
+                    ))
+                    break
+            else:
+                _consecutive_failures = 0
+        return text or "Done."
+    # ── Tool filtering → agent_tools.py (ToolExecutionMixin)
+    # ── System prompt builder ─────────────────────────────────────────────
+    def _build_system_prompt(self, user_input: str = "") -> str:
+        """Build a context-rich system prompt from all subsystems.
+        Delegates to the extracted SystemPromptBuilder so each section
+        (environment, project, tools, MCP, memory, formatting) lives in
+        its own method and can be tested individually.
+        When *user_input* is provided, memory recall is targeted to the
+        current task rather than returning a generic summary.
+        """
+        # Refresh model name on each build (may have changed via /model)
+        self._prompt_builder.model = self.config.llm.model
+        return self._prompt_builder.build(TOOL_DEFINITIONS, user_input=user_input)
+    # ── Memory commands ───────────────────────────────────────────────────
+    def remember(self, name: str, description: str, content: str,
+                 memory_type: str = "reference") -> str:
+        """Store a memory. Called by /remember command."""
+        if not self.memory:
+            return "Memory system not initialized."
+        from .memory import Memory
+        m = Memory(
+            name=name,
+            description=description,
+            content=content,
+            metadata={"type": memory_type},
+        )
+        self.memory.add(m)
+        return f"Memory saved: {name}"
+    def recall(self, query: str) -> str:
+        """Search memories. Called by /recall command."""
+        if not self.memory:
+            return "Memory system not initialized."
+        results = self.memory.search(query)
+        if not results:
+            return f"No memories found for: {query}"
+        lines = [f"Found {len(results)} memories:"]
+        for m in results[:10]:
+            lines.append(f"\n### {m.description}")
+            lines.append(f"Type: {m.memory_type} | Updated: {m.updated}")
+            lines.append(m.content[:300])
+        return "\n".join(lines)
+    # ── Helpers → agent_tools.py (ToolExecutionMixin)
+    # ── Parallel execution → agent_tools.py (ToolExecutionMixin)
+    # ── Undo / Redo / Dry-run ────────────────────────────────────────────
+    def undo(self, count: int = 1) -> str:
+        """Undo the last N changes."""
+        if not self.change_tracker:
+            return "Change tracker not available."
+        reverted = self.change_tracker.undo(count)
+        if not reverted:
+            return "Nothing to undo."
+        lines = [f"Undid {len(reverted)} change(s):"]
+        for c in reverted:
+            lines.append(f"  {c.summary}")
+        return "\n".join(lines)
+    def undo_all(self) -> str:
+        """Undo all changes in this session."""
+        if not self.change_tracker:
+            return "Change tracker not available."
+        reverted = self.change_tracker.undo_all()
+        if not reverted:
+            return "Nothing to undo."
+        return f"Undid all {len(reverted)} changes."
+    def restore_change(self, change_id: int) -> str:
+        """Re-apply a reverted change."""
+        if not self.change_tracker:
+            return "Change tracker not available."
+        restored = self.change_tracker.restore(change_id)
+        if restored:
+            return f"Restored: {restored.summary}"
+        return f"Change #{change_id} not found or not reverted."
+    def list_changes(self) -> str:
+        """List all changes in this session."""
+        if not self.change_tracker:
+            return "Change tracker not available."
+        return self.change_tracker.summary()
+    def show_change_diff(self, last_n: int = 3) -> str:
+        """Show diffs for recent changes."""
+        if not self.change_tracker:
+            return "Change tracker not available."
+        return self.change_tracker.diff_summary(last_n)
+    def toggle_dry_run(self, enabled: bool | None = None) -> str:
+        """Enable or disable dry-run mode."""
+        if not self.change_tracker:
+            return "Change tracker not available."
+        if enabled is None:
+            enabled = not self.change_tracker.dry_run
+        self.change_tracker.dry_run = enabled
+        if enabled:
+            return "DRY-RUN MODE ON — changes will be PREVIEWED only, not applied."
+        else:
+            return "DRY-RUN MODE OFF — changes will be applied normally."
+    def _extract_system_prompt(self) -> str:
+        """Return the system prompt from the current conversation state.
+        Cached — only re-scans when messages[0] is replaced (e.g. after compaction).
+        """
+        if self._cached_system_prompt is not None:
+            return self._cached_system_prompt
+        for m in self._state.messages:
+            if m.get("role") == "system":
+                self._cached_system_prompt = m.get("content", "")
+                return self._cached_system_prompt
+        return ""
+    # ── Utility ───────────────────────────────────────────────────────────
+    # ── Session persistence ─────────────────────────────────────────────
+    def save_session(self, session_id: str = "") -> str:
+        """Save current conversation to session storage (manual /save)."""
+        if not self.sessions:
+            return "Session storage not available."
+        sid = session_id or self._current_session_id
+        if not sid:
+            from .session import generate_session_id
+            sid = generate_session_id("manual-save", workspace=str(self.tools.workspace))
+        return self._do_save(sid)
+    def _auto_save_session(self) -> None:
+        """Auto-save after every task completion (fire-and-forget, best-effort)."""
+        if not self.sessions:
+            return
+        # Generate session ID on first auto-save
+        if not self._current_session_id:
+            from .session import generate_session_id
+            # Find first user message for the task hash
+            task_hint = ""
+            for msg in self._state.messages:
+                if msg.get("role") == "user":
+                    task_hint = msg.get("content", "")[:100]
+                    break
+            self._current_session_id = generate_session_id(
+                task_hint or "conversation",
+                skill=self.skills.active_skill.name if self.skills and self.skills.active_skill else "",
+                workspace=str(self.tools.workspace),
+            )
+        try:
+            self._do_save(self._current_session_id)
+        except Exception:
+            logger.warning("Auto-save failed for session %s", self._current_session_id, exc_info=True)
+    def _do_save(self, sid: str) -> str:
+        """Internal: persist messages + update index."""
+        from .utils import sanitize_surrogates
+        first_user_msg = ""
+        for msg in self._state.messages:
+            if msg.get("role") == "user":
+                first_user_msg = sanitize_surrogates(msg.get("content", "")[:200])
+                break
+        self.sessions.save(
+            session_id=sid,
+            messages=self._state.messages,
+            summary=first_user_msg,
+            skill=self.skills.active_skill.name if self.skills and self.skills.active_skill else "",
+            model=self.config.llm.model,
+            workspace=str(self.tools.workspace),
+            tool_call_count=self._state.tool_call_count,
+        )
+        self._current_session_id = sid
+        return sid
+    # Compaction → agent_compact.py (CompactionMixin)
+    @property
+    def session_id(self) -> str:
+        return self._current_session_id
+    # ── Change tracking helper → agent_tools.py (ToolExecutionMixin._read_old_content)
+    def get_token_estimate(self) -> int:
+        """Estimate total tokens in the conversation."""
+        return self.llm.count_tokens_approx(self._state.messages)
+    def get_conversation_summary(self) -> str:
+        msgs = self._state.messages
+        total = len(msgs)
+        tool_calls = sum(1 for m in msgs if m.get("tool_calls"))
+        user_msgs = sum(1 for m in msgs if m.get("role") == "user")
+        tokens = self.get_token_estimate()
+        return (
+            f"Session: {self._current_session_id or 'unsaved'}\n"
+            f"Messages: {total} ({user_msgs} user turns, {tool_calls} tool calls)\n"
+            f"Tokens: ~{tokens:,} / {self.config.agent.max_context_tokens:,}\n"
+            f"Skill: {self.skills.active_skill.name if self.skills and self.skills.active_skill else 'default'}\n"
+            f"Model: {self.config.llm.model}"
+        )
+    def reset(self) -> None:
+        self._state = AgentState(start_time=time.time())
+        self._current_session_id = ""
+        if self.skills:
+            self.skills.deactivate()
+        logger.info("Agent state reset")
+    async def shutdown(self) -> None:
+        """Clean up resources."""
+        # Clawd: final SessionEnd
+        get_clawd().shutdown()
+        await self.llm.close()
+        if self.mcp:
+            await self.mcp.stop_all()