PyPI - axion-code - Versions diffs - 1.0.0__py3-none-any.whl - Mend

axion-code 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

axion/__init__.py +3 -0
axion/api/__init__.py +0 -0
axion/api/anthropic.py +460 -0
axion/api/client.py +259 -0
axion/api/error.py +161 -0
axion/api/ollama.py +597 -0
axion/api/openai_compat.py +805 -0
axion/api/openai_responses.py +627 -0
axion/api/prompt_cache.py +31 -0
axion/api/sse.py +98 -0
axion/api/types.py +451 -0
axion/cli/__init__.py +0 -0
axion/cli/init_cmd.py +50 -0
axion/cli/input.py +290 -0
axion/cli/main.py +2953 -0
axion/cli/render.py +489 -0
axion/cli/tui.py +766 -0
axion/commands/__init__.py +0 -0
axion/commands/handlers/__init__.py +0 -0
axion/commands/handlers/agents.py +51 -0
axion/commands/handlers/builtin_commands.py +367 -0
axion/commands/handlers/mcp.py +59 -0
axion/commands/handlers/models.py +75 -0
axion/commands/handlers/plugins.py +55 -0
axion/commands/handlers/skills.py +61 -0
axion/commands/parsing.py +317 -0
axion/commands/registry.py +166 -0
axion/compat_harness/__init__.py +0 -0
axion/compat_harness/extractor.py +145 -0
axion/plugins/__init__.py +0 -0
axion/plugins/hooks.py +22 -0
axion/plugins/manager.py +391 -0
axion/plugins/manifest.py +270 -0
axion/runtime/__init__.py +0 -0
axion/runtime/bash.py +388 -0
axion/runtime/bootstrap.py +39 -0
axion/runtime/claude_subscription.py +300 -0
axion/runtime/compact.py +233 -0
axion/runtime/config.py +397 -0
axion/runtime/conversation.py +1073 -0
axion/runtime/file_ops.py +613 -0
axion/runtime/git.py +213 -0
axion/runtime/hooks.py +235 -0
axion/runtime/image.py +212 -0
axion/runtime/lanes.py +282 -0
axion/runtime/lsp.py +425 -0
axion/runtime/mcp/__init__.py +0 -0
axion/runtime/mcp/client.py +76 -0
axion/runtime/mcp/lifecycle.py +96 -0
axion/runtime/mcp/stdio.py +318 -0
axion/runtime/mcp/tool_bridge.py +79 -0
axion/runtime/memory.py +196 -0
axion/runtime/oauth.py +329 -0
axion/runtime/openai_subscription.py +346 -0
axion/runtime/permissions.py +247 -0
axion/runtime/plan_mode.py +96 -0
axion/runtime/policy_engine.py +259 -0
axion/runtime/prompt.py +586 -0
axion/runtime/recovery.py +261 -0
axion/runtime/remote.py +28 -0
axion/runtime/sandbox.py +68 -0
axion/runtime/scheduler.py +231 -0
axion/runtime/session.py +365 -0
axion/runtime/sharing.py +159 -0
axion/runtime/skills.py +124 -0
axion/runtime/tasks.py +258 -0
axion/runtime/usage.py +241 -0
axion/runtime/workers.py +186 -0
axion/telemetry/__init__.py +0 -0
axion/telemetry/events.py +67 -0
axion/telemetry/profile.py +49 -0
axion/telemetry/sink.py +60 -0
axion/telemetry/tracer.py +95 -0
axion/tools/__init__.py +0 -0
axion/tools/lane_completion.py +33 -0
axion/tools/registry.py +853 -0
axion/tools/tool_search.py +226 -0
axion_code-1.0.0.dist-info/METADATA +709 -0
axion_code-1.0.0.dist-info/RECORD +82 -0
axion_code-1.0.0.dist-info/WHEEL +4 -0
axion_code-1.0.0.dist-info/entry_points.txt +2 -0
axion_code-1.0.0.dist-info/licenses/LICENSE +21 -0

axion/runtime/conversation.py ADDED Viewed

@@ -0,0 +1,1073 @@
+"""Core conversation loop - coordinates model, tools, hooks, and session.
+Maps to: rust/crates/runtime/src/conversation.rs
+The ConversationRuntime orchestrates the full model turn loop including:
+- Streaming model responses and assembling tool-use blocks
+- Pre/post tool-use hook integration with permission override support
+- Auto-compaction when cumulative input tokens exceed a threshold
+- Session tracing for observability (turn lifecycle, tool execution)
+- Prompt cache event collection from stream metadata
+- Builder pattern for ergonomic construction
+- Session forking for parallel exploration branches
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import time
+from dataclasses import dataclass, field
+from typing import Any, Callable, Protocol, runtime_checkable
+from axion.api.client import (
+    ProviderClient,
+    max_tokens_for_model,
+    resolve_model_alias,
+)
+from axion.api.types import (
+    ContentBlockDeltaEvent,
+    ContentBlockStartEvent,
+    InputJsonDelta,
+    InputMessage,
+    MessageDeltaEvent,
+    MessageRequest,
+    MessageStartEvent,
+    MessageStopEvent,
+    TextDelta,
+    ThinkingDelta,
+    ToolChoice,
+    ToolDefinition,
+    ToolUseOutputBlock,
+)
+from axion.runtime.compact import (
+    CompactionConfig,
+    CompactionResult,
+    compact_session,
+    estimate_session_tokens,
+)
+from axion.runtime.hooks import HookRunner
+from axion.runtime.permissions import (
+    TOOL_PERMISSION_REQUIREMENTS,
+    PermissionAllow,
+    PermissionDeny,
+    PermissionMode,
+    PermissionOutcome,
+    PermissionOverride,
+    PermissionPolicy,
+    PermissionPromptDecision,
+    PermissionPrompter,
+    PermissionRequest,
+)
+from axion.runtime.session import (
+    ContentBlock,
+    ConversationMessage,
+    ImageBlock,
+    MessageRole,
+    Session,
+    SessionFork,
+    TextBlock,
+    ToolResultBlock,
+    ToolUseBlock,
+)
+from axion.runtime.usage import TokenUsage, UsageTracker
+from axion.telemetry.tracer import SessionTracer
+logger = logging.getLogger(__name__)
+DEFAULT_AUTO_COMPACTION_THRESHOLD = 100_000
+_ENV_COMPACTION_KEY = "CLAUDE_CODE_AUTO_COMPACT_INPUT_TOKENS"
+# Context window sizes per model family (in tokens)
+_CONTEXT_WINDOWS: dict[str, int] = {
+    "claude-opus": 200_000,
+    "claude-sonnet": 200_000,
+    "claude-haiku": 200_000,
+}
+# ---------------------------------------------------------------------------
+# Protocols (traits)
+# ---------------------------------------------------------------------------
+@runtime_checkable
+class ToolExecutor(Protocol):
+    """Trait for tool dispatchers that execute model-requested tools."""
+    async def execute(self, tool_name: str, tool_input: str) -> str: ...
+# ---------------------------------------------------------------------------
+# Events emitted during a turn
+# ---------------------------------------------------------------------------
+@dataclass(frozen=True)
+class AssistantTextDelta:
+    """Incremental text chunk from the model."""
+    text: str
+@dataclass(frozen=True)
+class AssistantToolUse:
+    """Model requested a tool invocation."""
+    id: str
+    name: str
+    input: str
+@dataclass(frozen=True)
+class AssistantUsage:
+    """Token usage snapshot for a single iteration."""
+    usage: TokenUsage
+@dataclass(frozen=True)
+class AssistantPromptCache:
+    """Prompt cache hit/miss information from streaming metadata."""
+    cache_creation_input_tokens: int
+    cache_read_input_tokens: int
+@dataclass(frozen=True)
+class AssistantMessageStop:
+    """End of model message, includes stop reason."""
+    stop_reason: str | None
+AssistantEvent = (
+    AssistantTextDelta
+    | AssistantToolUse
+    | AssistantUsage
+    | AssistantPromptCache
+    | AssistantMessageStop
+)
+# ---------------------------------------------------------------------------
+# Prompt cache event tracking
+# ---------------------------------------------------------------------------
+@dataclass
+class PromptCacheEvent:
+    """Collected prompt cache stats from a single streaming response."""
+    cache_creation_input_tokens: int = 0
+    cache_read_input_tokens: int = 0
+    timestamp_ms: int = 0
+# ---------------------------------------------------------------------------
+# Turn summary
+# ---------------------------------------------------------------------------
+@dataclass
+class TurnSummary:
+    """Summary of one completed runtime turn."""
+    assistant_messages: list[ConversationMessage] = field(default_factory=list)
+    tool_results: list[ConversationMessage] = field(default_factory=list)
+    iterations: int = 0
+    usage: TokenUsage = field(default_factory=TokenUsage)
+    text_output: str = ""
+    prompt_cache_events: list[PromptCacheEvent] = field(default_factory=list)
+    compaction_result: CompactionResult | None = None
+    was_auto_compacted: bool = False
+# ---------------------------------------------------------------------------
+# Errors
+# ---------------------------------------------------------------------------
+class ConversationError(Exception):
+    """Error during conversation turn."""
+    def __init__(self, message: str, *, cause: Exception | None = None) -> None:
+        super().__init__(message)
+        self.cause = cause
+class ToolError(Exception):
+    """Error from tool execution."""
+    def __init__(
+        self,
+        message: str,
+        *,
+        tool_name: str = "",
+        tool_use_id: str = "",
+        cause: Exception | None = None,
+    ) -> None:
+        super().__init__(message)
+        self.tool_name = tool_name
+        self.tool_use_id = tool_use_id
+        self.cause = cause
+class MaxIterationsError(ConversationError):
+    """Raised when the tool loop exceeds max_iterations."""
+class PermissionDeniedError(ConversationError):
+    """Raised when a tool is denied by permission policy or hooks."""
+class ContextWindowExceededError(ConversationError):
+    """Raised when estimated tokens exceed the model's context window."""
+# ---------------------------------------------------------------------------
+# Conversation runtime
+# ---------------------------------------------------------------------------
+@dataclass
+class ConversationRuntime:
+    """Coordinates the model loop, tool execution, and session updates.
+    Maps to: rust/crates/runtime/src/conversation.rs::ConversationRuntime
+    The runtime implements the full agentic loop:
+      1. Send user message + history to model
+      2. Stream and assemble the response (text + tool_use blocks)
+      3. For each tool_use: run pre-hooks, check permissions, execute, run post-hooks
+      4. Append results, check auto-compaction, and loop
+      5. Return when model produces final text (end_turn) or max iterations reached
+    """
+    session: Session
+    provider: ProviderClient
+    tool_executor: ToolExecutor | None = None
+    permission_policy: PermissionPolicy = field(default_factory=PermissionPolicy)
+    permission_prompter: PermissionPrompter | None = None
+    hook_runner: HookRunner | None = None
+    session_tracer: SessionTracer | None = None
+    system_prompt: str = ""
+    model: str = "claude-sonnet-4-6"
+    max_iterations: int = 50
+    auto_compaction_threshold: int = field(default_factory=lambda: _resolve_compaction_threshold())
+    usage_tracker: UsageTracker = field(default_factory=UsageTracker)
+    on_event: Callable[[AssistantEvent], None] | None = None
+    on_text_delta: Callable[[str], None] | None = None
+    on_tool_use: Callable[[str, str], None] | None = None  # (tool_name, tool_input)
+    on_tool_result: Callable[[str, str, bool], None] | None = None  # (tool_name, output, is_error)
+    on_thinking: Callable[[str], None] | None = None  # (thinking_text)
+    cost_budget_usd: float | None = None  # Max spend per session (None = unlimited)
+    plan_mode_active: bool = False  # When True, only read-only tools allowed
+    # -- Builder helpers -----------------------------------------------------
+    def with_max_iterations(self, n: int) -> ConversationRuntime:
+        """Set maximum tool-loop iterations per turn."""
+        self.max_iterations = n
+        return self
+    def with_auto_compaction_threshold(self, tokens: int) -> ConversationRuntime:
+        """Set the input-token threshold that triggers auto-compaction."""
+        self.auto_compaction_threshold = tokens
+        return self
+    def with_hook_runner(self, runner: HookRunner) -> ConversationRuntime:
+        """Attach a hook runner for pre/post tool-use hooks."""
+        self.hook_runner = runner
+        return self
+    def with_session_tracer(self, tracer: SessionTracer) -> ConversationRuntime:
+        """Attach a session tracer for observability."""
+        self.session_tracer = tracer
+        return self
+    def with_permission_prompter(self, prompter: PermissionPrompter) -> ConversationRuntime:
+        """Attach an interactive permission prompter for PROMPT mode."""
+        self.permission_prompter = prompter
+        return self
+    def with_tool_executor(self, executor: ToolExecutor) -> ConversationRuntime:
+        """Set the tool executor."""
+        self.tool_executor = executor
+        return self
+    def with_system_prompt(self, prompt: str) -> ConversationRuntime:
+        """Set the system prompt."""
+        self.system_prompt = prompt
+        return self
+    # -- Session operations --------------------------------------------------
+    def fork_session(self, branch_name: str | None = None) -> ConversationRuntime:
+        """Create a forked copy of this runtime with a new session.
+        The forked session shares the conversation history up to this point
+        but diverges from here. The fork metadata references the parent.
+        """
+        import copy
+        forked_session = Session(
+            messages=copy.deepcopy(self.session.messages),
+            compaction=copy.deepcopy(self.session.compaction),
+            fork=SessionFork(
+                parent_session_id=self.session.session_id,
+                branch_name=branch_name,
+            ),
+        )
+        return ConversationRuntime(
+            session=forked_session,
+            provider=self.provider,
+            tool_executor=self.tool_executor,
+            permission_policy=self.permission_policy,
+            permission_prompter=self.permission_prompter,
+            hook_runner=self.hook_runner,
+            session_tracer=self.session_tracer,
+            system_prompt=self.system_prompt,
+            model=self.model,
+            max_iterations=self.max_iterations,
+            auto_compaction_threshold=self.auto_compaction_threshold,
+            usage_tracker=UsageTracker(),
+            on_event=self.on_event,
+            on_text_delta=self.on_text_delta,
+        )
+    def estimated_tokens(self) -> int:
+        """Estimate the current token count of the session."""
+        return estimate_session_tokens(self.session)
+    # -- Preflight check -----------------------------------------------------
+    def _preflight_check(self) -> None:
+        """Estimate token count and raise if it would exceed the model's context window.
+        Uses ~4 chars/token heuristic for the system prompt + messages.
+        """
+        # Estimate system prompt tokens
+        system_tokens = len(self.system_prompt) // 4 if self.system_prompt else 0
+        # Estimate message tokens
+        message_tokens = estimate_session_tokens(self.session)
+        estimated_total = system_tokens + message_tokens
+        # Look up context window by model family prefix
+        resolved = resolve_model_alias(self.model)
+        context_window = 200_000  # default
+        for prefix, window in _CONTEXT_WINDOWS.items():
+            if resolved.startswith(prefix):
+                context_window = window
+                break
+        # Get max output tokens for the model
+        output_tokens = max_tokens_for_model(resolved)
+        if estimated_total + output_tokens > context_window:
+            raise ContextWindowExceededError(
+                f"Estimated {estimated_total} input tokens + {output_tokens} max output tokens "
+                f"= {estimated_total + output_tokens} exceeds context window of {context_window} "
+                f"for model {resolved}. Consider compacting the session."
+            )
+    # -- Main turn loop ------------------------------------------------------
+    async def run_turn(
+        self,
+        user_input: str,
+        images: list[tuple[str, str]] | None = None,
+    ) -> TurnSummary:
+        """Execute a full model turn with tool loop.
+        Args:
+            user_input: The user's text input.
+            images: Optional list of (media_type, base64_data) tuples for
+                    image inputs (screenshots, pasted images).
+        1. Send user message + history to model
+        2. If model requests tools, execute them (with hooks) and loop
+        3. Auto-compact if token threshold is exceeded
+        4. Return when model produces final text (end_turn)
+        """
+        self._trace("turn_started", {"user_input_length": len(user_input)})
+        if images:
+            # Push a combined text+image user message
+            blocks: list[ContentBlock] = []
+            for media_type, b64_data in images:
+                blocks.append(ImageBlock(media_type=media_type, data=b64_data))
+            if user_input:
+                blocks.append(TextBlock(text=user_input))
+            self.session.push_message(
+                ConversationMessage(role=MessageRole.USER, blocks=blocks)
+            )
+        else:
+            self.session.push_user_text(user_input)
+        summary = TurnSummary()
+        iteration = 0
+        cumulative_input_tokens = 0
+        # Preflight: ensure we won't exceed the context window
+        self._preflight_check()
+        try:
+            api_messages = self._build_api_messages()
+            while iteration < self.max_iterations:
+                iteration += 1
+                summary.iterations = iteration
+                self._trace("assistant_iteration_started", {"iteration": iteration})
+                # Pre-check budget before making an API call
+                if self.cost_budget_usd is not None and iteration > 1:
+                    current_cost = self.usage_tracker.total.estimate_cost_usd()
+                    remaining = self.cost_budget_usd - current_cost.total_cost_usd()
+                    if remaining <= 0:
+                        summary.text_output += (
+                            f"\n\n[Budget reached: ${current_cost.total_cost_usd():.4f} "
+                            f"of ${self.cost_budget_usd:.4f}. Stopping before next API call.]"
+                        )
+                        break
+                # Stream one model response
+                stream_result = await self._stream_model_response(api_messages)
+                # Accumulate usage
+                summary.usage += stream_result.usage
+                self.usage_tracker.record_turn(stream_result.usage)
+                cumulative_input_tokens += stream_result.usage.input_tokens
+                # Check cost budget — soft stop (don't crash, just stop looping)
+                if self.cost_budget_usd is not None:
+                    total_cost = self.usage_tracker.total.estimate_cost_usd()
+                    if total_cost.total_cost_usd() >= self.cost_budget_usd:
+                        logger.info(
+                            "Cost budget reached: $%.4f >= $%.4f",
+                            total_cost.total_cost_usd(), self.cost_budget_usd,
+                        )
+                        summary.text_output += (
+                            f"\n\n[Budget reached: ${total_cost.total_cost_usd():.4f} "
+                            f"of ${self.cost_budget_usd:.4f}. "
+                            f"Use /cost for details or restart with a higher --budget.]"
+                        )
+                        break  # Stop the tool loop gracefully
+                # Collect prompt cache events
+                if stream_result.prompt_cache_event:
+                    summary.prompt_cache_events.append(stream_result.prompt_cache_event)
+                # Store assistant message in session
+                assistant_msg = self._build_assistant_message(
+                    stream_result.text_parts, stream_result.tool_uses, stream_result.usage
+                )
+                if assistant_msg:
+                    self.session.push_message(assistant_msg)
+                    summary.assistant_messages.append(assistant_msg)
+                summary.text_output += stream_result.full_text
+                self._trace("assistant_iteration_completed", {
+                    "iteration": iteration,
+                    "tool_use_count": len(stream_result.tool_uses),
+                    "stop_reason": stream_result.stop_reason or "unknown",
+                })
+                # If no tool uses or stop_reason is end_turn, we're done
+                if not stream_result.tool_uses or stream_result.stop_reason == "end_turn":
+                    break
+                # Execute tools (with full hook integration)
+                tool_result_messages = await self._execute_tools_with_hooks(
+                    stream_result.tool_uses
+                )
+                for trm in tool_result_messages:
+                    self.session.push_message(trm)
+                    summary.tool_results.append(trm)
+                # Auto-compaction check
+                compaction = self._maybe_auto_compact(cumulative_input_tokens)
+                if compaction is not None:
+                    summary.compaction_result = compaction
+                    summary.was_auto_compacted = True
+                    logger.info(
+                        "Auto-compacted session: %d -> %d estimated tokens",
+                        compaction.estimated_tokens_before,
+                        compaction.estimated_tokens_after,
+                    )
+                # Rebuild API messages for next iteration
+                api_messages = self._build_api_messages()
+            else:
+                # Loop ended without break -- max iterations exceeded
+                logger.warning(
+                    "Turn reached max iterations (%d)", self.max_iterations
+                )
+        except Exception as exc:
+            self._trace("turn_failed", {"error": str(exc)})
+            raise ConversationError(
+                f"Turn failed at iteration {iteration}: {exc}", cause=exc
+            ) from exc
+        self._trace("turn_completed", {
+            "iterations": summary.iterations,
+            "total_input_tokens": summary.usage.input_tokens,
+            "total_output_tokens": summary.usage.output_tokens,
+            "was_compacted": summary.was_auto_compacted,
+        })
+        return summary
+    # -- Streaming -----------------------------------------------------------
+    @dataclass
+    class _StreamResult:
+        """Internal: assembled result from one streaming model response."""
+        text_parts: list[str] = field(default_factory=list)
+        thinking_parts: list[str] = field(default_factory=list)
+        tool_uses: list[dict[str, Any]] = field(default_factory=list)
+        usage: TokenUsage = field(default_factory=TokenUsage)
+        stop_reason: str | None = None
+        prompt_cache_event: PromptCacheEvent | None = None
+        @property
+        def full_text(self) -> str:
+            return "".join(self.text_parts)
+    def _build_tool_definitions(self) -> list[ToolDefinition] | None:
+        """Build API tool definitions from the tool registry.
+        Returns None if no tool executor is configured (the model won't see any tools).
+        """
+        if self.tool_executor is None:
+            return None
+        from axion.tools.registry import get_tool_registry
+        registry = get_tool_registry()
+        tools = []
+        for tool_def in registry.all_tools():
+            tools.append(ToolDefinition(
+                name=tool_def.spec.name,
+                description=tool_def.spec.description,
+                input_schema=tool_def.spec.input_schema,
+            ))
+        return tools if tools else None
+    async def _stream_model_response(
+        self, api_messages: list[InputMessage]
+    ) -> _StreamResult:
+        """Stream a single model request and assemble the response."""
+        resolved_model = resolve_model_alias(self.model)
+        # Build tool definitions so the model knows what tools are available
+        tool_defs = self._build_tool_definitions()
+        request = MessageRequest(
+            model=resolved_model,
+            max_tokens=max_tokens_for_model(resolved_model),
+            messages=api_messages,
+            system=self.system_prompt or None,
+            tools=tool_defs,
+            tool_choice=ToolChoice.auto() if tool_defs else None,
+            stream=True,
+        )
+        result = ConversationRuntime._StreamResult()
+        current_tool_inputs: dict[int, list[str]] = {}
+        current_tool_blocks: dict[int, dict[str, Any]] = {}
+        async for event in self.provider.stream_message(request):
+            match event:
+                case MessageStartEvent(message=msg) if msg is not None:
+                    result.usage.input_tokens = msg.usage.input_tokens
+                    result.usage.cache_creation_input_tokens = (
+                        msg.usage.cache_creation_input_tokens
+                    )
+                    result.usage.cache_read_input_tokens = (
+                        msg.usage.cache_read_input_tokens
+                    )
+                    # Collect prompt cache event
+                    if (
+                        msg.usage.cache_creation_input_tokens > 0
+                        or msg.usage.cache_read_input_tokens > 0
+                    ):
+                        result.prompt_cache_event = PromptCacheEvent(
+                            cache_creation_input_tokens=msg.usage.cache_creation_input_tokens,
+                            cache_read_input_tokens=msg.usage.cache_read_input_tokens,
+                            timestamp_ms=int(time.time() * 1000),
+                        )
+                case ContentBlockStartEvent(index=idx, content_block=block):
+                    if isinstance(block, ToolUseOutputBlock):
+                        current_tool_blocks[idx] = {
+                            "id": block.id,
+                            "name": block.name,
+                        }
+                        current_tool_inputs[idx] = []
+                case ContentBlockDeltaEvent(index=idx, delta=delta):
+                    if isinstance(delta, TextDelta) and delta.text:
+                        result.text_parts.append(delta.text)
+                        self._emit_event(AssistantTextDelta(text=delta.text))
+                        if self.on_text_delta:
+                            self.on_text_delta(delta.text)
+                    elif isinstance(delta, InputJsonDelta):
+                        if idx in current_tool_inputs:
+                            current_tool_inputs[idx].append(delta.partial_json)
+                    elif isinstance(delta, ThinkingDelta) and delta.thinking:
+                        result.thinking_parts.append(delta.thinking)
+                        if self.on_thinking:
+                            try:
+                                self.on_thinking(delta.thinking)
+                            except Exception:
+                                pass
+                case MessageDeltaEvent(delta=d, usage=u):
+                    result.usage.output_tokens = u.output_tokens
+                    result.stop_reason = d.stop_reason
+                case MessageStopEvent():
+                    self._emit_event(
+                        AssistantMessageStop(stop_reason=result.stop_reason)
+                    )
+        # Assemble completed tool uses
+        for idx, block_info in current_tool_blocks.items():
+            input_json = "".join(current_tool_inputs.get(idx, []))
+            tool_use = {
+                "id": block_info["id"],
+                "name": block_info["name"],
+                "input": input_json,
+            }
+            result.tool_uses.append(tool_use)
+            self._emit_event(
+                AssistantToolUse(
+                    id=tool_use["id"],
+                    name=tool_use["name"],
+                    input=input_json,
+                )
+            )
+        # Emit usage event
+        self._emit_event(AssistantUsage(usage=result.usage))
+        # Emit prompt cache event if present
+        if result.prompt_cache_event:
+            self._emit_event(AssistantPromptCache(
+                cache_creation_input_tokens=result.prompt_cache_event.cache_creation_input_tokens,
+                cache_read_input_tokens=result.prompt_cache_event.cache_read_input_tokens,
+            ))
+        return result
+    # -- Tool execution with hooks -------------------------------------------
+    async def _execute_tools_with_hooks(
+        self, tool_uses: list[dict[str, Any]]
+    ) -> list[ConversationMessage]:
+        """Execute tool calls with full pre/post hook integration.
+        Agent tool calls are executed in parallel via asyncio.gather for
+        better performance.  All other tools run sequentially to avoid
+        race conditions on shared state (filesystem, session, etc.).
+        """
+        import asyncio
+        # Separate parallelizable (Agent) calls from sequential ones
+        PARALLEL_TOOLS = {"Agent"}
+        parallel_batch: list[dict[str, Any]] = []
+        sequential_queue: list[dict[str, Any]] = []
+        for tu in tool_uses:
+            if tu["name"] in PARALLEL_TOOLS:
+                parallel_batch.append(tu)
+            else:
+                sequential_queue.append(tu)
+        results: list[ConversationMessage] = []
+        # Execute sequential tools first (file ops, bash, etc.)
+        for tu in sequential_queue:
+            result_msg = await self._execute_single_tool(tu)
+            results.append(result_msg)
+        # Execute parallel tools concurrently
+        if parallel_batch:
+            if len(parallel_batch) == 1:
+                result_msg = await self._execute_single_tool(parallel_batch[0])
+                results.append(result_msg)
+            else:
+                logger.info(
+                    "Executing %d Agent calls in parallel", len(parallel_batch)
+                )
+                parallel_results = await asyncio.gather(
+                    *(self._execute_single_tool(tu) for tu in parallel_batch),
+                    return_exceptions=True,
+                )
+                for i, res in enumerate(parallel_results):
+                    if isinstance(res, BaseException):
+                        tu_item = parallel_batch[i]
+                        err_msg = self._make_tool_result(
+                            tu_item["id"], tu_item["name"],
+                            f"Agent execution failed: {res}",
+                            is_error=True,
+                        )
+                        results.append(err_msg)
+                    elif isinstance(res, ConversationMessage):
+                        results.append(res)
+        return results
+    async def _execute_single_tool(
+        self, tu: dict[str, Any]
+    ) -> ConversationMessage:
+        """Execute a single tool call with full hook integration."""
+        tool_name = tu["name"]
+        tool_input = tu["input"]
+        tool_id = tu["id"]
+        self._trace("tool_execution_started", {
+            "tool_name": tool_name,
+            "tool_use_id": tool_id,
+        })
+        # ---- Phase 1: Pre-tool-use hooks ----
+        effective_input = tool_input
+        permission_override: PermissionOverride | None = None
+        if self.hook_runner:
+            pre_result = await self.hook_runner.run_pre_tool_use(
+                tool_name, tool_input
+            )
+            # Hook denied execution outright
+            if pre_result.denied:
+                deny_reason = "; ".join(pre_result.messages) or "Denied by pre-tool-use hook"
+                result_msg = self._make_tool_result(
+                    tool_id, tool_name, f"Hook denied: {deny_reason}", is_error=True
+                )
+                self._trace("tool_execution_finished", {
+                    "tool_name": tool_name,
+                    "tool_use_id": tool_id,
+                    "outcome": "hook_denied",
+                })
+                return result_msg
+            # Hook may have updated the input
+            if pre_result.updated_input is not None:
+                effective_input = pre_result.updated_input
+                logger.debug(
+                    "Pre-hook updated input for tool '%s'", tool_name
+                )
+            # Hook may have set a permission override
+            if pre_result.permission_override is not None:
+                try:
+                    permission_override = PermissionOverride(
+                        pre_result.permission_override
+                    )
+                except ValueError:
+                    logger.warning(
+                        "Invalid permission_override from hook: %s",
+                        pre_result.permission_override,
+                    )
+        # ---- Phase 2: Permission check ----
+        permission_outcome = await self._resolve_permission(
+            tool_name, effective_input, permission_override
+        )
+        if isinstance(permission_outcome, PermissionDeny):
+            result_msg = self._make_tool_result(
+                tool_id,
+                tool_name,
+                f"Permission denied: {permission_outcome.reason}",
+                is_error=True,
+            )
+            self._trace("tool_execution_finished", {
+                "tool_name": tool_name,
+                "tool_use_id": tool_id,
+                "outcome": "permission_denied",
+            })
+            return result_msg
+        # ---- Plan mode check: block write tools ----
+        if self.plan_mode_active:
+            from axion.runtime.plan_mode import get_plan_mode_denial_message, is_tool_allowed_in_plan_mode
+            if not is_tool_allowed_in_plan_mode(tool_name):
+                return self._make_tool_result(
+                    tool_id, tool_name,
+                    get_plan_mode_denial_message(tool_name),
+                    is_error=True,
+                )
+        # ---- Phase 3: Execute tool ----
+        # Notify caller that tool is about to execute
+        if self.on_tool_use is not None:
+            try:
+                self.on_tool_use(tool_name, effective_input)
+            except Exception:
+                pass
+        if self.tool_executor is None:
+            output = f"No tool executor configured for '{tool_name}'"
+            is_error = True
+        else:
+            try:
+                output = await self.tool_executor.execute(
+                    tool_name, effective_input
+                )
+                is_error = False
+            except Exception as exc:
+                output = f"Tool error: {exc}"
+                is_error = True
+                logger.warning("Tool '%s' failed: %s", tool_name, exc)
+                # ---- Phase 3b: Post-tool-use-failure hooks ----
+                if self.hook_runner:
+                    fail_result = await self.hook_runner.run_post_tool_use_failure(
+                        tool_name, effective_input, str(exc)
+                    )
+                    if fail_result.messages:
+                        output = self._merge_hook_feedback(
+                            output, fail_result.messages
+                        )
+        # ---- Phase 4: Post-tool-use hooks (on success) ----
+        if not is_error and self.hook_runner:
+            post_result = await self.hook_runner.run_post_tool_use(
+                tool_name, effective_input, output, is_error=False
+            )
+            # Post-hook can retroactively mark as error
+            if post_result.denied:
+                is_error = True
+                deny_reason = (
+                    "; ".join(post_result.messages)
+                    or "Retroactively denied by post-tool-use hook"
+                )
+                output = f"Post-hook error: {deny_reason}\nOriginal output: {output}"
+            elif post_result.messages:
+                output = self._merge_hook_feedback(output, post_result.messages)
+        # Notify caller of tool result
+        if self.on_tool_result is not None:
+            try:
+                self.on_tool_result(tool_name, output, is_error)
+            except Exception:
+                pass
+        result_msg = self._make_tool_result(
+            tool_id, tool_name, output, is_error=is_error
+        )
+        self._trace("tool_execution_finished", {
+            "tool_name": tool_name,
+            "tool_use_id": tool_id,
+            "outcome": "error" if is_error else "success",
+        })
+        return result_msg
+    # -- Permission resolution -----------------------------------------------
+    async def _resolve_permission(
+        self,
+        tool_name: str,
+        tool_input: str,
+        hook_override: PermissionOverride | None,
+    ) -> PermissionOutcome:
+        """Resolve permission for a tool call, respecting hook overrides.
+        Priority:
+          1. Hook override (allow/deny/ask)
+          2. Policy-based authorization
+          3. Interactive prompter (if policy returned __NEEDS_PROMPT__)
+          4. Cache and persist the decision
+        """
+        if hook_override is not None:
+            if hook_override == PermissionOverride.ALLOW:
+                return PermissionAllow()
+            if hook_override == PermissionOverride.DENY:
+                return PermissionDeny(reason="Denied by hook permission override")
+            # ASK falls through to normal policy + prompter flow
+        outcome = self.permission_policy.authorize(tool_name, tool_input)
+        # Check if the policy needs interactive approval
+        if (
+            isinstance(outcome, PermissionDeny)
+            and outcome.reason.startswith("__NEEDS_PROMPT__")
+            and self.permission_prompter is not None
+        ):
+            request = PermissionRequest(
+                tool_name=tool_name,
+                input_json=tool_input,
+                current_mode=self.permission_policy.mode,
+                required_mode=TOOL_PERMISSION_REQUIREMENTS.get(
+                    tool_name, PermissionMode.WORKSPACE_WRITE
+                ),
+                reason=f"Tool '{tool_name}' requires approval",
+            )
+            decision = await self.permission_prompter.decide(request)
+            if decision == PermissionPromptDecision.ALLOW:
+                # Cache the decision so we don't ask again for this tool
+                result = PermissionAllow()
+                self.permission_policy.remember_decision(tool_name, result)
+                return result
+            return PermissionDeny(reason=f"User denied '{tool_name}'")
+        return outcome
+    # -- Auto-compaction -----------------------------------------------------
+    def _maybe_auto_compact(self, cumulative_input_tokens: int) -> CompactionResult | None:
+        """Check if auto-compaction should trigger and perform it."""
+        if cumulative_input_tokens < self.auto_compaction_threshold:
+            return None
+        config = CompactionConfig(max_tokens=self.auto_compaction_threshold)
+        result = compact_session(self.session, config)
+        if result is not None:
+            self._trace("session_auto_compacted", {
+                "tokens_before": result.estimated_tokens_before,
+                "tokens_after": result.estimated_tokens_after,
+                "removed_count": result.removed_count,
+            })
+        return result
+    # -- Message building helpers --------------------------------------------
+    @staticmethod
+    def _build_assistant_message(
+        text_parts: list[str],
+        tool_uses: list[dict[str, Any]],
+        usage: TokenUsage,
+    ) -> ConversationMessage | None:
+        """Assemble an assistant ConversationMessage from streaming output."""
+        full_text = "".join(text_parts)
+        blocks: list[ContentBlock] = []
+        if full_text:
+            blocks.append(TextBlock(text=full_text))
+        for tu in tool_uses:
+            blocks.append(
+                ToolUseBlock(id=tu["id"], name=tu["name"], input=tu["input"])
+            )
+        if not blocks:
+            return None
+        return ConversationMessage(
+            role=MessageRole.ASSISTANT,
+            blocks=blocks,
+            usage=usage,
+        )
+    @staticmethod
+    def _make_tool_result(
+        tool_use_id: str,
+        tool_name: str,
+        output: str,
+        *,
+        is_error: bool = False,
+    ) -> ConversationMessage:
+        """Create a tool-result ConversationMessage."""
+        return ConversationMessage(
+            role=MessageRole.USER,
+            blocks=[
+                ToolResultBlock(
+                    tool_use_id=tool_use_id,
+                    tool_name=tool_name,
+                    output=output,
+                    is_error=is_error,
+                )
+            ],
+        )
+    @staticmethod
+    def _merge_hook_feedback(output: str, hook_messages: list[str]) -> str:
+        """Merge hook feedback messages into tool output."""
+        feedback = "\n".join(f"[hook] {m}" for m in hook_messages if m)
+        if not feedback:
+            return output
+        return f"{output}\n\n{feedback}"
+    # -- API message conversion ----------------------------------------------
+    def _build_api_messages(self) -> list[InputMessage]:
+        """Convert session messages to API input format."""
+        from axion.api.types import (
+            ImageInputBlock,
+            TextInputBlock,
+            ToolResultTextContent,
+            ToolUseInputBlock,
+        )
+        from axion.api.types import (
+            ToolResultBlock as ApiToolResultBlock,
+        )
+        api_messages: list[InputMessage] = []
+        for msg in self.session.messages:
+            blocks = []
+            for block in msg.blocks:
+                match block:
+                    case TextBlock(text=text):
+                        blocks.append(TextInputBlock(text=text))
+                    case ImageBlock(media_type=mt, data=data):
+                        blocks.append(ImageInputBlock(media_type=mt, data=data))
+                    case ToolUseBlock(id=tid, name=name, input=inp):
+                        try:
+                            parsed = json.loads(inp) if inp else {}
+                        except json.JSONDecodeError:
+                            parsed = {"raw": inp}
+                        blocks.append(
+                            ToolUseInputBlock(id=tid, name=name, input=parsed)
+                        )
+                    case ToolResultBlock() as tr:
+                        blocks.append(
+                            ApiToolResultBlock(
+                                tool_use_id=tr.tool_use_id,
+                                content=[ToolResultTextContent(text=tr.output)],
+                                is_error=tr.is_error,
+                            )
+                        )
+            if blocks:
+                role = "assistant" if msg.role == MessageRole.ASSISTANT else "user"
+                api_messages.append(InputMessage(role=role, content=blocks))
+        return api_messages
+    # -- Tracing / events ----------------------------------------------------
+    def _trace(self, name: str, attributes: dict[str, Any] | None = None) -> None:
+        """Record a trace event if a session tracer is attached."""
+        if self.session_tracer is not None:
+            self.session_tracer.record(name, attributes)
+    def _emit_event(self, event: AssistantEvent) -> None:
+        """Emit an assistant event to the on_event callback."""
+        if self.on_event is not None:
+            try:
+                self.on_event(event)
+            except Exception:
+                logger.debug("on_event callback raised", exc_info=True)
+# ---------------------------------------------------------------------------
+# Module-level helpers
+# ---------------------------------------------------------------------------
+def _resolve_compaction_threshold() -> int:
+    """Resolve auto-compaction threshold from environment or default."""
+    raw = os.environ.get(_ENV_COMPACTION_KEY)
+    if raw is not None:
+        try:
+            return int(raw)
+        except ValueError:
+            logger.warning(
+                "Invalid %s value '%s', using default %d",
+                _ENV_COMPACTION_KEY,
+                raw,
+                DEFAULT_AUTO_COMPACTION_THRESHOLD,
+            )
+    return DEFAULT_AUTO_COMPACTION_THRESHOLD