PyPI - aleph-rlm - Versions diffs - 0.6.0__py3-none-any.whl - Mend

aleph-rlm 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

aleph/__init__.py +49 -0
aleph/cache/__init__.py +6 -0
aleph/cache/base.py +20 -0
aleph/cache/memory.py +27 -0
aleph/cli.py +1044 -0
aleph/config.py +154 -0
aleph/core.py +874 -0
aleph/mcp/__init__.py +30 -0
aleph/mcp/local_server.py +3527 -0
aleph/mcp/server.py +20 -0
aleph/prompts/__init__.py +5 -0
aleph/prompts/system.py +45 -0
aleph/providers/__init__.py +14 -0
aleph/providers/anthropic.py +253 -0
aleph/providers/base.py +59 -0
aleph/providers/openai.py +224 -0
aleph/providers/registry.py +22 -0
aleph/repl/__init__.py +5 -0
aleph/repl/helpers.py +1068 -0
aleph/repl/sandbox.py +777 -0
aleph/sub_query/__init__.py +166 -0
aleph/sub_query/api_backend.py +166 -0
aleph/sub_query/cli_backend.py +327 -0
aleph/types.py +216 -0
aleph/utils/__init__.py +6 -0
aleph/utils/logging.py +79 -0
aleph/utils/tokens.py +43 -0
aleph_rlm-0.6.0.dist-info/METADATA +358 -0
aleph_rlm-0.6.0.dist-info/RECORD +32 -0
aleph_rlm-0.6.0.dist-info/WHEEL +4 -0
aleph_rlm-0.6.0.dist-info/entry_points.txt +3 -0
aleph_rlm-0.6.0.dist-info/licenses/LICENSE +21 -0

aleph/sub_query/__init__.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""Sub-query module for RLM-style recursive reasoning.
+This module enables Aleph to spawn sub-agents that can reason over context slices,
+following the Recursive Language Model (RLM) paradigm.
+Backend priority (configurable via ALEPH_SUB_QUERY_BACKEND):
+1. API (if credentials available) - OpenAI-compatible APIs only
+2. CLI backends (codex, gemini) - uses existing subscriptions
+   Note: claude CLI is deprioritized as it hangs in MCP/sandbox contexts
+Configuration via environment:
+- ALEPH_SUB_QUERY_API_KEY (or OPENAI_API_KEY fallback)
+- ALEPH_SUB_QUERY_URL (or OPENAI_BASE_URL fallback, default: https://api.openai.com/v1)
+- ALEPH_SUB_QUERY_MODEL (required)
+- ALEPH_SUB_QUERY_SHARE_SESSION (share live MCP session with CLI sub-agents)
+- ALEPH_SUB_QUERY_HTTP_HOST / ALEPH_SUB_QUERY_HTTP_PORT / ALEPH_SUB_QUERY_HTTP_PATH
+- ALEPH_SUB_QUERY_MCP_SERVER_NAME (server name exposed to sub-agents)
+"""
+from __future__ import annotations
+import os
+import shutil
+from dataclasses import dataclass, field
+from typing import Literal
+__all__ = [
+    "SubQueryConfig",
+    "detect_backend",
+    "DEFAULT_CONFIG",
+    "has_api_credentials",
+]
+BackendType = Literal["claude", "codex", "gemini", "api", "auto"]
+DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"
+DEFAULT_API_KEY_ENV = "ALEPH_SUB_QUERY_API_KEY"
+DEFAULT_API_BASE_URL_ENV = "ALEPH_SUB_QUERY_URL"
+DEFAULT_API_MODEL_ENV = "ALEPH_SUB_QUERY_MODEL"
+@dataclass
+class SubQueryConfig:
+    """Configuration for sub-query backend.
+    The backend priority can be configured via environment variables:
+    - ALEPH_SUB_QUERY_BACKEND: Force a specific backend ("api", "claude", "codex", "gemini")
+    - ALEPH_SUB_QUERY_API_KEY: API key for OpenAI-compatible providers (fallback: OPENAI_API_KEY)
+    - ALEPH_SUB_QUERY_URL: Base URL for OpenAI-compatible APIs (fallback: OPENAI_BASE_URL)
+    - ALEPH_SUB_QUERY_MODEL: Model name (required)
+    - ALEPH_SUB_QUERY_SHARE_SESSION: Share live MCP session with CLI sub-agents
+    - ALEPH_SUB_QUERY_HTTP_HOST / ALEPH_SUB_QUERY_HTTP_PORT / ALEPH_SUB_QUERY_HTTP_PATH
+    - ALEPH_SUB_QUERY_MCP_SERVER_NAME: Server name exposed to sub-agents
+    When backend="auto" (default), the priority is:
+    1. API - if API credentials are available
+    2. codex CLI - if installed
+    3. gemini CLI - if installed
+    4. claude CLI - if installed (deprioritized: hangs in MCP/sandbox contexts)
+    Attributes:
+        backend: Which backend to use. "auto" prioritizes API, then CLI.
+        cli_timeout_seconds: Timeout for CLI subprocess calls.
+        cli_max_output_chars: Maximum output characters from CLI.
+        api_timeout_seconds: Timeout for API calls.
+        api_key_env: Environment variable name for API key.
+        api_base_url_env: Environment variable name for API base URL.
+        api_model_env: Environment variable name for API model.
+        api_model: Explicit model override (if provided programmatically).
+        max_context_chars: Truncate context slices longer than this.
+        include_system_prompt: Whether to include a system prompt for sub-queries.
+    """
+    backend: BackendType = "auto"
+    # CLI options
+    cli_timeout_seconds: float = 120.0
+    cli_max_output_chars: int = 50_000
+    # API options
+    api_timeout_seconds: float = 60.0
+    api_key_env: str = DEFAULT_API_KEY_ENV
+    api_base_url_env: str = DEFAULT_API_BASE_URL_ENV
+    api_model_env: str = DEFAULT_API_MODEL_ENV
+    api_model: str | None = None
+    # Behavior
+    max_context_chars: int = 100_000
+    include_system_prompt: bool = True
+    # System prompt for sub-queries
+    system_prompt: str = field(
+        default="""You are a focused sub-agent processing a single task. This is a one-shot operation.
+INSTRUCTIONS:
+1. Answer the question based ONLY on the provided context
+2. Be concise - provide direct answers without preamble
+3. If context is insufficient, say "INSUFFICIENT_CONTEXT: [what's missing]"
+4. Structure your response for easy parsing:
+   - For summaries: bullet points or numbered lists
+   - For extractions: key: value format
+   - For analysis: clear sections with headers
+5. Do not make up information not present in the context
+OUTPUT FORMAT:
+- Start directly with your answer (no "Based on the context..." preamble)
+- End with a confidence indicator if uncertain: [CONFIDENCE: high/medium/low]"""
+    )
+def _get_api_key(api_key_env: str) -> str | None:
+    """Return API key from explicit env var or OPENAI_API_KEY fallback."""
+    return os.environ.get(api_key_env) or os.environ.get("OPENAI_API_KEY")
+def has_api_credentials(config: SubQueryConfig | None = None) -> bool:
+    """Check if API credentials are available for the sub-query backend."""
+    cfg = config or DEFAULT_CONFIG
+    return _get_api_key(cfg.api_key_env) is not None
+def detect_backend(config: SubQueryConfig | None = None) -> BackendType:
+    """Auto-detect the best available backend.
+    Priority (API-first for reliability and configurability):
+    1. Check ALEPH_SUB_QUERY_BACKEND env var for explicit override
+    2. api - if API credentials are available
+    3. codex CLI - if installed
+    4. gemini CLI - if installed
+    5. claude CLI - if installed (deprioritized: hangs in MCP/sandbox contexts)
+    6. api (fallback) - will error if no credentials, but gives helpful message
+    Returns:
+        The detected backend type.
+    """
+    cfg = config or DEFAULT_CONFIG
+    # Check for explicit backend override
+    explicit_backend = os.environ.get("ALEPH_SUB_QUERY_BACKEND", "").lower().strip()
+    if explicit_backend in ("api", "claude", "codex", "gemini"):
+        return explicit_backend  # type: ignore
+    # Prefer API if explicit model is set and credentials exist
+    if (cfg.api_model or os.environ.get(cfg.api_model_env)) and has_api_credentials(cfg):
+        return "api"
+    # Priority 1: API if credentials are available
+    if has_api_credentials(cfg):
+        return "api"
+    # Priority 2-4: CLI backends (codex/gemini preferred over claude)
+    # Note: claude CLI hangs in MCP/sandbox contexts, so it's deprioritized
+    if shutil.which("codex"):
+        return "codex"
+    if shutil.which("gemini"):
+        return "gemini"
+    if shutil.which("claude"):
+        return "claude"
+    # Fallback to API (will error with helpful message if no credentials)
+    return "api"
+DEFAULT_CONFIG = SubQueryConfig()

aleph/sub_query/api_backend.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""API backend for sub-queries.
+Supports OpenAI-compatible chat completions endpoints.
+Configuration via environment variables:
+- ALEPH_SUB_QUERY_API_KEY: API key (fallback: OPENAI_API_KEY)
+- ALEPH_SUB_QUERY_URL: Base URL (fallback: OPENAI_BASE_URL, default: https://api.openai.com/v1)
+- ALEPH_SUB_QUERY_MODEL: Model name (required)
+"""
+from __future__ import annotations
+import os
+from typing import Any
+from . import (
+    DEFAULT_API_BASE_URL_ENV,
+    DEFAULT_API_KEY_ENV,
+    DEFAULT_API_MODEL_ENV,
+    DEFAULT_OPENAI_BASE_URL,
+)
+__all__ = ["run_api_sub_query"]
+def _get_api_key(api_key_env: str) -> str | None:
+    return os.environ.get(api_key_env) or os.environ.get("OPENAI_API_KEY")
+def _get_base_url(api_base_url_env: str) -> str:
+    return (
+        os.environ.get(api_base_url_env)
+        or os.environ.get("OPENAI_BASE_URL")
+        or DEFAULT_OPENAI_BASE_URL
+    )
+def _get_model(api_model_env: str) -> str | None:
+    return os.environ.get(api_model_env)
+async def _call_openai_compatible(
+    messages: list[dict[str, Any]],
+    model: str,
+    api_key: str,
+    base_url: str,
+    timeout: float,
+    max_tokens: int,
+) -> tuple[bool, str]:
+    """Call OpenAI-compatible chat completions API.
+    Works with: OpenAI, Groq, Together, Mistral, DeepSeek, local LLMs, etc.
+    """
+    try:
+        import httpx
+    except ImportError:
+        return False, "httpx not installed. Run: pip install httpx"
+    url = f"{base_url.rstrip('/')}/chat/completions"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}",
+    }
+    payload = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": max_tokens,
+    }
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.post(
+                url,
+                json=payload,
+                headers=headers,
+                timeout=timeout,
+            )
+            if resp.status_code != 200:
+                try:
+                    err_data = resp.json()
+                    err_msg = err_data.get("error", {}).get("message", resp.text)
+                except Exception:
+                    err_msg = resp.text[:500]
+                return False, f"API error {resp.status_code}: {err_msg}"
+            data = resp.json()
+            text = data["choices"][0]["message"]["content"]
+            return True, text
+        except httpx.TimeoutException:
+            return False, f"API timeout after {timeout}s"
+        except httpx.ConnectError as e:
+            return False, f"API connection error: {e}. Check ALEPH_SUB_QUERY_URL."
+        except (KeyError, IndexError) as e:
+            return False, f"Failed to parse API response: {e}"
+        except Exception as e:
+            return False, f"API request failed: {e}"
+async def run_api_sub_query(
+    prompt: str,
+    context_slice: str | None = None,
+    model: str | None = None,
+    api_key_env: str = DEFAULT_API_KEY_ENV,
+    api_base_url_env: str = DEFAULT_API_BASE_URL_ENV,
+    api_model_env: str = DEFAULT_API_MODEL_ENV,
+    timeout: float = 60.0,
+    system_prompt: str | None = None,
+    max_tokens: int = 8192,
+) -> tuple[bool, str]:
+    """Run sub-query via OpenAI-compatible API.
+    Configuration via environment:
+    - ALEPH_SUB_QUERY_API_KEY: API key (fallback: OPENAI_API_KEY)
+    - ALEPH_SUB_QUERY_URL: Custom endpoint (fallback: OPENAI_BASE_URL)
+    - ALEPH_SUB_QUERY_MODEL: Required model name
+    Args:
+        prompt: The question/task for the sub-agent.
+        context_slice: Optional context to include.
+        model: Model name (required if ALEPH_SUB_QUERY_MODEL is not set).
+        api_key_env: Env var name for API key.
+        api_base_url_env: Env var name for API base URL.
+        api_model_env: Env var name for API model.
+        timeout: Request timeout in seconds.
+        system_prompt: Optional system prompt.
+        max_tokens: Maximum tokens in response.
+    Returns:
+        Tuple of (success, output).
+    """
+    api_key = _get_api_key(api_key_env)
+    if not api_key:
+        return False, (
+            "No API key found. Set ALEPH_SUB_QUERY_API_KEY (preferred) or OPENAI_API_KEY."
+        )
+    if model is None:
+        model = _get_model(api_model_env)
+    if not model:
+        return False, (
+            "No model configured. Set ALEPH_SUB_QUERY_MODEL or pass model=..."
+        )
+    base_url = _get_base_url(api_base_url_env)
+    # Build the full prompt
+    full_prompt = prompt
+    if context_slice:
+        full_prompt = f"{prompt}\n\n---\nContext:\n{context_slice}"
+    # Build messages
+    messages: list[dict[str, Any]] = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": full_prompt})
+    return await _call_openai_compatible(
+        messages=messages,
+        model=model,
+        api_key=api_key,
+        base_url=base_url,
+        timeout=timeout,
+        max_tokens=max_tokens,
+    )

aleph/sub_query/cli_backend.py ADDED Viewed

@@ -0,0 +1,327 @@
+"""CLI backend for sub-queries.
+Spawns CLI tools (claude, codex) as sub-agents.
+This allows RLM-style recursive reasoning without API keys.
+"""
+from __future__ import annotations
+import asyncio
+import json
+import os
+import sys
+import tempfile
+from pathlib import Path
+from typing import Literal
+__all__ = ["run_cli_sub_query", "CLI_BACKENDS"]
+CLI_BACKENDS = ("claude", "codex", "gemini")
+_KEEP_MCP_CONFIG_ENV = "ALEPH_SUB_QUERY_KEEP_MCP_CONFIG"
+def _env_bool(name: str, default: bool = False) -> bool:
+    value = os.environ.get(name)
+    if value is None:
+        return default
+    return value.strip().lower() in ("1", "true", "yes", "on")
+def _track_cleanup(path: Path, cleanup_paths: list[Path]) -> None:
+    if _env_bool(_KEEP_MCP_CONFIG_ENV, False):
+        print(f"[aleph] Keeping MCP config: {path}", file=sys.stderr)
+    else:
+        cleanup_paths.append(path)
+async def run_cli_sub_query(
+    prompt: str,
+    context_slice: str | None = None,
+    backend: Literal["claude", "codex", "gemini"] = "claude",
+    timeout: float = 120.0,
+    cwd: Path | None = None,
+    max_output_chars: int = 50_000,
+    mcp_server_url: str | None = None,
+    mcp_server_name: str = "aleph_shared",
+    trust_mcp_server: bool = True,
+) -> tuple[bool, str]:
+    """Spawn a CLI sub-agent and return its response.
+    Args:
+        prompt: The question/task for the sub-agent.
+        context_slice: Optional context to include.
+        backend: Which CLI tool to use.
+        timeout: Timeout in seconds.
+        cwd: Working directory for the subprocess.
+        max_output_chars: Maximum output characters.
+    Returns:
+        Tuple of (success, output).
+    """
+    # Build the full prompt
+    full_prompt = prompt
+    if context_slice:
+        full_prompt = f"{prompt}\n\n---\nContext:\n{context_slice}"
+    # For very long prompts, write to a temp file and pass via stdin/file
+    use_tempfile = len(full_prompt) > 10_000
+    try:
+        if use_tempfile:
+            return await _run_with_tempfile(
+                full_prompt,
+                backend,
+                timeout,
+                cwd,
+                max_output_chars,
+                mcp_server_url=mcp_server_url,
+                mcp_server_name=mcp_server_name,
+                trust_mcp_server=trust_mcp_server,
+            )
+        else:
+            return await _run_with_arg(
+                full_prompt,
+                backend,
+                timeout,
+                cwd,
+                max_output_chars,
+                mcp_server_url=mcp_server_url,
+                mcp_server_name=mcp_server_name,
+                trust_mcp_server=trust_mcp_server,
+            )
+    except FileNotFoundError:
+        return False, f"CLI backend '{backend}' not found. Install it or use API fallback."
+    except Exception as e:
+        return False, f"CLI error: {e}"
+def _codex_mcp_overrides(
+    mcp_server_url: str,
+    mcp_server_name: str,
+    trust_mcp_server: bool,
+) -> list[str]:
+    overrides = [
+        "-c",
+        f"mcp_servers.{mcp_server_name}.transport={json.dumps('streamable_http')}",
+        "-c",
+        f"mcp_servers.{mcp_server_name}.url={json.dumps(mcp_server_url)}",
+    ]
+    if trust_mcp_server:
+        overrides.extend(
+            [
+                "-c",
+                f"mcp_servers.{mcp_server_name}.trust=true",
+            ]
+        )
+    return overrides
+def _gemini_env_for_mcp(
+    mcp_server_url: str,
+    mcp_server_name: str,
+    trust_mcp_server: bool,
+) -> tuple[dict[str, str], Path]:
+    env = os.environ.copy()
+    payload = {
+        "mcpServers": {
+            mcp_server_name: {
+                "type": "http",
+                "url": mcp_server_url,
+                "trust": trust_mcp_server,
+            }
+        }
+    }
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+        json.dump(payload, f, ensure_ascii=True, indent=2)
+        settings_path = Path(f.name)
+    env["GEMINI_CLI_SYSTEM_SETTINGS_PATH"] = str(settings_path)
+    return env, settings_path
+def _claude_mcp_config(
+    mcp_server_url: str,
+    mcp_server_name: str,
+) -> Path:
+    """Create a temp JSON file with MCP config for Claude CLI.
+    Claude CLI uses --mcp-config flag to load MCP servers from JSON files.
+    The format is: {"mcpServers": {"name": {"type": "http", "url": "..."}}}
+    """
+    payload = {
+        "mcpServers": {
+            mcp_server_name: {
+                "type": "http",
+                "url": mcp_server_url,
+            }
+        }
+    }
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+        json.dump(payload, f, ensure_ascii=True, indent=2)
+        return Path(f.name)
+async def _run_with_arg(
+    prompt: str,
+    backend: str,
+    timeout: float,
+    cwd: Path | None,
+    max_output_chars: int,
+    mcp_server_url: str | None,
+    mcp_server_name: str,
+    trust_mcp_server: bool,
+) -> tuple[bool, str]:
+    """Run CLI with prompt as argument."""
+    env: dict[str, str] | None = None
+    cleanup_paths: list[Path] = []
+    if backend == "claude":
+        # Claude Code CLI: -p for print mode (non-interactive), --dangerously-skip-permissions to bypass
+        mcp_args: list[str] = []
+        if mcp_server_url:
+            config_path = _claude_mcp_config(mcp_server_url, mcp_server_name)
+            _track_cleanup(config_path, cleanup_paths)
+            mcp_args = ["--mcp-config", str(config_path), "--strict-mcp-config"]
+        cmd = ["claude", "-p", *mcp_args, prompt, "--dangerously-skip-permissions"]
+    elif backend == "codex":
+        # OpenAI Codex CLI (non-interactive)
+        overrides: list[str] = []
+        if mcp_server_url:
+            overrides = _codex_mcp_overrides(mcp_server_url, mcp_server_name, trust_mcp_server)
+        cmd = ["codex", *overrides, "exec", "--full-auto", prompt]
+    elif backend == "gemini":
+        # Google Gemini CLI: -y for yolo mode (auto-approve all actions)
+        if mcp_server_url:
+            env, settings_path = _gemini_env_for_mcp(
+                mcp_server_url, mcp_server_name, trust_mcp_server
+            )
+            _track_cleanup(settings_path, cleanup_paths)
+        cmd = ["gemini", "-y", prompt]
+    else:
+        return False, f"Unknown CLI backend: {backend}"
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdin=asyncio.subprocess.DEVNULL,  # Prevent subprocess from reading MCP stdio.
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+            cwd=str(cwd) if cwd else None,
+            env=env,
+        )
+        stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
+        output = stdout.decode("utf-8", errors="replace")
+        if len(output) > max_output_chars:
+            output = output[:max_output_chars] + "\n...[truncated]"
+        if proc.returncode != 0:
+            err = stderr.decode("utf-8", errors="replace")
+            # Some CLIs write to stderr even on success, check if we got output
+            if output.strip():
+                return True, output
+            return False, f"CLI error (exit {proc.returncode}): {err[:1000]}"
+        return True, output
+    except asyncio.TimeoutError:
+        proc.kill()
+        await proc.wait()
+        return False, f"CLI timeout after {timeout}s"
+    finally:
+        for path in cleanup_paths:
+            try:
+                path.unlink()
+            except Exception:
+                pass
+async def _run_with_tempfile(
+    prompt: str,
+    backend: str,
+    timeout: float,
+    cwd: Path | None,
+    max_output_chars: int,
+    mcp_server_url: str | None,
+    mcp_server_name: str,
+    trust_mcp_server: bool,
+) -> tuple[bool, str]:
+    """Run CLI with prompt from temp file (for long prompts)."""
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
+        f.write(prompt)
+        temp_path = f.name
+    try:
+        env: dict[str, str] | None = None
+        cleanup_paths: list[Path] = []
+        if backend == "claude":
+            # Claude reads from stdin with -p flag
+            mcp_args: list[str] = []
+            if mcp_server_url:
+                config_path = _claude_mcp_config(mcp_server_url, mcp_server_name)
+                _track_cleanup(config_path, cleanup_paths)
+                mcp_args = ["--mcp-config", str(config_path), "--strict-mcp-config"]
+            cmd = ["claude", "-p", *mcp_args, "--dangerously-skip-permissions"]
+            stdin_data = prompt.encode("utf-8")
+        elif backend == "codex":
+            # Codex reads prompt from stdin when "-" is passed
+            overrides: list[str] = []
+            if mcp_server_url:
+                overrides = _codex_mcp_overrides(mcp_server_url, mcp_server_name, trust_mcp_server)
+            cmd = ["codex", *overrides, "exec", "--full-auto", "-"]
+            stdin_data = prompt.encode("utf-8")
+        elif backend == "gemini":
+            # Gemini: -y for yolo mode, pass prompt via stdin
+            if mcp_server_url:
+                env, settings_path = _gemini_env_for_mcp(
+                    mcp_server_url, mcp_server_name, trust_mcp_server
+                )
+                _track_cleanup(settings_path, cleanup_paths)
+            cmd = ["gemini", "-y"]
+            stdin_data = prompt.encode("utf-8")
+        else:
+            return False, f"Unknown CLI backend: {backend}"
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdin=asyncio.subprocess.PIPE if stdin_data else None,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+            cwd=str(cwd) if cwd else None,
+            env=env,
+        )
+        try:
+            stdout, stderr = await asyncio.wait_for(
+                proc.communicate(input=stdin_data),
+                timeout=timeout
+            )
+            output = stdout.decode("utf-8", errors="replace")
+            if len(output) > max_output_chars:
+                output = output[:max_output_chars] + "\n...[truncated]"
+            if proc.returncode != 0:
+                err = stderr.decode("utf-8", errors="replace")
+                if output.strip():
+                    return True, output
+                return False, f"CLI error (exit {proc.returncode}): {err[:1000]}"
+            return True, output
+        except asyncio.TimeoutError:
+            proc.kill()
+            await proc.wait()
+            return False, f"CLI timeout after {timeout}s"
+        finally:
+            for path in cleanup_paths:
+                try:
+                    path.unlink()
+                except Exception:
+                    pass
+    finally:
+        # Clean up temp file
+        try:
+            Path(temp_path).unlink()
+        except Exception:
+            pass