PyPI - agentkernel-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agentkernel-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

agentkernel/__init__.py +7 -0
agentkernel/__main__.py +5 -0
agentkernel/agent.py +311 -0
agentkernel/approval/__init__.py +23 -0
agentkernel/approval/base.py +34 -0
agentkernel/approval/cli.py +129 -0
agentkernel/approval/policy.py +58 -0
agentkernel/approval/risk.py +91 -0
agentkernel/approval/sandbox.py +201 -0
agentkernel/budget.py +64 -0
agentkernel/checkpoint.py +50 -0
agentkernel/cli.py +1482 -0
agentkernel/config.py +224 -0
agentkernel/context/__init__.py +17 -0
agentkernel/context/manager.py +216 -0
agentkernel/context/truncate.py +35 -0
agentkernel/cron.py +146 -0
agentkernel/curation.py +183 -0
agentkernel/doctor.py +141 -0
agentkernel/embeddings.py +132 -0
agentkernel/evaluation.py +186 -0
agentkernel/improvement.py +133 -0
agentkernel/insights.py +141 -0
agentkernel/kanban.py +114 -0
agentkernel/knowledge.py +383 -0
agentkernel/loops.py +145 -0
agentkernel/mcp/__init__.py +23 -0
agentkernel/mcp/client.py +181 -0
agentkernel/mcp/config.py +59 -0
agentkernel/mcp/tools.py +96 -0
agentkernel/memory.py +1208 -0
agentkernel/paths.py +73 -0
agentkernel/plugins.py +76 -0
agentkernel/profiles.py +70 -0
agentkernel/progress.py +89 -0
agentkernel/providers/__init__.py +35 -0
agentkernel/providers/_http.py +157 -0
agentkernel/providers/anthropic.py +282 -0
agentkernel/providers/base.py +38 -0
agentkernel/providers/credentials.py +65 -0
agentkernel/providers/local.py +34 -0
agentkernel/providers/openai.py +260 -0
agentkernel/redaction.py +77 -0
agentkernel/semantic_index.py +139 -0
agentkernel/semantic_memory.py +253 -0
agentkernel/skills.py +268 -0
agentkernel/subagent.py +161 -0
agentkernel/telemetry.py +199 -0
agentkernel/templates/README.md +35 -0
agentkernel/templates/SKILL.md +28 -0
agentkernel/templates/eval-suite.toml +22 -0
agentkernel/templates/loop.toml +29 -0
agentkernel/templates/mcp-servers.toml +22 -0
agentkernel/templates/profile.toml +29 -0
agentkernel/templates/tool_module.py +64 -0
agentkernel/tools/__init__.py +5 -0
agentkernel/tools/base.py +100 -0
agentkernel/tools/builtin/__init__.py +37 -0
agentkernel/tools/builtin/checkpoint_tool.py +33 -0
agentkernel/tools/builtin/clarify.py +60 -0
agentkernel/tools/builtin/files.py +221 -0
agentkernel/tools/builtin/kanban_tool.py +100 -0
agentkernel/tools/builtin/search.py +225 -0
agentkernel/tools/builtin/shell.py +67 -0
agentkernel/tools/builtin/todo.py +106 -0
agentkernel/tui/__init__.py +50 -0
agentkernel/tui/app.py +594 -0
agentkernel/types.py +127 -0
agentkernel/worktree.py +64 -0
agentkernel_cli-0.1.0.dist-info/METADATA +426 -0
agentkernel_cli-0.1.0.dist-info/RECORD +74 -0
agentkernel_cli-0.1.0.dist-info/WHEEL +4 -0
agentkernel_cli-0.1.0.dist-info/entry_points.txt +2 -0
agentkernel_cli-0.1.0.dist-info/licenses/LICENSE +201 -0

agentkernel/templates/eval-suite.toml ADDED Viewed

@@ -0,0 +1,22 @@
+# Eval suite template — a set of scored tasks for the agent.
+# Run with:  agentkernel eval --suite <this-file>.toml
+#            agentkernel eval --suite <this-file>.toml -o report.json   # JSON report
+#            agentkernel eval --suite <this-file>.toml --case "*foo*"   # subset
+#
+# Each case runs through the agent; a judge model then scores the answer against
+# the rubric (0-1, pass/fail). The suite reports pass-rate and exits non-zero
+# unless every case passes, so it doubles as a CI gate.
+# Default rubric applied to any case that doesn't override it.
+rubric = "The answer is correct, specific, and grounded in the actual files — not guessed."
+[[cases]]
+name = "example-case"
+prompt = "<the task to give the agent>"
+# Optional per-case rubric; overrides the suite default above.
+rubric = "<what a correct answer must contain or do>"
+[[cases]]
+name = "another-case"
+prompt = "<another task>"
+# (no rubric here -> uses the suite default)

agentkernel/templates/loop.toml ADDED Viewed

@@ -0,0 +1,29 @@
+# Loop template — a workflow the agent repeats until a stopping condition.
+# Run with:  agentkernel loop --file loops/<name>.toml
+#
+# Pattern: action -> check -> iterate -> stop. Each iteration runs `prompt`
+# through a fresh agent; if `success_check` exits 0 it counts as a success, and
+# the loop stops once it sees `success_streak` successes in a row (or hits
+# max_iterations). Omit success_check to let the workflow itself decide when done.
+name = "{{name}}"
+description = "<one line describing what this loop accomplishes>"
+prompt = """
+<The instructions handed to the agent each iteration. Tell it what to do, how to
+make the smallest correct change, and to stop after acting. Write it so that
+repeating it converges — e.g. "fix the next failure", not "fix everything".>
+"""
+max_iterations = 5
+# Shell command run in the sandbox after each iteration; exit 0 == success.
+# Remove this line for a loop with no programmatic check.
+success_check = "<command that exits 0 when the work is done, e.g. 'uv run pytest -q'>"
+# How many consecutive successes are required to stop (2+ guards against a fix in
+# one round masking a regression in the next).
+success_streak = 1
+# Working directory for the success_check, and its timeout in seconds.
+cwd = "."
+check_timeout = 120

agentkernel/templates/mcp-servers.toml ADDED Viewed

@@ -0,0 +1,22 @@
+# MCP servers template — paste these blocks into your agentkernel.toml.
+#
+# Each [[mcp_servers]] entry launches an MCP server over stdio; its tools are
+# auto-discovered and registered into the same registry as the builtins, so the
+# model uses them like any other tool. Read-only tools (advertising readOnlyHint)
+# skip the approval gate; everything else is gated by default. Each server's
+# stderr goes to mcp_log_dir/<name>.log.
+[[mcp_servers]]
+name = "filesystem"
+command = "npx"
+args = ["-y", "@modelcontextprotocol/server-filesystem", "."]
+timeout = 30                       # per-request timeout in seconds
+[[mcp_servers]]
+name = "git"
+command = "uvx"
+args = ["mcp-server-git"]
+timeout = 30
+# On Windows, point `command` at the actual executable (e.g. "npx.cmd") since the
+# client launches the process directly without a shell.

agentkernel/templates/profile.toml ADDED Viewed

@@ -0,0 +1,29 @@
+# Profile template — a named parameterization of one run.
+# Save as profiles/<name>.toml; the file name is the profile name.
+# Load with:  agentkernel --profile <name> run "..."
+#
+# A profile sets any of: system_prompt, tool_filter, model_override, rubric.
+# The kernel honors system_prompt and tool_filter every run; model_override and
+# rubric are used by the CLI and the eval harness. Every field is optional.
+# Prepended to the system prompt for this run. Use it to set role and constraints.
+system_prompt = """
+<Describe the role and any hard constraints, e.g. "read-only — do not modify
+files" or "produce a plan, do not act".>
+"""
+# Restrict which tools are available this run, by tool name. OMIT this line to
+# allow every registered tool. An empty list [] removes all tools (plan-only).
+# Read-only builtins: read_file, list_dir, find_files, search_text, file_info.
+# Mutating builtins:  write_file, edit_file, bash.
+tool_filter = ["read_file", "list_dir", "find_files", "search_text", "file_info"]
+# Optional: override the model just for runs using this profile.
+# model_override = "claude-haiku-4-5-20251001"
+# Optional: default rubric used by `agentkernel eval` when this profile is active.
+# rubric = "The answer is correct and complete."
+# Optional: reasoning effort for providers that support it (OpenAI reasoning
+# models, Anthropic extended thinking). Ignored by providers/models that don't.
+# reasoning = "high"   # low | medium | high

agentkernel/templates/tool_module.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Template for a custom tool module (agentkernel conventions).
+A tool is a ToolSpec: a name, a model-facing description (write it like a prompt),
+a JSON-Schema parameter object, and a handler. Build tools with a factory function
+so any dependencies (a working dir, a client, a config value) are bound once and
+the handler stays a pure function of its arguments — never reaching for globals.
+Conventions to follow:
+- Return a ToolResult; NEVER raise. Turn failures into ToolResult(is_error=True).
+  (The registry also catches stray exceptions, but handling them yourself gives a
+  clearer message to the model.)
+- Set additionalProperties: False and list required fields in the schema.
+- Flag mutations: pass requires_approval/mutates/runs_code so the loop gates them.
+Register these with your runtime, e.g. in build_runtime or a plugin loader:
+    from your_module import my_tools
+    for spec in my_tools(working_dir="."):
+        registry.register(spec)
+"""
+from __future__ import annotations
+from agentkernel.tools.base import ToolSpec
+from agentkernel.types import ToolResult
+def my_tools(working_dir: str = ".") -> list[ToolSpec]:
+    """Build the toolset, binding any dependencies here."""
+    def greet(args: dict) -> ToolResult:
+        # args has already been validated against the schema below.
+        name = args["name"]
+        excited = bool(args.get("excited", False))
+        if not name.strip():
+            return ToolResult("", "name must not be empty", is_error=True)
+        greeting = f"Hello, {name}{'!' if excited else '.'}"
+        return ToolResult("", greeting)
+    return [
+        ToolSpec(
+            name="greet",
+            description=(
+                "Return a friendly greeting for the given name. Use when the user "
+                "asks to greet or welcome someone."
+            ),
+            parameters={
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string", "description": "Who to greet."},
+                    "excited": {
+                        "type": "boolean",
+                        "description": "Use an exclamation mark.",
+                    },
+                },
+                "required": ["name"],
+                "additionalProperties": False,
+            },
+            handler=greet,
+            category="custom",
+            # For a tool that writes files / runs commands, gate it:
+            #   mutates=True, requires_approval=True   (or runs_code=True)
+        ),
+    ]

agentkernel/tools/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Tool system: definitions, registry, validation, and dispatch (design §6)."""
+from agentkernel.tools.base import ToolRegistry, ToolSpec
+__all__ = ["ToolRegistry", "ToolSpec"]

agentkernel/tools/base.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Tool definitions and the registry (design §6).
+The registry is agnostic about a tool's origin: a native builtin and (later) an
+MCP-backed tool register identically. This is the Phase-2 seam — nothing here is
+special-cased per origin.
+"""
+from __future__ import annotations
+import traceback
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import Any
+import jsonschema
+from agentkernel.types import ToolCall, ToolResult
+@dataclass
+class ToolSpec:
+    """A registered tool. ``parameters`` is a JSON Schema (draft 2020-12) object.
+    Flags drive the approval/sandbox gate (design §10): any of ``requires_approval``,
+    ``mutates``, or ``runs_code`` causes the loop to consult the ``Approver`` before
+    executing; ``runs_code`` additionally routes execution through the ``Sandbox``.
+    """
+    name: str
+    description: str  # model-facing; write it like a prompt
+    parameters: dict[str, Any]  # JSON Schema (draft 2020-12) object
+    handler: Callable[[dict], ToolResult]
+    requires_approval: bool = False
+    mutates: bool = False
+    runs_code: bool = False
+    category: str = "general"
+    @property
+    def gated(self) -> bool:
+        """True if this tool must pass the approver before executing."""
+        return self.requires_approval or self.mutates or self.runs_code
+class ToolRegistry:
+    """Holds tool specs and dispatches calls. See design §6.2.
+    Spec ordering is insertion order and is never re-sorted — the spec list is
+    part of the cacheable prefix (design §9.3), so reordering it between turns
+    would destroy prompt-cache hit-rate.
+    """
+    def __init__(self) -> None:
+        self._specs: dict[str, ToolSpec] = {}
+    def register(self, spec: ToolSpec) -> None:
+        if spec.name in self._specs:
+            raise ValueError(f"tool already registered: {spec.name!r}")
+        self._specs[spec.name] = spec
+    def spec(self, name: str) -> ToolSpec | None:
+        return self._specs.get(name)
+    def specs(self) -> list[ToolSpec]:
+        """All specs in stable registration order (for the provider prefix)."""
+        return list(self._specs.values())
+    def validate(self, call: ToolCall) -> str | None:
+        """Validate ``call.arguments`` against the tool's schema.
+        Returns an error string on failure (unknown tool or schema violation) or
+        ``None`` if the call is valid. The loop turns a non-None result into a
+        ``ToolResult(is_error=True)`` *instead of* executing, so the model can
+        correct itself.
+        """
+        spec = self._specs.get(call.name)
+        if spec is None:
+            return f"Unknown tool: {call.name!r}"
+        try:
+            jsonschema.validate(call.arguments, spec.parameters)
+        except jsonschema.ValidationError as exc:
+            return f"Invalid arguments for {call.name!r}: {exc.message}"
+        return None
+    def execute(self, call: ToolCall) -> ToolResult:
+        """Dispatch to the handler. A handler exception becomes an error result.
+        Handlers receive only ``call.arguments`` and so cannot know the call id;
+        the registry stamps ``call_id`` onto the returned result here, which keeps
+        the §8 pairing contract the registry's responsibility, not the handler's.
+        """
+        spec = self._specs.get(call.name)
+        if spec is None:  # pragma: no cover - validate() runs first in the loop
+            return ToolResult(call.id, f"Unknown tool: {call.name!r}", is_error=True)
+        try:
+            result = spec.handler(call.arguments)
+        except Exception as exc:  # noqa: BLE001 - errors become results, not raises
+            summary = f"{type(exc).__name__}: {exc}\n{traceback.format_exc(limit=3)}"
+            return ToolResult(call.id, summary, is_error=True)
+        result.call_id = call.id
+        return result

agentkernel/tools/builtin/__init__.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Builtin tools the kernel ships (design §6.3)."""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from agentkernel.tools.builtin.files import file_tools
+from agentkernel.tools.builtin.search import search_tools
+from agentkernel.tools.builtin.shell import bash_tool
+if TYPE_CHECKING:
+    from agentkernel.approval import Sandbox
+    from agentkernel.checkpoint import Checkpointer
+    from agentkernel.tools.base import ToolSpec
+__all__ = ["file_tools", "search_tools", "bash_tool", "default_tools"]
+def default_tools(
+    sandbox: Sandbox,
+    working_dir: str = ".",
+    *,
+    max_result_tokens: int = 4096,
+    bash_timeout: int = 60,
+    checkpointer: Checkpointer | None = None,
+) -> list[ToolSpec]:
+    """The full builtin toolset: file + search tools + bash, bound to one working dir.
+    When ``checkpointer`` is set, the file tools record pre-edit state for
+    rollback; the ``rollback`` tool itself is registered by the runtime builder.
+    """
+    tools = file_tools(
+        working_dir, max_result_tokens=max_result_tokens, checkpointer=checkpointer
+    )
+    tools += search_tools(working_dir, max_result_tokens=max_result_tokens)
+    tools.append(bash_tool(sandbox, working_dir, timeout=bash_timeout))
+    return tools

agentkernel/tools/builtin/checkpoint_tool.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""The `rollback` tool — restore files to their pre-edit checkpoint (§18.1)."""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from agentkernel.tools.base import ToolSpec
+from agentkernel.types import ToolResult
+if TYPE_CHECKING:
+    from agentkernel.checkpoint import Checkpointer
+def rollback_tool(checkpointer: Checkpointer) -> ToolSpec:
+    """Build a `rollback` tool bound to ``checkpointer``."""
+    def rollback(_args: dict) -> ToolResult:
+        if checkpointer.pending() == 0:
+            return ToolResult("", "Nothing to roll back — no files have been modified.")
+        n = checkpointer.rollback()
+        return ToolResult("", f"Rolled back {n} file(s) to their pre-edit state.")
+    return ToolSpec(
+        name="rollback",
+        description=(
+            "Undo all file changes made this session, restoring every file the "
+            "file tools modified (and deleting any they created) to its state at "
+            "the start. Use this to recover after a wrong edit."
+        ),
+        parameters={"type": "object", "properties": {}, "additionalProperties": False},
+        handler=rollback,
+        category="files",
+    )

agentkernel/tools/builtin/clarify.py ADDED Viewed

@@ -0,0 +1,60 @@
+"""The `clarify` tool (design §18.4).
+Lets the model ask the user a single focused question mid-run instead of guessing,
+routed through the same terminal input channel the approver uses. In a
+non-interactive run (no stdin), it degrades gracefully: the model is told no one
+is available and to proceed with its best judgment, rather than blocking.
+"""
+from __future__ import annotations
+from collections.abc import Callable
+from agentkernel.tools.base import ToolSpec
+from agentkernel.types import ToolResult
+_NO_ANSWER = "No user is available to answer; proceed with your best judgment."
+def clarify_tool(
+    *,
+    input_fn: Callable[[str], str] = input,
+    output_fn: Callable[[str], None] = print,
+) -> ToolSpec:
+    """Build the `clarify` tool over a terminal input/output channel."""
+    def clarify(args: dict) -> ToolResult:
+        question = (args.get("question") or "").strip()
+        if not question:
+            return ToolResult("", "clarify requires a `question`.", is_error=True)
+        output_fn(f"\n[clarify] {question}")
+        try:
+            answer = input_fn("> your answer: ").strip()
+        except (EOFError, KeyboardInterrupt):
+            return ToolResult("", _NO_ANSWER)
+        if not answer:
+            return ToolResult("", "(no answer given) Proceed with your best judgment.")
+        return ToolResult("", f"User answered: {answer}")
+    return ToolSpec(
+        name="clarify",
+        description=(
+            "Ask the user one focused question when a requirement is genuinely "
+            "ambiguous and guessing would risk doing the wrong thing. Use "
+            "sparingly — prefer reasonable defaults. Returns the user's answer, or "
+            "tells you to proceed if no one is available."
+        ),
+        parameters={
+            "type": "object",
+            "properties": {
+                "question": {
+                    "type": "string",
+                    "description": "The single, specific question to ask the user.",
+                }
+            },
+            "required": ["question"],
+            "additionalProperties": False,
+        },
+        handler=clarify,
+        category="interaction",
+    )

agentkernel/tools/builtin/files.py ADDED Viewed

@@ -0,0 +1,221 @@
+"""Filesystem tools: read_file, write_file, list_dir, edit_file (design §6.3).
+All paths are confined to the configured working directory: ``..`` escapes and
+absolute paths outside the root are rejected with an error result (never a
+raise). ``read_file`` truncates large files via the shared §8.4/§9 mechanism.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import TYPE_CHECKING
+from agentkernel.context.truncate import truncate_text
+from agentkernel.tools.base import ToolSpec
+from agentkernel.types import ToolResult
+if TYPE_CHECKING:
+    from agentkernel.checkpoint import Checkpointer
+def resolve_within(root: Path, path: str) -> Path:
+    """Resolve ``path`` under ``root``, or raise ValueError if it escapes.
+    Shared by every working-dir-confined tool so the containment rule lives in
+    one place (design §6.3, §10.3).
+    """
+    candidate = (root / path).resolve()
+    if candidate != root and root not in candidate.parents:
+        raise ValueError(f"path escapes working directory: {path!r}")
+    return candidate
+def file_tools(
+    working_dir: str = ".",
+    *,
+    max_result_tokens: int = 4096,
+    checkpointer: Checkpointer | None = None,
+) -> list[ToolSpec]:
+    """Build the file toolset bound to ``working_dir``.
+    Binding the root (and result cap) here keeps handlers pure functions of
+    their arguments — they never reach for global config (AGENT.md, design §7).
+    When a ``checkpointer`` is supplied, write_file/edit_file record a file's
+    pre-modification state so a ``rollback`` can undo the change (design §18.1).
+    """
+    root = Path(working_dir).resolve()
+    def _resolve(path: str) -> Path:
+        return resolve_within(root, path)
+    def read_file(args: dict) -> ToolResult:
+        path = args["path"]
+        try:
+            target = _resolve(path)
+        except ValueError as exc:
+            return ToolResult("", str(exc), is_error=True)
+        if not target.is_file():
+            return ToolResult("", f"Not a file: {path!r}", is_error=True)
+        text = target.read_text(encoding="utf-8", errors="replace")
+        return ToolResult("", truncate_text(text, max_result_tokens))
+    def write_file(args: dict) -> ToolResult:
+        path = args["path"]
+        content = args["content"]
+        try:
+            target = _resolve(path)
+        except ValueError as exc:
+            return ToolResult("", str(exc), is_error=True)
+        if checkpointer is not None:
+            checkpointer.record(target)
+        target.parent.mkdir(parents=True, exist_ok=True)
+        target.write_text(content, encoding="utf-8")
+        return ToolResult("", f"Wrote {len(content)} bytes to {path}")
+    def list_dir(args: dict) -> ToolResult:
+        path = args.get("path", ".")
+        try:
+            target = _resolve(path)
+        except ValueError as exc:
+            return ToolResult("", str(exc), is_error=True)
+        if not target.is_dir():
+            return ToolResult("", f"Not a directory: {path!r}", is_error=True)
+        entries = sorted(
+            f"{p.name}/" if p.is_dir() else p.name for p in target.iterdir()
+        )
+        listing = "\n".join(entries) if entries else "(empty)"
+        return ToolResult("", truncate_text(listing, max_result_tokens))
+    def edit_file(args: dict) -> ToolResult:
+        path = args["path"]
+        old = args["old"]
+        new = args["new"]
+        replace_all = bool(args.get("replace_all", False))
+        try:
+            target = _resolve(path)
+        except ValueError as exc:
+            return ToolResult("", str(exc), is_error=True)
+        if not target.is_file():
+            return ToolResult("", f"Not a file: {path!r}", is_error=True)
+        if old == new:
+            return ToolResult("", "`old` and `new` are identical; nothing to do.", is_error=True)
+        text = target.read_text(encoding="utf-8", errors="replace")
+        count = text.count(old)
+        if count == 0:
+            return ToolResult("", f"`old` text not found in {path!r}.", is_error=True)
+        if count > 1 and not replace_all:
+            return ToolResult(
+                "",
+                f"`old` text is not unique in {path!r} ({count} occurrences); "
+                "pass replace_all=true or include more surrounding context.",
+                is_error=True,
+            )
+        if checkpointer is not None:
+            checkpointer.record(target)
+        updated = text.replace(old, new) if replace_all else text.replace(old, new, 1)
+        target.write_text(updated, encoding="utf-8")
+        replaced = count if replace_all else 1
+        return ToolResult("", f"Replaced {replaced} occurrence(s) in {path}.")
+    return [
+        ToolSpec(
+            name="read_file",
+            description=(
+                "Read a UTF-8 text file within the working directory. Returns the "
+                "file contents; large files are truncated with a marker."
+            ),
+            parameters={
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Path relative to the working directory.",
+                    }
+                },
+                "required": ["path"],
+                "additionalProperties": False,
+            },
+            handler=read_file,
+            category="files",
+        ),
+        ToolSpec(
+            name="write_file",
+            description=(
+                "Write (creating or overwriting) a UTF-8 text file within the "
+                "working directory. Parent directories are created as needed. To "
+                "change part of an existing file, prefer edit_file."
+            ),
+            parameters={
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string"},
+                    "content": {"type": "string"},
+                },
+                "required": ["path", "content"],
+                "additionalProperties": False,
+            },
+            handler=write_file,
+            mutates=True,
+            requires_approval=True,
+            category="files",
+        ),
+        ToolSpec(
+            name="list_dir",
+            description="List the entries of a directory within the working directory.",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Directory path relative to the working directory.",
+                    }
+                },
+                "required": [],
+                "additionalProperties": False,
+            },
+            handler=list_dir,
+            category="files",
+        ),
+        ToolSpec(
+            name="edit_file",
+            description=(
+                "Replace an exact substring in a text file within the working "
+                "directory — the surgical alternative to rewriting the whole file "
+                "with write_file. `old` must match the file byte-for-byte and be "
+                "unique unless replace_all is true. Fails (without writing) if "
+                "`old` is missing or ambiguous, so include enough surrounding "
+                "context to pin down the one spot you mean."
+            ),
+            parameters={
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Path of the file to edit, relative to the working dir.",
+                    },
+                    "old": {
+                        "type": "string",
+                        "description": (
+                            "Exact text to find; add surrounding lines to make it unique."
+                        ),
+                    },
+                    "new": {
+                        "type": "string",
+                        "description": "Replacement text.",
+                    },
+                    "replace_all": {
+                        "type": "boolean",
+                        "description": (
+                            "Replace every occurrence instead of needing a unique match."
+                        ),
+                    },
+                },
+                "required": ["path", "old", "new"],
+                "additionalProperties": False,
+            },
+            handler=edit_file,
+            mutates=True,
+            requires_approval=True,
+            category="files",
+        ),
+    ]