PyPI - agentforge-py - Versions diffs - 0.2.1__py3-none-any.whl - Mend

agentforge-py 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

agentforge/__init__.py +114 -0
agentforge/_testing/__init__.py +19 -0
agentforge/_testing/fake_llm.py +126 -0
agentforge/_testing/fake_tool.py +122 -0
agentforge/_tools/__init__.py +14 -0
agentforge/_tools/calculator.py +102 -0
agentforge/_tools/decorator.py +300 -0
agentforge/_tools/file_read.py +112 -0
agentforge/_tools/shell.py +134 -0
agentforge/_tools/web_search.py +207 -0
agentforge/agent.py +817 -0
agentforge/auth.py +42 -0
agentforge/cli/__init__.py +18 -0
agentforge/cli/_build.py +323 -0
agentforge/cli/_scaffold_state.py +250 -0
agentforge/cli/_shared_scaffold.py +174 -0
agentforge/cli/config_cmd.py +174 -0
agentforge/cli/db_cmd.py +262 -0
agentforge/cli/debug_cmd.py +168 -0
agentforge/cli/docs_cmd.py +217 -0
agentforge/cli/eval_cmd.py +181 -0
agentforge/cli/health_cmd.py +139 -0
agentforge/cli/list_modules.py +85 -0
agentforge/cli/main.py +81 -0
agentforge/cli/manifest_apply.py +368 -0
agentforge/cli/module_cmd.py +247 -0
agentforge/cli/new_cmd.py +171 -0
agentforge/cli/run_cmd.py +234 -0
agentforge/cli/upgrade_cmd.py +230 -0
agentforge/config/__init__.py +45 -0
agentforge/eval/__init__.py +18 -0
agentforge/eval/consistency.py +107 -0
agentforge/eval/coverage.py +100 -0
agentforge/eval/format_compliance.py +107 -0
agentforge/eval/regression.py +143 -0
agentforge/findings.py +166 -0
agentforge/guardrails/__init__.py +32 -0
agentforge/guardrails/allowlist.py +49 -0
agentforge/guardrails/capability_check.py +58 -0
agentforge/guardrails/engine.py +289 -0
agentforge/guardrails/pii_redact_basic.py +61 -0
agentforge/guardrails/prompt_injection_basic.py +90 -0
agentforge/memory/__init__.py +16 -0
agentforge/memory/in_memory.py +130 -0
agentforge/memory/in_memory_graph.py +262 -0
agentforge/memory/in_memory_vector.py +167 -0
agentforge/pipeline/__init__.py +26 -0
agentforge/pipeline/engine.py +189 -0
agentforge/pipeline/errors.py +19 -0
agentforge/pipeline/tool.py +93 -0
agentforge/py.typed +0 -0
agentforge/recording.py +189 -0
agentforge/renderers/__init__.py +28 -0
agentforge/renderers/_defaults.py +32 -0
agentforge/renderers/markdown.py +44 -0
agentforge/renderers/patch_applier.py +46 -0
agentforge/renderers/registry.py +108 -0
agentforge/renderers/scorecard.py +59 -0
agentforge/renderers/span_table.py +71 -0
agentforge/replay.py +260 -0
agentforge/resolver_register.py +41 -0
agentforge/retrieval.py +410 -0
agentforge/runtime.py +63 -0
agentforge/strategies/__init__.py +27 -0
agentforge/strategies/_base.py +280 -0
agentforge/strategies/_plan.py +93 -0
agentforge/strategies/multi_agent.py +541 -0
agentforge/strategies/plan_execute.py +506 -0
agentforge/strategies/react.py +237 -0
agentforge/strategies/tot.py +472 -0
agentforge/templates/_shared/.cursorrules +12 -0
agentforge/templates/_shared/.github/copilot-instructions.md +13 -0
agentforge/templates/_shared/.gitkeep +0 -0
agentforge/templates/_shared/AGENTS.md.tmpl +123 -0
agentforge/templates/_shared/CLAUDE.md +13 -0
agentforge/templates/_shared/docs/runbooks/01-set-up-new-agent.md.tmpl +67 -0
agentforge/templates/_shared/docs/runbooks/02-add-a-tool.md +67 -0
agentforge/templates/_shared/docs/runbooks/03-add-a-pipeline-task.md +69 -0
agentforge/templates/_shared/docs/runbooks/04-pick-reasoning-strategy.md +67 -0
agentforge/templates/_shared/docs/runbooks/05-write-prompts.md +75 -0
agentforge/templates/_shared/docs/runbooks/06-test-your-agent.md +75 -0
agentforge/templates/_shared/docs/runbooks/07-debug-a-run.md +70 -0
agentforge/templates/_shared/docs/runbooks/08-add-memory.md +75 -0
agentforge/templates/_shared/docs/runbooks/09-add-mcp.md +78 -0
agentforge/templates/_shared/docs/runbooks/10-add-evaluators.md +76 -0
agentforge/templates/_shared/docs/runbooks/11-add-safety-guardrails.md +83 -0
agentforge/templates/_shared/docs/runbooks/12-add-observability.md +77 -0
agentforge/templates/_shared/docs/runbooks/13-configure-multi-provider.md +91 -0
agentforge/templates/_shared/docs/runbooks/14-deploy-your-agent.md +70 -0
agentforge/templates/_shared/docs/runbooks/15-upgrade-your-agent.md +67 -0
agentforge/templates/_shared/docs/runbooks/16-configuration-reference.md +81 -0
agentforge/templates/_shared/docs/runbooks/17-add-reranker.md +78 -0
agentforge/templates/_shared/docs/runbooks/18-add-hybrid-search.md +78 -0
agentforge/templates/_shared/docs/runbooks/19-add-graphrag.md +83 -0
agentforge/templates/_shared/docs/runbooks/20-apply-schema-migrations.md +92 -0
agentforge/templates/_shared/docs/runbooks/21-use-streaming-guardrails.md +82 -0
agentforge/templates/_shared/docs/runbooks/README.md.tmpl +68 -0
agentforge/templates/code-reviewer/.env.example +8 -0
agentforge/templates/code-reviewer/.gitignore +7 -0
agentforge/templates/code-reviewer/README.md +12 -0
agentforge/templates/code-reviewer/agentforge.yaml +23 -0
agentforge/templates/code-reviewer/copier.yml +34 -0
agentforge/templates/code-reviewer/pyproject.toml +18 -0
agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
agentforge/templates/docs-qa/.env.example +8 -0
agentforge/templates/docs-qa/.gitignore +7 -0
agentforge/templates/docs-qa/README.md +14 -0
agentforge/templates/docs-qa/agentforge.yaml +19 -0
agentforge/templates/docs-qa/copier.yml +31 -0
agentforge/templates/docs-qa/pyproject.toml +18 -0
agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
agentforge/templates/minimal/.env.example +11 -0
agentforge/templates/minimal/.gitignore +10 -0
agentforge/templates/minimal/README.md +28 -0
agentforge/templates/minimal/agentforge.yaml +10 -0
agentforge/templates/minimal/copier.yml +52 -0
agentforge/templates/minimal/pyproject.toml +18 -0
agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/main.py +34 -0
agentforge/templates/patch-bot/.env.example +8 -0
agentforge/templates/patch-bot/.gitignore +7 -0
agentforge/templates/patch-bot/README.md +13 -0
agentforge/templates/patch-bot/agentforge.yaml +15 -0
agentforge/templates/patch-bot/copier.yml +31 -0
agentforge/templates/patch-bot/pyproject.toml +18 -0
agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
agentforge/templates/research/.env.example +8 -0
agentforge/templates/research/.gitignore +7 -0
agentforge/templates/research/README.md +14 -0
agentforge/templates/research/agentforge.yaml +17 -0
agentforge/templates/research/copier.yml +31 -0
agentforge/templates/research/pyproject.toml +18 -0
agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/main.py +31 -0
agentforge/templates/triage/.env.example +8 -0
agentforge/templates/triage/.gitignore +7 -0
agentforge/templates/triage/README.md +14 -0
agentforge/templates/triage/agentforge.yaml +25 -0
agentforge/templates/triage/copier.yml +31 -0
agentforge/templates/triage/pyproject.toml +18 -0
agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/main.py +30 -0
agentforge/testing/__init__.py +69 -0
agentforge/testing/conformance.py +40 -0
agentforge/testing/factory.py +89 -0
agentforge/testing/fixtures.py +42 -0
agentforge/testing/llm.py +235 -0
agentforge/testing/recording.py +177 -0
agentforge/tools/__init__.py +41 -0
agentforge_py-0.2.1.dist-info/METADATA +158 -0
agentforge_py-0.2.1.dist-info/RECORD +157 -0
agentforge_py-0.2.1.dist-info/WHEEL +4 -0
agentforge_py-0.2.1.dist-info/entry_points.txt +2 -0
agentforge_py-0.2.1.dist-info/licenses/LICENSE +202 -0

agentforge/_tools/decorator.py ADDED Viewed

@@ -0,0 +1,300 @@
+"""`@tool` — typed-function-to-`Tool` decorator (feat-004).
+Wraps a typed function as a concrete `Tool` subclass:
+    from agentforge import tool
+    @tool
+    def lookup_user(user_id: str, include_email: bool = False) -> dict:
+        '''Fetch a user record.
+        Args:
+            user_id: The internal user id (ULID).
+            include_email: When True, include the email field.
+        Returns:
+            A dict with name and signup_date.
+        '''
+        return db.get_user(user_id, with_email=include_email)
+The decorator inspects the wrapped function and constructs:
+  - `name`              from the function's `__name__` (or the
+                        `name=` override argument).
+  - `description`       from the docstring's summary line + Args
+                        section, parsed Google-style. The first
+                        non-blank non-arg line is the summary;
+                        per-arg descriptions feed Pydantic field
+                        descriptions.
+  - `input_schema`      a Pydantic v2 model built from the
+                        function's typed parameters. Required
+                        parameters have no default; optional ones
+                        carry the function's default.
+  - `run(**kwargs)`     dispatches to the wrapped function (sync or
+                        async). Returns whatever the function
+                        returns; the dispatch path in strategies
+                        validates kwargs before calling `run`.
+Errors at decoration time:
+  - Missing type hint on a parameter      → `ValueError`
+  - Variadic args (`*args`, `**kwargs`)   → `ValueError`
+  - Positional-only parameters            → `ValueError` (LLM
+                                            tool calls are
+                                            keyword-only over the
+                                            wire)
+  - `self` / class-method usage           → not supported here;
+                                            subclass `Tool`
+                                            directly instead
+Capabilities default to empty. Pass `capabilities={"network",
+"filesystem"}` to declare them up front (used by the future safety
+guardrails in feat-018).
+"""
+from __future__ import annotations
+import asyncio
+import inspect
+import re
+from collections.abc import Callable, Iterable
+from typing import Any, get_type_hints
+from agentforge_core.contracts.tool import Tool
+from pydantic import BaseModel, Field, create_model
+# Sentinel for "no default" — distinguished from `None` (which is a
+# legitimate default for `Optional[X] = None` parameters).
+_NO_DEFAULT = inspect.Parameter.empty
+def tool(
+    fn: Callable[..., Any] | None = None,
+    *,
+    name: str | None = None,
+    description: str | None = None,
+    capabilities: Iterable[str] = (),
+) -> Any:
+    """Decorate a typed function as a `Tool`.
+    Usage:
+        @tool
+        def my_func(x: int) -> str: ...
+        # or with explicit options:
+        @tool(name="custom_name", capabilities={"network"})
+        def my_func(x: int) -> str: ...
+    Returns a `Tool` *instance* (not a class). Pass the instance to
+    `Agent(tools=[...])` directly.
+    """
+    # Bare-decorator form: `@tool` without parens.
+    if fn is not None and callable(fn) and not isinstance(fn, type):
+        return _build_tool(fn, name=None, description=None, capabilities=())
+    # Parameterised form: `@tool(name=..., ...)` — fn is None here;
+    # return a closure that takes the function on the next call.
+    def _decorate(real_fn: Callable[..., Any]) -> Tool:
+        return _build_tool(
+            real_fn,
+            name=name,
+            description=description,
+            capabilities=capabilities,
+        )
+    return _decorate
+def _build_tool(
+    fn: Callable[..., Any],
+    *,
+    name: str | None,
+    description: str | None,
+    capabilities: Iterable[str],
+) -> Tool:
+    """Synthesize a concrete `Tool` subclass and instantiate it."""
+    sig = inspect.signature(fn)
+    type_hints = get_type_hints(fn)
+    fields = _build_pydantic_fields(fn, sig, type_hints)
+    parsed_doc = _parse_google_docstring(fn.__doc__ or "")
+    # Apply per-arg descriptions from the docstring's Args block by
+    # wrapping each field's default in `Field(default=..., description=...)`.
+    for field_name, arg_doc in parsed_doc.arg_descriptions.items():
+        if field_name not in fields or not arg_doc:
+            continue
+        annotation, default = fields[field_name]
+        if default is ...:
+            fields[field_name] = (annotation, Field(..., description=arg_doc))
+        else:
+            fields[field_name] = (annotation, Field(default=default, description=arg_doc))
+    schema_cls_name = _pascal_case(name or fn.__name__) + "Input"
+    # mypy can't verify keyword unpacking against `create_model`'s
+    # overloads; the runtime contract is exactly `create_model(name,
+    # **{field: (annotation, default), ...})`.
+    schema_cls: type[BaseModel] = create_model(schema_cls_name, **fields)  # type: ignore[call-overload]
+    final_name = name or fn.__name__
+    final_description = description or parsed_doc.summary or fn.__name__
+    final_capabilities = frozenset(capabilities)
+    is_coroutine = asyncio.iscoroutinefunction(fn)
+    # Synthesize the Tool subclass dynamically. We use `type()`
+    # instead of a `class ...:` block so the closure-captured
+    # `schema_cls` is bound cleanly into the class namespace
+    # (Python's class-body scope rules don't see enclosing locals
+    # via plain `name = name` assignment shapes).
+    async def _run(self: Any, **kwargs: Any) -> Any:  # noqa: ARG001 — bound method needs `self`
+        if is_coroutine:
+            return await fn(**kwargs)
+        return fn(**kwargs)
+    cls_namespace: dict[str, Any] = {
+        "name": final_name,
+        "description": final_description,
+        "input_schema": schema_cls,
+        "capabilities": final_capabilities,
+        "run": _run,
+    }
+    decorated_cls = type(
+        _pascal_case(final_name) + "Tool",
+        (Tool,),
+        cls_namespace,
+    )
+    instance: Tool = decorated_cls()
+    return instance
+def _build_pydantic_fields(
+    fn: Callable[..., Any],
+    sig: inspect.Signature,
+    type_hints: dict[str, Any],
+) -> dict[str, tuple[Any, Any]]:
+    """Walk the function's parameters and produce `create_model`
+    field definitions (annotation + default).
+    Raises `ValueError` on:
+      - missing type hint
+      - variadic args (`*args`, `**kwargs`)
+      - positional-only parameters
+      - the `return` annotation slot (skipped silently — not a
+        field)
+    """
+    fields: dict[str, tuple[Any, Any]] = {}
+    for param_name, param in sig.parameters.items():
+        # Disallow self / cls — decorator is for free functions.
+        if param.kind == inspect.Parameter.POSITIONAL_ONLY:
+            msg = (
+                f"@tool: parameter {param_name!r} on {fn.__qualname__!r} is "
+                "positional-only. LLM tool calls bind by keyword; declare "
+                "the parameter as positional-or-keyword instead."
+            )
+            raise ValueError(msg)
+        if param.kind in (
+            inspect.Parameter.VAR_POSITIONAL,
+            inspect.Parameter.VAR_KEYWORD,
+        ):
+            msg = (
+                f"@tool: variadic parameter {param_name!r} on "
+                f"{fn.__qualname__!r} is not supported. Tools must declare "
+                "every input explicitly so the schema is complete."
+            )
+            raise ValueError(msg)
+        if param_name not in type_hints:
+            msg = (
+                f"@tool: parameter {param_name!r} on {fn.__qualname__!r} "
+                "is missing a type hint. Every parameter must be typed."
+            )
+            raise ValueError(msg)
+        annotation = type_hints[param_name]
+        default = param.default if param.default is not _NO_DEFAULT else ...
+        fields[param_name] = (annotation, default)
+    return fields
+# ----------------------------------------------------------------------
+# Google-style docstring parser
+# ----------------------------------------------------------------------
+class _ParsedDoc(BaseModel):
+    summary: str
+    arg_descriptions: dict[str, str]
+_ARGS_HEADER_RE = re.compile(r"^\s*Args\s*:\s*$", re.MULTILINE)
+_ARG_LINE_RE = re.compile(r"^\s*([A-Za-z_]\w*)\s*(?:\(.*?\))?\s*:\s*(.*)$")
+_SECTION_HEADERS = ("Returns:", "Raises:", "Yields:", "Example:", "Examples:", "Note:", "Notes:")
+def _parse_google_docstring(doc: str) -> _ParsedDoc:
+    """Parse a Google-style docstring.
+    Extracts:
+      - `summary`: the first non-blank line(s) before any section
+        header.
+      - `arg_descriptions`: per-arg one-line descriptions from the
+        `Args:` block. Multi-line arg descriptions concatenate into
+        one string.
+    """
+    if not doc:
+        return _ParsedDoc(summary="", arg_descriptions={})
+    lines = inspect.cleandoc(doc).splitlines()
+    summary_lines: list[str] = []
+    arg_block: list[str] = []
+    in_args = False
+    for line in lines:
+        stripped = line.strip()
+        if not in_args and _ARGS_HEADER_RE.match(line):
+            in_args = True
+            continue
+        if in_args and any(stripped.startswith(h) for h in _SECTION_HEADERS):
+            in_args = False
+            continue
+        if in_args:
+            arg_block.append(line)
+        elif stripped:
+            # Stop summary if we hit a non-Args section header.
+            if any(stripped.startswith(h) for h in _SECTION_HEADERS):
+                break
+            summary_lines.append(stripped)
+    summary = " ".join(summary_lines).strip()
+    args = _parse_arg_block(arg_block)
+    return _ParsedDoc(summary=summary, arg_descriptions=args)
+def _parse_arg_block(lines: list[str]) -> dict[str, str]:
+    """Parse the body of a Google-style `Args:` block into
+    `{arg_name: description}`."""
+    out: dict[str, str] = {}
+    current_name: str | None = None
+    current_desc: list[str] = []
+    for line in lines:
+        m = _ARG_LINE_RE.match(line)
+        if m:
+            if current_name is not None:
+                out[current_name] = " ".join(current_desc).strip()
+            current_name = m.group(1)
+            current_desc = [m.group(2).strip()]
+        elif current_name is not None and line.strip():
+            current_desc.append(line.strip())
+    if current_name is not None:
+        out[current_name] = " ".join(current_desc).strip()
+    return out
+def _pascal_case(s: str) -> str:
+    """Convert `snake_case` or `kebab-case` to `PascalCase`."""
+    parts = re.split(r"[_\-\s]+", s)
+    return "".join(p[:1].upper() + p[1:] for p in parts if p)
+__all__ = ["tool"]

agentforge/_tools/file_read.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""`file_read` — sandboxed file-reading tool (feat-004).
+Reads a file from a configurable working directory with a size cap.
+The default instance is sandboxed to the process's current working
+directory at import time and caps reads at 1 MiB; users who want
+different limits construct their own:
+    custom = FileReadTool(work_dir="/srv/data", max_bytes=10 * 1024 * 1024)
+    agent = Agent(tools=[custom, ...])
+Sandbox enforcement:
+  - Path is resolved against `work_dir` then checked: the resolved
+    real path must be inside `work_dir` (no `../` escape, no
+    absolute paths that escape the sandbox).
+  - Symlinks are followed, but the target must also be inside the
+    sandbox.
+  - Files larger than `max_bytes` raise `ValueError` before reading.
+Capabilities: `{"filesystem"}`.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any, ClassVar
+from agentforge_core.contracts.tool import Tool
+from pydantic import BaseModel, Field
+_DEFAULT_MAX_BYTES = 1 * 1024 * 1024  # 1 MiB
+class _FileReadInput(BaseModel):
+    """Input schema for `file_read`."""
+    path: str = Field(
+        description=(
+            "Relative path inside the sandbox to read. "
+            "Absolute paths and `..` traversal are rejected."
+        )
+    )
+class FileReadTool(Tool):
+    """Read a file from a sandboxed working directory.
+    `work_dir` defaults to the process's CWD at construction time.
+    `max_bytes` defaults to 1 MiB.
+    """
+    name: ClassVar[str] = "file_read"
+    description: ClassVar[str] = (
+        "Read a UTF-8 text file from the sandbox. Returns the file's "
+        "contents as a string. Path must be relative and stay inside "
+        "the configured working directory."
+    )
+    input_schema: ClassVar[type[BaseModel]] = _FileReadInput
+    capabilities: ClassVar[frozenset[str]] = frozenset({"filesystem"})
+    def __init__(
+        self,
+        *,
+        work_dir: str | Path | None = None,
+        max_bytes: int = _DEFAULT_MAX_BYTES,
+    ) -> None:
+        if max_bytes < 1:
+            msg = f"max_bytes must be >= 1, got {max_bytes}"
+            raise ValueError(msg)
+        # Resolve work_dir to an absolute, real path (follows symlinks)
+        # so containment checks compare apples to apples.
+        self._work_dir = Path(work_dir if work_dir is not None else Path.cwd()).resolve()
+        if not self._work_dir.is_dir():
+            msg = f"work_dir {self._work_dir!r} is not a directory"
+            raise ValueError(msg)
+        self._max_bytes = max_bytes
+    async def run(self, **kwargs: Any) -> str:
+        path_str = kwargs["path"]
+        # Reject explicitly absolute paths up front for a clearer
+        # error than the contained-path check would give.
+        if Path(path_str).is_absolute():
+            msg = f"file_read: absolute paths are not allowed (got {path_str!r})"
+            raise ValueError(msg)
+        candidate = (self._work_dir / path_str).resolve()
+        try:
+            candidate.relative_to(self._work_dir)
+        except ValueError as exc:
+            msg = (
+                f"file_read: path {path_str!r} resolves to {candidate!r}, "
+                f"which is outside the sandbox {self._work_dir!r}"
+            )
+            raise ValueError(msg) from exc
+        if not candidate.is_file():
+            msg = f"file_read: {path_str!r} is not a file"
+            raise ValueError(msg)
+        size = candidate.stat().st_size
+        if size > self._max_bytes:
+            msg = f"file_read: {path_str!r} is {size} bytes; max_bytes={self._max_bytes}"
+            raise ValueError(msg)
+        text: str = candidate.read_text(encoding="utf-8")
+        return text
+# Default instance — sandboxed to CWD at import time, 1 MiB cap.
+file_read = FileReadTool()
+__all__ = ["FileReadTool", "file_read"]

agentforge/_tools/shell.py ADDED Viewed

@@ -0,0 +1,134 @@
+"""`shell` — sandboxed subprocess tool (feat-004).
+Executes a command as a list of arguments via `asyncio.create_subprocess_exec`
+(`shell=False` equivalent — no shell interpretation, no glob expansion,
+no env-var interpolation). Always reads input as `list[str]`, never as
+a single string, so there is no shell-injection vector.
+Capabilities: `{"shell", "destructive"}` — declared up front. Future
+safety guardrails (feat-018) refuse to enable destructive tools without
+explicit operator opt-in.
+The default instance is constructed at import time with a 30-second
+timeout and CWD as the sandbox. Users wanting different limits
+construct their own:
+    custom = ShellTool(work_dir="/srv/jobs", timeout_s=120,
+                       allowed_commands=("ls", "cat"))
+    agent = Agent(tools=[custom, ...])
+Sandbox enforcement:
+  - `command` is a list; argv[0] is the executable.
+  - `allowed_commands` (optional) restricts argv[0] to a whitelist of
+    binary names. The default is `None` → no restriction (deploy with
+    care).
+  - `timeout_s` kills the subprocess if it runs too long.
+  - Working directory pinned to `work_dir`.
+  - Output truncated to `max_output_bytes` (default 64 KiB).
+"""
+from __future__ import annotations
+import asyncio
+from pathlib import Path
+from typing import Any, ClassVar
+from agentforge_core.contracts.tool import Tool
+from pydantic import BaseModel, Field
+_DEFAULT_TIMEOUT_S = 30.0
+_DEFAULT_MAX_OUTPUT_BYTES = 64 * 1024  # 64 KiB
+class _ShellInput(BaseModel):
+    """Input schema for `shell`."""
+    command: list[str] = Field(
+        min_length=1,
+        description=(
+            "The command and arguments as a list, e.g. ['ls', '-la']. "
+            "Strings are not interpreted by a shell — no glob expansion, "
+            "no quoting, no env-var substitution. Pass each argument as "
+            "a separate list element."
+        ),
+    )
+class ShellTool(Tool):
+    """Run a sandboxed subprocess via `asyncio.create_subprocess_exec`.
+    `work_dir` defaults to CWD at construction time. `timeout_s`
+    defaults to 30s. `allowed_commands` defaults to None (any
+    command). `max_output_bytes` defaults to 64 KiB.
+    """
+    name: ClassVar[str] = "shell"
+    description: ClassVar[str] = (
+        "Run a command as a list of arguments (no shell interpretation). "
+        "Returns combined stdout+stderr as a string. Capabilities: shell, "
+        "destructive — deploy with caution."
+    )
+    input_schema: ClassVar[type[BaseModel]] = _ShellInput
+    capabilities: ClassVar[frozenset[str]] = frozenset({"shell", "destructive"})
+    def __init__(
+        self,
+        *,
+        work_dir: str | Path | None = None,
+        timeout_s: float = _DEFAULT_TIMEOUT_S,
+        allowed_commands: tuple[str, ...] | None = None,
+        max_output_bytes: int = _DEFAULT_MAX_OUTPUT_BYTES,
+    ) -> None:
+        if timeout_s <= 0:
+            msg = f"timeout_s must be > 0, got {timeout_s}"
+            raise ValueError(msg)
+        if max_output_bytes < 1:
+            msg = f"max_output_bytes must be >= 1, got {max_output_bytes}"
+            raise ValueError(msg)
+        self._work_dir = Path(work_dir if work_dir is not None else Path.cwd()).resolve()
+        if not self._work_dir.is_dir():
+            msg = f"work_dir {self._work_dir!r} is not a directory"
+            raise ValueError(msg)
+        self._timeout_s = timeout_s
+        self._allowed = allowed_commands
+        self._max_output_bytes = max_output_bytes
+    async def run(self, **kwargs: Any) -> str:
+        command: list[str] = list(kwargs["command"])
+        if not command:
+            msg = "shell: command list is empty"
+            raise ValueError(msg)
+        if self._allowed is not None and command[0] not in self._allowed:
+            msg = f"shell: command {command[0]!r} is not in allowed_commands ({self._allowed!r})"
+            raise ValueError(msg)
+        # `subprocess_exec` takes argv as separate args (not a list);
+        # *command spreads it. shell=False is the default and only
+        # mode for create_subprocess_exec.
+        proc = await asyncio.create_subprocess_exec(
+            *command,
+            cwd=str(self._work_dir),
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.STDOUT,
+        )
+        try:
+            stdout_bytes, _ = await asyncio.wait_for(proc.communicate(), timeout=self._timeout_s)
+        except TimeoutError:
+            proc.kill()
+            await proc.wait()
+            msg = f"shell: command {command!r} exceeded timeout_s={self._timeout_s}; killed"
+            raise TimeoutError(msg) from None
+        if len(stdout_bytes) > self._max_output_bytes:
+            stdout_bytes = stdout_bytes[: self._max_output_bytes] + b"\n... [output truncated]"
+        text = stdout_bytes.decode("utf-8", errors="replace")
+        if proc.returncode != 0:
+            return f"[exit {proc.returncode}]\n{text}"
+        return text
+# Default instance — sandboxed to CWD, 30s timeout, no whitelist.
+shell = ShellTool()
+__all__ = ["ShellTool", "shell"]