PyPI - agentforge-py - Versions diffs - 0.2.1__py3-none-any.whl - Mend

agentforge-py 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

agentforge/__init__.py +114 -0
agentforge/_testing/__init__.py +19 -0
agentforge/_testing/fake_llm.py +126 -0
agentforge/_testing/fake_tool.py +122 -0
agentforge/_tools/__init__.py +14 -0
agentforge/_tools/calculator.py +102 -0
agentforge/_tools/decorator.py +300 -0
agentforge/_tools/file_read.py +112 -0
agentforge/_tools/shell.py +134 -0
agentforge/_tools/web_search.py +207 -0
agentforge/agent.py +817 -0
agentforge/auth.py +42 -0
agentforge/cli/__init__.py +18 -0
agentforge/cli/_build.py +323 -0
agentforge/cli/_scaffold_state.py +250 -0
agentforge/cli/_shared_scaffold.py +174 -0
agentforge/cli/config_cmd.py +174 -0
agentforge/cli/db_cmd.py +262 -0
agentforge/cli/debug_cmd.py +168 -0
agentforge/cli/docs_cmd.py +217 -0
agentforge/cli/eval_cmd.py +181 -0
agentforge/cli/health_cmd.py +139 -0
agentforge/cli/list_modules.py +85 -0
agentforge/cli/main.py +81 -0
agentforge/cli/manifest_apply.py +368 -0
agentforge/cli/module_cmd.py +247 -0
agentforge/cli/new_cmd.py +171 -0
agentforge/cli/run_cmd.py +234 -0
agentforge/cli/upgrade_cmd.py +230 -0
agentforge/config/__init__.py +45 -0
agentforge/eval/__init__.py +18 -0
agentforge/eval/consistency.py +107 -0
agentforge/eval/coverage.py +100 -0
agentforge/eval/format_compliance.py +107 -0
agentforge/eval/regression.py +143 -0
agentforge/findings.py +166 -0
agentforge/guardrails/__init__.py +32 -0
agentforge/guardrails/allowlist.py +49 -0
agentforge/guardrails/capability_check.py +58 -0
agentforge/guardrails/engine.py +289 -0
agentforge/guardrails/pii_redact_basic.py +61 -0
agentforge/guardrails/prompt_injection_basic.py +90 -0
agentforge/memory/__init__.py +16 -0
agentforge/memory/in_memory.py +130 -0
agentforge/memory/in_memory_graph.py +262 -0
agentforge/memory/in_memory_vector.py +167 -0
agentforge/pipeline/__init__.py +26 -0
agentforge/pipeline/engine.py +189 -0
agentforge/pipeline/errors.py +19 -0
agentforge/pipeline/tool.py +93 -0
agentforge/py.typed +0 -0
agentforge/recording.py +189 -0
agentforge/renderers/__init__.py +28 -0
agentforge/renderers/_defaults.py +32 -0
agentforge/renderers/markdown.py +44 -0
agentforge/renderers/patch_applier.py +46 -0
agentforge/renderers/registry.py +108 -0
agentforge/renderers/scorecard.py +59 -0
agentforge/renderers/span_table.py +71 -0
agentforge/replay.py +260 -0
agentforge/resolver_register.py +41 -0
agentforge/retrieval.py +410 -0
agentforge/runtime.py +63 -0
agentforge/strategies/__init__.py +27 -0
agentforge/strategies/_base.py +280 -0
agentforge/strategies/_plan.py +93 -0
agentforge/strategies/multi_agent.py +541 -0
agentforge/strategies/plan_execute.py +506 -0
agentforge/strategies/react.py +237 -0
agentforge/strategies/tot.py +472 -0
agentforge/templates/_shared/.cursorrules +12 -0
agentforge/templates/_shared/.github/copilot-instructions.md +13 -0
agentforge/templates/_shared/.gitkeep +0 -0
agentforge/templates/_shared/AGENTS.md.tmpl +123 -0
agentforge/templates/_shared/CLAUDE.md +13 -0
agentforge/templates/_shared/docs/runbooks/01-set-up-new-agent.md.tmpl +67 -0
agentforge/templates/_shared/docs/runbooks/02-add-a-tool.md +67 -0
agentforge/templates/_shared/docs/runbooks/03-add-a-pipeline-task.md +69 -0
agentforge/templates/_shared/docs/runbooks/04-pick-reasoning-strategy.md +67 -0
agentforge/templates/_shared/docs/runbooks/05-write-prompts.md +75 -0
agentforge/templates/_shared/docs/runbooks/06-test-your-agent.md +75 -0
agentforge/templates/_shared/docs/runbooks/07-debug-a-run.md +70 -0
agentforge/templates/_shared/docs/runbooks/08-add-memory.md +75 -0
agentforge/templates/_shared/docs/runbooks/09-add-mcp.md +78 -0
agentforge/templates/_shared/docs/runbooks/10-add-evaluators.md +76 -0
agentforge/templates/_shared/docs/runbooks/11-add-safety-guardrails.md +83 -0
agentforge/templates/_shared/docs/runbooks/12-add-observability.md +77 -0
agentforge/templates/_shared/docs/runbooks/13-configure-multi-provider.md +91 -0
agentforge/templates/_shared/docs/runbooks/14-deploy-your-agent.md +70 -0
agentforge/templates/_shared/docs/runbooks/15-upgrade-your-agent.md +67 -0
agentforge/templates/_shared/docs/runbooks/16-configuration-reference.md +81 -0
agentforge/templates/_shared/docs/runbooks/17-add-reranker.md +78 -0
agentforge/templates/_shared/docs/runbooks/18-add-hybrid-search.md +78 -0
agentforge/templates/_shared/docs/runbooks/19-add-graphrag.md +83 -0
agentforge/templates/_shared/docs/runbooks/20-apply-schema-migrations.md +92 -0
agentforge/templates/_shared/docs/runbooks/21-use-streaming-guardrails.md +82 -0
agentforge/templates/_shared/docs/runbooks/README.md.tmpl +68 -0
agentforge/templates/code-reviewer/.env.example +8 -0
agentforge/templates/code-reviewer/.gitignore +7 -0
agentforge/templates/code-reviewer/README.md +12 -0
agentforge/templates/code-reviewer/agentforge.yaml +23 -0
agentforge/templates/code-reviewer/copier.yml +34 -0
agentforge/templates/code-reviewer/pyproject.toml +18 -0
agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
agentforge/templates/docs-qa/.env.example +8 -0
agentforge/templates/docs-qa/.gitignore +7 -0
agentforge/templates/docs-qa/README.md +14 -0
agentforge/templates/docs-qa/agentforge.yaml +19 -0
agentforge/templates/docs-qa/copier.yml +31 -0
agentforge/templates/docs-qa/pyproject.toml +18 -0
agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
agentforge/templates/minimal/.env.example +11 -0
agentforge/templates/minimal/.gitignore +10 -0
agentforge/templates/minimal/README.md +28 -0
agentforge/templates/minimal/agentforge.yaml +10 -0
agentforge/templates/minimal/copier.yml +52 -0
agentforge/templates/minimal/pyproject.toml +18 -0
agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/main.py +34 -0
agentforge/templates/patch-bot/.env.example +8 -0
agentforge/templates/patch-bot/.gitignore +7 -0
agentforge/templates/patch-bot/README.md +13 -0
agentforge/templates/patch-bot/agentforge.yaml +15 -0
agentforge/templates/patch-bot/copier.yml +31 -0
agentforge/templates/patch-bot/pyproject.toml +18 -0
agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
agentforge/templates/research/.env.example +8 -0
agentforge/templates/research/.gitignore +7 -0
agentforge/templates/research/README.md +14 -0
agentforge/templates/research/agentforge.yaml +17 -0
agentforge/templates/research/copier.yml +31 -0
agentforge/templates/research/pyproject.toml +18 -0
agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/main.py +31 -0
agentforge/templates/triage/.env.example +8 -0
agentforge/templates/triage/.gitignore +7 -0
agentforge/templates/triage/README.md +14 -0
agentforge/templates/triage/agentforge.yaml +25 -0
agentforge/templates/triage/copier.yml +31 -0
agentforge/templates/triage/pyproject.toml +18 -0
agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/main.py +30 -0
agentforge/testing/__init__.py +69 -0
agentforge/testing/conformance.py +40 -0
agentforge/testing/factory.py +89 -0
agentforge/testing/fixtures.py +42 -0
agentforge/testing/llm.py +235 -0
agentforge/testing/recording.py +177 -0
agentforge/tools/__init__.py +41 -0
agentforge_py-0.2.1.dist-info/METADATA +158 -0
agentforge_py-0.2.1.dist-info/RECORD +157 -0
agentforge_py-0.2.1.dist-info/WHEEL +4 -0
agentforge_py-0.2.1.dist-info/entry_points.txt +2 -0
agentforge_py-0.2.1.dist-info/licenses/LICENSE +202 -0

agentforge/__init__.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""AgentForge — open-source plug-and-play framework for production AI agents.
+This package is the default runtime. It ships:
+  - The `Agent` orchestrator (locked constructor surface per feat-001).
+  - `InMemoryStore` — process-local default `MemoryStore` so a fresh
+    agent has persistence wired without external infra.
+  - The configuration loader (`load_config`).
+  - The reasoning-strategy infrastructure: `RuntimeContext`,
+    `StrategyBase`, `get_runtime`, `ReActLoop` (feat-002).
+For provider clients, persistence drivers, MCP, observability backends,
+and safety modules, install the corresponding `agentforge-<X>` packages
+or use the `agentforge[<extra>]` install (per ADR-0003).
+See the project docs at `docs/README.md` (in the design workspace) and
+the per-feature specs under `docs/features/`.
+"""
+from __future__ import annotations
+from agentforge_core import FallbackChain
+# feat-018: importing `agentforge.guardrails` here triggers the
+# module-side `@register(...)` decorators on `PromptInjectionBasic`,
+# `PIIRedactBasic`, `CapabilityCheck`, and `Allowlist` so they're
+# resolvable by name from `agentforge.yaml` without an explicit
+# import in the consumer.
+import agentforge.guardrails  # noqa: F401
+from agentforge._tools import tool
+from agentforge.agent import Agent
+from agentforge.auth import EnvBearerAuth
+from agentforge.config import AgentForgeConfig, load_config
+from agentforge.findings import (
+    MultiSpanFinding,
+    NarrativeFinding,
+    Patch,
+    PatchFinding,
+    SimpleFinding,
+    Span,
+)
+from agentforge.memory import InMemoryGraphStore, InMemoryStore, InMemoryVectorStore
+from agentforge.pipeline import (
+    Pipeline,
+    PipelineFailure,
+    PipelineFindingsTool,
+    PipelineResult,
+    Task,
+)
+from agentforge.renderers import (
+    MarkdownRenderer,
+    MissingRendererError,
+    PatchApplierRenderer,
+    RendererRegistry,
+    ScorecardRenderer,
+    SpanTableRenderer,
+)
+from agentforge.resolver_register import register_task
+from agentforge.retrieval import Retriever
+from agentforge.runtime import RUNTIME_KEY, RuntimeContext
+from agentforge.strategies import (
+    MultiAgentSupervisor,
+    Plan,
+    PlanExecuteLoop,
+    PlanStep,
+    ReActLoop,
+    StrategyBase,
+    TreeOfThoughts,
+    get_runtime,
+)
+__version__ = "0.2.1"
+__all__ = [
+    "RUNTIME_KEY",
+    "Agent",
+    "AgentForgeConfig",
+    "EnvBearerAuth",
+    "FallbackChain",
+    "InMemoryGraphStore",
+    "InMemoryStore",
+    "InMemoryVectorStore",
+    "MarkdownRenderer",
+    "MissingRendererError",
+    "MultiAgentSupervisor",
+    "MultiSpanFinding",
+    "NarrativeFinding",
+    "Patch",
+    "PatchApplierRenderer",
+    "PatchFinding",
+    "Pipeline",
+    "PipelineFailure",
+    "PipelineFindingsTool",
+    "PipelineResult",
+    "Plan",
+    "PlanExecuteLoop",
+    "PlanStep",
+    "ReActLoop",
+    "RendererRegistry",
+    "Retriever",
+    "RuntimeContext",
+    "ScorecardRenderer",
+    "SimpleFinding",
+    "Span",
+    "SpanTableRenderer",
+    "StrategyBase",
+    "Task",
+    "TreeOfThoughts",
+    "__version__",
+    "get_runtime",
+    "load_config",
+    "register_task",
+    "tool",
+]

agentforge/_testing/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""Local test helpers.
+Private package (underscore prefix) — these helpers exist only to
+support feat-002's tests (and other early features) until feat-016
+ships the full public testing API at `agentforge.testing`.
+Helpers here:
+- `FakeLLMClient` — minimal scripted-response `LLMClient`. Replaced
+  by feat-016's `MockLLMClient` (richer: recording / replay /
+  capability simulation).
+"""
+from __future__ import annotations
+from agentforge._testing.fake_llm import FakeLLMClient, echo_response
+from agentforge._testing.fake_tool import FakeTool
+__all__ = ["FakeLLMClient", "FakeTool", "echo_response"]

agentforge/_testing/fake_llm.py ADDED Viewed

@@ -0,0 +1,126 @@
+"""`FakeLLMClient` — minimal scripted-response `LLMClient` for unit tests.
+Used by feat-002's strategy tests. Replaced by feat-016's full
+`MockLLMClient` (which supports recording / replay / capability
+simulation) when that lands; until then, this class is enough to
+drive the four reasoning strategies through their unit and
+integration tests.
+Usage:
+    fake = FakeLLMClient(
+        responses=[
+            LLMResponse(content="thought 1", stop_reason="tool_use",
+                        tool_calls=(ToolCall(id="t1", name="search",
+                                             arguments={"q": "x"}),),
+                        usage=TokenUsage(input_tokens=10, output_tokens=5),
+                        cost_usd=0.001, model="m", provider="p"),
+            LLMResponse(content="final answer", stop_reason="end_turn",
+                        usage=TokenUsage(input_tokens=12, output_tokens=8),
+                        cost_usd=0.002, model="m", provider="p"),
+        ],
+    )
+    # When the strategy under test calls fake.call(...) twice,
+    # it gets the two scripted responses in order.
+Responses can also be callables (called with the call's args) for
+dynamic scripting:
+    FakeLLMClient(responses=[lambda system, messages, tools=None: ...])
+"""
+from __future__ import annotations
+from collections.abc import Callable
+from typing import Any
+from agentforge_core.contracts.llm import LLMClient
+from agentforge_core.values.messages import LLMResponse, Message, TokenUsage, ToolSpec
+ResponseSpec = LLMResponse | Callable[..., LLMResponse]
+class FakeLLMClient(LLMClient):
+    """Scripted-response `LLMClient` for tests.
+    Constructor takes a list of `LLMResponse` instances OR callables
+    that build a response from the call args. Each `.call()` returns
+    the next item in the list; raises if the list is exhausted.
+    """
+    def __init__(
+        self,
+        responses: list[ResponseSpec] | None = None,
+        *,
+        capabilities: set[str] | None = None,
+    ) -> None:
+        self._responses: list[ResponseSpec] = list(responses or [])
+        self._call_count: int = 0
+        self._captured: list[tuple[str, list[Message], list[ToolSpec] | None]] = []
+        self._capabilities: set[str] = set(capabilities or ())
+        self._closed: bool = False
+    @property
+    def call_count(self) -> int:
+        """Number of `.call()` invocations so far."""
+        return self._call_count
+    @property
+    def captured(
+        self,
+    ) -> list[tuple[str, list[Message], list[ToolSpec] | None]]:
+        """Every `(system, messages, tools)` triple seen by `.call()`,
+        in order. Useful for asserting the strategy sent the expected
+        messages."""
+        return list(self._captured)
+    async def call(
+        self,
+        system: str,
+        messages: list[Message],
+        tools: list[ToolSpec] | None = None,
+    ) -> LLMResponse:
+        if self._closed:
+            raise RuntimeError("FakeLLMClient.close() was already called")
+        if self._call_count >= len(self._responses):
+            raise RuntimeError(
+                f"FakeLLMClient exhausted after {self._call_count} calls; "
+                f"add more scripted responses or check the strategy's loop."
+            )
+        spec = self._responses[self._call_count]
+        self._captured.append((system, list(messages), tools))
+        self._call_count += 1
+        if callable(spec):
+            return spec(system=system, messages=messages, tools=tools)
+        return spec
+    async def close(self) -> None:
+        self._closed = True
+    def capabilities(self) -> set[str]:
+        return set(self._capabilities)
+def echo_response(
+    *,
+    content: str = "ok",
+    stop_reason: str = "end_turn",
+    cost_usd: float = 0.0,
+    input_tokens: int = 1,
+    output_tokens: int = 1,
+    **_: Any,
+) -> LLMResponse:
+    """Convenience builder for an `LLMResponse` with sensible defaults.
+    Used by tests that only care about a single LLM call's output
+    shape and don't want to hand-write the full `LLMResponse`
+    construction every time.
+    """
+    return LLMResponse(
+        content=content,
+        stop_reason=stop_reason,  # type: ignore[arg-type]
+        usage=TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens),
+        cost_usd=cost_usd,
+        model="fake",
+        provider="fake",
+    )

agentforge/_testing/fake_tool.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""`FakeTool` — minimal scripted-response `Tool` for unit tests
+(feat-004 chunk 5).
+Replaces any tool with a stub during tests. Two construction forms:
+    from agentforge._testing import FakeTool
+    # 1. Static return value
+    web_search = FakeTool.fake("web_search", "stub result")
+    # 2. Callable that computes the response from the call args
+    web_search = FakeTool.fake(
+        "web_search",
+        lambda **kwargs: f"results for {kwargs['query']!r}",
+    )
+The fake honours the same locked `Tool` ABC: it has a `name`,
+`description`, `input_schema` (a permissive `dict`-shaped model that
+accepts any kwargs), and a `run(**kwargs)` method. `Agent(tools=
+[fake, ...])` works without other changes.
+Replaced by feat-016's richer testing API; this is the minimum
+surface to support feat-004 / feat-002 tests today.
+"""
+from __future__ import annotations
+import asyncio
+from collections.abc import Awaitable, Callable
+from typing import Any, ClassVar
+from agentforge_core.contracts.tool import Tool
+from pydantic import BaseModel, ConfigDict
+class _PermissiveInput(BaseModel):
+    """Input schema for `FakeTool` — accepts any kwargs.
+    Real `Tool` implementations declare a strict Pydantic model so
+    bad LLM tool-calls are rejected at the dispatch boundary; the
+    fake intentionally relaxes this so test code can pass arbitrary
+    kwargs without first defining a schema.
+    """
+    model_config = ConfigDict(extra="allow")
+_FakeFn = Callable[..., Any] | Callable[..., Awaitable[Any]]
+class FakeTool(Tool):
+    """Test-only `Tool` that returns scripted responses.
+    Construct via `FakeTool.fake(name, response_or_fn)` rather than
+    the bare class so the per-instance `name` / `description` work
+    without subclassing.
+    """
+    name: ClassVar[str] = "fake"
+    description: ClassVar[str] = "Test-only stub tool."
+    input_schema: ClassVar[type[BaseModel]] = _PermissiveInput
+    capabilities: ClassVar[frozenset[str]] = frozenset()
+    calls: list[dict[str, Any]]
+    """Per-instance recorded `run` invocation kwargs. Populated by
+    `fake()`-built instances; bare-class fallback keeps it empty."""
+    @classmethod
+    def fake(
+        cls,
+        name: str,
+        response: Any | _FakeFn,
+        *,
+        description: str | None = None,
+        capabilities: frozenset[str] | set[str] = frozenset(),
+    ) -> FakeTool:
+        """Build a fake tool with the given name and response.
+        `response` can be:
+          - A static value (returned as-is from every `run` call)
+          - A sync callable: `fn(**kwargs) -> Any`
+          - An async callable: `async fn(**kwargs) -> Any`
+        Records every call in `self.calls` for assertions.
+        """
+        # Synthesize a class so `name` / `description` / `capabilities`
+        # become per-fake. type() avoids subclass-scope dance from the
+        # @tool decorator.
+        is_async = _is_async_callable(response)
+        is_callable = callable(response) and not isinstance(response, type)
+        async def _run(self: FakeTool, **kwargs: Any) -> Any:
+            self.calls.append(dict(kwargs))
+            if is_callable:
+                if is_async:
+                    return await response(**kwargs)
+                return response(**kwargs)
+            return response
+        cls_namespace: dict[str, Any] = {
+            "name": name,
+            "description": description or f"Fake {name} tool.",
+            "input_schema": _PermissiveInput,
+            "capabilities": frozenset(capabilities),
+            "run": _run,
+            "calls": [],
+        }
+        synthesized = type(f"Fake{name.title()}Tool", (cls,), cls_namespace)
+        instance: FakeTool = synthesized()
+        return instance
+    async def run(self, **kwargs: Any) -> Any:  # noqa: ARG002 — bare-class fallback ignores kwargs
+        """Default `run` body — overridden by `fake()` instances; the
+        bare-class fallback returns the empty string."""
+        return ""
+def _is_async_callable(obj: Any) -> bool:
+    """True for `async def` functions and partials wrapping them."""
+    return asyncio.iscoroutinefunction(obj)
+__all__ = ["FakeTool"]

agentforge/_tools/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Internal tooling module (feat-004).
+The `@tool` decorator and the four shipped default tools
+(`calculator`, `file_read`, `web_search`, `shell`) live under this
+underscore-prefixed package; the public surface is re-exported from
+`agentforge` (`from agentforge import tool`) and from
+`agentforge.tools` (`from agentforge.tools import calculator`).
+"""
+from __future__ import annotations
+from agentforge._tools.decorator import tool
+__all__ = ["tool"]

agentforge/_tools/calculator.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""`calculator` — arithmetic expression tool (feat-004).
+Evaluates pure-arithmetic expressions via Python's AST module
+(`ast.parse` + recursive walker). **Does not use `eval()`** — only a
+closed set of node types is allowed, so the tool can't be tricked
+into running arbitrary Python.
+Supported:
+  - Numeric literals (int, float)
+  - Binary ops: `+`, `-`, `*`, `/`, `//`, `%`, `**`
+  - Unary ops: `+`, `-`
+  - Parenthesisation
+Rejected (raises `ValueError`):
+  - Names (variables), attribute access, function calls
+  - Subscripts, list / dict / set literals
+  - Comprehensions, lambdas, walrus, anything statement-y
+Capabilities: empty (pure computation, no side effects).
+"""
+from __future__ import annotations
+import ast
+import operator
+from collections.abc import Callable
+from agentforge._tools.decorator import tool
+_Number = int | float
+_BinaryFn = Callable[[_Number, _Number], _Number]
+_UnaryFn = Callable[[_Number], _Number]
+_BINARY_OPS: dict[type[ast.operator], _BinaryFn] = {
+    ast.Add: operator.add,
+    ast.Sub: operator.sub,
+    ast.Mult: operator.mul,
+    ast.Div: operator.truediv,
+    ast.FloorDiv: operator.floordiv,
+    ast.Mod: operator.mod,
+    ast.Pow: operator.pow,
+}
+_UNARY_OPS: dict[type[ast.unaryop], _UnaryFn] = {
+    ast.UAdd: operator.pos,
+    ast.USub: operator.neg,
+}
+def _evaluate(node: ast.AST) -> _Number:
+    """Walk an AST node, evaluating only the closed set of allowed
+    arithmetic node types. Raise `ValueError` on anything else."""
+    if isinstance(node, ast.Expression):
+        return _evaluate(node.body)
+    if isinstance(node, ast.Constant):
+        if isinstance(node.value, (int, float)) and not isinstance(node.value, bool):
+            return node.value
+        msg = f"calculator: literal {node.value!r} is not a number"
+        raise ValueError(msg)
+    if isinstance(node, ast.BinOp):
+        bop_type = type(node.op)
+        bop_fn = _BINARY_OPS.get(bop_type)
+        if bop_fn is None:
+            msg = f"calculator: binary operator {bop_type.__name__!r} not allowed"
+            raise ValueError(msg)
+        return bop_fn(_evaluate(node.left), _evaluate(node.right))
+    if isinstance(node, ast.UnaryOp):
+        uop_type = type(node.op)
+        uop_fn = _UNARY_OPS.get(uop_type)
+        if uop_fn is None:
+            msg = f"calculator: unary operator {uop_type.__name__!r} not allowed"
+            raise ValueError(msg)
+        return uop_fn(_evaluate(node.operand))
+    msg = f"calculator: AST node {type(node).__name__!r} not allowed"
+    raise ValueError(msg)
+@tool
+def calculator(expression: str) -> float:
+    """Evaluate an arithmetic expression and return the result.
+    Supports `+`, `-`, `*`, `/`, `//`, `%`, `**` and parentheses.
+    Variables, function calls, and any non-arithmetic syntax are
+    rejected — this is a calculator, not a Python interpreter.
+    Args:
+        expression: The arithmetic expression to evaluate, e.g.
+            `"(1 + 2) * 3"` or `"2 ** 10"`.
+    Returns:
+        The numeric result as a float (int values are coerced to
+        float for a uniform return type).
+    """
+    try:
+        tree = ast.parse(expression, mode="eval")
+    except SyntaxError as exc:
+        msg = f"calculator: cannot parse expression {expression!r}: {exc.msg}"
+        raise ValueError(msg) from exc
+    result = _evaluate(tree)
+    return float(result)
+__all__ = ["calculator"]