@checkstack/ai-backend 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +99 -0
- package/package.json +6 -4
- package/src/agent-runner.test.ts +24 -24
- package/src/chat/agent-loop.test.ts +10 -10
- package/src/chat/auto-apply.test.ts +2 -2
- package/src/chat/chat-service.streamturn.test.ts +16 -1
- package/src/chat/system-prompt.test.ts +11 -0
- package/src/chat/system-prompt.ts +34 -5
- package/src/extension-points.ts +89 -0
- package/src/generated/docs-index.ts +18 -3
- package/src/hardening/handler-authz.test.ts +11 -11
- package/src/index.ts +46 -1
- package/src/mcp/server.test.ts +13 -13
- package/src/propose-apply/service.test.ts +13 -13
- package/src/registry-wiring.test.ts +17 -9
- package/src/registry-wiring.ts +29 -1
- package/src/resolver.test.ts +8 -8
- package/src/system-signals-contributor.test.ts +162 -0
- package/src/system-signals-contributor.ts +129 -0
- package/src/tool-name.test.ts +42 -0
- package/src/tool-name.ts +37 -0
- package/src/tool-registry.ts +14 -4
- package/src/tools/docs-tools.test.ts +1 -1
- package/src/tools/system-issues.test.ts +236 -0
- package/src/tools/system-issues.ts +209 -0
- package/src/tools/tool-set.e2e.test.ts +1 -1
- package/tsconfig.json +6 -0
|
@@ -122,7 +122,7 @@ export const DOCS_INDEX: readonly DocsIndexEntry[] = [
|
|
|
122
122
|
"State and scale",
|
|
123
123
|
"Related"
|
|
124
124
|
],
|
|
125
|
-
"content": "Checkstack exposes a Model Context Protocol (MCP) server so external tooling can call the same read-only tools the in-app agent uses. The server speaks Streamable HTTP (not the deprecated HTTP+SSE transport) and is mounted at `/api/ai/mcp`. Every tool call is authorized as the narrowed OAuth principal, server-side, so the model can never reach a tool its token does not allow.\n\n## Transport and discovery\n\nThe endpoint is a JSON-RPC 2.0 handler over HTTP POST. It implements the read-only surface:\n\n- `initialize` returns the protocol version and a session id (`Mcp-Session-Id` header).\n- `tools/list` returns the tools the authenticated principal may call.\n- `tools/call` invokes a tool and returns its result as a text content block.\n\nOAuth discovery and registration live under the better-auth mount (see [OAuth and scopes](/checkstack/developer-guide/ai/oauth-and-scopes/)):\n\n- `/.well-known/oauth-authorization-server`\n- `/.well-known/oauth-protected-resource`\n- `/api/auth/mcp/register` (Dynamic Client Registration)\n\n## Auth flow\n\nA client obtains an opaque OAuth access token (via the authorization code flow, after consent), then calls the MCP endpoint with `Authorization: Bearer <token>`. On every request:\n\n1. The token is introspected and narrowed to a live principal (the narrow-only model).\n2. `tools/list` is filtered by the resolver, so the client only ever sees tools the principal may call.\n3. `tools/call` re-enters the live router as that principal, forwarding the same bearer token, so the handler re-checks authorization. The resolver gate also refuses an out-of-scope tool before re-entry.\n\n> [!IMPORTANT]\n> Authorization is enforced in the handler, never by the model. The model is treated as an untrusted caller that happens to be good at picking arguments. A `tools/call` for a tool outside the token's scopes is refused server-side, not merely hidden from `tools/list`.\n\n### Read-only is structural\n\nA bare `tools/call` may only ever run a `read`-effect tool. The handler checks the resolved tool's effect after the access gate: a `mutate` or `destructive` tool is refused with a 403 (JSON-RPC error) and the live router is never re-entered. Mutating tools are also excluded from `tools/list`, so the model never sees a tool it could only ever be refused. Mutating and destructive tools reach MCP only through the two-step [propose and apply](/checkstack/developer-guide/ai/propose-apply/) flow, where the single-use proposal token is the consent gate. This makes the read-only-over-MCP guarantee a property of the handler, independent of which tools happen to be registered.\n\n## The read-only tool surface\n\nThe Phase 2 surface is the projected read-only tools: `incident.list`, `healthcheck.status`, and `anomaly.
|
|
125
|
+
"content": "Checkstack exposes a Model Context Protocol (MCP) server so external tooling can call the same read-only tools the in-app agent uses. The server speaks Streamable HTTP (not the deprecated HTTP+SSE transport) and is mounted at `/api/ai/mcp`. Every tool call is authorized as the narrowed OAuth principal, server-side, so the model can never reach a tool its token does not allow.\n\n## Transport and discovery\n\nThe endpoint is a JSON-RPC 2.0 handler over HTTP POST. It implements the read-only surface:\n\n- `initialize` returns the protocol version and a session id (`Mcp-Session-Id` header).\n- `tools/list` returns the tools the authenticated principal may call.\n- `tools/call` invokes a tool and returns its result as a text content block.\n\nOAuth discovery and registration live under the better-auth mount (see [OAuth and scopes](/checkstack/developer-guide/ai/oauth-and-scopes/)):\n\n- `/.well-known/oauth-authorization-server`\n- `/.well-known/oauth-protected-resource`\n- `/api/auth/mcp/register` (Dynamic Client Registration)\n\n## Auth flow\n\nA client obtains an opaque OAuth access token (via the authorization code flow, after consent), then calls the MCP endpoint with `Authorization: Bearer <token>`. On every request:\n\n1. The token is introspected and narrowed to a live principal (the narrow-only model).\n2. `tools/list` is filtered by the resolver, so the client only ever sees tools the principal may call.\n3. `tools/call` re-enters the live router as that principal, forwarding the same bearer token, so the handler re-checks authorization. The resolver gate also refuses an out-of-scope tool before re-entry.\n\n> [!IMPORTANT]\n> Authorization is enforced in the handler, never by the model. The model is treated as an untrusted caller that happens to be good at picking arguments. A `tools/call` for a tool outside the token's scopes is refused server-side, not merely hidden from `tools/list`.\n\n### Read-only is structural\n\nA bare `tools/call` may only ever run a `read`-effect tool. The handler checks the resolved tool's effect after the access gate: a `mutate` or `destructive` tool is refused with a 403 (JSON-RPC error) and the live router is never re-entered. Mutating tools are also excluded from `tools/list`, so the model never sees a tool it could only ever be refused. Mutating and destructive tools reach MCP only through the two-step [propose and apply](/checkstack/developer-guide/ai/propose-apply/) flow, where the single-use proposal token is the consent gate. This makes the read-only-over-MCP guarantee a property of the handler, independent of which tools happen to be registered.\n\n## The read-only tool surface\n\nThe Phase 2 surface is the projected read-only tools: `incident.list`, `healthcheck.status`, and `anomaly.list`. Each is a projection of an existing oRPC read procedure, so its input schema and access rules come straight from the source procedure and never drift.\n\n## Connecting a client\n\nPoint any MCP client that supports OAuth and Streamable HTTP at the endpoint:\n\n```bash\n# 1. Discover the authorization server.\ncurl https://your-checkstack/.well-known/oauth-protected-resource\n\n# 2. After the OAuth flow yields a token, list tools.\ncurl -X POST https://your-checkstack/api/ai/mcp \\\n -H \"authorization: Bearer $TOKEN\" \\\n -H \"content-type: application/json\" \\\n -d '{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"tools/list\"}'\n\n# 3. Call a read-only tool.\ncurl -X POST https://your-checkstack/api/ai/mcp \\\n -H \"authorization: Bearer $TOKEN\" \\\n -H \"content-type: application/json\" \\\n -d '{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/call\",\"params\":{\"name\":\"incident.list\",\"arguments\":{}}}'\n```\n\n## State and scale\n\nThe only pod-local state is the live MCP connection registry, which tracks connections terminated on this pod for bookkeeping. It is never a source of truth: a principal's rights are re-derived from the durable OAuth token on every request, and the rate-limit counters and token state live in shared Postgres. So any pod answers the same way for the same token.\n\n## Related\n\nThe MCP server resolves its tools through the [tool registry](/checkstack/developer-guide/ai/tool-registry/), authenticates via [OAuth and scopes](/checkstack/developer-guide/ai/oauth-and-scopes/), runs mutating tools only through [propose and apply](/checkstack/developer-guide/ai/propose-apply/), and shares its spine with the [internal chat](/checkstack/developer-guide/ai/chat/). The wire behaviour (initialize / tools-list / tools-call, an out-of-scope tool refused with 403 without re-entering the router, and a mutating tool refused by the structural effect-gate) is exercised by `core/ai-backend/src/mcp/server.test.ts` and the env-gated `core/ai-backend/src/mcp/mcp-conformance.it.test.ts`. See the [AI platform overview](/checkstack/developer-guide/ai/) for the full security model.",
|
|
126
126
|
"truncated": false
|
|
127
127
|
},
|
|
128
128
|
{
|
|
@@ -169,7 +169,7 @@ export const DOCS_INDEX: readonly DocsIndexEntry[] = [
|
|
|
169
169
|
"State and scale",
|
|
170
170
|
"Related"
|
|
171
171
|
],
|
|
172
|
-
"content": "Every AI tool declares an `effect`: `read`, `mutate`, or `destructive`. Read tools run directly. Mutating and destructive tools never run directly: they go through a transport-agnostic two-step flow where `propose` runs a dry-run and returns a single-use token, and `apply` consumes the token and commits. This is how the platform keeps a model from silently changing state, and it works identically in the in-app chat and over MCP.\n\n## The two steps\n\n1. `propose(toolName, input)` resolves the tool, re-checks authorization against the tool's `requiredAccessRules`, and runs the tool's `dryRun`. The dry-run validates the input without mutating anything (it reuses the mature validation paths, e.g. the automation plugin's `validateDefinition` or the health-check plugin's `validateConfiguration`). On success it persists a `proposed` audit row and returns a proposal token plus a human-readable summary and the validated payload.\n2. `apply(token)` parses the token, fetches the proposal row, verifies the nonce in constant time, checks the TTL and status, re-checks authorization (the principal's rights may have changed since `propose`), then atomically transitions the row to `applied` and runs the tool's `execute`. `apply` executes ONLY the server-stored `proposedPayload` captured at `propose` time; it never accepts caller-supplied arguments. As a belt-and-suspenders guard the stored payload is re-parsed against the tool's input schema immediately before `execute`, so a payload that no longer satisfies an evolved schema is rejected rather than run.\n\nIn chat, the summary and payload render a confirm card between the two steps. Over MCP, `propose` returns the token and the client calls `apply` as a follow-up (MCP client elicitation is not universal, so the token is the consent gate).\n\n```ts\n// Step 1 — never mutates.\nconst proposal = await ai.proposeTool({\n toolName: \"automation.propose\",\n input: { name: \"Page on outage\", definition: draftDefinition },\n});\n// proposal = { token, summary, payload, toolCallId, expiresAt }\n\n// Step 2 — a human has reviewed `proposal.summary` / `proposal.payload`.\nconst applied = await ai.applyTool({ token: proposal.token });\n// applied = { toolCallId, result }\n```\n\n## The proposal token\n\nThe token format is `propose:<rowId>.<nonce>`. The `proposed` audit row IS the token store: there is no separate ephemeral table.\n\n- The `nonce` is 32 random bytes (hex) stored on the row and compared in constant time at `apply`.\n- The TTL is 10 minutes. A token older than that is rejected even if its row was not yet swept.\n- `apply` is single-use and atomic: it runs one `UPDATE ... WHERE id = ? AND status = 'proposed' AND proposal_expires_at > now()`. Exactly one caller wins the `proposed -> applied` transition, so a second `apply` (even a concurrent one) is rejected.\n\nA background sweep flips expired `proposed` rows to `expired`, keeping them as audit history. The sweep is hygiene only; correctness never depends on it because `apply` rejects an expired token regardless of the swept status.\n\n> [!IMPORTANT]\n> Authorization is re-checked at both `propose` and `apply`. A rule the principal has lost between the two steps blocks `apply`. Service principals can never drive the registry, so a proposal is always bound to a real user or application.\n\n## The flagship flow: automation.propose\n\n`automation.propose` is a hand-authored tool that now lives in and registers from **automation-backend** via `aiToolExtensionPoint` (see [Registering tools](/checkstack/developer-guide/ai/registering-tools/)); `ai-backend` no longer owns it. The model authors a structured draft automation definition; the tool validates it against the live trigger and action registries (the automation plugin's `validateDefinition` dry-run) and returns the validated draft. It never creates an automation at `propose` time. A human reviews the draft (in chat, the confirm card deep-links into the collapsed-card automation editor seeded with the draft) and applies it; only then does `apply` call `createAutomation`.\n\n```ts\n// effect: \"mutate\"; requiredAccessRules: [\"automation.automation.manage\"]\n// dryRun -> validateDefinition (no mutation); returns the validated draft + YAML\n// execute -> createAutomation (reached only via apply)\n```\n\n## healthcheck.propose\n\n`healthcheck.propose` mirrors `automation.propose` for health checks (it is what completes the end-to-end \"create a script health check\" flow, see [Context tools](/checkstack/developer-guide/ai/context-tools/)). Like the automation tools it now lives in and registers from **healthcheck-backend** via `aiToolExtensionPoint` (see [Registering tools](/checkstack/developer-guide/ai/registering-tools/)) rather than from `ai-backend`. The model authors a structured draft configuration; the tool's `dryRun` deep-validates it via the `healthCheckContract.validateConfiguration` RPC. That RPC runs the SAME strategy/collector resolution plus migrate-then-validate-strict logic the create and GitOps-apply paths use, so propose-time errors are identical to apply-time errors: it confirms the `strategyId` and every collector id exist, and validates each config against its registered schema (wrong types, missing required fields, AND unknown/typo'd keys), not just required-field presence. It returns `{ valid, errors: [{ path, message }] }` and persists nothing. On success the tool resolves the strategy/collector display names (`getStrategies` / `getCollectors`) and renders a confirm card describing the strategy, collectors, interval, and any inline script source. It never creates a health check at `propose` time; `apply` calls `healthCheckContract.createConfiguration`.\n\n```ts\n// effect: \"mutate\"; requiredAccessRules: [\"healthcheck.healthcheck.manage\"]\n// dryRun -> validateConfiguration (deep, no mutation); returns { valid, errors }\n// execute -> createConfiguration (reached only via apply)\n```\n\n`validateConfiguration` is itself gated by `healthcheck.healthcheck.manage` (the privilege the create form requires) and is the health-check mirror of automation's `validateDefinition`. The shared validator (`collectConfigurationIssues` / `validateVersionedConfigStrict` in `healthcheck-backend`) is the single migrate-then-validate-strict implementation behind both the RPC and the GitOps reconcile path, so the editor, the AI propose tool, and GitOps all agree on what counts as valid.\n\nCreating a health-check configuration is a non-destructive create, so it is `mutate` (not `destructive`): it auto-applies in `auto` mode and is confirm-gated in `approve` mode, exactly like `automation.propose`. A single `requiredAccessRules` of `healthcheck.healthcheck.manage` keeps the framework's all-of (AND) gate correct, and the propose/apply service re-checks `isAllowed` at both `propose` and `apply`.\n\n> [!NOTE]\n> A newly created health check does not execute until it is assigned to a system. The `healthcheck.propose` summary and description say so, so the assistant tells the operator to assign it after applying.\n\n## Full CRUD: update and delete tools\n\nBeyond create, the assistant has update and delete tools for both resource types, so it can manage existing objects, not only author new ones. These tools register from their owning plugins too: `automation.update` / `automation.delete` from **automation-backend** and `healthcheck.update` / `healthcheck.delete` from **healthcheck-backend**, both via `aiToolExtensionPoint` (see [Registering tools](/checkstack/developer-guide/ai/registering-tools/)). They follow the same propose/apply gate.\n\n- `healthcheck.update` / `automation.update` (`effect: \"mutate\"`): take an id plus a partial body. `healthcheck.update` merges the body over the live config and deep-validates the RESULT (the same `validateConfiguration` path as create, including assertion field/operator validation); `automation.update` validates a provided `definition` via `validateDefinition`. Like the propose tools they auto-apply in `auto` mode and confirm in `approve` mode.\n- `healthcheck.delete` / `automation.delete` (`effect: \"destructive\"`): take an id; `dryRun` resolves the target so the confirm card names exactly what is removed. Being destructive, they ALWAYS route through the confirm card in BOTH modes - they can never auto-apply (the `decideToolDisposition` invariant, regression-guarded by each owning plugin's own tests). All four are gated by the same `*.manage` rule as create and re-checked at `propose` and `apply`.\n\n```ts\n// healthcheck.update / automation.update -> effect: \"mutate\" (auto-applies in auto mode)\n// healthcheck.delete / automation.delete -> effect: \"destructive\" (ALWAYS confirm-gated)\n```\n\n## Always-visible changes: diffs and the applied card\n\nA change is always shown to the operator, in BOTH modes. An update tool's `dryRun` computes a before -> after field diff (`computeFieldDiff`) and returns it on the proposal preview; it threads through `ProposeResult` to the chat card. In approve mode the confirm card renders that diff (instead of the full payload) so the operator sees exactly what changes before approving. In auto mode the change auto-applies, but the result is NOT silent: the tool returns an `AutoAppliedResult` (`__applied: true`) carrying the same summary + diff, and the chat renders a read-only \"Applied\" card so the operator still sees what was created or changed. A create has no diff (the whole payload is new), so its card shows the created object.\n\n```ts\n// dryRun -> AiProposalPreview { summary, payload, diff? }\n// approve mode: ConfirmCardResult { __confirm, ..., diff? } -> confirm card + diff\n// auto mode: AutoAppliedResult { __applied, summary, result, diff? } -> read-only applied card\n```\n\n## Authorization: every tool call runs as the originating user\n\nThe model is an untrusted caller. A tool must never let it reach data or mutations the human behind the conversation could not reach directly, even when the request happens to go through a tool or an MCP function. Two layers enforce this:\n\n1. **The resolver gate decides what is OFFERED.** A tool is surfaced to the model only when the principal satisfies its `requiredAccessRules` (`resolveTools` / `isAllowed`). The model is never handed a tool the principal lacks.\n2. **A user-scoped RPC client decides what actually RUNS.** Both `dryRun` and `execute` receive an `rpcClient` bound to the ORIGINATING user (built from the request's own session cookie / bearer). Any plugin procedure it calls re-enters the live router AS THAT USER, so the handler runs `autoAuthMiddleware` - access rules AND per-resource/team `instanceAccess` scope - exactly as a direct UI/RPC call. A tool MUST use this client for plugin calls and MUST NEVER capture a trusted service client: the trusted client short-circuits every principal check, so calling it would let the model read or mutate team-scoped resources the user cannot reach - a privilege escalation.\n\nBecause the second layer re-enters as the user, a tool can never broaden access beyond the user's own permissions, even for resources gated to a specific team. When a call is refused, the propose/apply service names the missing rule in the error (`Forbidden: <tool> (missing permission: <rules>)`), so the assistant can tell the operator exactly which permission a read, mutation, or delete needs.\n\nTools register from many plugins (see [Registering tools](/checkstack/developer-guide/ai/registering-tools/)) rather than from a single central spot in `ai-backend`, and every registered tool, wherever it is owned, falls into one category:\n\n- **Mutating tools** (`effect !== \"read\"`, e.g. `automation.propose`, `healthcheck.propose`/`.update`/`.delete`, `incident.create`/`.update`/`.delete`/`.addUpdate`/`.resolve`/`.addLink`/`.removeLink`, `maintenance.create`/`.update`/`.delete`/`.addUpdate`/`.close`/`.addLink`/`.removeLink`, `catalog.createSystem`/`.updateSystem`/`.deleteSystem`/`.createGroup`/`.updateGroup`/`.deleteGroup`/`.addSystemToGroup`/`.removeSystemFromGroup`) route through the propose/apply service, which re-checks `isAllowed` at both `propose` and `apply`, then runs `dryRun`/`execute` with the user-scoped client (so the underlying create/update/delete RPC enforces handler authz as the user). Each is owned by its plugin and registered via `aiToolExtensionPoint`. A `create`/`update`/`addUpdate`/`resolve`/`close`/`addLink` is `mutate`; a `delete`/`removeLink` is `destructive` (always confirm-gated). Update tools dry-run against the live record and surface a before -> after diff on the card.\n- **Composite read tools** (`getScriptContext`, `testScript`, `listCapabilities`, `getCapabilitySchema`, plus `ai-backend`'s own `ai.searchDocs` / `ai.getDoc` / `ai.probeUrl`) run their own `execute` with the user-scoped client; the resolver gate plus that user-scoped fan-out are the authorization authority.\n- **Projected read tools** (`incident.list` / `incident.get`, `healthcheck.status`, `anomaly.explain`, `maintenance.list` / `maintenance.get`, `catalog.listSystems` / `catalog.listGroups`, `slo.listObjectives`, `dependency.list`) are exposed by their owning plugins via `aiToolProjectionExtensionPoint`; `ai-backend` collects their routing in an `afterPluginsReady` phase. Each carries its source procedure's own access rules and is routed through the live router as the logged-in principal, so handler-side authz holds. (`catalog.listSystems` in particular lets the assistant resolve a system name to its id before creating an incident, maintenance, or health check.)\n\nThis invariant is regression-guarded per owner. `ai-backend`'s own tools are covered by `core/ai-backend/src/tools/tool-set.e2e.test.ts` and `core/ai-backend/src/hardening/handler-authz.test.ts`, while each plugin tests the tools and projections it registers, so a tool offered to a principal who lacks its rules fails the suite rather than silently bypassing authz.\n\n## Audit log\n\nEvery tool invocation across both transports writes an `ai_tool_calls` row, which doubles as the proposal-token store:\n\n- `read` tools write a row with status `executed`.\n- `propose` writes a `proposed` row; `apply` transitions it to `applied`; a failed `execute` is recorded as `failed`; an unconsumed proposal ages to `expired`.\n- The row stores a SHA-256 `argsHash` of the canonical-JSON arguments, never the raw arguments (they may carry PII or secrets). The `proposedPayload` column holds the validated, ready-to-apply payload captured at `propose` time.\n- The proposer is recorded in `principalKind`/`principalId`. The principal that actually consumes the token at `apply` is recorded separately in `appliedByKind`/`appliedById`. These are normally identical, but a cross-principal apply is RECORDED rather than rejected: the single-use 256-bit token plus the live authorization re-check already hold the security invariant, so the audit log simply attributes the apply to the real applier instead of silently crediting the proposer.\n\nThe platform emits an `ai.toolCalled` hook on the shared event bus for each call, carrying only metadata (`principalKind`, `principalId`, `transport`, `toolName`, `effect`, `status`) and never arguments or results. Subscribers react to the fact of a call, not its contents.\n\n## Per-principal tool rate-limit budgets\n\nEvery tool invocation across both transports is also rate-limited per principal. The budget is a shared-Postgres rolling-window counter over `ai_tool_calls`: before a tool runs, the platform counts the rows the principal has written in the trailing window (using the `ai_tool_calls_principal_created_idx` index) and refuses the call once the count meets the cap.\n\n```ts\n// Enforced before execution on BOTH transports (MCP tools/call + the chat loop).\nawait enforceToolBudget({ db, principal, max: 60, windowMs: 60_000 });\n// throws ToolBudgetExceededError when over budget\n```\n\nBecause the count is read from the same shared table every pod writes to, the cap holds across all pods. An in-memory per-pod limiter would let N pods each allow the cap (N times the intended limit), which a single-process test would never catch, so the limiter is Postgres-backed by design. This mirrors the Phase 2 DCR rate-limiter pattern. Over MCP an over-budget call returns a JSON-RPC rate-limited error (HTTP 429); in chat it surfaces as a friendly error in the stream.\n\n## State and scale\n\nThe audit log, the proposal tokens, and the rate-limit budget counter all live in shared Postgres. A token proposed on one pod is consumable on any other; an expired token is rejected on every pod; the budget count is identical on every pod. No proposal, audit, or budget state is pod-local.\n\n## Related\n\nProposable tools come from the [tool registry](/checkstack/developer-guide/ai/tool-registry/); the [internal chat](/checkstack/developer-guide/ai/chat/) renders the proposal as a confirm card and the [MCP server](/checkstack/developer-guide/ai/mcp-server/) returns it for a follow-up `apply`. The token lifecycle (single-use, expiry, constant-time nonce) is regression-guarded in `core/ai-backend/src/propose-apply/`, and the per-principal budget is verified cross-pod in `core/ai-backend/src/rate-limit/tool-budget.it.test.ts`. See the [AI platform overview](/checkstack/developer-guide/ai/) for the full security model.\n\nIn chat, the [permission mode](/checkstack/developer-guide/ai/permission-mode/) decides whether a `mutate` tool's proposal is auto-applied server-side (`auto`) or surfaced as a confirm card (`approve`). It reuses this exact apply path, so a destructive tool always requires a human apply regardless of the mode.",
|
|
172
|
+
"content": "Every AI tool declares an `effect`: `read`, `mutate`, or `destructive`. Read tools run directly. Mutating and destructive tools never run directly: they go through a transport-agnostic two-step flow where `propose` runs a dry-run and returns a single-use token, and `apply` consumes the token and commits. This is how the platform keeps a model from silently changing state, and it works identically in the in-app chat and over MCP.\n\n## The two steps\n\n1. `propose(toolName, input)` resolves the tool, re-checks authorization against the tool's `requiredAccessRules`, and runs the tool's `dryRun`. The dry-run validates the input without mutating anything (it reuses the mature validation paths, e.g. the automation plugin's `validateDefinition` or the health-check plugin's `validateConfiguration`). On success it persists a `proposed` audit row and returns a proposal token plus a human-readable summary and the validated payload.\n2. `apply(token)` parses the token, fetches the proposal row, verifies the nonce in constant time, checks the TTL and status, re-checks authorization (the principal's rights may have changed since `propose`), then atomically transitions the row to `applied` and runs the tool's `execute`. `apply` executes ONLY the server-stored `proposedPayload` captured at `propose` time; it never accepts caller-supplied arguments. As a belt-and-suspenders guard the stored payload is re-parsed against the tool's input schema immediately before `execute`, so a payload that no longer satisfies an evolved schema is rejected rather than run.\n\nIn chat, the summary and payload render a confirm card between the two steps. Over MCP, `propose` returns the token and the client calls `apply` as a follow-up (MCP client elicitation is not universal, so the token is the consent gate).\n\n```ts\n// Step 1 — never mutates.\nconst proposal = await ai.proposeTool({\n toolName: \"automation.propose\",\n input: { name: \"Page on outage\", definition: draftDefinition },\n});\n// proposal = { token, summary, payload, toolCallId, expiresAt }\n\n// Step 2 — a human has reviewed `proposal.summary` / `proposal.payload`.\nconst applied = await ai.applyTool({ token: proposal.token });\n// applied = { toolCallId, result }\n```\n\n## The proposal token\n\nThe token format is `propose:<rowId>.<nonce>`. The `proposed` audit row IS the token store: there is no separate ephemeral table.\n\n- The `nonce` is 32 random bytes (hex) stored on the row and compared in constant time at `apply`.\n- The TTL is 10 minutes. A token older than that is rejected even if its row was not yet swept.\n- `apply` is single-use and atomic: it runs one `UPDATE ... WHERE id = ? AND status = 'proposed' AND proposal_expires_at > now()`. Exactly one caller wins the `proposed -> applied` transition, so a second `apply` (even a concurrent one) is rejected.\n\nA background sweep flips expired `proposed` rows to `expired`, keeping them as audit history. The sweep is hygiene only; correctness never depends on it because `apply` rejects an expired token regardless of the swept status.\n\n> [!IMPORTANT]\n> Authorization is re-checked at both `propose` and `apply`. A rule the principal has lost between the two steps blocks `apply`. Service principals can never drive the registry, so a proposal is always bound to a real user or application.\n\n## The flagship flow: automation.propose\n\n`automation.propose` is a hand-authored tool that now lives in and registers from **automation-backend** via `aiToolExtensionPoint` (see [Registering tools](/checkstack/developer-guide/ai/registering-tools/)); `ai-backend` no longer owns it. The model authors a structured draft automation definition; the tool validates it against the live trigger and action registries (the automation plugin's `validateDefinition` dry-run) and returns the validated draft. It never creates an automation at `propose` time. A human reviews the draft (in chat, the confirm card deep-links into the collapsed-card automation editor seeded with the draft) and applies it; only then does `apply` call `createAutomation`.\n\n```ts\n// effect: \"mutate\"; requiredAccessRules: [\"automation.automation.manage\"]\n// dryRun -> validateDefinition (no mutation); returns the validated draft + YAML\n// execute -> createAutomation (reached only via apply)\n```\n\n## healthcheck.propose\n\n`healthcheck.propose` mirrors `automation.propose` for health checks (it is what completes the end-to-end \"create a script health check\" flow, see [Context tools](/checkstack/developer-guide/ai/context-tools/)). Like the automation tools it now lives in and registers from **healthcheck-backend** via `aiToolExtensionPoint` (see [Registering tools](/checkstack/developer-guide/ai/registering-tools/)) rather than from `ai-backend`. The model authors a structured draft configuration; the tool's `dryRun` deep-validates it via the `healthCheckContract.validateConfiguration` RPC. That RPC runs the SAME strategy/collector resolution plus migrate-then-validate-strict logic the create and GitOps-apply paths use, so propose-time errors are identical to apply-time errors: it confirms the `strategyId` and every collector id exist, and validates each config against its registered schema (wrong types, missing required fields, AND unknown/typo'd keys), not just required-field presence. It returns `{ valid, errors: [{ path, message }] }` and persists nothing. On success the tool resolves the strategy/collector display names (`getStrategies` / `getCollectors`) and renders a confirm card describing the strategy, collectors, interval, and any inline script source. It never creates a health check at `propose` time; `apply` calls `healthCheckContract.createConfiguration`.\n\n```ts\n// effect: \"mutate\"; requiredAccessRules: [\"healthcheck.healthcheck.manage\"]\n// dryRun -> validateConfiguration (deep, no mutation); returns { valid, errors }\n// execute -> createConfiguration (reached only via apply)\n```\n\n`validateConfiguration` is itself gated by `healthcheck.healthcheck.manage` (the privilege the create form requires) and is the health-check mirror of automation's `validateDefinition`. The shared validator (`collectConfigurationIssues` / `validateVersionedConfigStrict` in `healthcheck-backend`) is the single migrate-then-validate-strict implementation behind both the RPC and the GitOps reconcile path, so the editor, the AI propose tool, and GitOps all agree on what counts as valid.\n\nCreating a health-check configuration is a non-destructive create, so it is `mutate` (not `destructive`): it auto-applies in `auto` mode and is confirm-gated in `approve` mode, exactly like `automation.propose`. A single `requiredAccessRules` of `healthcheck.healthcheck.manage` keeps the framework's all-of (AND) gate correct, and the propose/apply service re-checks `isAllowed` at both `propose` and `apply`.\n\n> [!NOTE]\n> A newly created health check does not execute until it is assigned to a system. The `healthcheck.propose` summary and description say so, so the assistant tells the operator to assign it after applying.\n\n## Full CRUD: update and delete tools\n\nBeyond create, the assistant has update and delete tools for both resource types, so it can manage existing objects, not only author new ones. These tools register from their owning plugins too: `automation.update` / `automation.delete` from **automation-backend** and `healthcheck.update` / `healthcheck.delete` from **healthcheck-backend**, both via `aiToolExtensionPoint` (see [Registering tools](/checkstack/developer-guide/ai/registering-tools/)). They follow the same propose/apply gate.\n\n- `healthcheck.update` / `automation.update` (`effect: \"mutate\"`): take an id plus a partial body. `healthcheck.update` merges the body over the live config and deep-validates the RESULT (the same `validateConfiguration` path as create, including assertion field/operator validation); `automation.update` validates a provided `definition` via `validateDefinition`. Like the propose tools they auto-apply in `auto` mode and confirm in `approve` mode.\n- `healthcheck.delete` / `automation.delete` (`effect: \"destructive\"`): take an id; `dryRun` resolves the target so the confirm card names exactly what is removed. Being destructive, they ALWAYS route through the confirm card in BOTH modes - they can never auto-apply (the `decideToolDisposition` invariant, regression-guarded by each owning plugin's own tests). All four are gated by the same `*.manage` rule as create and re-checked at `propose` and `apply`.\n\n```ts\n// healthcheck.update / automation.update -> effect: \"mutate\" (auto-applies in auto mode)\n// healthcheck.delete / automation.delete -> effect: \"destructive\" (ALWAYS confirm-gated)\n```\n\n## Always-visible changes: diffs and the applied card\n\nA change is always shown to the operator, in BOTH modes. An update tool's `dryRun` computes a before -> after field diff (`computeFieldDiff`) and returns it on the proposal preview; it threads through `ProposeResult` to the chat card. In approve mode the confirm card renders that diff (instead of the full payload) so the operator sees exactly what changes before approving. In auto mode the change auto-applies, but the result is NOT silent: the tool returns an `AutoAppliedResult` (`__applied: true`) carrying the same summary + diff, and the chat renders a read-only \"Applied\" card so the operator still sees what was created or changed. A create has no diff (the whole payload is new), so its card shows the created object.\n\n```ts\n// dryRun -> AiProposalPreview { summary, payload, diff? }\n// approve mode: ConfirmCardResult { __confirm, ..., diff? } -> confirm card + diff\n// auto mode: AutoAppliedResult { __applied, summary, result, diff? } -> read-only applied card\n```\n\n## Authorization: every tool call runs as the originating user\n\nThe model is an untrusted caller. A tool must never let it reach data or mutations the human behind the conversation could not reach directly, even when the request happens to go through a tool or an MCP function. Two layers enforce this:\n\n1. **The resolver gate decides what is OFFERED.** A tool is surfaced to the model only when the principal satisfies its `requiredAccessRules` (`resolveTools` / `isAllowed`). The model is never handed a tool the principal lacks.\n2. **A user-scoped RPC client decides what actually RUNS.** Both `dryRun` and `execute` receive an `rpcClient` bound to the ORIGINATING user (built from the request's own session cookie / bearer). Any plugin procedure it calls re-enters the live router AS THAT USER, so the handler runs `autoAuthMiddleware` - access rules AND per-resource/team `instanceAccess` scope - exactly as a direct UI/RPC call. A tool MUST use this client for plugin calls and MUST NEVER capture a trusted service client: the trusted client short-circuits every principal check, so calling it would let the model read or mutate team-scoped resources the user cannot reach - a privilege escalation.\n\nBecause the second layer re-enters as the user, a tool can never broaden access beyond the user's own permissions, even for resources gated to a specific team. When a call is refused, the propose/apply service names the missing rule in the error (`Forbidden: <tool> (missing permission: <rules>)`), so the assistant can tell the operator exactly which permission a read, mutation, or delete needs.\n\nTools register from many plugins (see [Registering tools](/checkstack/developer-guide/ai/registering-tools/)) rather than from a single central spot in `ai-backend`, and every registered tool, wherever it is owned, falls into one category:\n\n- **Mutating tools** (`effect !== \"read\"`, e.g. `automation.propose`, `healthcheck.propose`/`.update`/`.delete`, `incident.create`/`.update`/`.delete`/`.addUpdate`/`.resolve`/`.addLink`/`.removeLink`, `maintenance.create`/`.update`/`.delete`/`.addUpdate`/`.close`/`.addLink`/`.removeLink`, `catalog.createSystem`/`.updateSystem`/`.deleteSystem`/`.createGroup`/`.updateGroup`/`.deleteGroup`/`.addSystemToGroup`/`.removeSystemFromGroup`) route through the propose/apply service, which re-checks `isAllowed` at both `propose` and `apply`, then runs `dryRun`/`execute` with the user-scoped client (so the underlying create/update/delete RPC enforces handler authz as the user). Each is owned by its plugin and registered via `aiToolExtensionPoint`. A `create`/`update`/`addUpdate`/`resolve`/`close`/`addLink` is `mutate`; a `delete`/`removeLink` is `destructive` (always confirm-gated). Update tools dry-run against the live record and surface a before -> after diff on the card.\n- **Composite read tools** (`getScriptContext`, `testScript`, `listCapabilities`, `getCapabilitySchema`, plus `ai-backend`'s own `ai.searchDocs` / `ai.getDoc` / `ai.probeUrl`) run their own `execute` with the user-scoped client; the resolver gate plus that user-scoped fan-out are the authorization authority.\n- **Projected read tools** (`incident.list` / `incident.get`, `healthcheck.status`, `anomaly.list`, `maintenance.list` / `maintenance.get`, `catalog.listSystems` / `catalog.listGroups`, `slo.listObjectives`, `dependency.list`) are exposed by their owning plugins via `aiToolProjectionExtensionPoint`; `ai-backend` collects their routing in an `afterPluginsReady` phase. Each carries its source procedure's own access rules and is routed through the live router as the logged-in principal, so handler-side authz holds. (`catalog.listSystems` in particular lets the assistant resolve a system name to its id before creating an incident, maintenance, or health check.)\n\nThis invariant is regression-guarded per owner. `ai-backend`'s own tools are covered by `core/ai-backend/src/tools/tool-set.e2e.test.ts` and `core/ai-backend/src/hardening/handler-authz.test.ts`, while each plugin tests the tools and projections it registers, so a tool offered to a principal who lacks its rules fails the suite rather than silently bypassing authz.\n\n## Audit log\n\nEvery tool invocation across both transports writes an `ai_tool_calls` row, which doubles as the proposal-token store:\n\n- `read` tools write a row with status `executed`.\n- `propose` writes a `proposed` row; `apply` transitions it to `applied`; a failed `execute` is recorded as `failed`; an unconsumed proposal ages to `expired`.\n- The row stores a SHA-256 `argsHash` of the canonical-JSON arguments, never the raw arguments (they may carry PII or secrets). The `proposedPayload` column holds the validated, ready-to-apply payload captured at `propose` time.\n- The proposer is recorded in `principalKind`/`principalId`. The principal that actually consumes the token at `apply` is recorded separately in `appliedByKind`/`appliedById`. These are normally identical, but a cross-principal apply is RECORDED rather than rejected: the single-use 256-bit token plus the live authorization re-check already hold the security invariant, so the audit log simply attributes the apply to the real applier instead of silently crediting the proposer.\n\nThe platform emits an `ai.toolCalled` hook on the shared event bus for each call, carrying only metadata (`principalKind`, `principalId`, `transport`, `toolName`, `effect`, `status`) and never arguments or results. Subscribers react to the fact of a call, not its contents.\n\n## Per-principal tool rate-limit budgets\n\nEvery tool invocation across both transports is also rate-limited per principal. The budget is a shared-Postgres rolling-window counter over `ai_tool_calls`: before a tool runs, the platform counts the rows the principal has written in the trailing window (using the `ai_tool_calls_principal_created_idx` index) and refuses the call once the count meets the cap.\n\n```ts\n// Enforced before execution on BOTH transports (MCP tools/call + the chat loop).\nawait enforceToolBudget({ db, principal, max: 60, windowMs: 60_000 });\n// throws ToolBudgetExceededError when over budget\n```\n\nBecause the count is read from the same shared table every pod writes to, the cap holds across all pods. An in-memory per-pod limiter would let N pods each allow the cap (N times the intended limit), which a single-process test would never catch, so the limiter is Postgres-backed by design. This mirrors the Phase 2 DCR rate-limiter pattern. Over MCP an over-budget call returns a JSON-RPC rate-limited error (HTTP 429); in chat it surfaces as a friendly error in the stream.\n\n## State and scale\n\nThe audit log, the proposal tokens, and the rate-limit budget counter all live in shared Postgres. A token proposed on one pod is consumable on any other; an expired token is rejected on every pod; the budget count is identical on every pod. No proposal, audit, or budget state is pod-local.\n\n## Related\n\nProposable tools come from the [tool registry](/checkstack/developer-guide/ai/tool-registry/); the [internal chat](/checkstack/developer-guide/ai/chat/) renders the proposal as a confirm card and the [MCP server](/checkstack/developer-guide/ai/mcp-server/) returns it for a follow-up `apply`. The token lifecycle (single-use, expiry, constant-time nonce) is regression-guarded in `core/ai-backend/src/propose-apply/`, and the per-principal budget is verified cross-pod in `core/ai-backend/src/rate-limit/tool-budget.it.test.ts`. See the [AI platform overview](/checkstack/developer-guide/ai/) for the full security model.\n\nIn chat, the [permission mode](/checkstack/developer-guide/ai/permission-mode/) decides whether a `mutate` tool's proposal is auto-applied server-side (`auto`) or surfaced as a confirm card (`approve`). It reuses this exact apply path, so a destructive tool always requires a human apply regardless of the mode.",
|
|
173
173
|
"truncated": false
|
|
174
174
|
},
|
|
175
175
|
{
|
|
@@ -185,6 +185,21 @@ export const DOCS_INDEX: readonly DocsIndexEntry[] = [
|
|
|
185
185
|
"content": "AI tools are owned by the plugins whose domain they act on, not by `ai-backend`. A plugin contributes tools by registering them through `ai-backend`'s extension points from its own `init` (or `register`). The dependency direction is always plugin -> `@checkstack/ai-backend`; `ai-backend` never depends on a capability plugin's `*-common`. This is the contract an external plugin author follows, and it is exactly how the first-party health-check and automation tools are wired.\n\n## The two extension points\n\n- `aiToolExtensionPoint` - register a hand-authored tool (a `RegisteredAiTool` with its own `execute` / `dryRun`): propose/apply mutations, capability catalogs, script-context tools, URL probes, anything self-contained.\n- `aiToolProjectionExtensionPoint` - expose an existing oRPC read procedure as a read tool. The projected tool inherits the procedure's input schema and access rules verbatim; its execution is routed by the transport (MCP / chat) back through the live router as the principal, so handler-side authz holds.\n\nBoth come from `@checkstack/ai-backend`. Because `ai-backend` registers the extension points in its `register()`, and your plugin depends on `@checkstack/ai-backend`, your plugin loads after it - so `env.getExtensionPoint(...)` resolves.\n\n## Register a hand-authored tool\n\n```ts\nimport { aiToolExtensionPoint, type RegisteredAiTool } from \"@checkstack/ai-backend\";\nimport { pluginMetadata, MyApi, myAccess } from \"@checkstack/my-common\";\nimport { qualifyAccessRuleId } from \"@checkstack/common\";\n\nfunction buildMyAiTools(): RegisteredAiTool[] {\n return [\n {\n name: \"my.doThing\", // already qualified -> kept as-is\n description: \"Do the thing. Requires confirmation before it takes effect.\",\n effect: \"mutate\", // \"read\" | \"mutate\" | \"destructive\"\n input: MyDoThingInputSchema,\n requiredAccessRules: [qualifyAccessRuleId(pluginMetadata, myAccess.manage)],\n // `rpcClient` is the USER-SCOPED client: it re-enters the router as the\n // originating user, so the procedure's own handler authz (access rules +\n // per-resource/team scope) applies. Resolve the plugin client INSIDE the\n // handler from this arg. NEVER capture a trusted service client at factory\n // scope - that bypasses the user's authorization (privilege escalation).\n dryRun: async ({ input, principal, rpcClient }) => ({ summary: \"...\", payload: input }),\n execute: async ({ input, principal, rpcClient }) =>\n rpcClient.forPlugin(MyApi).doThing(input),\n },\n ];\n}\n\n// in registerInit({ init }):\nconst aiToolExt = env.getExtensionPoint(aiToolExtensionPoint);\nfor (const tool of buildMyAiTools()) {\n aiToolExt.registerTool(tool, pluginMetadata);\n}\n```\n\nAn unqualified `name` is auto-qualified to `<pluginId>.<name>` on registration; an already-qualified name (e.g. `my.doThing`) is kept. `mutate`/`destructive` tools route through [propose and apply](/checkstack/developer-guide/ai/propose-apply/) - they never run inline.\n\n> [!IMPORTANT]\n> Always call plugin procedures through the `rpcClient` handed to `dryRun` /\n> `execute`. It is bound to the user who triggered the tool, so original\n> access controls (including team-scoped resources) still apply, scoped to that\n> user. A tool must not broaden what the user can access just because the\n> request arrived via a tool or MCP function.\n\n## Expose a read procedure as a projection\n\n```ts\nimport { aiToolProjectionExtensionPoint, deferredProjectionExecute } from \"@checkstack/ai-backend\";\nimport { pluginMetadata, myContract } from \"@checkstack/my-common\";\n\n// in init (or register):\nenv.getExtensionPoint(aiToolProjectionExtensionPoint).expose({\n procedure: myContract.listThings,\n sourcePluginMetadata: pluginMetadata,\n procedureKey: \"listThings\",\n name: \"my.list\",\n description: \"List things. Read-only.\",\n effect: \"read\",\n execute: deferredProjectionExecute, // routed by the transport, never run directly\n});\n```\n\n`ai-backend` collects each exposed projection's routing (`{ pluginId, procedureKey }`) in its `afterPluginsReady` phase - once every plugin has exposed - and wires it into the MCP transport and the chat read-loop. You never tell `ai-backend` your plugin exists; it discovers your projection through the extension point.\n\n## Why ai-backend stays plugin-agnostic\n\n`ai-backend` is the AI platform: the tool registry + resolver, the projection mechanism, the chat agent loop, the MCP server, propose/apply, and a few genuinely cross-plugin tools (docs grounding, URL probe). It imports no capability plugin's `*-common`. Pure, shareable helpers a tool author needs - `computeFieldDiff`, the capability-summary helpers, `ScriptContextKind` - live in `@checkstack/ai-common`; `resolveScriptContext` and `buildProjectedTool` are exported from `@checkstack/ai-backend`. So a third-party plugin can author rich AI tools (including assertion diffs and script-context grounding) using only the platform packages, and adding or removing a plugin never touches `ai-backend`.",
|
|
186
186
|
"truncated": false
|
|
187
187
|
},
|
|
188
|
+
{
|
|
189
|
+
"slug": "developer-guide/ai/system-issues",
|
|
190
|
+
"title": "The system.issues tool and system-signals contributors",
|
|
191
|
+
"description": "How the system.issues AI tool aggregates \"needs attention\" signals across plugins, and how a plugin contributes its own problem signals via the systemSignalsExtensionPoint.",
|
|
192
|
+
"headings": [
|
|
193
|
+
"The contributor contract",
|
|
194
|
+
"The per-source access gate",
|
|
195
|
+
"Share the deriver with the frontend",
|
|
196
|
+
"Register the contributor",
|
|
197
|
+
"State and scale",
|
|
198
|
+
"Why ai-backend stays plugin-agnostic"
|
|
199
|
+
],
|
|
200
|
+
"content": "`system.issues` is the single \"what is wrong right now\" read tool. In ONE call it returns every current problem across all systems - failing health checks, breaching or at-risk SLOs, active anomalies, open incidents, active maintenances, and dependency problems - grouped by system. The model is told to reach for it FIRST whenever asked whether there are issues, what is down, or for an overall health overview, before any per-domain tool.\n\nThe tool itself owns no domain knowledge. It fans out across every backend `SystemSignalsContributor` that plugins register through the `systemSignalsExtensionPoint`, merges their per-system maps, and shapes the result for the model. `ai-backend` imports no capability plugin's `*-common` to do this - the dependency direction is always plugin -> `@checkstack/ai-backend`, exactly like [registering tools](/checkstack/developer-guide/ai/registering-tools/).\n\n## The contributor contract\n\nA contributor returns problem signals for ALL systems globally, keyed by systemId, scoped to what the calling principal may see. This mirrors the frontend `SystemSignalsSlot`: where a frontend plugin's React filler computes per-system `SystemSignal[]` from a bulk RPC, a backend plugin registers a contributor that computes the same signals server-side for the aggregator.\n\n```ts\nimport type { AuthUser } from \"@checkstack/backend-api\";\nimport type { SystemSignalsMap } from \"@checkstack/catalog-common\";\n\ninterface SystemSignalsContribution {\n /** False when the principal lacks this source's access (signals are empty). */\n accessible: boolean;\n signals: SystemSignalsMap;\n}\n\ninterface SystemSignalsContributor {\n /** Stable id of the source, e.g. \"incident\" / \"slo\" / \"healthcheck\". */\n sourceId: string;\n /**\n * Return problem signals for ALL systems globally, keyed by systemId, scoped\n * to what `principal` may see, plus whether the principal could read this\n * source at all. When access is denied, return\n * `{ accessible: false, signals: {} }` (never a throw).\n */\n read(context: { principal: AuthUser }): Promise<SystemSignalsContribution>;\n}\n```\n\n`SystemSignalsMap` is `Record<string, SystemSignal[]>` from `@checkstack/catalog-common`. Only systems that currently have a problem appear in the map; healthy systems are simply absent. The aggregator drops the link/icon fields the model does not need (`href`, `accessRule`, `iconName`) and keeps `source` / `tone` / `label` / `detail` / `since`.\n\nReturning `accessible` lets the aggregator tell \"checked and clear\" apart from \"skipped for lack of permission\". The tool output therefore includes `checkedSources`, `inaccessibleSources`, and `failedSources` (a contributor that threw), and the model is instructed to tell the operator when a source could not be checked rather than implying everything is clear.\n\n## The per-source access gate\n\nThe `system.issues` tool is gated by `catalog.system.read`, but that only controls whether the tool runs at all. Per-source visibility - the global rule AND per-system team grants - is applied for you by `createGatedSystemSignalsContributor`. Build your contributor with it instead of hand-rolling the gate: pass your source's read `accessRule`, a `SystemAccessResolver`, and a `readSignals` that returns problem signals for ALL systems globally. The factory then:\n\n- lets a principal holding the global rule (and a trusted `ServiceUser`, mapped to the wildcard) see every system the source reports;\n- filters a real user / application WITHOUT the global rule to the systems its TEAM grants allow - the SAME `getAccessibleResourceIds` instance/team filtering the matching bulk RPC applies - so `system.issues` never under- or over-reports relative to the per-domain UI;\n- returns `{ accessible: false, signals: {} }` (never throws) for any other principal without access, and reports the source as inaccessible.\n\nIt does not call `readSignals` for a principal that can see nothing.\n\n```ts\nimport {\n createGatedSystemSignalsContributor,\n type SystemAccessResolver,\n type SystemSignalsContributor,\n} from \"@checkstack/ai-backend\";\nimport {\n incidentAccess,\n INCIDENT_SIGNAL_SOURCE_ID,\n deriveIncidentSignals,\n} from \"@checkstack/incident-common\";\nimport type { IncidentService } from \"./service\";\n\nexport function createIncidentSignalsContributor({\n service,\n resolver,\n}: {\n service: Pick<IncidentService, \"listOpenIncidentsBySystem\">;\n resolver: SystemAccessResolver;\n}): SystemSignalsContributor {\n return createGatedSystemSignalsContributor({\n sourceId: INCIDENT_SIGNAL_SOURCE_ID,\n accessRule: incidentAccess.incident.read,\n resolver,\n // Global read: problem signals for EVERY system. The factory applies the\n // access gate (global rule + per-system team grants) on top.\n readSignals: async () => {\n const incidentsBySystem = await service.listOpenIncidentsBySystem();\n return deriveIncidentSignals({\n incidentsBySystem,\n systemIds: Object.keys(incidentsBySystem),\n });\n },\n });\n}\n```\n\n## Share the deriver with the frontend\n\nA signal must look the same whether it comes from the backend aggregator or the dashboard's frontend filler. Put the pure mapping - the function that turns domain rows into `SystemSignal[]` - in your plugin's `*-common` package and have BOTH the frontend filler and the backend contributor call it. The deriver stays dependency-free (it imports only types and `resolveRoute`), so it is trivially unit-testable and the two surfaces can never drift.\n\n## Register the contributor\n\nRegister ONE contributor from your plugin's own `init`, after the service it reads is bound, through the same extension point external plugins use.\n\n```ts\nimport {\n systemSignalsExtensionPoint,\n createSystemAccessResolver,\n} from \"@checkstack/ai-backend\";\n\n// in registerInit({ init }), with `rpcClient` from coreServices.rpcClient:\nenv.getExtensionPoint(systemSignalsExtensionPoint).contribute(\n createIncidentSignalsContributor({\n service,\n resolver: createSystemAccessResolver(rpcClient),\n }),\n);\n```\n\n`ai-backend` accumulates every contributor into the same array the `system.issues` tool reads at execute time, so a contributor registered during any plugin's `init` is visible by the time the tool runs.\n\n## State and scale\n\nA contributor's `read` MUST resolve from shared, durable storage - the plugin's own Postgres tables or a derivation of them - so the answer is identical on every pod. Never read from process-local or in-memory state: the tool can execute on whichever pod handles the request, and a value written on one pod would be invisible to another, returning stale or empty issues. This is the same constraint reactive entity reads follow.\n\n## Why ai-backend stays plugin-agnostic\n\nThe aggregator is pure machinery: collect contributors, merge their maps, shape the output. It knows nothing about incidents, SLOs, or health checks. Each domain owns its source id, its access gate, its global query, and its shared deriver. Adding or removing a plugin never touches `ai-backend` - the new source simply appears in (or disappears from) the aggregated answer.",
|
|
201
|
+
"truncated": false
|
|
202
|
+
},
|
|
188
203
|
{
|
|
189
204
|
"slug": "developer-guide/ai/tool-registry",
|
|
190
205
|
"title": "Tool registry",
|
|
@@ -3019,4 +3034,4 @@ export const DOCS_INDEX: readonly DocsIndexEntry[] = [
|
|
|
3019
3034
|
];
|
|
3020
3035
|
|
|
3021
3036
|
/** A content hash of the source tree, so a CI check can detect drift. */
|
|
3022
|
-
export const DOCS_INDEX_HASH = "
|
|
3037
|
+
export const DOCS_INDEX_HASH = "4c5b709025cfd5c8807d5a548645a90ff21f0dd1cf5ade4b0b25b8184577fdca";
|
|
@@ -154,7 +154,7 @@ describe("HARDENING: a misbehaving model cannot escape the resolver gate", () =>
|
|
|
154
154
|
test("isAllowed refuses a tool whose rule the principal lacks", () => {
|
|
155
155
|
const registry = createAiToolRegistry();
|
|
156
156
|
let ran = false;
|
|
157
|
-
const adminTool = readTool("
|
|
157
|
+
const adminTool = readTool("ai_secrets", "ai.tools.manage", () => {
|
|
158
158
|
ran = true;
|
|
159
159
|
});
|
|
160
160
|
registry.register(adminTool);
|
|
@@ -168,7 +168,7 @@ describe("HARDENING: a misbehaving model cannot escape the resolver gate", () =>
|
|
|
168
168
|
|
|
169
169
|
test("a service principal (no access rules) is refused every tool", () => {
|
|
170
170
|
const registry = createAiToolRegistry();
|
|
171
|
-
const tool = readTool("
|
|
171
|
+
const tool = readTool("incident_list", "incident.incident.read", () => {});
|
|
172
172
|
registry.register(tool);
|
|
173
173
|
const resolver = createAiToolResolver({ registry });
|
|
174
174
|
const service: AuthUser = { type: "service", pluginId: "svc" };
|
|
@@ -181,7 +181,7 @@ describe("HARDENING: propose refuses a model-picked out-of-scope tool BEFORE dry
|
|
|
181
181
|
const registry = createAiToolRegistry();
|
|
182
182
|
let dryRan = false;
|
|
183
183
|
let executed = false;
|
|
184
|
-
const tool = mutateTool("
|
|
184
|
+
const tool = mutateTool("billing_refund", "billing.billing.manage", {
|
|
185
185
|
onDryRun: () => {
|
|
186
186
|
dryRan = true;
|
|
187
187
|
},
|
|
@@ -200,7 +200,7 @@ describe("HARDENING: propose refuses a model-picked out-of-scope tool BEFORE dry
|
|
|
200
200
|
await expect(
|
|
201
201
|
service.propose({
|
|
202
202
|
principal: limited, // lacks billing.billing.manage
|
|
203
|
-
toolName: "
|
|
203
|
+
toolName: "billing_refund",
|
|
204
204
|
input: { amount: 100 },
|
|
205
205
|
transport: "chat",
|
|
206
206
|
rpcClient,
|
|
@@ -217,7 +217,7 @@ describe("HARDENING: bad model-supplied args are rejected (no execution on garba
|
|
|
217
217
|
test("propose rejects args that fail the tool's own zod schema", async () => {
|
|
218
218
|
const registry = createAiToolRegistry();
|
|
219
219
|
let dryRan = false;
|
|
220
|
-
const tool = mutateTool("
|
|
220
|
+
const tool = mutateTool("incident_escalate", "incident.incident.read", {
|
|
221
221
|
onDryRun: () => {
|
|
222
222
|
dryRan = true;
|
|
223
223
|
},
|
|
@@ -237,7 +237,7 @@ describe("HARDENING: bad model-supplied args are rejected (no execution on garba
|
|
|
237
237
|
await expect(
|
|
238
238
|
service.propose({
|
|
239
239
|
principal: limited,
|
|
240
|
-
toolName: "
|
|
240
|
+
toolName: "incident_escalate",
|
|
241
241
|
input: { amount: -5 },
|
|
242
242
|
transport: "chat",
|
|
243
243
|
rpcClient,
|
|
@@ -253,9 +253,9 @@ describe("HARDENING: scope-narrowing can never WIDEN the surfaced toolset", () =
|
|
|
253
253
|
// only ever shrink the visible tools — never add one the principal lacks.
|
|
254
254
|
test("narrowing the principal's rules monotonically shrinks the visible tools", () => {
|
|
255
255
|
const registry = createAiToolRegistry();
|
|
256
|
-
registry.register(readTool("
|
|
257
|
-
registry.register(readTool("
|
|
258
|
-
registry.register(readTool("
|
|
256
|
+
registry.register(readTool("incident_list", "incident.incident.read", () => {}));
|
|
257
|
+
registry.register(readTool("hc_status", "healthcheck.config.read", () => {}));
|
|
258
|
+
registry.register(readTool("ai_secrets", "ai.tools.manage", () => {}));
|
|
259
259
|
const resolver = createAiToolResolver({ registry });
|
|
260
260
|
|
|
261
261
|
const wide: AuthUser = {
|
|
@@ -274,8 +274,8 @@ describe("HARDENING: scope-narrowing can never WIDEN the surfaced toolset", () =
|
|
|
274
274
|
|
|
275
275
|
// Narrowed is a strict subset — never a superset.
|
|
276
276
|
for (const name of narrowNames) expect(wideNames.has(name)).toBe(true);
|
|
277
|
-
expect(narrowNames.has("
|
|
278
|
-
expect(narrowNames.has("
|
|
277
|
+
expect(narrowNames.has("hc_status")).toBe(false);
|
|
278
|
+
expect(narrowNames.has("ai_secrets")).toBe(false);
|
|
279
279
|
// And the narrowing never invented a tool outside the wide set.
|
|
280
280
|
expect([...narrowNames].every((n) => wideNames.has(n))).toBe(true);
|
|
281
281
|
});
|
package/src/index.ts
CHANGED
|
@@ -18,11 +18,16 @@ import type { OpenAiCompatibleConnection } from "@checkstack/ai-common";
|
|
|
18
18
|
import {
|
|
19
19
|
aiToolExtensionPoint,
|
|
20
20
|
aiToolProjectionExtensionPoint,
|
|
21
|
+
systemSignalsExtensionPoint,
|
|
21
22
|
} from "./extension-points";
|
|
22
23
|
import { createAiToolRegistry } from "./tool-registry";
|
|
23
24
|
import { createAiToolResolver } from "./resolver";
|
|
24
|
-
import {
|
|
25
|
+
import {
|
|
26
|
+
createRegistryExtensionPoints,
|
|
27
|
+
createSystemSignalsExtensionPoint,
|
|
28
|
+
} from "./registry-wiring";
|
|
25
29
|
import { buildCompositeTools } from "./tools/composite-tools";
|
|
30
|
+
import { createSystemIssuesTool } from "./tools/system-issues";
|
|
26
31
|
import { createOpenAiCompatibleProvider } from "./openai-provider";
|
|
27
32
|
import { createAiRouter } from "./router";
|
|
28
33
|
import { createMcpRequestHandler } from "./mcp/server";
|
|
@@ -76,6 +81,13 @@ export default createBackendPlugin({
|
|
|
76
81
|
const { toolExtensionPoint, projectionExtensionPoint, exposedProjections } =
|
|
77
82
|
createRegistryExtensionPoints({ registry });
|
|
78
83
|
|
|
84
|
+
// System-signals contributors: each plugin that owns a kind of problem state
|
|
85
|
+
// registers ONE contributor from its OWN init; the `system.issues` composite
|
|
86
|
+
// tool fans out across this SAME array at execute time. ai-backend imports no
|
|
87
|
+
// plugin's `*-common` to collect them.
|
|
88
|
+
const { systemSignalsExtensionPoint: systemSignalsExt, contributors } =
|
|
89
|
+
createSystemSignalsExtensionPoint();
|
|
90
|
+
|
|
79
91
|
// Path 1 — hand-authored composite tools.
|
|
80
92
|
env.registerExtensionPoint(aiToolExtensionPoint, toolExtensionPoint);
|
|
81
93
|
// Path 2 — opt-in projection of an existing oRPC procedure. Plugins call
|
|
@@ -85,6 +97,7 @@ export default createBackendPlugin({
|
|
|
85
97
|
aiToolProjectionExtensionPoint,
|
|
86
98
|
projectionExtensionPoint,
|
|
87
99
|
);
|
|
100
|
+
env.registerExtensionPoint(systemSignalsExtensionPoint, systemSignalsExt);
|
|
88
101
|
|
|
89
102
|
// Live MCP connection registry — the ONE allowed pod-local thing
|
|
90
103
|
// (declareNonReactiveState({ reason: "bookkeeping" }), decision 9). Created
|
|
@@ -175,6 +188,18 @@ export default createBackendPlugin({
|
|
|
175
188
|
toolExt.registerTool(tool, pluginMetadata);
|
|
176
189
|
}
|
|
177
190
|
|
|
191
|
+
// The `system.issues` aggregator: ONE "what are the current issues"
|
|
192
|
+
// read tool that fans out across every registered SystemSignalsContributor
|
|
193
|
+
// and merges their global maps. Registered through the same extension
|
|
194
|
+
// point external plugins use; gated by catalog.system.read (per-source
|
|
195
|
+
// access is enforced inside each contributor). The `contributors` array
|
|
196
|
+
// is the live register-scope array, read at execute time, so plugins that
|
|
197
|
+
// contribute during their own init are seen.
|
|
198
|
+
toolExt.registerTool(
|
|
199
|
+
createSystemIssuesTool({ contributors }),
|
|
200
|
+
pluginMetadata,
|
|
201
|
+
);
|
|
202
|
+
|
|
178
203
|
|
|
179
204
|
// Register the OpenAI-compatible integration provider so it appears in
|
|
180
205
|
// the generic Connections settings UI (DynamicForm-driven). Done at
|
|
@@ -408,11 +433,31 @@ export default createBackendPlugin({
|
|
|
408
433
|
export {
|
|
409
434
|
aiToolExtensionPoint,
|
|
410
435
|
aiToolProjectionExtensionPoint,
|
|
436
|
+
systemSignalsExtensionPoint,
|
|
437
|
+
principalGrantedRuleIds,
|
|
411
438
|
} from "./extension-points";
|
|
412
439
|
export type {
|
|
413
440
|
AiToolExtensionPoint,
|
|
414
441
|
AiToolProjectionExtensionPoint,
|
|
442
|
+
SystemSignalsExtensionPoint,
|
|
443
|
+
SystemSignalsContributor,
|
|
444
|
+
SystemSignalsContribution,
|
|
415
445
|
} from "./extension-points";
|
|
446
|
+
export {
|
|
447
|
+
createGatedSystemSignalsContributor,
|
|
448
|
+
createSystemAccessResolver,
|
|
449
|
+
} from "./system-signals-contributor";
|
|
450
|
+
export type { SystemAccessResolver } from "./system-signals-contributor";
|
|
451
|
+
export {
|
|
452
|
+
createSystemIssuesTool,
|
|
453
|
+
mergeSystemSignalsMaps,
|
|
454
|
+
collectSystemSignals,
|
|
455
|
+
toSystemIssuesOutput,
|
|
456
|
+
SystemIssuesInputSchema,
|
|
457
|
+
SystemIssuesOutputSchema,
|
|
458
|
+
type SystemIssuesInput,
|
|
459
|
+
type SystemIssuesOutput,
|
|
460
|
+
} from "./tools/system-issues";
|
|
416
461
|
export {
|
|
417
462
|
createAiToolRegistry,
|
|
418
463
|
type AiToolRegistry,
|
package/src/mcp/server.test.ts
CHANGED
|
@@ -47,12 +47,12 @@ function buildHandler({
|
|
|
47
47
|
}) => Promise<void>;
|
|
48
48
|
}) {
|
|
49
49
|
const registry = createAiToolRegistry();
|
|
50
|
-
const incidentTool = readTool("
|
|
51
|
-
const adminTool = readTool("
|
|
50
|
+
const incidentTool = readTool("incident_list", "incident.incident.read");
|
|
51
|
+
const adminTool = readTool("ai_secrets", "ai.tools.manage");
|
|
52
52
|
// A mutating tool the limited principal IS allowed for (same access rule as
|
|
53
53
|
// incident.list). The ONLY thing that may refuse a bare tools/call for it is
|
|
54
54
|
// the structural effect-gate, not the resolver.
|
|
55
|
-
const mutating = mutateTool("
|
|
55
|
+
const mutating = mutateTool("incident_close", "incident.incident.read");
|
|
56
56
|
registry.register(incidentTool);
|
|
57
57
|
registry.register(adminTool);
|
|
58
58
|
registry.register(mutating);
|
|
@@ -121,8 +121,8 @@ describe("MCP server (read-only Streamable-HTTP)", () => {
|
|
|
121
121
|
);
|
|
122
122
|
const json = await res.json();
|
|
123
123
|
const names = json.result.tools.map((t: { name: string }) => t.name);
|
|
124
|
-
expect(names).toEqual(["
|
|
125
|
-
expect(names).not.toContain("
|
|
124
|
+
expect(names).toEqual(["incident_list"]);
|
|
125
|
+
expect(names).not.toContain("ai_secrets");
|
|
126
126
|
});
|
|
127
127
|
|
|
128
128
|
test("tools/list returns 401 for an unauthenticated caller", async () => {
|
|
@@ -150,7 +150,7 @@ describe("MCP server (read-only Streamable-HTTP)", () => {
|
|
|
150
150
|
jsonrpc: "2.0",
|
|
151
151
|
id: 4,
|
|
152
152
|
method: "tools/call",
|
|
153
|
-
params: { name: "
|
|
153
|
+
params: { name: "ai_secrets", arguments: {} },
|
|
154
154
|
}),
|
|
155
155
|
);
|
|
156
156
|
expect(res.status).toBe(403);
|
|
@@ -175,7 +175,7 @@ describe("MCP server (read-only Streamable-HTTP)", () => {
|
|
|
175
175
|
jsonrpc: "2.0",
|
|
176
176
|
id: 5,
|
|
177
177
|
method: "tools/call",
|
|
178
|
-
params: { name: "
|
|
178
|
+
params: { name: "incident_list", arguments: { status: "open" } },
|
|
179
179
|
},
|
|
180
180
|
"tok-123",
|
|
181
181
|
),
|
|
@@ -205,7 +205,7 @@ describe("MCP server (read-only Streamable-HTTP)", () => {
|
|
|
205
205
|
jsonrpc: "2.0",
|
|
206
206
|
id: 7,
|
|
207
207
|
method: "tools/call",
|
|
208
|
-
params: { name: "
|
|
208
|
+
params: { name: "incident_close", arguments: {} },
|
|
209
209
|
}),
|
|
210
210
|
);
|
|
211
211
|
expect(res.status).toBe(403);
|
|
@@ -221,8 +221,8 @@ describe("MCP server (read-only Streamable-HTTP)", () => {
|
|
|
221
221
|
);
|
|
222
222
|
const json = await res.json();
|
|
223
223
|
const names = json.result.tools.map((t: { name: string }) => t.name);
|
|
224
|
-
expect(names).toContain("
|
|
225
|
-
expect(names).not.toContain("
|
|
224
|
+
expect(names).toContain("incident_list");
|
|
225
|
+
expect(names).not.toContain("incident_close");
|
|
226
226
|
});
|
|
227
227
|
|
|
228
228
|
// §14.5: per-principal tool budget enforced on tools/call (shared-Postgres).
|
|
@@ -241,7 +241,7 @@ describe("MCP server (read-only Streamable-HTTP)", () => {
|
|
|
241
241
|
jsonrpc: "2.0",
|
|
242
242
|
id: 9,
|
|
243
243
|
method: "tools/call",
|
|
244
|
-
params: { name: "
|
|
244
|
+
params: { name: "incident_list", arguments: {} },
|
|
245
245
|
}),
|
|
246
246
|
);
|
|
247
247
|
expect(res.status).toBe(429);
|
|
@@ -264,12 +264,12 @@ describe("MCP server (read-only Streamable-HTTP)", () => {
|
|
|
264
264
|
jsonrpc: "2.0",
|
|
265
265
|
id: 10,
|
|
266
266
|
method: "tools/call",
|
|
267
|
-
params: { name: "
|
|
267
|
+
params: { name: "incident_list", arguments: { status: "open" } },
|
|
268
268
|
}),
|
|
269
269
|
);
|
|
270
270
|
expect(res.status).toBe(200);
|
|
271
271
|
expect(recorded).toHaveLength(1);
|
|
272
|
-
expect(recorded[0]?.toolName).toBe("
|
|
272
|
+
expect(recorded[0]?.toolName).toBe("incident_list");
|
|
273
273
|
// The args hash is a SHA-256 hex digest, never the raw args.
|
|
274
274
|
expect(recorded[0]?.argsHash).toMatch(/^[0-9a-f]{64}$/);
|
|
275
275
|
});
|
|
@@ -135,7 +135,7 @@ function mutatingTool(
|
|
|
135
135
|
): RegisteredAiTool<{ value: string }, { created: string }> {
|
|
136
136
|
let executed = 0;
|
|
137
137
|
const tool: RegisteredAiTool<{ value: string }, { created: string }> = {
|
|
138
|
-
name: "
|
|
138
|
+
name: "demo_mutate",
|
|
139
139
|
description: "demo mutating tool",
|
|
140
140
|
effect: "mutate",
|
|
141
141
|
input: ManageInput,
|
|
@@ -200,7 +200,7 @@ describe("propose/apply lifecycle (matrix #11)", () => {
|
|
|
200
200
|
|
|
201
201
|
const proposal = await service.propose({
|
|
202
202
|
principal: allowed,
|
|
203
|
-
toolName: "
|
|
203
|
+
toolName: "demo_mutate",
|
|
204
204
|
input: { value: "alpha" },
|
|
205
205
|
transport: "chat",
|
|
206
206
|
});
|
|
@@ -222,7 +222,7 @@ describe("propose/apply lifecycle (matrix #11)", () => {
|
|
|
222
222
|
|
|
223
223
|
const proposal = await service.propose({
|
|
224
224
|
principal: allowed,
|
|
225
|
-
toolName: "
|
|
225
|
+
toolName: "demo_mutate",
|
|
226
226
|
input: { value: "beta" },
|
|
227
227
|
transport: "chat",
|
|
228
228
|
});
|
|
@@ -239,7 +239,7 @@ describe("propose/apply lifecycle (matrix #11)", () => {
|
|
|
239
239
|
const { service } = setup(tool);
|
|
240
240
|
const proposal = await service.propose({
|
|
241
241
|
principal: allowed,
|
|
242
|
-
toolName: "
|
|
242
|
+
toolName: "demo_mutate",
|
|
243
243
|
input: { value: "gamma" },
|
|
244
244
|
transport: "chat",
|
|
245
245
|
});
|
|
@@ -265,7 +265,7 @@ describe("propose/apply authorization (matrix #11 / decision 5)", () => {
|
|
|
265
265
|
await expect(
|
|
266
266
|
service.propose({
|
|
267
267
|
principal: notAllowed,
|
|
268
|
-
toolName: "
|
|
268
|
+
toolName: "demo_mutate",
|
|
269
269
|
input: { value: "x" },
|
|
270
270
|
transport: "chat",
|
|
271
271
|
}),
|
|
@@ -277,7 +277,7 @@ describe("propose/apply authorization (matrix #11 / decision 5)", () => {
|
|
|
277
277
|
const { service } = setup(tool);
|
|
278
278
|
const proposal = await service.propose({
|
|
279
279
|
principal: allowed,
|
|
280
|
-
toolName: "
|
|
280
|
+
toolName: "demo_mutate",
|
|
281
281
|
input: { value: "x" },
|
|
282
282
|
transport: "chat",
|
|
283
283
|
});
|
|
@@ -294,7 +294,7 @@ describe("propose/apply authorization (matrix #11 / decision 5)", () => {
|
|
|
294
294
|
await expect(
|
|
295
295
|
service.propose({
|
|
296
296
|
principal: allowed,
|
|
297
|
-
toolName: "
|
|
297
|
+
toolName: "demo_mutate",
|
|
298
298
|
input: { value: "x" },
|
|
299
299
|
transport: "chat",
|
|
300
300
|
}),
|
|
@@ -306,7 +306,7 @@ describe("propose/apply authorization (matrix #11 / decision 5)", () => {
|
|
|
306
306
|
await expect(
|
|
307
307
|
service.propose({
|
|
308
308
|
principal: { type: "service", pluginId: "x" },
|
|
309
|
-
toolName: "
|
|
309
|
+
toolName: "demo_mutate",
|
|
310
310
|
input: { value: "x" },
|
|
311
311
|
transport: "chat",
|
|
312
312
|
}),
|
|
@@ -320,7 +320,7 @@ describe("propose does NOT mutate (matrix #12)", () => {
|
|
|
320
320
|
const { service } = setup(tool);
|
|
321
321
|
await service.propose({
|
|
322
322
|
principal: allowed,
|
|
323
|
-
toolName: "
|
|
323
|
+
toolName: "demo_mutate",
|
|
324
324
|
input: { value: "x" },
|
|
325
325
|
transport: "chat",
|
|
326
326
|
});
|
|
@@ -334,7 +334,7 @@ describe("audit rows (matrix #13)", () => {
|
|
|
334
334
|
const { service, store } = setup(tool);
|
|
335
335
|
const proposal = await service.propose({
|
|
336
336
|
principal: allowed,
|
|
337
|
-
toolName: "
|
|
337
|
+
toolName: "demo_mutate",
|
|
338
338
|
input: { value: "x" },
|
|
339
339
|
transport: "chat",
|
|
340
340
|
});
|
|
@@ -358,7 +358,7 @@ describe("audit rows (matrix #13)", () => {
|
|
|
358
358
|
// Proposed by u1.
|
|
359
359
|
const proposal = await service.propose({
|
|
360
360
|
principal: allowed,
|
|
361
|
-
toolName: "
|
|
361
|
+
toolName: "demo_mutate",
|
|
362
362
|
input: { value: "x" },
|
|
363
363
|
transport: "chat",
|
|
364
364
|
});
|
|
@@ -386,7 +386,7 @@ describe("audit rows (matrix #13)", () => {
|
|
|
386
386
|
const { service, store } = setup(tool);
|
|
387
387
|
const proposal = await service.propose({
|
|
388
388
|
principal: allowed,
|
|
389
|
-
toolName: "
|
|
389
|
+
toolName: "demo_mutate",
|
|
390
390
|
input: { value: "x" },
|
|
391
391
|
transport: "chat",
|
|
392
392
|
});
|
|
@@ -411,7 +411,7 @@ describe("audit rows (matrix #13)", () => {
|
|
|
411
411
|
const { service, store } = setup(tool, () => current);
|
|
412
412
|
const proposal = await service.propose({
|
|
413
413
|
principal: allowed,
|
|
414
|
-
toolName: "
|
|
414
|
+
toolName: "demo_mutate",
|
|
415
415
|
input: { value: "x" },
|
|
416
416
|
transport: "chat",
|
|
417
417
|
});
|
|
@@ -30,7 +30,7 @@ function handAuthoredTool(): RegisteredAiTool {
|
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
describe("createRegistryExtensionPoints (end-to-end registration)", () => {
|
|
33
|
-
test("registerTool qualifies an unqualified name
|
|
33
|
+
test("registerTool qualifies an unqualified name, registered provider-safe", () => {
|
|
34
34
|
const registry = createAiToolRegistry();
|
|
35
35
|
const { toolExtensionPoint } = createRegistryExtensionPoints({ registry });
|
|
36
36
|
|
|
@@ -39,11 +39,15 @@ describe("createRegistryExtensionPoints (end-to-end registration)", () => {
|
|
|
39
39
|
definePluginMetadata({ pluginId: "automation" }),
|
|
40
40
|
);
|
|
41
41
|
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
// Qualified to `automation.propose`, then normalized to the provider-safe
|
|
43
|
+
// name set (the "." the provider rejects becomes "_").
|
|
44
|
+
expect(registry.hasTool("automation_propose")).toBe(true);
|
|
45
|
+
expect(registry.getTool("automation_propose")?.effect).toBe("mutate");
|
|
46
|
+
// The dotted form is NOT a key (the provider would never send it).
|
|
47
|
+
expect(registry.hasTool("automation.propose")).toBe(false);
|
|
44
48
|
});
|
|
45
49
|
|
|
46
|
-
test("registerTool leaves an already-qualified name unchanged", () => {
|
|
50
|
+
test("registerTool leaves an already-qualified name unchanged (modulo sanitization)", () => {
|
|
47
51
|
const registry = createAiToolRegistry();
|
|
48
52
|
const { toolExtensionPoint } = createRegistryExtensionPoints({ registry });
|
|
49
53
|
|
|
@@ -52,8 +56,10 @@ describe("createRegistryExtensionPoints (end-to-end registration)", () => {
|
|
|
52
56
|
definePluginMetadata({ pluginId: "different" }),
|
|
53
57
|
);
|
|
54
58
|
|
|
55
|
-
|
|
56
|
-
|
|
59
|
+
// Already qualified, so it is not re-prefixed with "different"; only "."
|
|
60
|
+
// is sanitized to "_".
|
|
61
|
+
expect(registry.hasTool("automation_propose")).toBe(true);
|
|
62
|
+
expect(registry.hasTool("different_automation_propose")).toBe(false);
|
|
57
63
|
});
|
|
58
64
|
|
|
59
65
|
test("expose builds and registers a projected tool from a contract procedure", () => {
|
|
@@ -72,7 +78,8 @@ describe("createRegistryExtensionPoints (end-to-end registration)", () => {
|
|
|
72
78
|
execute: () => Promise.resolve({}),
|
|
73
79
|
});
|
|
74
80
|
|
|
75
|
-
|
|
81
|
+
// The authored name "incident.list" is normalized to the provider-safe key.
|
|
82
|
+
const tool = registry.getTool("incident_list");
|
|
76
83
|
expect(tool).toBeDefined();
|
|
77
84
|
// Access rules read verbatim from the source procedure, qualified.
|
|
78
85
|
expect(tool?.requiredAccessRules).toEqual(["incident.incident.read"]);
|
|
@@ -98,9 +105,10 @@ describe("createRegistryExtensionPoints (end-to-end registration)", () => {
|
|
|
98
105
|
execute: () => Promise.resolve({}),
|
|
99
106
|
});
|
|
100
107
|
|
|
108
|
+
// Registry keys/names are the provider-safe form of each authored name.
|
|
101
109
|
expect(registry.getTools().map((t) => t.name).sort()).toEqual([
|
|
102
|
-
"
|
|
103
|
-
"
|
|
110
|
+
"automation_propose",
|
|
111
|
+
"incident_list",
|
|
104
112
|
]);
|
|
105
113
|
});
|
|
106
114
|
|