npm - @checkstack/ai-backend - Versions diffs - 0.1.6 → 0.3.0 - Mend

@checkstack/ai-backend 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +99 -0
package/package.json +6 -4
package/src/agent-runner.test.ts +24 -24
package/src/chat/agent-loop.test.ts +10 -10
package/src/chat/auto-apply.test.ts +2 -2
package/src/chat/chat-service.streamturn.test.ts +16 -1
package/src/chat/system-prompt.test.ts +11 -0
package/src/chat/system-prompt.ts +34 -5
package/src/extension-points.ts +89 -0
package/src/generated/docs-index.ts +18 -3
package/src/hardening/handler-authz.test.ts +11 -11
package/src/index.ts +46 -1
package/src/mcp/server.test.ts +13 -13
package/src/propose-apply/service.test.ts +13 -13
package/src/registry-wiring.test.ts +17 -9
package/src/registry-wiring.ts +29 -1
package/src/resolver.test.ts +8 -8
package/src/system-signals-contributor.test.ts +162 -0
package/src/system-signals-contributor.ts +129 -0
package/src/tool-name.test.ts +42 -0
package/src/tool-name.ts +37 -0
package/src/tool-registry.ts +14 -4
package/src/tools/docs-tools.test.ts +1 -1
package/src/tools/system-issues.test.ts +236 -0
package/src/tools/system-issues.ts +209 -0
package/src/tools/tool-set.e2e.test.ts +1 -1
package/tsconfig.json +6 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,104 @@
 # @checkstack/ai-backend
+## 0.3.0
+### Minor Changes
+- 0b6f01b: feat(ai): add the system.issues aggregator tool and system-signals extension point
+  `ai-backend` gains a new read tool, `system.issues`, that returns ALL current
+  system issues - failing health checks, breaching or at-risk SLOs, active
+  anomalies, open incidents, active maintenances, and dependency problems -
+  aggregated across every system in ONE call. The assistant is steered to reach
+  for it FIRST whenever asked whether there are issues, what is down, or for an
+  overall health overview, instead of polling each per-domain tool. The tool is
+  gated by `catalog.system.read`.
+  The tool owns no domain knowledge. A new backend `systemSignalsExtensionPoint`
+  lets any plugin register ONE `SystemSignalsContributor` from its own `init`; the
+  tool fans out across every contributor and merges their per-system maps. Each
+  contributor enforces its OWN per-source access gate - returning an empty map
+  (never throwing) when the principal lacks access - and reads from shared, durable
+  storage so the answer is identical on every pod. `ai-backend` imports no
+  capability plugin's `*-common` to collect signals; the dependency direction stays
+  plugin -> `@checkstack/ai-backend`.
+  The maintenance plugin now registers a `system.issues` contributor (sourceId
+  `maintenance`) from its backend `init`, surfacing in-progress maintenances
+  alongside the other sources. The contributor enforces its own
+  `maintenance.read` gate and reads active maintenances for all systems globally
+  via a new `getActiveMaintenancesBySystem` service method. The row->signal mapping
+  is extracted into a new pure `deriveMaintenanceSignals` deriver in
+  `@checkstack/maintenance-common`, shared by the backend contributor and the
+  frontend `MaintenanceSignalsFiller` so the two surfaces stay in lockstep.
+  The new `systemSignalsExtensionPoint`, `SystemSignalsContributor`,
+  `SystemSignalsExtensionPoint`, and the `system.issues` tool factory plus its
+  pure helpers (`mergeSystemSignalsMaps`, `collectSystemSignals`,
+  `toSystemIssuesOutput`, schemas) are exported from `@checkstack/ai-backend`.
+### Patch Changes
+- dbb76a2: fix(ai): guide the assistant to find all issues and fix the anomaly tool
+  Two assistant problems reported in production:
+  1. Asked "are there any issues?", the model answered from a single source (an
+     SLO breach) and missed a system with a failing health check. The chat
+     system prompt now instructs the model to check ALL issue sources before
+     answering - failing health checks (`healthcheck_status`), breaching/at-risk
+     SLOs (`slo_listObjectives`), active anomalies (`anomaly_list`), and open
+     incidents (`incident_list`) - and not to stop after the first source. It
+     also tells the model that `systemId` must be a real system UUID (resolve a
+     name via the catalog tool first) and to never invent ids or filter values.
+  2. The anomaly tool was named `anomaly.explain` but actually LISTS anomalies
+     with optional filters. The misleading name led the model to pass a
+     non-existent filter value ("Type validation failed") and a system
+     name/anomaly id as `systemId` ("a value was malformed"). Renamed to
+     `anomaly.list` with a description that spells out the optional filters and
+     their valid enum values (state: suspicious|anomaly|recovered, kind:
+     spike|drift, suppression: active|suppressed|all) and that `systemId` is a
+     system UUID.
+  Also sharpened the `healthcheck.status` and `slo.listObjectives` tool
+  descriptions to be use-case oriented ("use when asked what is failing /
+  breaching").
+  BREAKING: the anomaly read tool's name changes from `anomaly_explain` to
+  `anomaly_list` over the MCP `tools/list` surface. MCP clients referencing it by
+  the old name must update.
+  - @checkstack/sdk@0.103.1
+  - @checkstack/backend-api@0.21.6
+  - @checkstack/integration-backend@0.4.6
+## 0.2.0
+### Minor Changes
+- 2428bfc: fix(ai): make AI tool names provider-safe (no "." in names)
+  LLM providers (and the MCP spec) require tool names to match
+  `^[a-zA-Z0-9_-]+$`, but our tool names are qualified as `<plugin>.<tool>`
+  (e.g. `incident.list`, `dependency.list`). The "." caused the model backend to
+  reject the tool list, so chat tool-calling failed after deploy.
+  Tool names are now normalized to a provider-safe form at the single
+  registration chokepoint (the tool registry) and in the projection-routing
+  table: the "." namespace separator is mapped to "\_" (so `incident.list`
+  becomes `incident_list`). The registry key, the name serialized out to the
+  model / MCP client, and the name the model echoes back in a tool call are all
+  the same normalized string, so the round-trip needs no reverse lookup. Any
+  other illegal character is an authoring mistake and is now rejected at
+  registration rather than silently rewritten.
+  BREAKING: AI tool names exposed over the MCP `tools/list` endpoint change from
+  the dotted form (`incident.list`) to the underscored form (`incident_list`).
+  MCP clients that referenced tools by their dotted names must update to the
+  underscored names. (Chat was already broken by the provider rejection, so this
+  only changes the working MCP surface.)
 ## 0.1.6
 ### Patch Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@checkstack/ai-backend",
-  "version": "0.1.6",
+  "version": "0.3.0",
   "license": "Elastic-2.0",
   "type": "module",
   "main": "src/index.ts",
@@ -17,11 +17,13 @@
   "dependencies": {
     "@ai-sdk/openai-compatible": "^2.0.48",
     "@checkstack/ai-common": "0.1.3",
-    "@checkstack/backend-api": "0.21.5",
+    "@checkstack/auth-common": "0.8.3",
+    "@checkstack/backend-api": "0.21.6",
+    "@checkstack/catalog-common": "2.3.4",
     "@checkstack/common": "0.15.0",
     "@checkstack/drizzle-helper": "0.0.5",
-    "@checkstack/integration-backend": "0.4.5",
-    "@checkstack/sdk": "0.101.1",
+    "@checkstack/integration-backend": "0.4.6",
+    "@checkstack/sdk": "0.103.1",
     "@orpc/client": "^1.14.4",
     "@orpc/contract": "^1.14.4",
     "@orpc/server": "^1.14.4",

package/src/agent-runner.test.ts CHANGED Viewed

@@ -47,14 +47,14 @@ describe("createAgentRunner", () => {
     const registry = createAiToolRegistry();
     const calls: string[] = [];
     registry.register(
-      readTool("plugin.read", async () => {
-        calls.push("plugin.read");
+      readTool("plugin_read", async () => {
+        calls.push("plugin_read");
         return { ok: true };
       }),
     );
     // A destructive tool must NOT be offered.
     registry.register({
-      name: "plugin.delete",
+      name: "plugin_delete",
       description: "delete",
       effect: "destructive",
       input: z.object({}),
@@ -63,7 +63,7 @@ describe("createAgentRunner", () => {
     } as RegisteredAiTool);
     // A projected read (deferred sentinel) must NOT be offered in v1.
     registry.register({
-      name: "plugin.projected",
+      name: "plugin_projected",
       description: "projected",
       effect: "read",
       input: z.object({}),
@@ -77,7 +77,7 @@ describe("createAgentRunner", () => {
     const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
       offeredToolNames = Object.keys(args.tools ?? {});
       // Simulate the model calling the read tool once.
-      const t = (args.tools ?? {})["plugin.read"] as {
+      const t = (args.tools ?? {})["plugin_read"] as {
         execute: (i: unknown) => Promise<unknown>;
       };
       await t.execute({});
@@ -102,11 +102,11 @@ describe("createAgentRunner", () => {
       outputSchema: z.object({ severity: z.string() }),
     });
-    expect(offeredToolNames.sort()).toEqual(["plugin.read"]);
-    expect(calls).toEqual(["plugin.read"]);
+    expect(offeredToolNames.sort()).toEqual(["plugin_read"]);
+    expect(calls).toEqual(["plugin_read"]);
     expect(result.text).toBe("done");
     expect(result.object).toEqual({ severity: "high" });
-    expect(result.toolCalls).toEqual([{ tool: "plugin.read", ok: true }]);
+    expect(result.toolCalls).toEqual([{ tool: "plugin_read", ok: true }]);
   });
   it("hands the model a date-safe schema for tools with Date inputs (no throw)", async () => {
@@ -116,7 +116,7 @@ describe("createAgentRunner", () => {
     // chat. The runner must gate date inputs through dateSafeModelSchema too.
     const registry = createAiToolRegistry();
     registry.register({
-      name: "plugin.history",
+      name: "plugin_history",
       description: "history",
       effect: "read",
       input: z.object({ since: z.date() }),
@@ -130,7 +130,7 @@ describe("createAgentRunner", () => {
       async (args: {
         tools?: Record<string, { inputSchema: unknown }>;
       }) => {
-        const t = (args.tools ?? {})["plugin.history"];
+        const t = (args.tools ?? {})["plugin_history"];
         // Exactly what the SDK does internally to build the model request; this
         // threw before the fix.
         offeredSchema = await asSchema(t.inputSchema as never).jsonSchema;
@@ -161,7 +161,7 @@ describe("createAgentRunner", () => {
   it("offers a projected read tool and routes it through the principal's client", async () => {
     const registry = createAiToolRegistry();
     registry.register({
-      name: "incident.list",
+      name: "incident_list",
       description: "list incidents",
       effect: "read",
       input: z.object({}),
@@ -186,7 +186,7 @@ describe("createAgentRunner", () => {
     let offered: string[] = [];
     const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
       offered = Object.keys(args.tools ?? {});
-      const t = (args.tools ?? {})["incident.list"] as {
+      const t = (args.tools ?? {})["incident_list"] as {
         execute: (i: unknown) => Promise<unknown>;
       };
       await t.execute({ status: "open" });
@@ -197,7 +197,7 @@ describe("createAgentRunner", () => {
       resolver,
       resolveConnection: async () => connection,
       getProjectionRoute: (name) =>
-        name === "incident.list"
+        name === "incident_list"
           ? { pluginId: "incident", procedureKey: "listIncidents" }
           : undefined,
       modelFns: { generateText: generateText as never },
@@ -210,15 +210,15 @@ describe("createAgentRunner", () => {
       prompt: "go",
     });
-    expect(offered).toEqual(["incident.list"]);
+    expect(offered).toEqual(["incident_list"]);
     expect(procCalls).toEqual([{ status: "open" }]);
-    expect(result.toolCalls).toEqual([{ tool: "incident.list", ok: true }]);
+    expect(result.toolCalls).toEqual([{ tool: "incident_list", ok: true }]);
   });
   it("records a tool failure and surfaces it to the model instead of aborting", async () => {
     const registry = createAiToolRegistry();
     registry.register(
-      readTool("plugin.boom", async () => {
+      readTool("plugin_boom", async () => {
         throw new Error("missing access: plugin.read");
       }),
     );
@@ -226,7 +226,7 @@ describe("createAgentRunner", () => {
     let toolResult: unknown;
     const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
-      const t = (args.tools ?? {})["plugin.boom"] as {
+      const t = (args.tools ?? {})["plugin_boom"] as {
         execute: (i: unknown) => Promise<unknown>;
       };
       toolResult = await t.execute({});
@@ -247,15 +247,15 @@ describe("createAgentRunner", () => {
     });
     expect(toolResult).toEqual({ error: "missing access: plugin.read" });
-    expect(result.toolCalls).toEqual([{ tool: "plugin.boom", ok: false }]);
+    expect(result.toolCalls).toEqual([{ tool: "plugin_boom", ok: false }]);
     expect(result.object).toBeUndefined();
   });
   it("calls recordToolCall for each invocation (ok and failure)", async () => {
     const registry = createAiToolRegistry();
-    registry.register(readTool("plugin.ok", async () => ({ ok: true })));
+    registry.register(readTool("plugin_ok", async () => ({ ok: true })));
     registry.register(
-      readTool("plugin.boom", async () => {
+      readTool("plugin_boom", async () => {
         throw new Error("nope");
       }),
     );
@@ -273,8 +273,8 @@ describe("createAgentRunner", () => {
     const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
       const tools = args.tools ?? {};
-      await (tools["plugin.ok"] as { execute: (i: unknown) => Promise<unknown> }).execute({});
-      await (tools["plugin.boom"] as { execute: (i: unknown) => Promise<unknown> }).execute({});
+      await (tools["plugin_ok"] as { execute: (i: unknown) => Promise<unknown> }).execute({});
+      await (tools["plugin_boom"] as { execute: (i: unknown) => Promise<unknown> }).execute({});
       return { text: "x", usage: {} };
     });
@@ -287,12 +287,12 @@ describe("createAgentRunner", () => {
     await runner({ principal, rpcClient, connectionId: "c", prompt: "go" });
     expect(recorded).toContainEqual({
-      toolName: "plugin.ok",
+      toolName: "plugin_ok",
       ok: true,
       effect: "read",
     });
     expect(recorded).toContainEqual({
-      toolName: "plugin.boom",
+      toolName: "plugin_boom",
       ok: false,
       effect: "read",
     });

package/src/chat/agent-loop.test.ts CHANGED Viewed

@@ -28,9 +28,9 @@ function tool(
 function setup() {
   const registry = createAiToolRegistry();
-  const read = tool("incident.list", "read", "incident.incident.read");
-  const mutate = tool("automation.propose", "mutate", "automation.automation.manage");
-  const destroy = tool("incident.delete", "destructive", "incident.incident.manage");
+  const read = tool("incident_list", "read", "incident.incident.read");
+  const mutate = tool("automation_propose", "mutate", "automation.automation.manage");
+  const destroy = tool("incident_delete", "destructive", "incident.incident.manage");
   registry.register(read);
   registry.register(mutate);
   registry.register(destroy);
@@ -57,15 +57,15 @@ describe("agent loop tool gating (matrix #14)", () => {
   test("the loop only offers resolver-allowed tools", () => {
     const { resolver } = setup();
     const offered = offeredTools({ principal: limited, resolver }).map((t) => t.name);
-    expect(offered).toEqual(["incident.list"]);
-    expect(offered).not.toContain("automation.propose");
-    expect(offered).not.toContain("incident.delete");
+    expect(offered).toEqual(["incident_list"]);
+    expect(offered).not.toContain("automation_propose");
+    expect(offered).not.toContain("incident_delete");
   });
   test("a model-requested tool OUTSIDE the principal's set is refused server-side", () => {
     const { resolver, registry } = setup();
     const d = disposeAgentTool({
-      toolName: "automation.propose",
+      toolName: "automation_propose",
       principal: limited,
       resolver,
       getTool: (n) => registry.getTool(n),
@@ -87,7 +87,7 @@ describe("agent loop tool gating (matrix #14)", () => {
   test("a read tool auto-runs", () => {
     const { resolver, registry } = setup();
     const d = disposeAgentTool({
-      toolName: "incident.list",
+      toolName: "incident_list",
       principal: limited,
       resolver,
       getTool: (n) => registry.getTool(n),
@@ -98,7 +98,7 @@ describe("agent loop tool gating (matrix #14)", () => {
   test("a mutate tool requires a confirm card (never silently mutates)", () => {
     const { resolver, registry } = setup();
     const d = disposeAgentTool({
-      toolName: "automation.propose",
+      toolName: "automation_propose",
       principal: power,
       resolver,
       getTool: (n) => registry.getTool(n),
@@ -109,7 +109,7 @@ describe("agent loop tool gating (matrix #14)", () => {
   test("a destructive tool requires a confirm card", () => {
     const { resolver, registry } = setup();
     const d = disposeAgentTool({
-      toolName: "incident.delete",
+      toolName: "incident_delete",
       principal: power,
       resolver,
       getTool: (n) => registry.getTool(n),

package/src/chat/auto-apply.test.ts CHANGED Viewed

@@ -129,7 +129,7 @@ function mutatingTool(): {
     created: input.value,
   }));
   const tool: RegisteredAiTool<{ value: string }, { created: string }> = {
-    name: "demo.mutate",
+    name: "demo_mutate",
     description: "demo mutating tool",
     effect: "mutate",
     input: ManageInput,
@@ -208,7 +208,7 @@ describe("AUTO-mode mutate auto-apply path", () => {
     // proposed -> applied, with the applier stamped. Not a weaker/parallel path.
     const applied = [...store.rows.values()].filter((r) => r.status === "applied");
     expect(applied).toHaveLength(1);
-    expect(applied[0]?.toolName).toBe("demo.mutate");
+    expect(applied[0]?.toolName).toBe("demo_mutate");
     expect(applied[0]?.effect).toBe("mutate");
     expect(applied[0]?.appliedById).toBe("u1");
     expect(applied[0]?.id).toBe(result.toolCallId);

package/src/chat/chat-service.streamturn.test.ts CHANGED Viewed

@@ -1,4 +1,12 @@
-import { describe, expect, test, mock, beforeEach, afterEach } from "bun:test";
+import {
+  describe,
+  expect,
+  test,
+  mock,
+  beforeEach,
+  afterEach,
+  afterAll,
+} from "bun:test";
 import { APICallError, type LanguageModelUsage } from "ai";
 import type { AuthUser } from "@checkstack/backend-api";
 import type { OpenAiCompatibleConnection } from "@checkstack/ai-common";
@@ -210,6 +218,13 @@ afterEach(() => {
   mock.restore();
 });
+// mock.restore() does NOT undo a module mock, so restore the real `ai` module
+// here - otherwise the stubbed streamText/stepCountIs leak into every other
+// ai-backend suite that runs after this file.
+afterAll(() => {
+  mock.module("ai", () => ({ ...realAi }));
+});
 describe("streamTurn topical pre-classifier", () => {
   const turn = {
     principal,

package/src/chat/system-prompt.test.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import { describe, expect, test } from "bun:test";
 import {
   CHAT_SYSTEM_PROMPT,
   DATE_FORMAT_INSTRUCTION,
+  INVESTIGATION_INSTRUCTION,
   buildChatSystemPrompt,
   buildDateTimeContext,
   formatInstantInZone,
@@ -45,6 +46,16 @@ describe("buildChatSystemPrompt", () => {
     expect(prompt).toContain(DATE_FORMAT_INSTRUCTION);
   });
+  test("carries the issue-investigation guidance (check all sources, real ids)", () => {
+    const prompt = buildChatSystemPrompt({ timeZone: "Europe/Berlin" });
+    expect(prompt).toContain(INVESTIGATION_INSTRUCTION);
+    // The concrete behaviours we are fixing must be present in the text.
+    expect(prompt).toContain("system_issues");
+    expect(prompt).toContain("healthcheck_status");
+    expect(prompt).toContain("anomaly_list");
+    expect(prompt).toContain("Do not stop after the first source");
+  });
   test("folds in a valid operator timezone", () => {
     expect(buildChatSystemPrompt({ timeZone: "America/New_York" })).toContain(
       "America/New_York",

package/src/chat/system-prompt.ts CHANGED Viewed

@@ -24,6 +24,33 @@ export const CHAT_SYSTEM_PROMPT =
   "redirect back to Checkstack monitoring and operations. Be concise and " +
   "engineering-focused.";
+/**
+ * How to answer "are there any issues?" thoroughly, and how to pass ids.
+ *
+ * The model tends to answer from the first source that returns something (e.g.
+ * report an SLO breach and stop, missing a failing health check). It also tends
+ * to pass a system NAME, an invented id, or a made-up filter value where a tool
+ * wants a real id/enum - which fails validation. This block makes both
+ * behaviours explicit. Tool names are the provider-safe ids the model is given.
+ */
+export const INVESTIGATION_INSTRUCTION =
+  "When the operator asks whether there are issues/problems, what is wrong, or " +
+  "what is down/failing/breaching, do NOT answer from a single source. Prefer " +
+  "the system_issues tool, which aggregates ALL current problems (failing " +
+  "health checks, breaching/at-risk SLOs, active anomalies, open incidents, " +
+  "active maintenances, dependency problems) across every system in one call. " +
+  "If it is unavailable, instead check ALL of these and report a consolidated " +
+  "summary: failing health checks (healthcheck_status), breaching or at-risk " +
+  "SLOs (slo_listObjectives), active anomalies (anomaly_list), and open " +
+  "incidents (incident_list). Do not stop after the first source that returns " +
+  "something; an empty result from one source does not mean there are no issues " +
+  "in another. " +
+  "Many tools take a systemId, which MUST be a system's UUID: if the operator " +
+  "names a system, first resolve it to its id with the catalog tool, then pass " +
+  "that id. Pass ids and enum filter values EXACTLY as a tool returned or as a " +
+  "tool's description lists them - never invent an id, and never pass a filter " +
+  "value (such as a state) that the tool does not document.";
 /**
  * The date-time wire contract, stated to the model so it emits an offset the
  * first time instead of learning via a rejected tool call. Enforced server-side
@@ -138,9 +165,11 @@ export function buildChatSystemPrompt({
   timeZone?: string;
   now?: Date;
 }): string {
-  return `${CHAT_SYSTEM_PROMPT} ${buildDateTimeContext({
-    timeZone,
-    now,
-    audience: "operator",
-  })}`;
+  return `${CHAT_SYSTEM_PROMPT} ${INVESTIGATION_INSTRUCTION} ${buildDateTimeContext(
+    {
+      timeZone,
+      now,
+      audience: "operator",
+    },
+  )}`;
 }

package/src/extension-points.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import { createExtensionPoint } from "@checkstack/backend-api";
+import type { AuthUser } from "@checkstack/backend-api";
 import type { PluginMetadata } from "@checkstack/common";
+import type { SystemSignalsMap } from "@checkstack/catalog-common";
 import type { RegisteredAiTool } from "./tool-registry";
 import type { ProjectToolInput } from "./projection";
@@ -39,3 +41,90 @@ export const aiToolProjectionExtensionPoint =
   createExtensionPoint<AiToolProjectionExtensionPoint>(
     "ai.toolProjectionExtensionPoint",
   );
+/**
+ * A single backend contributor of dashboard "needs attention" system signals.
+ *
+ * This mirrors the FRONTEND `SystemSignalsSlot` concept on the backend: where a
+ * frontend plugin's React filler computes per-system `SystemSignal[]` from a
+ * bulk RPC and reports via the slot, a backend plugin registers a contributor
+ * here that returns problem signals for ALL systems globally (keyed by
+ * systemId). The `system.issues` AI tool fans out across every registered
+ * contributor and merges their maps into one "what is wrong right now" answer.
+ *
+ * Access: the `system.issues` tool itself is gated by `catalog.system.read`, but
+ * PER-SOURCE access (and per-system/team scoping) is the contributor's own
+ * responsibility - `read` receives the originating `AuthUser` principal and MUST
+ * return only signals the principal is allowed to see (returning `{}` when the
+ * principal lacks access). The aggregator never inspects a source's data to
+ * decide visibility.
+ */
+export interface SystemSignalsContributor {
+  /**
+   * Stable id of the contributing source, e.g. "incident" / "slo" /
+   * "healthcheck". Surfaced on the aggregated result so the model can attribute
+   * each signal, and used to keep one source's failure from affecting others.
+   */
+  sourceId: string;
+  /**
+   * Return problem signals for ALL systems globally, keyed by systemId, scoped
+   * to what `principal` may see, plus whether the principal could access this
+   * source at all. Systems absent from `signals` have no signal from this
+   * source. MUST resolve from shared, durable storage so the answer is
+   * identical on every pod (state-and-scale rule).
+   *
+   * When the principal lacks access, return `{ accessible: false, signals: {} }`
+   * (NOT a throw) - the aggregator surfaces that as an inaccessible source so
+   * the model can say "I could not check X" instead of implying "no issues".
+   */
+  read(context: {
+    principal: AuthUser;
+  }): Promise<SystemSignalsContribution>;
+}
+/**
+ * One contributor's reply: the signals it found (empty if none or if access was
+ * denied) plus whether the principal could read the source at all. `accessible:
+ * false` means "skipped for lack of permission", which the aggregator reports
+ * distinctly from "checked and found nothing".
+ */
+export interface SystemSignalsContribution {
+  accessible: boolean;
+  signals: SystemSignalsMap;
+}
+/**
+ * Backend extension point for contributing dashboard "needs attention" system
+ * signals to the `system.issues` AI tool. Each plugin that owns a kind of
+ * problem state (incidents, breaching/at-risk SLOs, failing health checks,
+ * active anomalies, open incidents, active maintenances, dependency problems)
+ * registers ONE contributor from its own backend `init`. ai-backend collects
+ * every contributor and the `system.issues` tool merges their global maps in a
+ * single call - ai-backend imports no plugin's `*-common` to do so.
+ */
+export interface SystemSignalsExtensionPoint {
+  contribute(contributor: SystemSignalsContributor): void;
+}
+export const systemSignalsExtensionPoint =
+  createExtensionPoint<SystemSignalsExtensionPoint>(
+    "ai.systemSignalsExtensionPoint",
+  );
+/**
+ * The access-rule ids a principal holds, for a {@link SystemSignalsContributor}'s
+ * per-source gate. Pass the result to `isAccessRuleSatisfied`.
+ *
+ * Service principals are trusted backend-to-backend callers - the RPC
+ * middleware (`autoAuthMiddleware`) skips access-rule checks for them entirely -
+ * so they are treated here as holding the wildcard `*`, matching that behaviour.
+ * Real users and applications carry their own `accessRules`. Centralising this
+ * keeps every contributor's gate consistent (a service caller sees every source
+ * or none, never a per-source split).
+ */
+export function principalGrantedRuleIds(
+  principal: AuthUser,
+): readonly string[] {
+  if (principal.type === "service") return ["*"];
+  return principal.accessRules ?? [];
+}