@checkstack/ai-backend 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +99 -0
- package/package.json +6 -4
- package/src/agent-runner.test.ts +24 -24
- package/src/chat/agent-loop.test.ts +10 -10
- package/src/chat/auto-apply.test.ts +2 -2
- package/src/chat/chat-service.streamturn.test.ts +16 -1
- package/src/chat/system-prompt.test.ts +11 -0
- package/src/chat/system-prompt.ts +34 -5
- package/src/extension-points.ts +89 -0
- package/src/generated/docs-index.ts +18 -3
- package/src/hardening/handler-authz.test.ts +11 -11
- package/src/index.ts +46 -1
- package/src/mcp/server.test.ts +13 -13
- package/src/propose-apply/service.test.ts +13 -13
- package/src/registry-wiring.test.ts +17 -9
- package/src/registry-wiring.ts +29 -1
- package/src/resolver.test.ts +8 -8
- package/src/system-signals-contributor.test.ts +162 -0
- package/src/system-signals-contributor.ts +129 -0
- package/src/tool-name.test.ts +42 -0
- package/src/tool-name.ts +37 -0
- package/src/tool-registry.ts +14 -4
- package/src/tools/docs-tools.test.ts +1 -1
- package/src/tools/system-issues.test.ts +236 -0
- package/src/tools/system-issues.ts +209 -0
- package/src/tools/tool-set.e2e.test.ts +1 -1
- package/tsconfig.json +6 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,104 @@
|
|
|
1
1
|
# @checkstack/ai-backend
|
|
2
2
|
|
|
3
|
+
## 0.3.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 0b6f01b: feat(ai): add the system.issues aggregator tool and system-signals extension point
|
|
8
|
+
|
|
9
|
+
`ai-backend` gains a new read tool, `system.issues`, that returns ALL current
|
|
10
|
+
system issues - failing health checks, breaching or at-risk SLOs, active
|
|
11
|
+
anomalies, open incidents, active maintenances, and dependency problems -
|
|
12
|
+
aggregated across every system in ONE call. The assistant is steered to reach
|
|
13
|
+
for it FIRST whenever asked whether there are issues, what is down, or for an
|
|
14
|
+
overall health overview, instead of polling each per-domain tool. The tool is
|
|
15
|
+
gated by `catalog.system.read`.
|
|
16
|
+
|
|
17
|
+
The tool owns no domain knowledge. A new backend `systemSignalsExtensionPoint`
|
|
18
|
+
lets any plugin register ONE `SystemSignalsContributor` from its own `init`; the
|
|
19
|
+
tool fans out across every contributor and merges their per-system maps. Each
|
|
20
|
+
contributor enforces its OWN per-source access gate - returning an empty map
|
|
21
|
+
(never throwing) when the principal lacks access - and reads from shared, durable
|
|
22
|
+
storage so the answer is identical on every pod. `ai-backend` imports no
|
|
23
|
+
capability plugin's `*-common` to collect signals; the dependency direction stays
|
|
24
|
+
plugin -> `@checkstack/ai-backend`.
|
|
25
|
+
|
|
26
|
+
The maintenance plugin now registers a `system.issues` contributor (sourceId
|
|
27
|
+
`maintenance`) from its backend `init`, surfacing in-progress maintenances
|
|
28
|
+
alongside the other sources. The contributor enforces its own
|
|
29
|
+
`maintenance.read` gate and reads active maintenances for all systems globally
|
|
30
|
+
via a new `getActiveMaintenancesBySystem` service method. The row->signal mapping
|
|
31
|
+
is extracted into a new pure `deriveMaintenanceSignals` deriver in
|
|
32
|
+
`@checkstack/maintenance-common`, shared by the backend contributor and the
|
|
33
|
+
frontend `MaintenanceSignalsFiller` so the two surfaces stay in lockstep.
|
|
34
|
+
|
|
35
|
+
The new `systemSignalsExtensionPoint`, `SystemSignalsContributor`,
|
|
36
|
+
`SystemSignalsExtensionPoint`, and the `system.issues` tool factory plus its
|
|
37
|
+
pure helpers (`mergeSystemSignalsMaps`, `collectSystemSignals`,
|
|
38
|
+
`toSystemIssuesOutput`, schemas) are exported from `@checkstack/ai-backend`.
|
|
39
|
+
|
|
40
|
+
### Patch Changes
|
|
41
|
+
|
|
42
|
+
- dbb76a2: fix(ai): guide the assistant to find all issues and fix the anomaly tool
|
|
43
|
+
|
|
44
|
+
Two assistant problems reported in production:
|
|
45
|
+
|
|
46
|
+
1. Asked "are there any issues?", the model answered from a single source (an
|
|
47
|
+
SLO breach) and missed a system with a failing health check. The chat
|
|
48
|
+
system prompt now instructs the model to check ALL issue sources before
|
|
49
|
+
answering - failing health checks (`healthcheck_status`), breaching/at-risk
|
|
50
|
+
SLOs (`slo_listObjectives`), active anomalies (`anomaly_list`), and open
|
|
51
|
+
incidents (`incident_list`) - and not to stop after the first source. It
|
|
52
|
+
also tells the model that `systemId` must be a real system UUID (resolve a
|
|
53
|
+
name via the catalog tool first) and to never invent ids or filter values.
|
|
54
|
+
|
|
55
|
+
2. The anomaly tool was named `anomaly.explain` but actually LISTS anomalies
|
|
56
|
+
with optional filters. The misleading name led the model to pass a
|
|
57
|
+
non-existent filter value ("Type validation failed") and a system
|
|
58
|
+
name/anomaly id as `systemId` ("a value was malformed"). Renamed to
|
|
59
|
+
`anomaly.list` with a description that spells out the optional filters and
|
|
60
|
+
their valid enum values (state: suspicious|anomaly|recovered, kind:
|
|
61
|
+
spike|drift, suppression: active|suppressed|all) and that `systemId` is a
|
|
62
|
+
system UUID.
|
|
63
|
+
|
|
64
|
+
Also sharpened the `healthcheck.status` and `slo.listObjectives` tool
|
|
65
|
+
descriptions to be use-case oriented ("use when asked what is failing /
|
|
66
|
+
breaching").
|
|
67
|
+
|
|
68
|
+
BREAKING: the anomaly read tool's name changes from `anomaly_explain` to
|
|
69
|
+
`anomaly_list` over the MCP `tools/list` surface. MCP clients referencing it by
|
|
70
|
+
the old name must update.
|
|
71
|
+
|
|
72
|
+
- @checkstack/sdk@0.103.1
|
|
73
|
+
- @checkstack/backend-api@0.21.6
|
|
74
|
+
- @checkstack/integration-backend@0.4.6
|
|
75
|
+
|
|
76
|
+
## 0.2.0
|
|
77
|
+
|
|
78
|
+
### Minor Changes
|
|
79
|
+
|
|
80
|
+
- 2428bfc: fix(ai): make AI tool names provider-safe (no "." in names)
|
|
81
|
+
|
|
82
|
+
LLM providers (and the MCP spec) require tool names to match
|
|
83
|
+
`^[a-zA-Z0-9_-]+$`, but our tool names are qualified as `<plugin>.<tool>`
|
|
84
|
+
(e.g. `incident.list`, `dependency.list`). The "." caused the model backend to
|
|
85
|
+
reject the tool list, so chat tool-calling failed after deploy.
|
|
86
|
+
|
|
87
|
+
Tool names are now normalized to a provider-safe form at the single
|
|
88
|
+
registration chokepoint (the tool registry) and in the projection-routing
|
|
89
|
+
table: the "." namespace separator is mapped to "\_" (so `incident.list`
|
|
90
|
+
becomes `incident_list`). The registry key, the name serialized out to the
|
|
91
|
+
model / MCP client, and the name the model echoes back in a tool call are all
|
|
92
|
+
the same normalized string, so the round-trip needs no reverse lookup. Any
|
|
93
|
+
other illegal character is an authoring mistake and is now rejected at
|
|
94
|
+
registration rather than silently rewritten.
|
|
95
|
+
|
|
96
|
+
BREAKING: AI tool names exposed over the MCP `tools/list` endpoint change from
|
|
97
|
+
the dotted form (`incident.list`) to the underscored form (`incident_list`).
|
|
98
|
+
MCP clients that referenced tools by their dotted names must update to the
|
|
99
|
+
underscored names. (Chat was already broken by the provider rejection, so this
|
|
100
|
+
only changes the working MCP surface.)
|
|
101
|
+
|
|
3
102
|
## 0.1.6
|
|
4
103
|
|
|
5
104
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@checkstack/ai-backend",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"license": "Elastic-2.0",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.ts",
|
|
@@ -17,11 +17,13 @@
|
|
|
17
17
|
"dependencies": {
|
|
18
18
|
"@ai-sdk/openai-compatible": "^2.0.48",
|
|
19
19
|
"@checkstack/ai-common": "0.1.3",
|
|
20
|
-
"@checkstack/
|
|
20
|
+
"@checkstack/auth-common": "0.8.3",
|
|
21
|
+
"@checkstack/backend-api": "0.21.6",
|
|
22
|
+
"@checkstack/catalog-common": "2.3.4",
|
|
21
23
|
"@checkstack/common": "0.15.0",
|
|
22
24
|
"@checkstack/drizzle-helper": "0.0.5",
|
|
23
|
-
"@checkstack/integration-backend": "0.4.
|
|
24
|
-
"@checkstack/sdk": "0.
|
|
25
|
+
"@checkstack/integration-backend": "0.4.6",
|
|
26
|
+
"@checkstack/sdk": "0.103.1",
|
|
25
27
|
"@orpc/client": "^1.14.4",
|
|
26
28
|
"@orpc/contract": "^1.14.4",
|
|
27
29
|
"@orpc/server": "^1.14.4",
|
package/src/agent-runner.test.ts
CHANGED
|
@@ -47,14 +47,14 @@ describe("createAgentRunner", () => {
|
|
|
47
47
|
const registry = createAiToolRegistry();
|
|
48
48
|
const calls: string[] = [];
|
|
49
49
|
registry.register(
|
|
50
|
-
readTool("
|
|
51
|
-
calls.push("
|
|
50
|
+
readTool("plugin_read", async () => {
|
|
51
|
+
calls.push("plugin_read");
|
|
52
52
|
return { ok: true };
|
|
53
53
|
}),
|
|
54
54
|
);
|
|
55
55
|
// A destructive tool must NOT be offered.
|
|
56
56
|
registry.register({
|
|
57
|
-
name: "
|
|
57
|
+
name: "plugin_delete",
|
|
58
58
|
description: "delete",
|
|
59
59
|
effect: "destructive",
|
|
60
60
|
input: z.object({}),
|
|
@@ -63,7 +63,7 @@ describe("createAgentRunner", () => {
|
|
|
63
63
|
} as RegisteredAiTool);
|
|
64
64
|
// A projected read (deferred sentinel) must NOT be offered in v1.
|
|
65
65
|
registry.register({
|
|
66
|
-
name: "
|
|
66
|
+
name: "plugin_projected",
|
|
67
67
|
description: "projected",
|
|
68
68
|
effect: "read",
|
|
69
69
|
input: z.object({}),
|
|
@@ -77,7 +77,7 @@ describe("createAgentRunner", () => {
|
|
|
77
77
|
const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
|
|
78
78
|
offeredToolNames = Object.keys(args.tools ?? {});
|
|
79
79
|
// Simulate the model calling the read tool once.
|
|
80
|
-
const t = (args.tools ?? {})["
|
|
80
|
+
const t = (args.tools ?? {})["plugin_read"] as {
|
|
81
81
|
execute: (i: unknown) => Promise<unknown>;
|
|
82
82
|
};
|
|
83
83
|
await t.execute({});
|
|
@@ -102,11 +102,11 @@ describe("createAgentRunner", () => {
|
|
|
102
102
|
outputSchema: z.object({ severity: z.string() }),
|
|
103
103
|
});
|
|
104
104
|
|
|
105
|
-
expect(offeredToolNames.sort()).toEqual(["
|
|
106
|
-
expect(calls).toEqual(["
|
|
105
|
+
expect(offeredToolNames.sort()).toEqual(["plugin_read"]);
|
|
106
|
+
expect(calls).toEqual(["plugin_read"]);
|
|
107
107
|
expect(result.text).toBe("done");
|
|
108
108
|
expect(result.object).toEqual({ severity: "high" });
|
|
109
|
-
expect(result.toolCalls).toEqual([{ tool: "
|
|
109
|
+
expect(result.toolCalls).toEqual([{ tool: "plugin_read", ok: true }]);
|
|
110
110
|
});
|
|
111
111
|
|
|
112
112
|
it("hands the model a date-safe schema for tools with Date inputs (no throw)", async () => {
|
|
@@ -116,7 +116,7 @@ describe("createAgentRunner", () => {
|
|
|
116
116
|
// chat. The runner must gate date inputs through dateSafeModelSchema too.
|
|
117
117
|
const registry = createAiToolRegistry();
|
|
118
118
|
registry.register({
|
|
119
|
-
name: "
|
|
119
|
+
name: "plugin_history",
|
|
120
120
|
description: "history",
|
|
121
121
|
effect: "read",
|
|
122
122
|
input: z.object({ since: z.date() }),
|
|
@@ -130,7 +130,7 @@ describe("createAgentRunner", () => {
|
|
|
130
130
|
async (args: {
|
|
131
131
|
tools?: Record<string, { inputSchema: unknown }>;
|
|
132
132
|
}) => {
|
|
133
|
-
const t = (args.tools ?? {})["
|
|
133
|
+
const t = (args.tools ?? {})["plugin_history"];
|
|
134
134
|
// Exactly what the SDK does internally to build the model request; this
|
|
135
135
|
// threw before the fix.
|
|
136
136
|
offeredSchema = await asSchema(t.inputSchema as never).jsonSchema;
|
|
@@ -161,7 +161,7 @@ describe("createAgentRunner", () => {
|
|
|
161
161
|
it("offers a projected read tool and routes it through the principal's client", async () => {
|
|
162
162
|
const registry = createAiToolRegistry();
|
|
163
163
|
registry.register({
|
|
164
|
-
name: "
|
|
164
|
+
name: "incident_list",
|
|
165
165
|
description: "list incidents",
|
|
166
166
|
effect: "read",
|
|
167
167
|
input: z.object({}),
|
|
@@ -186,7 +186,7 @@ describe("createAgentRunner", () => {
|
|
|
186
186
|
let offered: string[] = [];
|
|
187
187
|
const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
|
|
188
188
|
offered = Object.keys(args.tools ?? {});
|
|
189
|
-
const t = (args.tools ?? {})["
|
|
189
|
+
const t = (args.tools ?? {})["incident_list"] as {
|
|
190
190
|
execute: (i: unknown) => Promise<unknown>;
|
|
191
191
|
};
|
|
192
192
|
await t.execute({ status: "open" });
|
|
@@ -197,7 +197,7 @@ describe("createAgentRunner", () => {
|
|
|
197
197
|
resolver,
|
|
198
198
|
resolveConnection: async () => connection,
|
|
199
199
|
getProjectionRoute: (name) =>
|
|
200
|
-
name === "
|
|
200
|
+
name === "incident_list"
|
|
201
201
|
? { pluginId: "incident", procedureKey: "listIncidents" }
|
|
202
202
|
: undefined,
|
|
203
203
|
modelFns: { generateText: generateText as never },
|
|
@@ -210,15 +210,15 @@ describe("createAgentRunner", () => {
|
|
|
210
210
|
prompt: "go",
|
|
211
211
|
});
|
|
212
212
|
|
|
213
|
-
expect(offered).toEqual(["
|
|
213
|
+
expect(offered).toEqual(["incident_list"]);
|
|
214
214
|
expect(procCalls).toEqual([{ status: "open" }]);
|
|
215
|
-
expect(result.toolCalls).toEqual([{ tool: "
|
|
215
|
+
expect(result.toolCalls).toEqual([{ tool: "incident_list", ok: true }]);
|
|
216
216
|
});
|
|
217
217
|
|
|
218
218
|
it("records a tool failure and surfaces it to the model instead of aborting", async () => {
|
|
219
219
|
const registry = createAiToolRegistry();
|
|
220
220
|
registry.register(
|
|
221
|
-
readTool("
|
|
221
|
+
readTool("plugin_boom", async () => {
|
|
222
222
|
throw new Error("missing access: plugin.read");
|
|
223
223
|
}),
|
|
224
224
|
);
|
|
@@ -226,7 +226,7 @@ describe("createAgentRunner", () => {
|
|
|
226
226
|
|
|
227
227
|
let toolResult: unknown;
|
|
228
228
|
const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
|
|
229
|
-
const t = (args.tools ?? {})["
|
|
229
|
+
const t = (args.tools ?? {})["plugin_boom"] as {
|
|
230
230
|
execute: (i: unknown) => Promise<unknown>;
|
|
231
231
|
};
|
|
232
232
|
toolResult = await t.execute({});
|
|
@@ -247,15 +247,15 @@ describe("createAgentRunner", () => {
|
|
|
247
247
|
});
|
|
248
248
|
|
|
249
249
|
expect(toolResult).toEqual({ error: "missing access: plugin.read" });
|
|
250
|
-
expect(result.toolCalls).toEqual([{ tool: "
|
|
250
|
+
expect(result.toolCalls).toEqual([{ tool: "plugin_boom", ok: false }]);
|
|
251
251
|
expect(result.object).toBeUndefined();
|
|
252
252
|
});
|
|
253
253
|
|
|
254
254
|
it("calls recordToolCall for each invocation (ok and failure)", async () => {
|
|
255
255
|
const registry = createAiToolRegistry();
|
|
256
|
-
registry.register(readTool("
|
|
256
|
+
registry.register(readTool("plugin_ok", async () => ({ ok: true })));
|
|
257
257
|
registry.register(
|
|
258
|
-
readTool("
|
|
258
|
+
readTool("plugin_boom", async () => {
|
|
259
259
|
throw new Error("nope");
|
|
260
260
|
}),
|
|
261
261
|
);
|
|
@@ -273,8 +273,8 @@ describe("createAgentRunner", () => {
|
|
|
273
273
|
|
|
274
274
|
const generateText = mock(async (args: { tools?: Record<string, unknown> }) => {
|
|
275
275
|
const tools = args.tools ?? {};
|
|
276
|
-
await (tools["
|
|
277
|
-
await (tools["
|
|
276
|
+
await (tools["plugin_ok"] as { execute: (i: unknown) => Promise<unknown> }).execute({});
|
|
277
|
+
await (tools["plugin_boom"] as { execute: (i: unknown) => Promise<unknown> }).execute({});
|
|
278
278
|
return { text: "x", usage: {} };
|
|
279
279
|
});
|
|
280
280
|
|
|
@@ -287,12 +287,12 @@ describe("createAgentRunner", () => {
|
|
|
287
287
|
await runner({ principal, rpcClient, connectionId: "c", prompt: "go" });
|
|
288
288
|
|
|
289
289
|
expect(recorded).toContainEqual({
|
|
290
|
-
toolName: "
|
|
290
|
+
toolName: "plugin_ok",
|
|
291
291
|
ok: true,
|
|
292
292
|
effect: "read",
|
|
293
293
|
});
|
|
294
294
|
expect(recorded).toContainEqual({
|
|
295
|
-
toolName: "
|
|
295
|
+
toolName: "plugin_boom",
|
|
296
296
|
ok: false,
|
|
297
297
|
effect: "read",
|
|
298
298
|
});
|
|
@@ -28,9 +28,9 @@ function tool(
|
|
|
28
28
|
|
|
29
29
|
function setup() {
|
|
30
30
|
const registry = createAiToolRegistry();
|
|
31
|
-
const read = tool("
|
|
32
|
-
const mutate = tool("
|
|
33
|
-
const destroy = tool("
|
|
31
|
+
const read = tool("incident_list", "read", "incident.incident.read");
|
|
32
|
+
const mutate = tool("automation_propose", "mutate", "automation.automation.manage");
|
|
33
|
+
const destroy = tool("incident_delete", "destructive", "incident.incident.manage");
|
|
34
34
|
registry.register(read);
|
|
35
35
|
registry.register(mutate);
|
|
36
36
|
registry.register(destroy);
|
|
@@ -57,15 +57,15 @@ describe("agent loop tool gating (matrix #14)", () => {
|
|
|
57
57
|
test("the loop only offers resolver-allowed tools", () => {
|
|
58
58
|
const { resolver } = setup();
|
|
59
59
|
const offered = offeredTools({ principal: limited, resolver }).map((t) => t.name);
|
|
60
|
-
expect(offered).toEqual(["
|
|
61
|
-
expect(offered).not.toContain("
|
|
62
|
-
expect(offered).not.toContain("
|
|
60
|
+
expect(offered).toEqual(["incident_list"]);
|
|
61
|
+
expect(offered).not.toContain("automation_propose");
|
|
62
|
+
expect(offered).not.toContain("incident_delete");
|
|
63
63
|
});
|
|
64
64
|
|
|
65
65
|
test("a model-requested tool OUTSIDE the principal's set is refused server-side", () => {
|
|
66
66
|
const { resolver, registry } = setup();
|
|
67
67
|
const d = disposeAgentTool({
|
|
68
|
-
toolName: "
|
|
68
|
+
toolName: "automation_propose",
|
|
69
69
|
principal: limited,
|
|
70
70
|
resolver,
|
|
71
71
|
getTool: (n) => registry.getTool(n),
|
|
@@ -87,7 +87,7 @@ describe("agent loop tool gating (matrix #14)", () => {
|
|
|
87
87
|
test("a read tool auto-runs", () => {
|
|
88
88
|
const { resolver, registry } = setup();
|
|
89
89
|
const d = disposeAgentTool({
|
|
90
|
-
toolName: "
|
|
90
|
+
toolName: "incident_list",
|
|
91
91
|
principal: limited,
|
|
92
92
|
resolver,
|
|
93
93
|
getTool: (n) => registry.getTool(n),
|
|
@@ -98,7 +98,7 @@ describe("agent loop tool gating (matrix #14)", () => {
|
|
|
98
98
|
test("a mutate tool requires a confirm card (never silently mutates)", () => {
|
|
99
99
|
const { resolver, registry } = setup();
|
|
100
100
|
const d = disposeAgentTool({
|
|
101
|
-
toolName: "
|
|
101
|
+
toolName: "automation_propose",
|
|
102
102
|
principal: power,
|
|
103
103
|
resolver,
|
|
104
104
|
getTool: (n) => registry.getTool(n),
|
|
@@ -109,7 +109,7 @@ describe("agent loop tool gating (matrix #14)", () => {
|
|
|
109
109
|
test("a destructive tool requires a confirm card", () => {
|
|
110
110
|
const { resolver, registry } = setup();
|
|
111
111
|
const d = disposeAgentTool({
|
|
112
|
-
toolName: "
|
|
112
|
+
toolName: "incident_delete",
|
|
113
113
|
principal: power,
|
|
114
114
|
resolver,
|
|
115
115
|
getTool: (n) => registry.getTool(n),
|
|
@@ -129,7 +129,7 @@ function mutatingTool(): {
|
|
|
129
129
|
created: input.value,
|
|
130
130
|
}));
|
|
131
131
|
const tool: RegisteredAiTool<{ value: string }, { created: string }> = {
|
|
132
|
-
name: "
|
|
132
|
+
name: "demo_mutate",
|
|
133
133
|
description: "demo mutating tool",
|
|
134
134
|
effect: "mutate",
|
|
135
135
|
input: ManageInput,
|
|
@@ -208,7 +208,7 @@ describe("AUTO-mode mutate auto-apply path", () => {
|
|
|
208
208
|
// proposed -> applied, with the applier stamped. Not a weaker/parallel path.
|
|
209
209
|
const applied = [...store.rows.values()].filter((r) => r.status === "applied");
|
|
210
210
|
expect(applied).toHaveLength(1);
|
|
211
|
-
expect(applied[0]?.toolName).toBe("
|
|
211
|
+
expect(applied[0]?.toolName).toBe("demo_mutate");
|
|
212
212
|
expect(applied[0]?.effect).toBe("mutate");
|
|
213
213
|
expect(applied[0]?.appliedById).toBe("u1");
|
|
214
214
|
expect(applied[0]?.id).toBe(result.toolCallId);
|
|
@@ -1,4 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
describe,
|
|
3
|
+
expect,
|
|
4
|
+
test,
|
|
5
|
+
mock,
|
|
6
|
+
beforeEach,
|
|
7
|
+
afterEach,
|
|
8
|
+
afterAll,
|
|
9
|
+
} from "bun:test";
|
|
2
10
|
import { APICallError, type LanguageModelUsage } from "ai";
|
|
3
11
|
import type { AuthUser } from "@checkstack/backend-api";
|
|
4
12
|
import type { OpenAiCompatibleConnection } from "@checkstack/ai-common";
|
|
@@ -210,6 +218,13 @@ afterEach(() => {
|
|
|
210
218
|
mock.restore();
|
|
211
219
|
});
|
|
212
220
|
|
|
221
|
+
// mock.restore() does NOT undo a module mock, so restore the real `ai` module
|
|
222
|
+
// here - otherwise the stubbed streamText/stepCountIs leak into every other
|
|
223
|
+
// ai-backend suite that runs after this file.
|
|
224
|
+
afterAll(() => {
|
|
225
|
+
mock.module("ai", () => ({ ...realAi }));
|
|
226
|
+
});
|
|
227
|
+
|
|
213
228
|
describe("streamTurn topical pre-classifier", () => {
|
|
214
229
|
const turn = {
|
|
215
230
|
principal,
|
|
@@ -2,6 +2,7 @@ import { describe, expect, test } from "bun:test";
|
|
|
2
2
|
import {
|
|
3
3
|
CHAT_SYSTEM_PROMPT,
|
|
4
4
|
DATE_FORMAT_INSTRUCTION,
|
|
5
|
+
INVESTIGATION_INSTRUCTION,
|
|
5
6
|
buildChatSystemPrompt,
|
|
6
7
|
buildDateTimeContext,
|
|
7
8
|
formatInstantInZone,
|
|
@@ -45,6 +46,16 @@ describe("buildChatSystemPrompt", () => {
|
|
|
45
46
|
expect(prompt).toContain(DATE_FORMAT_INSTRUCTION);
|
|
46
47
|
});
|
|
47
48
|
|
|
49
|
+
test("carries the issue-investigation guidance (check all sources, real ids)", () => {
|
|
50
|
+
const prompt = buildChatSystemPrompt({ timeZone: "Europe/Berlin" });
|
|
51
|
+
expect(prompt).toContain(INVESTIGATION_INSTRUCTION);
|
|
52
|
+
// The concrete behaviours we are fixing must be present in the text.
|
|
53
|
+
expect(prompt).toContain("system_issues");
|
|
54
|
+
expect(prompt).toContain("healthcheck_status");
|
|
55
|
+
expect(prompt).toContain("anomaly_list");
|
|
56
|
+
expect(prompt).toContain("Do not stop after the first source");
|
|
57
|
+
});
|
|
58
|
+
|
|
48
59
|
test("folds in a valid operator timezone", () => {
|
|
49
60
|
expect(buildChatSystemPrompt({ timeZone: "America/New_York" })).toContain(
|
|
50
61
|
"America/New_York",
|
|
@@ -24,6 +24,33 @@ export const CHAT_SYSTEM_PROMPT =
|
|
|
24
24
|
"redirect back to Checkstack monitoring and operations. Be concise and " +
|
|
25
25
|
"engineering-focused.";
|
|
26
26
|
|
|
27
|
+
/**
|
|
28
|
+
* How to answer "are there any issues?" thoroughly, and how to pass ids.
|
|
29
|
+
*
|
|
30
|
+
* The model tends to answer from the first source that returns something (e.g.
|
|
31
|
+
* report an SLO breach and stop, missing a failing health check). It also tends
|
|
32
|
+
* to pass a system NAME, an invented id, or a made-up filter value where a tool
|
|
33
|
+
* wants a real id/enum - which fails validation. This block makes both
|
|
34
|
+
* behaviours explicit. Tool names are the provider-safe ids the model is given.
|
|
35
|
+
*/
|
|
36
|
+
export const INVESTIGATION_INSTRUCTION =
|
|
37
|
+
"When the operator asks whether there are issues/problems, what is wrong, or " +
|
|
38
|
+
"what is down/failing/breaching, do NOT answer from a single source. Prefer " +
|
|
39
|
+
"the system_issues tool, which aggregates ALL current problems (failing " +
|
|
40
|
+
"health checks, breaching/at-risk SLOs, active anomalies, open incidents, " +
|
|
41
|
+
"active maintenances, dependency problems) across every system in one call. " +
|
|
42
|
+
"If it is unavailable, instead check ALL of these and report a consolidated " +
|
|
43
|
+
"summary: failing health checks (healthcheck_status), breaching or at-risk " +
|
|
44
|
+
"SLOs (slo_listObjectives), active anomalies (anomaly_list), and open " +
|
|
45
|
+
"incidents (incident_list). Do not stop after the first source that returns " +
|
|
46
|
+
"something; an empty result from one source does not mean there are no issues " +
|
|
47
|
+
"in another. " +
|
|
48
|
+
"Many tools take a systemId, which MUST be a system's UUID: if the operator " +
|
|
49
|
+
"names a system, first resolve it to its id with the catalog tool, then pass " +
|
|
50
|
+
"that id. Pass ids and enum filter values EXACTLY as a tool returned or as a " +
|
|
51
|
+
"tool's description lists them - never invent an id, and never pass a filter " +
|
|
52
|
+
"value (such as a state) that the tool does not document.";
|
|
53
|
+
|
|
27
54
|
/**
|
|
28
55
|
* The date-time wire contract, stated to the model so it emits an offset the
|
|
29
56
|
* first time instead of learning via a rejected tool call. Enforced server-side
|
|
@@ -138,9 +165,11 @@ export function buildChatSystemPrompt({
|
|
|
138
165
|
timeZone?: string;
|
|
139
166
|
now?: Date;
|
|
140
167
|
}): string {
|
|
141
|
-
return `${CHAT_SYSTEM_PROMPT} ${buildDateTimeContext(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
168
|
+
return `${CHAT_SYSTEM_PROMPT} ${INVESTIGATION_INSTRUCTION} ${buildDateTimeContext(
|
|
169
|
+
{
|
|
170
|
+
timeZone,
|
|
171
|
+
now,
|
|
172
|
+
audience: "operator",
|
|
173
|
+
},
|
|
174
|
+
)}`;
|
|
146
175
|
}
|
package/src/extension-points.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { createExtensionPoint } from "@checkstack/backend-api";
|
|
2
|
+
import type { AuthUser } from "@checkstack/backend-api";
|
|
2
3
|
import type { PluginMetadata } from "@checkstack/common";
|
|
4
|
+
import type { SystemSignalsMap } from "@checkstack/catalog-common";
|
|
3
5
|
import type { RegisteredAiTool } from "./tool-registry";
|
|
4
6
|
import type { ProjectToolInput } from "./projection";
|
|
5
7
|
|
|
@@ -39,3 +41,90 @@ export const aiToolProjectionExtensionPoint =
|
|
|
39
41
|
createExtensionPoint<AiToolProjectionExtensionPoint>(
|
|
40
42
|
"ai.toolProjectionExtensionPoint",
|
|
41
43
|
);
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* A single backend contributor of dashboard "needs attention" system signals.
|
|
47
|
+
*
|
|
48
|
+
* This mirrors the FRONTEND `SystemSignalsSlot` concept on the backend: where a
|
|
49
|
+
* frontend plugin's React filler computes per-system `SystemSignal[]` from a
|
|
50
|
+
* bulk RPC and reports via the slot, a backend plugin registers a contributor
|
|
51
|
+
* here that returns problem signals for ALL systems globally (keyed by
|
|
52
|
+
* systemId). The `system.issues` AI tool fans out across every registered
|
|
53
|
+
* contributor and merges their maps into one "what is wrong right now" answer.
|
|
54
|
+
*
|
|
55
|
+
* Access: the `system.issues` tool itself is gated by `catalog.system.read`, but
|
|
56
|
+
* PER-SOURCE access (and per-system/team scoping) is the contributor's own
|
|
57
|
+
* responsibility - `read` receives the originating `AuthUser` principal and MUST
|
|
58
|
+
* return only signals the principal is allowed to see (returning `{}` when the
|
|
59
|
+
* principal lacks access). The aggregator never inspects a source's data to
|
|
60
|
+
* decide visibility.
|
|
61
|
+
*/
|
|
62
|
+
export interface SystemSignalsContributor {
|
|
63
|
+
/**
|
|
64
|
+
* Stable id of the contributing source, e.g. "incident" / "slo" /
|
|
65
|
+
* "healthcheck". Surfaced on the aggregated result so the model can attribute
|
|
66
|
+
* each signal, and used to keep one source's failure from affecting others.
|
|
67
|
+
*/
|
|
68
|
+
sourceId: string;
|
|
69
|
+
/**
|
|
70
|
+
* Return problem signals for ALL systems globally, keyed by systemId, scoped
|
|
71
|
+
* to what `principal` may see, plus whether the principal could access this
|
|
72
|
+
* source at all. Systems absent from `signals` have no signal from this
|
|
73
|
+
* source. MUST resolve from shared, durable storage so the answer is
|
|
74
|
+
* identical on every pod (state-and-scale rule).
|
|
75
|
+
*
|
|
76
|
+
* When the principal lacks access, return `{ accessible: false, signals: {} }`
|
|
77
|
+
* (NOT a throw) - the aggregator surfaces that as an inaccessible source so
|
|
78
|
+
* the model can say "I could not check X" instead of implying "no issues".
|
|
79
|
+
*/
|
|
80
|
+
read(context: {
|
|
81
|
+
principal: AuthUser;
|
|
82
|
+
}): Promise<SystemSignalsContribution>;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* One contributor's reply: the signals it found (empty if none or if access was
|
|
87
|
+
* denied) plus whether the principal could read the source at all. `accessible:
|
|
88
|
+
* false` means "skipped for lack of permission", which the aggregator reports
|
|
89
|
+
* distinctly from "checked and found nothing".
|
|
90
|
+
*/
|
|
91
|
+
export interface SystemSignalsContribution {
|
|
92
|
+
accessible: boolean;
|
|
93
|
+
signals: SystemSignalsMap;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Backend extension point for contributing dashboard "needs attention" system
|
|
98
|
+
* signals to the `system.issues` AI tool. Each plugin that owns a kind of
|
|
99
|
+
* problem state (incidents, breaching/at-risk SLOs, failing health checks,
|
|
100
|
+
* active anomalies, open incidents, active maintenances, dependency problems)
|
|
101
|
+
* registers ONE contributor from its own backend `init`. ai-backend collects
|
|
102
|
+
* every contributor and the `system.issues` tool merges their global maps in a
|
|
103
|
+
* single call - ai-backend imports no plugin's `*-common` to do so.
|
|
104
|
+
*/
|
|
105
|
+
export interface SystemSignalsExtensionPoint {
|
|
106
|
+
contribute(contributor: SystemSignalsContributor): void;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export const systemSignalsExtensionPoint =
|
|
110
|
+
createExtensionPoint<SystemSignalsExtensionPoint>(
|
|
111
|
+
"ai.systemSignalsExtensionPoint",
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* The access-rule ids a principal holds, for a {@link SystemSignalsContributor}'s
|
|
116
|
+
* per-source gate. Pass the result to `isAccessRuleSatisfied`.
|
|
117
|
+
*
|
|
118
|
+
* Service principals are trusted backend-to-backend callers - the RPC
|
|
119
|
+
* middleware (`autoAuthMiddleware`) skips access-rule checks for them entirely -
|
|
120
|
+
* so they are treated here as holding the wildcard `*`, matching that behaviour.
|
|
121
|
+
* Real users and applications carry their own `accessRules`. Centralising this
|
|
122
|
+
* keeps every contributor's gate consistent (a service caller sees every source
|
|
123
|
+
* or none, never a per-source split).
|
|
124
|
+
*/
|
|
125
|
+
export function principalGrantedRuleIds(
|
|
126
|
+
principal: AuthUser,
|
|
127
|
+
): readonly string[] {
|
|
128
|
+
if (principal.type === "service") return ["*"];
|
|
129
|
+
return principal.accessRules ?? [];
|
|
130
|
+
}
|