@checkstack/ai-backend 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +97 -0
- package/drizzle/0000_productive_jackpot.sql +26 -0
- package/drizzle/0001_puzzling_purple_man.sql +26 -0
- package/drizzle/0002_sparkling_paper_doll.sql +15 -0
- package/drizzle/0003_married_senator_kelly.sql +1 -0
- package/drizzle/0004_crazy_miek.sql +2 -0
- package/drizzle/0005_tearful_randall_flagg.sql +1 -0
- package/drizzle/meta/0000_snapshot.json +232 -0
- package/drizzle/meta/0001_snapshot.json +434 -0
- package/drizzle/meta/0002_snapshot.json +551 -0
- package/drizzle/meta/0003_snapshot.json +557 -0
- package/drizzle/meta/0004_snapshot.json +573 -0
- package/drizzle/meta/0005_snapshot.json +574 -0
- package/drizzle/meta/_journal.json +48 -0
- package/drizzle.config.ts +7 -0
- package/package.json +42 -0
- package/src/agent-runner.test.ts +262 -0
- package/src/agent-runner.ts +262 -0
- package/src/chat/agent-loop.test.ts +119 -0
- package/src/chat/agent-loop.ts +73 -0
- package/src/chat/auto-apply.test.ts +237 -0
- package/src/chat/chat-handler.ts +111 -0
- package/src/chat/chat-service.streamturn.test.ts +417 -0
- package/src/chat/chat-service.test.ts +250 -0
- package/src/chat/chat-service.ts +923 -0
- package/src/chat/classifier-service.ts +64 -0
- package/src/chat/classifier.logic.test.ts +92 -0
- package/src/chat/classifier.logic.ts +71 -0
- package/src/chat/conversation-store.it.test.ts +203 -0
- package/src/chat/conversation-store.test.ts +248 -0
- package/src/chat/conversation-store.ts +237 -0
- package/src/chat/decision.logic.test.ts +45 -0
- package/src/chat/decision.logic.ts +54 -0
- package/src/chat/llm-provider.test.ts +63 -0
- package/src/chat/llm-provider.ts +67 -0
- package/src/chat/model-error.logic.test.ts +60 -0
- package/src/chat/model-error.logic.ts +65 -0
- package/src/chat/normalize-messages.logic.test.ts +101 -0
- package/src/chat/normalize-messages.logic.ts +65 -0
- package/src/chat/permission-mode.logic.test.ts +70 -0
- package/src/chat/permission-mode.logic.ts +45 -0
- package/src/chat/read-invoker.ts +72 -0
- package/src/chat/replay.test.ts +174 -0
- package/src/chat/scrub-content.test.ts +183 -0
- package/src/chat/scrub-content.ts +154 -0
- package/src/chat/sdk-tools.test.ts +168 -0
- package/src/chat/sdk-tools.ts +181 -0
- package/src/chat/title-service.test.ts +146 -0
- package/src/chat/title-service.ts +111 -0
- package/src/chat/title.logic.test.ts +98 -0
- package/src/chat/title.logic.ts +102 -0
- package/src/extension-points.ts +41 -0
- package/src/generated/docs-index.ts +3020 -0
- package/src/hardening/handler-authz.test.ts +282 -0
- package/src/hardening/no-secret-leak.test.ts +303 -0
- package/src/hooks.ts +33 -0
- package/src/index.ts +542 -0
- package/src/mcp/connection-registry.test.ts +25 -0
- package/src/mcp/connection-registry.ts +54 -0
- package/src/mcp/mcp-conformance.it.test.ts +128 -0
- package/src/mcp/server.test.ts +285 -0
- package/src/mcp/server.ts +300 -0
- package/src/mcp/tool-invoker.ts +65 -0
- package/src/openai-provider.test.ts +64 -0
- package/src/openai-provider.ts +146 -0
- package/src/projection.test.ts +97 -0
- package/src/projection.ts +132 -0
- package/src/propose-apply/args-hash.test.ts +26 -0
- package/src/propose-apply/args-hash.ts +30 -0
- package/src/propose-apply/service.test.ts +423 -0
- package/src/propose-apply/service.ts +419 -0
- package/src/propose-apply/store.test.ts +136 -0
- package/src/propose-apply/store.ts +224 -0
- package/src/propose-apply/token.test.ts +52 -0
- package/src/propose-apply/token.ts +71 -0
- package/src/rate-limit/spend-ledger.it.test.ts +224 -0
- package/src/rate-limit/spend-ledger.test.ts +176 -0
- package/src/rate-limit/spend-ledger.ts +162 -0
- package/src/rate-limit/tool-budget.it.test.ts +173 -0
- package/src/rate-limit/tool-budget.test.ts +58 -0
- package/src/rate-limit/tool-budget.ts +107 -0
- package/src/registry-wiring.test.ts +131 -0
- package/src/registry-wiring.ts +68 -0
- package/src/resolver.test.ts +156 -0
- package/src/resolver.ts +78 -0
- package/src/router.test.ts +78 -0
- package/src/router.ts +345 -0
- package/src/schema.ts +284 -0
- package/src/serializer.test.ts +88 -0
- package/src/serializer.ts +42 -0
- package/src/tool-registry.ts +58 -0
- package/src/tools/composite-tools.ts +24 -0
- package/src/tools/docs-tools.test.ts +150 -0
- package/src/tools/docs-tools.ts +115 -0
- package/src/tools/probe-url.test.ts +51 -0
- package/src/tools/probe-url.ts +146 -0
- package/src/tools/rank-docs.test.ts +153 -0
- package/src/tools/rank-docs.ts +209 -0
- package/src/tools/script-context-extract.test.ts +93 -0
- package/src/tools/script-context-extract.ts +283 -0
- package/src/tools/ssrf-guard.test.ts +69 -0
- package/src/tools/ssrf-guard.ts +108 -0
- package/src/tools/tool-set.e2e.test.ts +64 -0
- package/src/user-rpc-client.test.ts +45 -0
- package/src/user-rpc-client.ts +60 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { qualifyAccessRuleId } from "@checkstack/common";
|
|
3
|
+
import type {
|
|
4
|
+
AccessRule,
|
|
5
|
+
PluginMetadata,
|
|
6
|
+
ProcedureMetadata,
|
|
7
|
+
} from "@checkstack/common";
|
|
8
|
+
import { isContractProcedure } from "@orpc/contract";
|
|
9
|
+
import type { AnyContractProcedure } from "@orpc/contract";
|
|
10
|
+
import type { AuthUser } from "@checkstack/backend-api";
|
|
11
|
+
import type { AiToolEffect } from "@checkstack/ai-common";
|
|
12
|
+
import type { RegisteredAiTool } from "./tool-registry";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Subset of a contract procedure's `~orpc` def that the projection reads. The
|
|
16
|
+
* full def is typed by oRPC; we only narrow what we touch so the projection
|
|
17
|
+
* never depends on undocumented internals.
|
|
18
|
+
*/
|
|
19
|
+
interface ProjectableProcedureDef {
|
|
20
|
+
meta?: Partial<ProcedureMetadata>;
|
|
21
|
+
inputSchema?: z.ZodType<unknown>;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Input to {@link buildProjectedTool}. The `procedure` is an existing oRPC
|
|
26
|
+
* contract procedure whose access rules and input schema are read verbatim —
|
|
27
|
+
* nothing is duplicated (decision 2a).
|
|
28
|
+
*/
|
|
29
|
+
export interface ProjectToolInput<TInput = unknown, TOutput = unknown> {
|
|
30
|
+
/** The contract procedure to project. */
|
|
31
|
+
procedure: AnyContractProcedure;
|
|
32
|
+
/**
|
|
33
|
+
* Plugin metadata that OWNS `procedure`. Used to qualify the procedure's
|
|
34
|
+
* (unqualified) access rules into the same `<plugin>.<resource>.<level>` IDs
|
|
35
|
+
* `autoAuthMiddleware` enforces. For a plugin projecting another plugin's
|
|
36
|
+
* procedure this MUST be the source plugin's metadata, otherwise the gate
|
|
37
|
+
* would check a wrong rule id.
|
|
38
|
+
*/
|
|
39
|
+
sourcePluginMetadata: PluginMetadata;
|
|
40
|
+
/** Model-facing description (procedures often lack good model prose). */
|
|
41
|
+
description: string;
|
|
42
|
+
/** Effect classification — REQUIRED, never inferred from operationType. */
|
|
43
|
+
effect: AiToolEffect;
|
|
44
|
+
/** Optional override of the derived tool name (else `<sourcePlugin>.<key>`). */
|
|
45
|
+
name?: string;
|
|
46
|
+
/**
|
|
47
|
+
* Procedure key (e.g. "listIncidents"); used to derive the tool name when
|
|
48
|
+
* `name` is not given.
|
|
49
|
+
*/
|
|
50
|
+
procedureKey: string;
|
|
51
|
+
/** The executor — invokes the underlying procedure for the principal. */
|
|
52
|
+
execute(args: { input: TInput; principal: AuthUser }): Promise<TOutput>;
|
|
53
|
+
/** Optional dry-run for mutate / destructive projections (Phase 3). */
|
|
54
|
+
dryRun?: RegisteredAiTool<TInput, TOutput>["dryRun"];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* The `execute` a plugin supplies when EXPOSING a read-only projection. A
|
|
59
|
+
* projected read tool is never run via its own `execute`: the MCP transport and
|
|
60
|
+
* the chat read-loop re-enter the live router AS the principal using the tool's
|
|
61
|
+
* `{ pluginId, procedureKey }` routing (so handler-side authz holds). This is a
|
|
62
|
+
* fail-closed safety net if a transport ever forgot to route - it rejects rather
|
|
63
|
+
* than silently running as a trusted service principal.
|
|
64
|
+
*/
|
|
65
|
+
export function deferredProjectionExecute(): Promise<never> {
|
|
66
|
+
return Promise.reject(
|
|
67
|
+
new Error(
|
|
68
|
+
"Projected read tool execution is routed by the transport (MCP / chat), " +
|
|
69
|
+
"not run via its own execute.",
|
|
70
|
+
),
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Build a {@link RegisteredAiTool} from an existing oRPC contract procedure.
|
|
76
|
+
*
|
|
77
|
+
* - `requiredAccessRules` is the procedure's `~orpc.meta.access` rules,
|
|
78
|
+
* qualified against the source plugin id — exactly the IDs the resolver and
|
|
79
|
+
* `autoAuthMiddleware` compare against `principal.accessRules`.
|
|
80
|
+
* - `input` is the procedure's `~orpc.inputSchema`; a procedure with no input
|
|
81
|
+
* projects to an empty-object schema.
|
|
82
|
+
* - `effect` is mandatory: this throws if it is omitted (a `mutation`
|
|
83
|
+
* operationType is NOT the same as a destructive effect).
|
|
84
|
+
*/
|
|
85
|
+
export function buildProjectedTool<TInput = unknown, TOutput = unknown>(
|
|
86
|
+
input: ProjectToolInput<TInput, TOutput>,
|
|
87
|
+
): RegisteredAiTool<TInput, TOutput> {
|
|
88
|
+
const {
|
|
89
|
+
procedure,
|
|
90
|
+
sourcePluginMetadata,
|
|
91
|
+
description,
|
|
92
|
+
effect,
|
|
93
|
+
name,
|
|
94
|
+
procedureKey,
|
|
95
|
+
execute,
|
|
96
|
+
dryRun,
|
|
97
|
+
} = input;
|
|
98
|
+
|
|
99
|
+
if (!isContractProcedure(procedure)) {
|
|
100
|
+
throw new Error(
|
|
101
|
+
`Cannot project ${procedureKey}: value is not an oRPC contract procedure.`,
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
// `effect` is required by the type, but guard the runtime path for JS callers.
|
|
105
|
+
if (effect === undefined) {
|
|
106
|
+
throw new Error(
|
|
107
|
+
`Cannot project ${procedureKey}: 'effect' is required and is never inferred from the operation type.`,
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const def = (procedure as { "~orpc": ProjectableProcedureDef })["~orpc"];
|
|
112
|
+
const accessRules: AccessRule[] = def.meta?.access ?? [];
|
|
113
|
+
const requiredAccessRules = accessRules.map((rule) =>
|
|
114
|
+
qualifyAccessRuleId(sourcePluginMetadata, rule),
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
const inputSchema: z.ZodType<TInput> =
|
|
118
|
+
(def.inputSchema as z.ZodType<TInput> | undefined) ??
|
|
119
|
+
(z.object({}) as unknown as z.ZodType<TInput>);
|
|
120
|
+
|
|
121
|
+
const toolName = name ?? `${sourcePluginMetadata.pluginId}.${procedureKey}`;
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
name: toolName,
|
|
125
|
+
description,
|
|
126
|
+
effect,
|
|
127
|
+
input: inputSchema,
|
|
128
|
+
requiredAccessRules,
|
|
129
|
+
dryRun,
|
|
130
|
+
execute,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { hashToolArgs } from "./args-hash";
|
|
3
|
+
|
|
4
|
+
describe("hashToolArgs", () => {
|
|
5
|
+
test("is stable across key order (canonical JSON)", () => {
|
|
6
|
+
const a = hashToolArgs({ b: 2, a: 1, nested: { y: 1, x: 2 } });
|
|
7
|
+
const b = hashToolArgs({ a: 1, nested: { x: 2, y: 1 }, b: 2 });
|
|
8
|
+
expect(a).toBe(b);
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
test("preserves array order significance", () => {
|
|
12
|
+
expect(hashToolArgs([1, 2, 3])).not.toBe(hashToolArgs([3, 2, 1]));
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
test("produces a 64-char hex SHA-256 digest", () => {
|
|
16
|
+
expect(hashToolArgs({ x: 1 })).toMatch(/^[0-9a-f]{64}$/);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test("different content hashes differently", () => {
|
|
20
|
+
expect(hashToolArgs({ x: 1 })).not.toBe(hashToolArgs({ x: 2 }));
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test("ignores undefined properties (treated as absent)", () => {
|
|
24
|
+
expect(hashToolArgs({ x: 1, y: undefined })).toBe(hashToolArgs({ x: 1 }));
|
|
25
|
+
});
|
|
26
|
+
});
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Deterministically serialize a value to canonical JSON: object keys are sorted
|
|
5
|
+
* recursively so two semantically-equal argument objects hash identically
|
|
6
|
+
* regardless of key order. Arrays preserve order (order is significant).
|
|
7
|
+
*/
|
|
8
|
+
function canonicalJson(value: unknown): string {
|
|
9
|
+
if (value === null || typeof value !== "object") {
|
|
10
|
+
return JSON.stringify(value) ?? "null";
|
|
11
|
+
}
|
|
12
|
+
if (Array.isArray(value)) {
|
|
13
|
+
return `[${value.map((item) => canonicalJson(item)).join(",")}]`;
|
|
14
|
+
}
|
|
15
|
+
const entries = Object.entries(value as Record<string, unknown>)
|
|
16
|
+
.filter(([, v]) => v !== undefined)
|
|
17
|
+
.toSorted(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0))
|
|
18
|
+
.map(([k, v]) => `${JSON.stringify(k)}:${canonicalJson(v)}`);
|
|
19
|
+
return `{${entries.join(",")}}`;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* SHA-256 of the canonical-JSON form of the tool arguments. The audit row
|
|
24
|
+
* stores ONLY this hash, never the raw args, because args may carry PII or
|
|
25
|
+
* secrets (§10). Identical args always hash identically; key order is
|
|
26
|
+
* irrelevant.
|
|
27
|
+
*/
|
|
28
|
+
export function hashToolArgs(args: unknown): string {
|
|
29
|
+
return createHash("sha256").update(canonicalJson(args)).digest("hex");
|
|
30
|
+
}
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import type { AuthUser, RpcClient } from "@checkstack/backend-api";
|
|
4
|
+
import { createAiToolRegistry } from "../tool-registry";
|
|
5
|
+
import type { RegisteredAiTool } from "../tool-registry";
|
|
6
|
+
import { createAiToolResolver } from "../resolver";
|
|
7
|
+
import {
|
|
8
|
+
createProposeApplyService,
|
|
9
|
+
ProposeApplyError,
|
|
10
|
+
} from "./service";
|
|
11
|
+
import { generateProposalNonce } from "./token";
|
|
12
|
+
import type { AiToolCallStore } from "./store";
|
|
13
|
+
import type { AiToolCallRow } from "../schema";
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* In-memory `AiToolCallStore` that faithfully mimics the atomic single-use
|
|
17
|
+
* consume: only one caller can flip a row from `proposed` to `applied`. This
|
|
18
|
+
* lets the lifecycle / single-use / expiry / authz logic be tested without a
|
|
19
|
+
* real Postgres connection (the SQL atomicity itself is expressed by the
|
|
20
|
+
* `WHERE status='proposed'` clause in the real store).
|
|
21
|
+
*/
|
|
22
|
+
function createFakeStore(now: () => Date): AiToolCallStore & {
|
|
23
|
+
rows: Map<string, AiToolCallRow>;
|
|
24
|
+
} {
|
|
25
|
+
const rows = new Map<string, AiToolCallRow>();
|
|
26
|
+
let counter = 0;
|
|
27
|
+
|
|
28
|
+
const baseRow = (over: Partial<AiToolCallRow>): AiToolCallRow => ({
|
|
29
|
+
id: `row-${++counter}`,
|
|
30
|
+
principalKind: "user",
|
|
31
|
+
principalId: "u1",
|
|
32
|
+
transport: "chat",
|
|
33
|
+
conversationId: null,
|
|
34
|
+
toolName: "x",
|
|
35
|
+
effect: "mutate",
|
|
36
|
+
argsHash: "h",
|
|
37
|
+
status: "proposed",
|
|
38
|
+
proposalNonce: null,
|
|
39
|
+
proposalExpiresAt: null,
|
|
40
|
+
resultSnapshot: null,
|
|
41
|
+
proposedPayload: null,
|
|
42
|
+
error: null,
|
|
43
|
+
proposedAt: null,
|
|
44
|
+
appliedAt: null,
|
|
45
|
+
appliedByKind: null,
|
|
46
|
+
appliedById: null,
|
|
47
|
+
createdAt: now(),
|
|
48
|
+
...over,
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
rows,
|
|
53
|
+
async recordExecuted(args) {
|
|
54
|
+
const row = baseRow({
|
|
55
|
+
...args,
|
|
56
|
+
conversationId: args.conversationId ?? null,
|
|
57
|
+
effect: "read",
|
|
58
|
+
status: "executed",
|
|
59
|
+
resultSnapshot: args.resultSnapshot ?? null,
|
|
60
|
+
});
|
|
61
|
+
rows.set(row.id, row);
|
|
62
|
+
return row;
|
|
63
|
+
},
|
|
64
|
+
async recordFailed(args) {
|
|
65
|
+
const row = baseRow({
|
|
66
|
+
...args,
|
|
67
|
+
conversationId: args.conversationId ?? null,
|
|
68
|
+
status: "failed",
|
|
69
|
+
});
|
|
70
|
+
rows.set(row.id, row);
|
|
71
|
+
return row;
|
|
72
|
+
},
|
|
73
|
+
async createProposal(args) {
|
|
74
|
+
const nonce = generateProposalNonce();
|
|
75
|
+
const row = baseRow({
|
|
76
|
+
principalKind: args.principal.kind,
|
|
77
|
+
principalId: args.principal.id,
|
|
78
|
+
transport: args.transport,
|
|
79
|
+
conversationId: args.conversationId ?? null,
|
|
80
|
+
toolName: args.toolName,
|
|
81
|
+
effect: args.effect,
|
|
82
|
+
argsHash: args.argsHash,
|
|
83
|
+
status: "proposed",
|
|
84
|
+
proposalNonce: nonce,
|
|
85
|
+
proposalExpiresAt: new Date((args.now ?? now()).getTime() + 600_000),
|
|
86
|
+
proposedPayload: args.proposedPayload,
|
|
87
|
+
resultSnapshot: args.resultSnapshot ?? null,
|
|
88
|
+
proposedAt: args.now ?? now(),
|
|
89
|
+
});
|
|
90
|
+
rows.set(row.id, row);
|
|
91
|
+
return { row, nonce };
|
|
92
|
+
},
|
|
93
|
+
async consumeProposal({ rowId, applier, now: at = now() }) {
|
|
94
|
+
const row = rows.get(rowId);
|
|
95
|
+
if (!row) return undefined;
|
|
96
|
+
// Atomic single-use: only a still-`proposed`, non-expired row wins.
|
|
97
|
+
if (row.status !== "proposed") return undefined;
|
|
98
|
+
if (row.proposalExpiresAt && row.proposalExpiresAt.getTime() <= at.getTime()) {
|
|
99
|
+
return undefined;
|
|
100
|
+
}
|
|
101
|
+
const updated: AiToolCallRow = {
|
|
102
|
+
...row,
|
|
103
|
+
status: "applied",
|
|
104
|
+
appliedAt: at,
|
|
105
|
+
appliedByKind: applier.kind,
|
|
106
|
+
appliedById: applier.id,
|
|
107
|
+
};
|
|
108
|
+
rows.set(rowId, updated);
|
|
109
|
+
return updated;
|
|
110
|
+
},
|
|
111
|
+
async getProposal(rowId) {
|
|
112
|
+
return rows.get(rowId);
|
|
113
|
+
},
|
|
114
|
+
async expireStaleProposals(at = now()) {
|
|
115
|
+
let n = 0;
|
|
116
|
+
for (const [id, row] of rows) {
|
|
117
|
+
if (
|
|
118
|
+
row.status === "proposed" &&
|
|
119
|
+
row.proposalExpiresAt &&
|
|
120
|
+
row.proposalExpiresAt.getTime() < at.getTime()
|
|
121
|
+
) {
|
|
122
|
+
rows.set(id, { ...row, status: "expired" });
|
|
123
|
+
n += 1;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return n;
|
|
127
|
+
},
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const ManageInput = z.object({ value: z.string() });
|
|
132
|
+
|
|
133
|
+
function mutatingTool(
|
|
134
|
+
over?: Partial<RegisteredAiTool>,
|
|
135
|
+
): RegisteredAiTool<{ value: string }, { created: string }> {
|
|
136
|
+
let executed = 0;
|
|
137
|
+
const tool: RegisteredAiTool<{ value: string }, { created: string }> = {
|
|
138
|
+
name: "demo.mutate",
|
|
139
|
+
description: "demo mutating tool",
|
|
140
|
+
effect: "mutate",
|
|
141
|
+
input: ManageInput,
|
|
142
|
+
requiredAccessRules: ["demo.demo.manage"],
|
|
143
|
+
dryRun: async ({ input }) => ({
|
|
144
|
+
summary: `Would create ${input.value}`,
|
|
145
|
+
payload: { value: input.value },
|
|
146
|
+
}),
|
|
147
|
+
execute: async ({ input }) => {
|
|
148
|
+
executed += 1;
|
|
149
|
+
return { created: input.value };
|
|
150
|
+
},
|
|
151
|
+
...(over as Partial<RegisteredAiTool<{ value: string }, { created: string }>>),
|
|
152
|
+
};
|
|
153
|
+
Object.defineProperty(tool, "_executed", { get: () => executed });
|
|
154
|
+
return tool;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const allowed: AuthUser = {
|
|
158
|
+
type: "user",
|
|
159
|
+
id: "u1",
|
|
160
|
+
accessRules: ["demo.demo.manage"],
|
|
161
|
+
};
|
|
162
|
+
const notAllowed: AuthUser = {
|
|
163
|
+
type: "user",
|
|
164
|
+
id: "u2",
|
|
165
|
+
accessRules: ["other.read"],
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
// The demo tool's dryRun/execute ignore the RPC client, so a never-called stub
|
|
169
|
+
// satisfies the user-scoped `rpcClient` arg threaded through propose/apply.
|
|
170
|
+
const rpcClient = {
|
|
171
|
+
forPlugin: () => {
|
|
172
|
+
throw new Error("demo tool must not call the RPC client");
|
|
173
|
+
},
|
|
174
|
+
} as unknown as RpcClient;
|
|
175
|
+
|
|
176
|
+
function setup(tool: RegisteredAiTool, clock = () => new Date()) {
|
|
177
|
+
const registry = createAiToolRegistry();
|
|
178
|
+
registry.register(tool);
|
|
179
|
+
const resolver = createAiToolResolver({ registry });
|
|
180
|
+
const store = createFakeStore(clock);
|
|
181
|
+
const real = createProposeApplyService({ registry, resolver, store });
|
|
182
|
+
// Auto-inject the user-scoped `rpcClient` stub on every propose/apply so the
|
|
183
|
+
// individual tests below stay focused on lifecycle/authz (the demo tool
|
|
184
|
+
// ignores the client). This mirrors how the transports build the per-turn,
|
|
185
|
+
// user-scoped client and hand it to the service.
|
|
186
|
+
const service = {
|
|
187
|
+
...real,
|
|
188
|
+
propose: (args: Omit<Parameters<typeof real.propose>[0], "rpcClient">) =>
|
|
189
|
+
real.propose({ ...args, rpcClient }),
|
|
190
|
+
apply: (args: Omit<Parameters<typeof real.apply>[0], "rpcClient">) =>
|
|
191
|
+
real.apply({ ...args, rpcClient }),
|
|
192
|
+
};
|
|
193
|
+
return { registry, resolver, store, service };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
describe("propose/apply lifecycle (matrix #11)", () => {
|
|
197
|
+
test("a valid token applies exactly once; a second apply is rejected", async () => {
|
|
198
|
+
const tool = mutatingTool();
|
|
199
|
+
const { service } = setup(tool);
|
|
200
|
+
|
|
201
|
+
const proposal = await service.propose({
|
|
202
|
+
principal: allowed,
|
|
203
|
+
toolName: "demo.mutate",
|
|
204
|
+
input: { value: "alpha" },
|
|
205
|
+
transport: "chat",
|
|
206
|
+
});
|
|
207
|
+
expect(proposal.token.startsWith("propose:")).toBe(true);
|
|
208
|
+
|
|
209
|
+
const first = await service.apply({ principal: allowed, token: proposal.token });
|
|
210
|
+
expect(first.result).toEqual({ created: "alpha" });
|
|
211
|
+
|
|
212
|
+
// Single-use: the second apply must be rejected (atomic consume lost).
|
|
213
|
+
await expect(
|
|
214
|
+
service.apply({ principal: allowed, token: proposal.token }),
|
|
215
|
+
).rejects.toMatchObject({ code: "consumed" });
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
test("an expired token is rejected", async () => {
|
|
219
|
+
let current = new Date("2026-06-01T00:00:00Z");
|
|
220
|
+
const tool = mutatingTool();
|
|
221
|
+
const { service } = setup(tool, () => current);
|
|
222
|
+
|
|
223
|
+
const proposal = await service.propose({
|
|
224
|
+
principal: allowed,
|
|
225
|
+
toolName: "demo.mutate",
|
|
226
|
+
input: { value: "beta" },
|
|
227
|
+
transport: "chat",
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
// Advance the clock past the 10-minute TTL.
|
|
231
|
+
current = new Date(current.getTime() + 11 * 60_000);
|
|
232
|
+
await expect(
|
|
233
|
+
service.apply({ principal: allowed, token: proposal.token }),
|
|
234
|
+
).rejects.toMatchObject({ code: "expired" });
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
test("a tampered nonce is rejected (constant-time compare)", async () => {
|
|
238
|
+
const tool = mutatingTool();
|
|
239
|
+
const { service } = setup(tool);
|
|
240
|
+
const proposal = await service.propose({
|
|
241
|
+
principal: allowed,
|
|
242
|
+
toolName: "demo.mutate",
|
|
243
|
+
input: { value: "gamma" },
|
|
244
|
+
transport: "chat",
|
|
245
|
+
});
|
|
246
|
+
// Replace the nonce with a same-length but different value.
|
|
247
|
+
const [head, nonce] = proposal.token.split(".");
|
|
248
|
+
const tampered = `${head}.${"f".repeat(nonce.length)}`;
|
|
249
|
+
await expect(
|
|
250
|
+
service.apply({ principal: allowed, token: tampered }),
|
|
251
|
+
).rejects.toMatchObject({ code: "invalid_token" });
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
test("a malformed token is rejected", async () => {
|
|
255
|
+
const { service } = setup(mutatingTool());
|
|
256
|
+
await expect(
|
|
257
|
+
service.apply({ principal: allowed, token: "not-a-token" }),
|
|
258
|
+
).rejects.toMatchObject({ code: "invalid_token" });
|
|
259
|
+
});
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
describe("propose/apply authorization (matrix #11 / decision 5)", () => {
|
|
263
|
+
test("propose is refused for a principal lacking the rule", async () => {
|
|
264
|
+
const { service } = setup(mutatingTool());
|
|
265
|
+
await expect(
|
|
266
|
+
service.propose({
|
|
267
|
+
principal: notAllowed,
|
|
268
|
+
toolName: "demo.mutate",
|
|
269
|
+
input: { value: "x" },
|
|
270
|
+
transport: "chat",
|
|
271
|
+
}),
|
|
272
|
+
).rejects.toMatchObject({ code: "forbidden" });
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
test("apply re-checks authz: a rule lost after propose blocks apply", async () => {
|
|
276
|
+
const tool = mutatingTool();
|
|
277
|
+
const { service } = setup(tool);
|
|
278
|
+
const proposal = await service.propose({
|
|
279
|
+
principal: allowed,
|
|
280
|
+
toolName: "demo.mutate",
|
|
281
|
+
input: { value: "x" },
|
|
282
|
+
transport: "chat",
|
|
283
|
+
});
|
|
284
|
+
// The SAME user, but their rules have since been revoked.
|
|
285
|
+
const revoked: AuthUser = { type: "user", id: "u1", accessRules: [] };
|
|
286
|
+
await expect(
|
|
287
|
+
service.apply({ principal: revoked, token: proposal.token }),
|
|
288
|
+
).rejects.toMatchObject({ code: "forbidden" });
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
test("a read tool is not proposable", async () => {
|
|
292
|
+
const read = mutatingTool({ effect: "read", dryRun: undefined });
|
|
293
|
+
const { service } = setup(read);
|
|
294
|
+
await expect(
|
|
295
|
+
service.propose({
|
|
296
|
+
principal: allowed,
|
|
297
|
+
toolName: "demo.mutate",
|
|
298
|
+
input: { value: "x" },
|
|
299
|
+
transport: "chat",
|
|
300
|
+
}),
|
|
301
|
+
).rejects.toMatchObject({ code: "not_proposable" });
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
test("a service principal can never propose", async () => {
|
|
305
|
+
const { service } = setup(mutatingTool());
|
|
306
|
+
await expect(
|
|
307
|
+
service.propose({
|
|
308
|
+
principal: { type: "service", pluginId: "x" },
|
|
309
|
+
toolName: "demo.mutate",
|
|
310
|
+
input: { value: "x" },
|
|
311
|
+
transport: "chat",
|
|
312
|
+
}),
|
|
313
|
+
).rejects.toBeInstanceOf(ProposeApplyError);
|
|
314
|
+
});
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
describe("propose does NOT mutate (matrix #12)", () => {
|
|
318
|
+
test("propose runs dryRun and never calls execute", async () => {
|
|
319
|
+
const tool = mutatingTool();
|
|
320
|
+
const { service } = setup(tool);
|
|
321
|
+
await service.propose({
|
|
322
|
+
principal: allowed,
|
|
323
|
+
toolName: "demo.mutate",
|
|
324
|
+
input: { value: "x" },
|
|
325
|
+
transport: "chat",
|
|
326
|
+
});
|
|
327
|
+
expect((tool as unknown as { _executed: number })._executed).toBe(0);
|
|
328
|
+
});
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
describe("audit rows (matrix #13)", () => {
|
|
332
|
+
test("propose writes a proposed row; apply transitions it to applied", async () => {
|
|
333
|
+
const tool = mutatingTool();
|
|
334
|
+
const { service, store } = setup(tool);
|
|
335
|
+
const proposal = await service.propose({
|
|
336
|
+
principal: allowed,
|
|
337
|
+
toolName: "demo.mutate",
|
|
338
|
+
input: { value: "x" },
|
|
339
|
+
transport: "chat",
|
|
340
|
+
});
|
|
341
|
+
const proposedRow = store.rows.get(proposal.toolCallId);
|
|
342
|
+
expect(proposedRow?.status).toBe("proposed");
|
|
343
|
+
// The audit row stores a SHA-256 of the args, never the raw args. (The
|
|
344
|
+
// separate `proposedPayload` holds the VALIDATED apply payload by design —
|
|
345
|
+
// it is what `apply` commits — but the inbound args themselves are only
|
|
346
|
+
// ever represented as the hash.)
|
|
347
|
+
expect(proposedRow?.argsHash).toMatch(/^[0-9a-f]{64}$/);
|
|
348
|
+
expect(proposedRow).not.toHaveProperty("args");
|
|
349
|
+
expect(proposedRow).not.toHaveProperty("rawArgs");
|
|
350
|
+
|
|
351
|
+
await service.apply({ principal: allowed, token: proposal.token });
|
|
352
|
+
expect(store.rows.get(proposal.toolCallId)?.status).toBe("applied");
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
test("apply records WHO applied, not the proposer (P3 review item 1)", async () => {
|
|
356
|
+
const tool = mutatingTool();
|
|
357
|
+
const { service, store } = setup(tool);
|
|
358
|
+
// Proposed by u1.
|
|
359
|
+
const proposal = await service.propose({
|
|
360
|
+
principal: allowed,
|
|
361
|
+
toolName: "demo.mutate",
|
|
362
|
+
input: { value: "x" },
|
|
363
|
+
transport: "chat",
|
|
364
|
+
});
|
|
365
|
+
// Applied by a DIFFERENT principal that also holds the rule. The invariant
|
|
366
|
+
// (single-use 256-bit token + live authz re-check) holds, so the apply is
|
|
367
|
+
// recorded, attributed to the real applier — not rejected.
|
|
368
|
+
const otherApplier: AuthUser = {
|
|
369
|
+
type: "user",
|
|
370
|
+
id: "u-applier",
|
|
371
|
+
accessRules: ["demo.demo.manage"],
|
|
372
|
+
};
|
|
373
|
+
await service.apply({ principal: otherApplier, token: proposal.token });
|
|
374
|
+
const applied = store.rows.get(proposal.toolCallId);
|
|
375
|
+
expect(applied?.status).toBe("applied");
|
|
376
|
+
expect(applied?.principalKind).toBe("user");
|
|
377
|
+
expect(applied?.principalId).toBe("u1"); // proposer preserved
|
|
378
|
+
expect(applied?.appliedByKind).toBe("user");
|
|
379
|
+
expect(applied?.appliedById).toBe("u-applier"); // real applier recorded
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
test("apply re-parses the stored payload against the tool schema (P3 review item 2)", async () => {
|
|
383
|
+
// A tool whose input schema would reject the stored payload at apply time
|
|
384
|
+
// (simulating an evolved schema): execute must never run.
|
|
385
|
+
const tool = mutatingTool();
|
|
386
|
+
const { service, store } = setup(tool);
|
|
387
|
+
const proposal = await service.propose({
|
|
388
|
+
principal: allowed,
|
|
389
|
+
toolName: "demo.mutate",
|
|
390
|
+
input: { value: "x" },
|
|
391
|
+
transport: "chat",
|
|
392
|
+
});
|
|
393
|
+
// Corrupt the server-stored payload so it no longer matches the schema.
|
|
394
|
+
const row = store.rows.get(proposal.toolCallId);
|
|
395
|
+
if (row) {
|
|
396
|
+
store.rows.set(proposal.toolCallId, {
|
|
397
|
+
...row,
|
|
398
|
+
proposedPayload: { value: 42 } as unknown as Record<string, unknown>,
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
await expect(
|
|
402
|
+
service.apply({ principal: allowed, token: proposal.token }),
|
|
403
|
+
).rejects.toMatchObject({ code: "execute_failed" });
|
|
404
|
+
// execute never ran.
|
|
405
|
+
expect((tool as unknown as { _executed: number })._executed).toBe(0);
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
test("expireStaleProposals flips expired rows to expired status", async () => {
|
|
409
|
+
let current = new Date("2026-06-01T00:00:00Z");
|
|
410
|
+
const tool = mutatingTool();
|
|
411
|
+
const { service, store } = setup(tool, () => current);
|
|
412
|
+
const proposal = await service.propose({
|
|
413
|
+
principal: allowed,
|
|
414
|
+
toolName: "demo.mutate",
|
|
415
|
+
input: { value: "x" },
|
|
416
|
+
transport: "chat",
|
|
417
|
+
});
|
|
418
|
+
current = new Date(current.getTime() + 11 * 60_000);
|
|
419
|
+
const n = await store.expireStaleProposals(current);
|
|
420
|
+
expect(n).toBe(1);
|
|
421
|
+
expect(store.rows.get(proposal.toolCallId)?.status).toBe("expired");
|
|
422
|
+
});
|
|
423
|
+
});
|