@suluk/agents 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +33 -0
- package/src/conformance.ts +97 -0
- package/src/context.ts +256 -0
- package/src/index.ts +44 -0
- package/src/lint.ts +118 -0
- package/src/manifest.ts +141 -0
- package/src/model-select.ts +54 -0
- package/src/policy.ts +210 -0
- package/src/project.ts +156 -0
- package/src/resolve.ts +110 -0
- package/src/scope.ts +78 -0
- package/src/skill.ts +39 -0
- package/test/conformance.test.ts +34 -0
- package/test/context.test.ts +167 -0
- package/test/core-boundary.test.ts +38 -0
- package/test/fixtures/conin.ts +112 -0
- package/test/lint.test.ts +62 -0
- package/test/manifest.test.ts +41 -0
- package/test/model-select.test.ts +56 -0
- package/test/policy.test.ts +103 -0
- package/test/project.test.ts +103 -0
- package/test/scope.test.ts +27 -0
- package/test/signing.integration.test.ts +45 -0
- package/tsconfig.json +1 -0
package/src/resolve.ts
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolution helpers for the agent layer (C027). These walk the document and the agent graph BY NAME (C009/C013)
|
|
3
|
+
* — they NEVER touch the DOM→ADA request→operation matcher. A route's `operationRef` is a by-name JSON-pointer
|
|
4
|
+
* into an EXISTING operation; a sub-agent `ref` is a by-name pointer into the same `x-suluk-agents` map.
|
|
5
|
+
*/
|
|
6
|
+
import type { OpenAPIv4Document, Request, SulukAgent } from "@suluk/core";
|
|
7
|
+
|
|
8
|
+
/** Unescape one JSON-Pointer token (RFC6901): `~1`→`/`, `~0`→`~`. */
|
|
9
|
+
const unescapeToken = (t: string) => t.replace(/~1/g, "/").replace(/~0/g, "~");
|
|
10
|
+
|
|
11
|
+
/** Parse a `#/a/b~1c/d` fragment pointer into its decoded tokens (or null if not a local fragment pointer). */
|
|
12
|
+
export function parsePointer(ref: string): string[] | null {
|
|
13
|
+
if (typeof ref !== "string" || !ref.startsWith("#/")) return null;
|
|
14
|
+
return ref.slice(2).split("/").map(unescapeToken);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export type OperationLocus = "path" | "webhook" | "job";
|
|
18
|
+
export interface ResolvedOperation {
|
|
19
|
+
locus: OperationLocus;
|
|
20
|
+
/** the container key (path template / webhook name / job name). */
|
|
21
|
+
container: string;
|
|
22
|
+
/** the by-name request handle within a pathItem (paths only). */
|
|
23
|
+
requestName?: string;
|
|
24
|
+
request?: Request;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Resolve a route's `operationRef` to an EXISTING operation. Supports the three operation loci:
|
|
29
|
+
* - `#/paths/<pathTemplate>/requests/<name>` (a pathItem request — the common case)
|
|
30
|
+
* - `#/webhooks/<name>` (an incoming webhook operation)
|
|
31
|
+
* - `#/x-suluk-jobs/<name>` (a non-HTTP job, C025)
|
|
32
|
+
* Returns null when the ref dangles (the resolve-lint failure — Conin's MCP-only `run_core_primitive`).
|
|
33
|
+
*/
|
|
34
|
+
export function resolveOperationRef(doc: OpenAPIv4Document, ref: string): ResolvedOperation | null {
|
|
35
|
+
const toks = parsePointer(ref);
|
|
36
|
+
if (!toks) return null;
|
|
37
|
+
if (toks[0] === "paths" && toks.length === 4 && toks[2] === "requests") {
|
|
38
|
+
const pathItem = doc.paths?.[toks[1]];
|
|
39
|
+
const request = pathItem?.requests?.[toks[3]] as Request | undefined;
|
|
40
|
+
return request ? { locus: "path", container: toks[1], requestName: toks[3], request } : null;
|
|
41
|
+
}
|
|
42
|
+
if (toks[0] === "webhooks" && toks.length === 2) {
|
|
43
|
+
const request = doc.webhooks?.[toks[1]];
|
|
44
|
+
return request ? { locus: "webhook", container: toks[1], request } : null;
|
|
45
|
+
}
|
|
46
|
+
if (toks[0] === "x-suluk-jobs" && toks.length === 2) {
|
|
47
|
+
const job = doc["x-suluk-jobs"]?.[toks[1]];
|
|
48
|
+
return job ? { locus: "job", container: toks[1] } : null;
|
|
49
|
+
}
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** The agent map, or an empty record. */
|
|
54
|
+
export const agentMap = (doc: OpenAPIv4Document): Record<string, SulukAgent> => doc["x-suluk-agents"] ?? {};
|
|
55
|
+
|
|
56
|
+
/** Decode a sub-agent ref `#/x-suluk-agents/<key>` to its key (or null if malformed / not an agent ref). */
|
|
57
|
+
export function subAgentKey(ref: string): string | null {
|
|
58
|
+
const toks = parsePointer(ref);
|
|
59
|
+
return toks && toks.length === 2 && toks[0] === "x-suluk-agents" ? toks[1] : null;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** Direct sub-agent keys of an agent (decoded; may include dangling keys — the caller lint-checks existence). */
|
|
63
|
+
export function childKeys(agent: SulukAgent): { local: string; key: string | null; ref: string }[] {
|
|
64
|
+
return Object.entries(agent.agents ?? {}).map(([local, r]) => ({ local, key: subAgentKey(r.ref), ref: r.ref }));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Detect a cycle in the agent graph reachable from `root`, following by-name sub-agent refs. Returns the cycle
|
|
69
|
+
* path (keys) if one exists, else null. JSON-Schema cannot express acyclicity — this is the author/install lint
|
|
70
|
+
* the C027 gate requires. (Same shape as the shipped builder/compose cycle detection, C021.)
|
|
71
|
+
*/
|
|
72
|
+
export function findCycle(map: Record<string, SulukAgent>, root: string): string[] | null {
|
|
73
|
+
const onStack: string[] = [];
|
|
74
|
+
const inStack = new Set<string>();
|
|
75
|
+
const done = new Set<string>();
|
|
76
|
+
const dfs = (key: string): string[] | null => {
|
|
77
|
+
if (inStack.has(key)) return [...onStack.slice(onStack.indexOf(key)), key]; // back-edge → cycle
|
|
78
|
+
if (done.has(key) || !map[key]) return null;
|
|
79
|
+
onStack.push(key); inStack.add(key);
|
|
80
|
+
for (const c of childKeys(map[key])) {
|
|
81
|
+
if (c.key && map[c.key]) { const hit = dfs(c.key); if (hit) return hit; }
|
|
82
|
+
}
|
|
83
|
+
onStack.pop(); inStack.delete(key); done.add(key);
|
|
84
|
+
return null;
|
|
85
|
+
};
|
|
86
|
+
return dfs(root);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Longest sub-agent path depth below `root` (a leaf — no sub-agents — is depth 0). Returns Infinity if a cycle is
|
|
91
|
+
* reachable. `maxDepth` on an agent must be >= this for its subtree.
|
|
92
|
+
*/
|
|
93
|
+
export function subtreeDepth(map: Record<string, SulukAgent>, root: string, seen = new Set<string>()): number {
|
|
94
|
+
if (seen.has(root)) return Infinity;
|
|
95
|
+
const agent = map[root];
|
|
96
|
+
if (!agent) return 0;
|
|
97
|
+
const children = childKeys(agent).filter((c) => c.key && map[c.key!]);
|
|
98
|
+
if (children.length === 0) return 0;
|
|
99
|
+
seen.add(root);
|
|
100
|
+
let max = 0;
|
|
101
|
+
for (const c of children) max = Math.max(max, 1 + subtreeDepth(map, c.key!, new Set(seen)));
|
|
102
|
+
return max;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/** Every string value reachable in an object (for the request-value-selector D1 scan). */
|
|
106
|
+
export function* deepStrings(v: unknown, path = ""): Generator<{ path: string; value: string }> {
|
|
107
|
+
if (typeof v === "string") { yield { path, value: v }; return; }
|
|
108
|
+
if (Array.isArray(v)) { for (let i = 0; i < v.length; i++) yield* deepStrings(v[i], `${path}[${i}]`); return; }
|
|
109
|
+
if (v && typeof v === "object") for (const [k, val] of Object.entries(v)) yield* deepStrings(val, path ? `${path}.${k}` : k);
|
|
110
|
+
}
|
package/src/scope.ts
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scope analysis (C027 security red-line) — least-privilege by construction. A child agent's EFFECTIVE scope is the
|
|
3
|
+
* INTERSECTION of its declared scope and its caller's, NEVER the union: scope cannot ESCALATE across a parent→child
|
|
4
|
+
* hop. This makes the confused-deputy structurally impossible (a child can't reach a tool its caller-chain doesn't
|
|
5
|
+
* grant), and the FULL reachable authz surface is computable from the document with zero requests. `null` = an
|
|
6
|
+
* unconstrained scope (no `scope` declared) — it inherits the caller's; intersection with `null` is the other set.
|
|
7
|
+
*/
|
|
8
|
+
import type { OpenAPIv4Document } from "@suluk/core";
|
|
9
|
+
import { agentMap, childKeys } from "./resolve";
|
|
10
|
+
|
|
11
|
+
export type Scope = string[] | null;
|
|
12
|
+
|
|
13
|
+
/** INTERSECTION with null-as-unconstrained: ∩(null, X)=X, ∩(X, null)=X, ∩(X, Y)=X∩Y. */
|
|
14
|
+
export const intersectScope = (a: Scope, b: Scope): Scope =>
|
|
15
|
+
a === null ? b : b === null ? a : a.filter((x) => b.includes(x));
|
|
16
|
+
|
|
17
|
+
export interface ScopeEscalation {
|
|
18
|
+
/** the agent whose declared grant is exceeded by a child. */
|
|
19
|
+
parent: string;
|
|
20
|
+
/** the local handle of the offending sub-agent. */
|
|
21
|
+
childLocal: string;
|
|
22
|
+
/** the resolved child agent key. */
|
|
23
|
+
child: string;
|
|
24
|
+
/** the permissions the child declares that the parent does NOT grant (silently dropped under intersection). */
|
|
25
|
+
perms: string[];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Walk the agent tree from `root`, computing each reachable node's effective (intersected) scope and every per-edge
|
|
30
|
+
* escalation. Cycle-guarded (lint rejects cycles independently); on a DAG/tree each node's effective is its first
|
|
31
|
+
* reaching path's intersection — sufficient for the shallow agent graphs C027 ships.
|
|
32
|
+
*/
|
|
33
|
+
export function analyzeScopes(doc: OpenAPIv4Document, root: string): { effective: Record<string, Scope>; escalations: ScopeEscalation[] } {
|
|
34
|
+
const map = agentMap(doc);
|
|
35
|
+
const effective: Record<string, Scope> = {};
|
|
36
|
+
const escalations: ScopeEscalation[] = [];
|
|
37
|
+
const seen = new Set<string>();
|
|
38
|
+
|
|
39
|
+
const walk = (key: string, callerEff: Scope) => {
|
|
40
|
+
const agent = map[key];
|
|
41
|
+
if (!agent || seen.has(key)) return;
|
|
42
|
+
seen.add(key);
|
|
43
|
+
const declared: Scope = agent.scope ?? null;
|
|
44
|
+
const myEff = intersectScope(declared, callerEff);
|
|
45
|
+
effective[key] = myEff;
|
|
46
|
+
for (const c of childKeys(agent)) {
|
|
47
|
+
if (!c.key || !map[c.key]) continue;
|
|
48
|
+
const childDeclared: Scope = map[c.key].scope ?? null;
|
|
49
|
+
if (childDeclared !== null && myEff !== null) {
|
|
50
|
+
const over = childDeclared.filter((p) => !myEff.includes(p));
|
|
51
|
+
if (over.length) escalations.push({ parent: key, childLocal: c.local, child: c.key, perms: over });
|
|
52
|
+
}
|
|
53
|
+
walk(c.key, myEff);
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
walk(root, null);
|
|
57
|
+
return { effective, escalations };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* A LOCAL author-time escalation check for one agent's direct children: a child may not DECLARE a permission its
|
|
62
|
+
* immediate parent does not grant (under intersection it would be silently dropped — flag the author's confusion /
|
|
63
|
+
* a confused-deputy attempt). Used by the linter; the transitive picture is {@link analyzeScopes}.
|
|
64
|
+
*/
|
|
65
|
+
export function localEscalations(doc: OpenAPIv4Document, agentName: string): ScopeEscalation[] {
|
|
66
|
+
const map = agentMap(doc);
|
|
67
|
+
const parent = map[agentName];
|
|
68
|
+
if (!parent || parent.scope === undefined) return []; // unconstrained parent grants everything → no escalation
|
|
69
|
+
const grant = parent.scope;
|
|
70
|
+
const out: ScopeEscalation[] = [];
|
|
71
|
+
for (const c of childKeys(parent)) {
|
|
72
|
+
const child = c.key ? map[c.key] : undefined;
|
|
73
|
+
if (!child || !child.scope) continue;
|
|
74
|
+
const over = child.scope.filter((p) => !grant.includes(p));
|
|
75
|
+
if (over.length) out.push({ parent: agentName, childLocal: c.local, child: c.key!, perms: over });
|
|
76
|
+
}
|
|
77
|
+
return out;
|
|
78
|
+
}
|
package/src/skill.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SKILL.md generation + the STALENESS STAMP — the single highest-value, lowest-risk feature the council flagged as
|
|
3
|
+
* genuinely missing today (conin's build-skill.ts has no hash). The skill's instruction TEXT is never the source of
|
|
4
|
+
* truth: a served endpoint is, and SKILL.md is GENERATED from a `contentHash`-pinned snapshot of it. The stamp
|
|
5
|
+
* binds the artifact to that snapshot so drift (the served preprompt changing after a signature/mint) is
|
|
6
|
+
* tool-detectable. Pure: the instructions snapshot is an INPUT — no fetch at generate time.
|
|
7
|
+
*/
|
|
8
|
+
import { createHash } from "node:crypto";
|
|
9
|
+
|
|
10
|
+
/** A short, stable content hash of an instructions snapshot. */
|
|
11
|
+
export function contentHash(instructions: string): string {
|
|
12
|
+
return "sha256-" + createHash("sha256").update(instructions, "utf8").digest("hex").slice(0, 16);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface SkillRenderInput {
|
|
16
|
+
name: string;
|
|
17
|
+
description: string;
|
|
18
|
+
/** the instruction snapshot (the served /v1/instructions content, pinned at generate time). */
|
|
19
|
+
instructions: string;
|
|
20
|
+
/** the URL the snapshot was taken from (recorded in the stamp; not fetched here). */
|
|
21
|
+
source?: string;
|
|
22
|
+
version?: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Render a Claude SKILL.md: YAML frontmatter (name + description) + a GENERATED stamp carrying source, the
|
|
27
|
+
* computed contentHash, and version — then the instructions body verbatim. Deterministic in its inputs.
|
|
28
|
+
*/
|
|
29
|
+
export function renderSkillMd(input: SkillRenderInput): string {
|
|
30
|
+
const hash = contentHash(input.instructions);
|
|
31
|
+
const stamp = [
|
|
32
|
+
"<!-- GENERATED by @suluk/agents — DO NOT EDIT BY HAND. Re-run on release.",
|
|
33
|
+
` source: ${input.source ?? "(inline)"}`,
|
|
34
|
+
` contentHash: ${hash}`,
|
|
35
|
+
` version: ${input.version ?? "(unversioned)"} -->`,
|
|
36
|
+
].join("\n");
|
|
37
|
+
const desc = input.description.replace(/"/g, '\\"');
|
|
38
|
+
return `---\nname: ${input.name}\ndescription: "${desc}"\n---\n\n${stamp}\n\n${input.instructions}\n`;
|
|
39
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { test, expect, describe } from "bun:test";
|
|
2
|
+
import { reachableSurface, assertServedSubset, verifySkillFreshness, contentHash } from "../src/index";
|
|
3
|
+
import { coninDoc, coninInstructions } from "./fixtures/conin";
|
|
4
|
+
|
|
5
|
+
describe("C027 conformance — static reachable surface + over-serve auditor", () => {
|
|
6
|
+
test("the full reachable tool surface is statically enumerable (zero requests)", () => {
|
|
7
|
+
const s = reachableSurface(coninDoc, "conin");
|
|
8
|
+
expect(s.tools).toEqual(["find_comparables", "generate_deliverable", "run_core_primitive", "search_library"]);
|
|
9
|
+
expect(s.agents).toEqual(["coninRetrieval"]);
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
test("a served set equal to the surface is conformant", () => {
|
|
13
|
+
expect(assertServedSubset(coninDoc, "conin", ["generate_deliverable", "run_core_primitive", "search_library", "find_comparables"])).toEqual([]);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
test("NAMED failure: a server that WIDENS the surface (Conin's full-catalog over-serve) is flagged", () => {
|
|
17
|
+
const findings = assertServedSubset(coninDoc, "conin", ["generate_deliverable", "list_everything", "audit_boq_raw"]);
|
|
18
|
+
expect(findings.map((f) => f.code)).toEqual(["over-serve", "over-serve"]);
|
|
19
|
+
expect(findings[0].detail).toContain("list_everything");
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
describe("C027 conformance — skill freshness (drift detection)", () => {
|
|
24
|
+
const snap = coninInstructions.operate;
|
|
25
|
+
test("a matching declared hash is fresh", () => {
|
|
26
|
+
expect(verifySkillFreshness(contentHash(snap), snap)).toEqual([]);
|
|
27
|
+
});
|
|
28
|
+
test("a drifted served snapshot is caught as stale", () => {
|
|
29
|
+
expect(verifySkillFreshness("sha256-0000000000000000", snap).map((f) => f.code)).toEqual(["stale-skill"]);
|
|
30
|
+
});
|
|
31
|
+
test("an unpinned skill (no declared hash) is flagged — drift would be invisible", () => {
|
|
32
|
+
expect(verifySkillFreshness(undefined, snap).map((f) => f.code)).toEqual(["unpinned-skill"]);
|
|
33
|
+
});
|
|
34
|
+
});
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import { test, expect, describe } from "bun:test";
|
|
2
|
+
import type { OpenAPIv4Document } from "@suluk/core";
|
|
3
|
+
import { contextReport, suggestUnflatten, SEED_CATALOG } from "../src/index";
|
|
4
|
+
|
|
5
|
+
/** An agent with one resident route (with a schema), one cold-tail route, and a resident skill. */
|
|
6
|
+
function doc1(opts: { budget?: number } = {}): OpenAPIv4Document {
|
|
7
|
+
return {
|
|
8
|
+
openapi: "4.0.0-candidate",
|
|
9
|
+
info: { title: "x", version: "0" },
|
|
10
|
+
paths: {
|
|
11
|
+
"v1/a": { requests: { opA: { method: "post", responses: { ok: { status: 200 } }, contentSchema: { type: "object", properties: { foo: { type: "string" }, bar: { type: "number" } } } } } },
|
|
12
|
+
"v1/b": { requests: { opB: { method: "get", responses: { ok: { status: 200 } } } } },
|
|
13
|
+
},
|
|
14
|
+
"x-suluk-agents": {
|
|
15
|
+
one: {
|
|
16
|
+
description: "agent one",
|
|
17
|
+
...(opts.budget !== undefined ? { contextBudget: { tokens: opts.budget, basis: "estimate" } } : {}),
|
|
18
|
+
skills: { guide: { model: ["anthropic/claude-opus-4"] } }, // resident skill (no tier)
|
|
19
|
+
routes: {
|
|
20
|
+
tool_a: { operationRef: "#/paths/v1~1a/requests/opA" }, // resident
|
|
21
|
+
tool_b: { operationRef: "#/paths/v1~1b/requests/opB", tier: "cold-tail" }, // cold-tail
|
|
22
|
+
},
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
} as OpenAPIv4Document;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
describe("C027 context-budget analyzer", () => {
|
|
29
|
+
test("cold-tail tools are NOT counted in the default load (the tiering's whole point)", () => {
|
|
30
|
+
const load = contextReport(doc1()).loads[0];
|
|
31
|
+
expect(load.tools.find((t) => t.name === "tool_b")!.tier).toBe("cold-tail");
|
|
32
|
+
expect(load.tools.filter((t) => t.tier === "resident").map((t) => t.name)).toEqual(["tool_a"]);
|
|
33
|
+
expect(load.coldTailTokens).toBeGreaterThan(0);
|
|
34
|
+
expect(load.totalTokens).toBe(load.instructionsTokens + load.residentToolTokens + load.overheadTokens);
|
|
35
|
+
// tool_b's cost is in coldTailTokens, not the resident surface
|
|
36
|
+
expect(load.residentToolTokens).toBeLessThan(load.residentToolTokens + load.coldTailTokens);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
test("instruction sizing: unmeasured without a snapshot, measured with one", () => {
|
|
40
|
+
expect(contextReport(doc1()).findings.some((f) => f.code === "unmeasured-instructions")).toBe(true);
|
|
41
|
+
const measured = contextReport(doc1(), { instructions: { "one/guide": "x".repeat(400) } });
|
|
42
|
+
expect(measured.loads[0].instructionsTokens).toBe(100); // 400 chars / 4
|
|
43
|
+
expect(measured.loads[0].instructionsMeasured).toBe(true);
|
|
44
|
+
expect(measured.findings.some((f) => f.code === "unmeasured-instructions")).toBe(false);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test("over-window: a tiny model window flags no-fitting-model + an unflatten suggestion", () => {
|
|
48
|
+
const r = contextReport(doc1(), { modelWindows: { "anthropic/claude-opus-4": 200 } });
|
|
49
|
+
expect(r.loads[0].modelWindow).toBe(200);
|
|
50
|
+
expect(r.findings.some((f) => f.code === "no-fitting-model")).toBe(true); // overhead alone > 200
|
|
51
|
+
const sug = r.suggestions.find((s) => s.agent === "one")!;
|
|
52
|
+
expect(sug).toBeDefined();
|
|
53
|
+
expect(sug.moveToColdTail).toContain("tool_a");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test("over-budget + flat-agent-overloaded when the resident surface dwarfs a small budget", () => {
|
|
57
|
+
const r = contextReport(doc1({ budget: 100 }));
|
|
58
|
+
expect(r.findings.some((f) => f.code === "context-over-budget")).toBe(true);
|
|
59
|
+
expect(r.findings.some((f) => f.code === "flat-agent-overloaded")).toBe(true);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test("comfortably under budget ⇒ no over-findings, no suggestion", () => {
|
|
63
|
+
const r = contextReport(doc1({ budget: 100_000 }));
|
|
64
|
+
expect(r.findings.some((f) => ["context-over-window", "context-over-budget", "flat-agent-overloaded"].includes(f.code))).toBe(false);
|
|
65
|
+
expect(r.suggestions).toEqual([]);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test("empty layer (no skills, no routes) is flagged to fill — the 'layers first' flow", () => {
|
|
69
|
+
const d = { openapi: "4.0.0-candidate", info: { title: "x", version: "0" }, paths: {}, "x-suluk-agents": { hollow: { description: "reserved layer" } } } as OpenAPIv4Document;
|
|
70
|
+
expect(contextReport(d).findings.some((f) => f.code === "empty-layer")).toBe(true);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test("suggestUnflatten moves the biggest resident tools first and reports the saving", () => {
|
|
74
|
+
const load = contextReport(doc1({ budget: 100 })).loads[0];
|
|
75
|
+
const s = suggestUnflatten(load)!;
|
|
76
|
+
expect(s.moveToColdTail.length).toBeGreaterThan(0);
|
|
77
|
+
expect(s.wouldSaveTokens).toBeGreaterThan(0);
|
|
78
|
+
expect(suggestUnflatten(load, 1_000_000)).toBeNull(); // not over a generous target
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
/** A parent with no own work delegating to one small leaf — the flatten cases. */
|
|
83
|
+
function layered(): OpenAPIv4Document {
|
|
84
|
+
return {
|
|
85
|
+
openapi: "4.0.0-candidate",
|
|
86
|
+
info: { title: "x", version: "0" },
|
|
87
|
+
paths: { "v1/s": { requests: { search: { method: "get", responses: { ok: { status: 200 } } } } } },
|
|
88
|
+
"x-suluk-agents": {
|
|
89
|
+
top: { description: "orchestrator with no own tools", maxDepth: 1, agents: { leaf: { ref: "#/x-suluk-agents/worker" } } },
|
|
90
|
+
worker: { description: "a thin leaf", maxDepth: 0, skills: { go: { model: ["google/gemini-2.5-flash"] } }, routes: { search: { operationRef: "#/paths/v1~1s/requests/search" } }, agents: {} },
|
|
91
|
+
},
|
|
92
|
+
} as OpenAPIv4Document;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
describe("C027 flatten (the dual of unflatten) — collapse a thin/redundant layer up", () => {
|
|
96
|
+
test("a passthrough agent (no own work, one child) is flagged", () => {
|
|
97
|
+
const r = contextReport(layered());
|
|
98
|
+
expect(r.findings.some((f) => f.code === "passthrough-agent" && f.agent === "top")).toBe(true);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test("a thin leaf reached by one parent, merging within budget, is a flatten candidate", () => {
|
|
102
|
+
const r = contextReport(layered());
|
|
103
|
+
const flat = r.flatten.find((f) => f.parent === "top" && f.child === "worker");
|
|
104
|
+
expect(flat).toBeDefined();
|
|
105
|
+
expect(flat!.fitsTarget).toBe(true);
|
|
106
|
+
expect(flat!.savedHopOverhead).toBeGreaterThan(0);
|
|
107
|
+
expect(r.findings.some((f) => f.code === "flattenable-layer")).toBe(true);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test("NOT flattenable when merging would blow the parent's target (the layer earns its keep)", () => {
|
|
111
|
+
const d = layered();
|
|
112
|
+
// give the worker a big resident tool so inlining it would exceed a tight parent budget
|
|
113
|
+
d.paths["v1/big"] = { requests: { bigOp: { method: "post", responses: { ok: { status: 200 } }, contentSchema: { type: "object", properties: Object.fromEntries(Array.from({ length: 25 }, (_, i) => [`p${i}`, { type: "string", description: "a descriptive field" }])) } } } };
|
|
114
|
+
d["x-suluk-agents"]!.worker.routes!.big = { operationRef: "#/paths/v1~1big/requests/bigOp" };
|
|
115
|
+
d["x-suluk-agents"]!.top.contextBudget = { tokens: 500, basis: "estimate" }; // top (460) fits; merged (>600) does not
|
|
116
|
+
const r = contextReport(d);
|
|
117
|
+
expect(r.flatten.some((f) => f.parent === "top")).toBe(false);
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
describe("C029 thinking — round-accretion folds into the load the analyzer checks", () => {
|
|
122
|
+
test("a multi-round PEAK can exceed a window the single-shot base fits (the fixed blindspot)", () => {
|
|
123
|
+
const d = doc1();
|
|
124
|
+
d["x-suluk-agents"]!.one.thinking = { maxRounds: 6 };
|
|
125
|
+
const r = contextReport(d, { modelWindows: { "anthropic/claude-opus-4": 600 } });
|
|
126
|
+
const load = r.loads[0];
|
|
127
|
+
expect(load.maxRounds).toBe(6);
|
|
128
|
+
expect(load.peakTokens).toBeGreaterThan(load.totalTokens);
|
|
129
|
+
expect(load.totalTokens).toBeLessThan(600); // single-shot fits
|
|
130
|
+
expect(load.peakTokens).toBeGreaterThan(600); // the 6-round peak does not
|
|
131
|
+
expect(load.minWindowRequired).toBe(load.peakTokens);
|
|
132
|
+
expect(r.findings.some((f) => f.code === "no-fitting-model")).toBe(true);
|
|
133
|
+
expect(r.findings.some((f) => f.code === "thinking-context-growth")).toBe(true);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test("an explicit thinking budget is used as the accretion", () => {
|
|
137
|
+
const d = doc1();
|
|
138
|
+
d["x-suluk-agents"]!.one.thinking = { maxRounds: 3, budget: { tokens: 5000, basis: "estimate" } };
|
|
139
|
+
const load = contextReport(d).loads[0];
|
|
140
|
+
expect(load.thinkingBudget).toBe(5000);
|
|
141
|
+
expect(load.peakTokens).toBe(load.totalTokens + 5000);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
test("no thinking ⇒ peak equals single-shot (backward-compatible)", () => {
|
|
145
|
+
const load = contextReport(doc1()).loads[0];
|
|
146
|
+
expect(load.peakTokens).toBe(load.totalTokens);
|
|
147
|
+
expect(load.maxRounds).toBeUndefined();
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
describe("C027 model fit — which declared models are expected to work", () => {
|
|
152
|
+
test("each candidate model is checked for window fit; minWindowRequired is the load", () => {
|
|
153
|
+
// windows now come from the catalog (DEFAULT_WINDOWS deleted) — opus is 200k in the seed
|
|
154
|
+
const load = contextReport(doc1(), { catalog: SEED_CATALOG }).loads[0];
|
|
155
|
+
expect(load.minWindowRequired).toBe(load.totalTokens);
|
|
156
|
+
const fit = load.modelFit.find((f) => f.model === "anthropic/claude-opus-4")!;
|
|
157
|
+
expect(fit.window).toBe(200_000);
|
|
158
|
+
expect(fit.fits).toBe(true);
|
|
159
|
+
expect(fit.headroom).toBe(200_000 - load.totalTokens);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
test("model-too-small names a declared model that cannot hold the agent", () => {
|
|
163
|
+
const r = contextReport(doc1(), { modelWindows: { "anthropic/claude-opus-4": 100 } });
|
|
164
|
+
expect(r.findings.some((f) => f.code === "no-fitting-model" && f.agent === "one")).toBe(true);
|
|
165
|
+
expect(r.loads[0].modelFit[0].fits).toBe(false);
|
|
166
|
+
});
|
|
167
|
+
});
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { test, expect, describe } from "bun:test";
|
|
2
|
+
import { readdirSync, readFileSync, statSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* The C027 MODULE-BOUNDARY INVARIANT — D1 enforced by build, not discipline. @suluk/core's matcher must never be
|
|
7
|
+
* able to consult an agent field, and the structural guarantee of that is: @suluk/core NEVER imports @suluk/agents.
|
|
8
|
+
* This is a maintained tripwire — if anyone ever wires the agent layer into core, this fails loud.
|
|
9
|
+
*/
|
|
10
|
+
function walk(dir: string): string[] {
|
|
11
|
+
return readdirSync(dir).flatMap((e) => {
|
|
12
|
+
const p = join(dir, e);
|
|
13
|
+
return statSync(p).isDirectory() ? walk(p) : p.endsWith(".ts") ? [p] : [];
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
describe("C027 module boundary (D1 by build)", () => {
|
|
18
|
+
const coreSrc = join(import.meta.dir, "..", "..", "core", "src");
|
|
19
|
+
|
|
20
|
+
test("@suluk/core imports nothing from @suluk/agents (the dependency is one-way)", () => {
|
|
21
|
+
const offenders: string[] = [];
|
|
22
|
+
for (const f of walk(coreSrc)) {
|
|
23
|
+
const text = readFileSync(f, "utf8");
|
|
24
|
+
if (/from\s+["']@suluk\/agents["']|require\(\s*["']@suluk\/agents["']\s*\)|from\s+["'][./]+agents["']/.test(text)) {
|
|
25
|
+
offenders.push(f);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
expect(offenders).toEqual([]);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
test("@suluk/core's matcher source (ada.ts) never references x-suluk-agents or x-suluk-policy", () => {
|
|
32
|
+
const ada = readFileSync(join(coreSrc, "ada.ts"), "utf8");
|
|
33
|
+
expect(ada.includes("x-suluk-agents")).toBe(false);
|
|
34
|
+
expect(ada.includes("x-suluk-policy")).toBe(false);
|
|
35
|
+
expect(ada.includes("agents")).toBe(false);
|
|
36
|
+
expect(ada.includes("policy")).toBe(false);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conin as an `x-suluk-agents` fixture (C027) — the live, un-standardized cowpath, serialized. A FLAT two-tier
|
|
3
|
+
* agent: an orchestrator (`conin`) with a model-bearing skill + two deterministic routes + one sub-agent (the
|
|
4
|
+
* untrusted retrieval tier `coninRetrieval`). Plus the NAMED conformance-failure variants the council requires
|
|
5
|
+
* tracked (not laundered): Conin's MCP-only primitive (a dangling operationRef), a cycle, a missing depth bound,
|
|
6
|
+
* and a forbidden request-value selector.
|
|
7
|
+
*/
|
|
8
|
+
import type { OpenAPIv4Document, Request, HttpMethod } from "@suluk/core";
|
|
9
|
+
|
|
10
|
+
const req = (method: HttpMethod): Request => ({ method, responses: { ok: { status: 200 } } });
|
|
11
|
+
|
|
12
|
+
/** The valid, installable Conin agent (all four routes resolve to real operations). */
|
|
13
|
+
export const coninDoc: OpenAPIv4Document = {
|
|
14
|
+
openapi: "4.0.0-candidate",
|
|
15
|
+
info: { title: "Conin — Construction Intelligence", version: "1.0.0" },
|
|
16
|
+
paths: {
|
|
17
|
+
"v1/deliverables": { requests: { generateDeliverable: req("post") } },
|
|
18
|
+
"v1/primitives": { requests: { runCorePrimitive: req("post") } },
|
|
19
|
+
"v1/library/search": { requests: { searchLibrary: req("get") } },
|
|
20
|
+
"v1/comparables": { requests: { findComparables: req("get") } },
|
|
21
|
+
},
|
|
22
|
+
"x-suluk-agents": {
|
|
23
|
+
conin: {
|
|
24
|
+
description: "Construction-intelligence orchestrator: messy project docs → provenance-graded deliverables. Use to cost/value/certify/reconcile a MENA capital project.",
|
|
25
|
+
// the orchestrator must GRANT what its sub-tree uses (incl. the retrieval child's library:read) — a child's
|
|
26
|
+
// effective scope is INTERSECTION(child, caller), so a permission absent here would be silently dropped.
|
|
27
|
+
scope: ["project:read", "deliverable:write", "library:read"],
|
|
28
|
+
maxDepth: 1,
|
|
29
|
+
skills: {
|
|
30
|
+
operate: {
|
|
31
|
+
model: ["anthropic/claude-opus-4", "google/gemini-2.5-flash"],
|
|
32
|
+
tier: "cold-tail",
|
|
33
|
+
whenToUse: "Always loaded first; governs deterministic-first + SOURCED/ASSUMED grading; routes to a deliverable kind.",
|
|
34
|
+
trust: "author-declared",
|
|
35
|
+
scope: ["project:read"],
|
|
36
|
+
provenance: { source: "https://construction-intelligence.saastemly.com/v1/instructions", contentHash: "sha256-9f2c0000deadbeef", version: "2026-06-11" },
|
|
37
|
+
},
|
|
38
|
+
},
|
|
39
|
+
routes: {
|
|
40
|
+
generate_deliverable: { operationRef: "#/paths/v1~1deliverables/requests/generateDeliverable", guarantee: "same-in-same-out", scope: ["deliverable:write"] },
|
|
41
|
+
run_core_primitive: { operationRef: "#/paths/v1~1primitives/requests/runCorePrimitive", guarantee: "same-in-same-out", scope: ["project:read"] },
|
|
42
|
+
},
|
|
43
|
+
agents: { retrieval: { ref: "#/x-suluk-agents/coninRetrieval" } },
|
|
44
|
+
},
|
|
45
|
+
coninRetrieval: {
|
|
46
|
+
description: "Untrusted retrieval tier: search_library / find_comparables. Returns ASSUMED-grade material only; never emits a graded figure.",
|
|
47
|
+
scope: ["library:read"],
|
|
48
|
+
maxDepth: 0,
|
|
49
|
+
trustBoundary: "untrusted",
|
|
50
|
+
skills: {
|
|
51
|
+
search: {
|
|
52
|
+
model: ["google/gemini-2.5-flash"],
|
|
53
|
+
tier: "resident",
|
|
54
|
+
whenToUse: "Find comparables/evidence; returns ASSUMED-grade material only.",
|
|
55
|
+
provenance: { source: "https://construction-intelligence.saastemly.com/v1/instructions#retrieval", contentHash: "sha256-1a7d0000feedface", version: "2026-06-11" },
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
routes: {
|
|
59
|
+
search_library: { operationRef: "#/paths/v1~1library~1search/requests/searchLibrary", guarantee: "idempotent", scope: ["library:read"] },
|
|
60
|
+
find_comparables: { operationRef: "#/paths/v1~1comparables/requests/findComparables", guarantee: "idempotent", scope: ["library:read"] },
|
|
61
|
+
},
|
|
62
|
+
agents: {},
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
/** The instruction snapshots a projector is fed (pinned; never fetched at generate time). */
|
|
68
|
+
export const coninInstructions: Record<string, string> = {
|
|
69
|
+
operate: "You are Conin. Deterministic-first: every NUMBER comes from a deterministic tool; the LLM only routes. Grade every figure SOURCED or ASSUMED.",
|
|
70
|
+
search: "Retrieval tier. Find comparables and evidence. You return ASSUMED-grade material only — never a graded figure.",
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
/** FAILURE FIXTURE — Conin's REAL day-one gap: run_core_primitive is dispatched MCP-only, with no REST operation. */
|
|
74
|
+
export function coninDayOne(): OpenAPIv4Document {
|
|
75
|
+
const d = structuredClone(coninDoc);
|
|
76
|
+
delete (d.paths as Record<string, unknown>)["v1/primitives"]; // the operationRef now dangles
|
|
77
|
+
return d;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** FAILURE FIXTURE — a recursion cycle (a ↔ b), each with a (useless) declared maxDepth. */
|
|
81
|
+
export function cyclicDoc(): OpenAPIv4Document {
|
|
82
|
+
return {
|
|
83
|
+
openapi: "4.0.0-candidate",
|
|
84
|
+
info: { title: "cyclic", version: "0" },
|
|
85
|
+
paths: {},
|
|
86
|
+
"x-suluk-agents": {
|
|
87
|
+
a: { description: "agent a", maxDepth: 1, agents: { toB: { ref: "#/x-suluk-agents/b" } } },
|
|
88
|
+
b: { description: "agent b", maxDepth: 1, agents: { toA: { ref: "#/x-suluk-agents/a" } } },
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** FAILURE FIXTURE — sub-agents present but no maxDepth declared (must not install). */
|
|
94
|
+
export function missingMaxDepthDoc(): OpenAPIv4Document {
|
|
95
|
+
const d = structuredClone(coninDoc);
|
|
96
|
+
delete d["x-suluk-agents"]!.conin.maxDepth;
|
|
97
|
+
return d;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** FAILURE FIXTURE — a forbidden request-value selector smuggled in via a vendor field (the #20 tripwire, D1). */
|
|
101
|
+
export function selectorDoc(): OpenAPIv4Document {
|
|
102
|
+
const d = structuredClone(coninDoc);
|
|
103
|
+
d["x-suluk-agents"]!.conin["x-suluk-route-when"] = "{$request.body#/kind}";
|
|
104
|
+
return d;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/** FAILURE FIXTURE — scope escalation: the retrieval child needs library:read but the orchestrator no longer grants it. */
|
|
108
|
+
export function escalationDoc(): OpenAPIv4Document {
|
|
109
|
+
const d = structuredClone(coninDoc);
|
|
110
|
+
d["x-suluk-agents"]!.conin.scope = ["project:read", "deliverable:write"]; // drops library:read → child escalates
|
|
111
|
+
return d;
|
|
112
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { test, expect, describe } from "bun:test";
|
|
2
|
+
import { lintAgents, lintOk } from "../src/index";
|
|
3
|
+
import { coninDoc, coninDayOne, cyclicDoc, missingMaxDepthDoc, selectorDoc } from "./fixtures/conin";
|
|
4
|
+
|
|
5
|
+
const codes = (doc: Parameters<typeof lintAgents>[0]) => lintAgents(doc).filter((f) => f.severity === "error").map((f) => f.code);
|
|
6
|
+
|
|
7
|
+
describe("C027 agent lint", () => {
|
|
8
|
+
test("the valid Conin agent installs (no errors)", () => {
|
|
9
|
+
const findings = lintAgents(coninDoc);
|
|
10
|
+
expect(findings.filter((f) => f.severity === "error")).toEqual([]);
|
|
11
|
+
expect(lintOk(findings)).toBe(true);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
test("NAMED failure: Conin's MCP-only run_core_primitive is a dangling operationRef", () => {
|
|
15
|
+
const errs = lintAgents(coninDayOne()).filter((f) => f.severity === "error");
|
|
16
|
+
expect(errs.some((e) => e.code === "dangling-operation-ref" && e.at?.includes("run_core_primitive"))).toBe(true);
|
|
17
|
+
expect(lintOk(errs)).toBe(false);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test("a recursion cycle is rejected", () => {
|
|
21
|
+
expect(codes(cyclicDoc())).toContain("agent-cycle");
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test("sub-agents without a declared maxDepth do not install", () => {
|
|
25
|
+
expect(codes(missingMaxDepthDoc())).toContain("missing-max-depth");
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test("D1: a request-value selector smuggled via a vendor field is forbidden", () => {
|
|
29
|
+
const errs = lintAgents(selectorDoc()).filter((f) => f.severity === "error");
|
|
30
|
+
expect(errs.some((e) => e.code === "request-value-selector")).toBe(true);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test("a route carrying a model is an error (a route is deterministic)", () => {
|
|
34
|
+
const d = structuredClone(coninDoc);
|
|
35
|
+
(d["x-suluk-agents"]!.conin.routes!.generate_deliverable as unknown as Record<string, unknown>).model = ["x"];
|
|
36
|
+
expect(codes(d)).toContain("route-has-model");
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
test("C029: thinking present without maxRounds is rejected", () => {
|
|
40
|
+
const d = structuredClone(coninDoc);
|
|
41
|
+
d["x-suluk-agents"]!.conin.thinking = {} as { maxRounds: number };
|
|
42
|
+
expect(codes(d)).toContain("missing-max-rounds");
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test("C029: maxRounds < 1 is rejected", () => {
|
|
46
|
+
const d = structuredClone(coninDoc);
|
|
47
|
+
d["x-suluk-agents"]!.conin.thinking = { maxRounds: 0 };
|
|
48
|
+
expect(codes(d)).toContain("invalid-max-rounds");
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test("C029: a stopCondition-shaped member is forbidden (declare the bound, not the process)", () => {
|
|
52
|
+
const d = structuredClone(coninDoc);
|
|
53
|
+
d["x-suluk-agents"]!.conin.thinking = { maxRounds: 3, stopCondition: "final-answer" } as unknown as { maxRounds: number };
|
|
54
|
+
expect(codes(d)).toContain("thinking-process-declared");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test("C029: a valid thinking bound lints clean", () => {
|
|
58
|
+
const d = structuredClone(coninDoc);
|
|
59
|
+
d["x-suluk-agents"]!.conin.thinking = { maxRounds: 6, budget: { tokens: 40000, basis: "estimate" } };
|
|
60
|
+
expect(lintAgents(d).filter((f) => f.severity === "error")).toEqual([]);
|
|
61
|
+
});
|
|
62
|
+
});
|