@danielblomma/cortex-mcp 2.0.5 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cortex.mjs +24 -0
- package/package.json +1 -1
- package/scaffold/mcp/package-lock.json +63 -4
- package/scaffold/mcp/package.json +4 -1
- package/scaffold/mcp/src/cli/stage.ts +325 -0
- package/scaffold/mcp/src/core/workflow/artifact-io.ts +156 -0
- package/scaffold/mcp/src/core/workflow/capabilities.ts +100 -0
- package/scaffold/mcp/src/core/workflow/default-workflows.ts +83 -0
- package/scaffold/mcp/src/core/workflow/enforcement.ts +206 -0
- package/scaffold/mcp/src/core/workflow/envelope.ts +220 -0
- package/scaffold/mcp/src/core/workflow/index.ts +8 -0
- package/scaffold/mcp/src/core/workflow/mcp-tools.ts +208 -0
- package/scaffold/mcp/src/core/workflow/run-lifecycle.ts +165 -0
- package/scaffold/mcp/src/core/workflow/schemas.ts +125 -0
- package/scaffold/mcp/src/hooks/pre-tool-use.ts +30 -0
- package/scaffold/mcp/src/server.ts +75 -0
- package/scaffold/mcp/tests/workflow-cli.test.mjs +293 -0
- package/scaffold/mcp/tests/workflow-enforcement.test.mjs +370 -0
- package/scaffold/mcp/tests/workflow-envelope.test.mjs +247 -0
- package/scaffold/mcp/tests/workflow-mcp-tools.test.mjs +293 -0
- package/scaffold/mcp/tests/workflow.test.mjs +283 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
import test from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import fs from "node:fs";
|
|
4
|
+
import os from "node:os";
|
|
5
|
+
import path from "node:path";
|
|
6
|
+
|
|
7
|
+
import { runStageCommand } from "../dist/cli/stage.js";
|
|
8
|
+
|
|
9
|
+
function makeWorkspace() {
|
|
10
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "cortex-stage-cli-"));
|
|
11
|
+
process.env.CORTEX_PROJECT_ROOT = dir;
|
|
12
|
+
return dir;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function captureStdout(run) {
|
|
16
|
+
const original = process.stdout.write.bind(process.stdout);
|
|
17
|
+
let captured = "";
|
|
18
|
+
process.stdout.write = (chunk) => {
|
|
19
|
+
captured += String(chunk);
|
|
20
|
+
return true;
|
|
21
|
+
};
|
|
22
|
+
return run()
|
|
23
|
+
.then((value) => ({ value, captured }))
|
|
24
|
+
.finally(() => {
|
|
25
|
+
process.stdout.write = original;
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
test.afterEach(() => {
|
|
30
|
+
delete process.env.CORTEX_PROJECT_ROOT;
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test("stage start: creates run + returns first envelope as JSON on stdout", async () => {
|
|
34
|
+
const cwd = makeWorkspace();
|
|
35
|
+
const { captured } = await captureStdout(() =>
|
|
36
|
+
runStageCommand([
|
|
37
|
+
"start",
|
|
38
|
+
"--task-id",
|
|
39
|
+
"task-1",
|
|
40
|
+
"--description",
|
|
41
|
+
"Add login flow",
|
|
42
|
+
"--workflow",
|
|
43
|
+
"secure-build",
|
|
44
|
+
]),
|
|
45
|
+
);
|
|
46
|
+
const parsed = JSON.parse(captured);
|
|
47
|
+
assert.equal(parsed.state.task_id, "task-1");
|
|
48
|
+
assert.equal(parsed.state.current_stage, "plan");
|
|
49
|
+
assert.equal(parsed.envelope.expectedArtifact, "plan.md");
|
|
50
|
+
assert.ok(fs.existsSync(path.join(cwd, ".agents", "task-1", "state.json")));
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("stage start: rejects unknown workflow", async () => {
|
|
54
|
+
makeWorkspace();
|
|
55
|
+
await assert.rejects(
|
|
56
|
+
runStageCommand([
|
|
57
|
+
"start",
|
|
58
|
+
"--task-id",
|
|
59
|
+
"task-1",
|
|
60
|
+
"--description",
|
|
61
|
+
"x",
|
|
62
|
+
"--workflow",
|
|
63
|
+
"no-such",
|
|
64
|
+
]),
|
|
65
|
+
/Unknown workflow_id/,
|
|
66
|
+
);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
test("stage start: rejects missing required flags", async () => {
|
|
70
|
+
makeWorkspace();
|
|
71
|
+
await assert.rejects(
|
|
72
|
+
runStageCommand(["start", "--task-id", "task-1"]),
|
|
73
|
+
/Missing required flag: --description/,
|
|
74
|
+
);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("stage status: prints state when run exists", async () => {
|
|
78
|
+
makeWorkspace();
|
|
79
|
+
await runStageCommand([
|
|
80
|
+
"start",
|
|
81
|
+
"--task-id",
|
|
82
|
+
"task-1",
|
|
83
|
+
"--description",
|
|
84
|
+
"x",
|
|
85
|
+
]);
|
|
86
|
+
const { captured } = await captureStdout(() =>
|
|
87
|
+
runStageCommand(["status", "--task-id", "task-1"]),
|
|
88
|
+
);
|
|
89
|
+
const parsed = JSON.parse(captured);
|
|
90
|
+
assert.equal(parsed.state.task_id, "task-1");
|
|
91
|
+
assert.equal(parsed.state.outcome, "in_progress");
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test("stage status: returns null state when no run exists", async () => {
|
|
95
|
+
makeWorkspace();
|
|
96
|
+
const { captured } = await captureStdout(() =>
|
|
97
|
+
runStageCommand(["status", "--task-id", "ghost"]),
|
|
98
|
+
);
|
|
99
|
+
const parsed = JSON.parse(captured);
|
|
100
|
+
assert.equal(parsed.state, null);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
test("stage envelope: returns current envelope without mutating state", async () => {
|
|
104
|
+
makeWorkspace();
|
|
105
|
+
await runStageCommand([
|
|
106
|
+
"start",
|
|
107
|
+
"--task-id",
|
|
108
|
+
"task-1",
|
|
109
|
+
"--description",
|
|
110
|
+
"x",
|
|
111
|
+
]);
|
|
112
|
+
const { captured } = await captureStdout(() =>
|
|
113
|
+
runStageCommand(["envelope", "--task-id", "task-1"]),
|
|
114
|
+
);
|
|
115
|
+
const parsed = JSON.parse(captured);
|
|
116
|
+
assert.equal(parsed.envelope.expectedArtifact, "plan.md");
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
test("stage advance: writes artifact + advances run via body file", async () => {
|
|
120
|
+
const cwd = makeWorkspace();
|
|
121
|
+
await runStageCommand([
|
|
122
|
+
"start",
|
|
123
|
+
"--task-id",
|
|
124
|
+
"task-1",
|
|
125
|
+
"--description",
|
|
126
|
+
"x",
|
|
127
|
+
]);
|
|
128
|
+
|
|
129
|
+
const bodyPath = path.join(cwd, "plan-body.md");
|
|
130
|
+
fs.writeFileSync(bodyPath, "# Plan\n\n1. Step\n2. Step", "utf8");
|
|
131
|
+
const fmPath = path.join(cwd, "plan-fm.json");
|
|
132
|
+
fs.writeFileSync(
|
|
133
|
+
fmPath,
|
|
134
|
+
JSON.stringify({ files_targeted: ["src/login.ts"], constraints: [] }),
|
|
135
|
+
"utf8",
|
|
136
|
+
);
|
|
137
|
+
|
|
138
|
+
const { captured } = await captureStdout(() =>
|
|
139
|
+
runStageCommand([
|
|
140
|
+
"advance",
|
|
141
|
+
"--task-id",
|
|
142
|
+
"task-1",
|
|
143
|
+
"--stage",
|
|
144
|
+
"plan",
|
|
145
|
+
"--body-file",
|
|
146
|
+
bodyPath,
|
|
147
|
+
"--frontmatter-file",
|
|
148
|
+
fmPath,
|
|
149
|
+
]),
|
|
150
|
+
);
|
|
151
|
+
const parsed = JSON.parse(captured);
|
|
152
|
+
assert.equal(parsed.state.current_stage, "plan-review");
|
|
153
|
+
assert.ok(parsed.next_envelope, "next_envelope present while run is in_progress");
|
|
154
|
+
assert.equal(parsed.next_envelope.expectedArtifact, "plan-review.md");
|
|
155
|
+
assert.ok(fs.existsSync(path.join(cwd, ".agents", "task-1", "plan.md")));
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
test("stage advance: blocked status halts the run and returns null next envelope", async () => {
|
|
159
|
+
const cwd = makeWorkspace();
|
|
160
|
+
await runStageCommand([
|
|
161
|
+
"start",
|
|
162
|
+
"--task-id",
|
|
163
|
+
"task-1",
|
|
164
|
+
"--description",
|
|
165
|
+
"x",
|
|
166
|
+
]);
|
|
167
|
+
|
|
168
|
+
const bodyPath = path.join(cwd, "blocked.md");
|
|
169
|
+
fs.writeFileSync(bodyPath, "# Plan blocked\n\nMissing context.", "utf8");
|
|
170
|
+
|
|
171
|
+
const { captured } = await captureStdout(() =>
|
|
172
|
+
runStageCommand([
|
|
173
|
+
"advance",
|
|
174
|
+
"--task-id",
|
|
175
|
+
"task-1",
|
|
176
|
+
"--stage",
|
|
177
|
+
"plan",
|
|
178
|
+
"--body-file",
|
|
179
|
+
bodyPath,
|
|
180
|
+
"--status",
|
|
181
|
+
"blocked",
|
|
182
|
+
]),
|
|
183
|
+
);
|
|
184
|
+
const parsed = JSON.parse(captured);
|
|
185
|
+
assert.equal(parsed.state.outcome, "blocked");
|
|
186
|
+
assert.equal(parsed.next_envelope, null);
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test("stage advance: rejects malformed frontmatter file", async () => {
|
|
190
|
+
const cwd = makeWorkspace();
|
|
191
|
+
await runStageCommand([
|
|
192
|
+
"start",
|
|
193
|
+
"--task-id",
|
|
194
|
+
"task-1",
|
|
195
|
+
"--description",
|
|
196
|
+
"x",
|
|
197
|
+
]);
|
|
198
|
+
|
|
199
|
+
const bodyPath = path.join(cwd, "body.md");
|
|
200
|
+
fs.writeFileSync(bodyPath, "# Body", "utf8");
|
|
201
|
+
const badFm = path.join(cwd, "bad-fm.json");
|
|
202
|
+
fs.writeFileSync(badFm, "[1, 2, 3]", "utf8");
|
|
203
|
+
|
|
204
|
+
await assert.rejects(
|
|
205
|
+
runStageCommand([
|
|
206
|
+
"advance",
|
|
207
|
+
"--task-id",
|
|
208
|
+
"task-1",
|
|
209
|
+
"--stage",
|
|
210
|
+
"plan",
|
|
211
|
+
"--body-file",
|
|
212
|
+
bodyPath,
|
|
213
|
+
"--frontmatter-file",
|
|
214
|
+
badFm,
|
|
215
|
+
]),
|
|
216
|
+
/Expected JSON object/,
|
|
217
|
+
);
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
test("stage run: rejects when no command after --", async () => {
|
|
221
|
+
makeWorkspace();
|
|
222
|
+
await runStageCommand([
|
|
223
|
+
"start",
|
|
224
|
+
"--task-id",
|
|
225
|
+
"task-1",
|
|
226
|
+
"--description",
|
|
227
|
+
"x",
|
|
228
|
+
]);
|
|
229
|
+
await assert.rejects(
|
|
230
|
+
runStageCommand(["run", "--task-id", "task-1"]),
|
|
231
|
+
/requires a command after --/,
|
|
232
|
+
);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
test("stage run: rejects when run is not in progress", async () => {
|
|
236
|
+
const cwd = makeWorkspace();
|
|
237
|
+
await runStageCommand([
|
|
238
|
+
"start",
|
|
239
|
+
"--task-id",
|
|
240
|
+
"task-1",
|
|
241
|
+
"--description",
|
|
242
|
+
"x",
|
|
243
|
+
]);
|
|
244
|
+
// Corrupt state to simulate a finished run.
|
|
245
|
+
const statePath = path.join(cwd, ".agents", "task-1", "state.json");
|
|
246
|
+
const raw = JSON.parse(fs.readFileSync(statePath, "utf8"));
|
|
247
|
+
raw.outcome = "complete";
|
|
248
|
+
raw.current_stage = null;
|
|
249
|
+
raw.completed_at = new Date().toISOString();
|
|
250
|
+
fs.writeFileSync(statePath, JSON.stringify(raw, null, 2));
|
|
251
|
+
|
|
252
|
+
await assert.rejects(
|
|
253
|
+
runStageCommand(["run", "--task-id", "task-1", "--", "echo", "hello"]),
|
|
254
|
+
/not in progress/,
|
|
255
|
+
);
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
test("stage run: spawns subprocess with CORTEX_ACTIVE_TASK_ID set", async () => {
|
|
259
|
+
const cwd = makeWorkspace();
|
|
260
|
+
await runStageCommand([
|
|
261
|
+
"start",
|
|
262
|
+
"--task-id",
|
|
263
|
+
"task-1",
|
|
264
|
+
"--description",
|
|
265
|
+
"x",
|
|
266
|
+
]);
|
|
267
|
+
|
|
268
|
+
const outPath = path.join(cwd, "env-dump.txt");
|
|
269
|
+
// Use node -e to write CORTEX_ACTIVE_TASK_ID into a file we can inspect.
|
|
270
|
+
await runStageCommand([
|
|
271
|
+
"run",
|
|
272
|
+
"--task-id",
|
|
273
|
+
"task-1",
|
|
274
|
+
"--",
|
|
275
|
+
"node",
|
|
276
|
+
"-e",
|
|
277
|
+
`require("fs").writeFileSync(${JSON.stringify(outPath)}, process.env.CORTEX_ACTIVE_TASK_ID || "<unset>")`,
|
|
278
|
+
]);
|
|
279
|
+
|
|
280
|
+
const written = fs.readFileSync(outPath, "utf8");
|
|
281
|
+
assert.equal(written, "task-1");
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
test("stage help: prints help text and returns without throwing", async () => {
|
|
285
|
+
const { captured } = await captureStdout(() => runStageCommand(["help"]));
|
|
286
|
+
assert.match(captured, /Usage:/);
|
|
287
|
+
assert.match(captured, /cortex stage start/);
|
|
288
|
+
assert.match(captured, /cortex stage run/);
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
test("stage <unknown>: throws with help text", async () => {
|
|
292
|
+
await assert.rejects(runStageCommand(["frobnicate"]), /Unknown stage subcommand/);
|
|
293
|
+
});
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
import test from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import fs from "node:fs";
|
|
4
|
+
import os from "node:os";
|
|
5
|
+
import path from "node:path";
|
|
6
|
+
|
|
7
|
+
import { evaluateToolCall } from "../dist/core/workflow/enforcement.js";
|
|
8
|
+
import { createRun } from "../dist/core/workflow/run-lifecycle.js";
|
|
9
|
+
import { capabilityDefinitionSchema, DEFAULT_CAPABILITIES } from "../dist/core/workflow/capabilities.js";
|
|
10
|
+
|
|
11
|
+
function makeWorkspace() {
|
|
12
|
+
return fs.mkdtempSync(path.join(os.tmpdir(), "cortex-enforcement-"));
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const WORKFLOW_PLANNER = {
|
|
16
|
+
id: "planner-only",
|
|
17
|
+
description: "Single planner stage",
|
|
18
|
+
version: 1,
|
|
19
|
+
stages: [
|
|
20
|
+
{
|
|
21
|
+
name: "plan",
|
|
22
|
+
artifact: "plan.md",
|
|
23
|
+
reads: [],
|
|
24
|
+
required_fields: [],
|
|
25
|
+
capability: "planner",
|
|
26
|
+
description: "Plan",
|
|
27
|
+
},
|
|
28
|
+
],
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const WORKFLOW_BUILDER = {
|
|
32
|
+
id: "builder-only",
|
|
33
|
+
description: "Single builder stage",
|
|
34
|
+
version: 1,
|
|
35
|
+
stages: [
|
|
36
|
+
{
|
|
37
|
+
name: "build",
|
|
38
|
+
artifact: "changes.md",
|
|
39
|
+
reads: [],
|
|
40
|
+
required_fields: [],
|
|
41
|
+
capability: "builder",
|
|
42
|
+
description: "Build",
|
|
43
|
+
},
|
|
44
|
+
],
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const WORKFLOWS = {
|
|
48
|
+
"planner-only": WORKFLOW_PLANNER,
|
|
49
|
+
"builder-only": WORKFLOW_BUILDER,
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
function startPlannerRun(cwd, taskId = "task-1") {
|
|
53
|
+
return createRun({
|
|
54
|
+
cwd,
|
|
55
|
+
taskId,
|
|
56
|
+
workflow: WORKFLOW_PLANNER,
|
|
57
|
+
taskDescription: "x",
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function startBuilderRun(cwd, taskId = "task-1") {
|
|
62
|
+
return createRun({
|
|
63
|
+
cwd,
|
|
64
|
+
taskId,
|
|
65
|
+
workflow: WORKFLOW_BUILDER,
|
|
66
|
+
taskDescription: "x",
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
test("capabilities: default registry validates against schema", () => {
|
|
71
|
+
for (const cap of Object.values(DEFAULT_CAPABILITIES)) {
|
|
72
|
+
capabilityDefinitionSchema.parse(cap);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test("evaluator: allows everything when no run state exists", () => {
|
|
77
|
+
const cwd = makeWorkspace();
|
|
78
|
+
const result = evaluateToolCall({
|
|
79
|
+
cwd,
|
|
80
|
+
taskId: "ghost",
|
|
81
|
+
call: { toolName: "Edit", toolInput: { file_path: "src/foo.ts" } },
|
|
82
|
+
workflows: WORKFLOWS,
|
|
83
|
+
});
|
|
84
|
+
assert.equal(result.allowed, true);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test("planner: blocks Edit (read-only capability)", () => {
|
|
88
|
+
const cwd = makeWorkspace();
|
|
89
|
+
startPlannerRun(cwd);
|
|
90
|
+
const result = evaluateToolCall({
|
|
91
|
+
cwd,
|
|
92
|
+
taskId: "task-1",
|
|
93
|
+
call: { toolName: "Edit", toolInput: { file_path: "src/foo.ts" } },
|
|
94
|
+
workflows: WORKFLOWS,
|
|
95
|
+
});
|
|
96
|
+
assert.equal(result.allowed, false);
|
|
97
|
+
assert.match(result.reason, /read-only/);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test("planner: blocks Bash (could mutate filesystem under read-only)", () => {
|
|
101
|
+
const cwd = makeWorkspace();
|
|
102
|
+
startPlannerRun(cwd);
|
|
103
|
+
const result = evaluateToolCall({
|
|
104
|
+
cwd,
|
|
105
|
+
taskId: "task-1",
|
|
106
|
+
call: { toolName: "Bash", toolInput: { command: "rm -rf /" } },
|
|
107
|
+
workflows: WORKFLOWS,
|
|
108
|
+
});
|
|
109
|
+
assert.equal(result.allowed, false);
|
|
110
|
+
assert.match(result.reason, /Bash/);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test("planner: allows Read on any path", () => {
|
|
114
|
+
const cwd = makeWorkspace();
|
|
115
|
+
startPlannerRun(cwd);
|
|
116
|
+
const result = evaluateToolCall({
|
|
117
|
+
cwd,
|
|
118
|
+
taskId: "task-1",
|
|
119
|
+
call: { toolName: "Read", toolInput: { file_path: "src/anything.ts" } },
|
|
120
|
+
workflows: WORKFLOWS,
|
|
121
|
+
});
|
|
122
|
+
assert.equal(result.allowed, true);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
test("planner: allows Grep without explicit path", () => {
|
|
126
|
+
const cwd = makeWorkspace();
|
|
127
|
+
startPlannerRun(cwd);
|
|
128
|
+
const result = evaluateToolCall({
|
|
129
|
+
cwd,
|
|
130
|
+
taskId: "task-1",
|
|
131
|
+
call: { toolName: "Grep", toolInput: { pattern: "TODO" } },
|
|
132
|
+
workflows: WORKFLOWS,
|
|
133
|
+
});
|
|
134
|
+
assert.equal(result.allowed, true);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
test("builder: allows Edit on src/", () => {
|
|
138
|
+
const cwd = makeWorkspace();
|
|
139
|
+
startBuilderRun(cwd);
|
|
140
|
+
const result = evaluateToolCall({
|
|
141
|
+
cwd,
|
|
142
|
+
taskId: "task-1",
|
|
143
|
+
call: { toolName: "Edit", toolInput: { file_path: "src/foo.ts" } },
|
|
144
|
+
workflows: WORKFLOWS,
|
|
145
|
+
});
|
|
146
|
+
assert.equal(result.allowed, true);
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
test("builder: allows Edit on tests/", () => {
|
|
150
|
+
const cwd = makeWorkspace();
|
|
151
|
+
startBuilderRun(cwd);
|
|
152
|
+
const result = evaluateToolCall({
|
|
153
|
+
cwd,
|
|
154
|
+
taskId: "task-1",
|
|
155
|
+
call: { toolName: "Edit", toolInput: { file_path: "tests/foo.test.ts" } },
|
|
156
|
+
workflows: WORKFLOWS,
|
|
157
|
+
});
|
|
158
|
+
assert.equal(result.allowed, true);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
test("builder: blocks Edit on package.json (outside write_globs)", () => {
|
|
162
|
+
const cwd = makeWorkspace();
|
|
163
|
+
startBuilderRun(cwd);
|
|
164
|
+
const result = evaluateToolCall({
|
|
165
|
+
cwd,
|
|
166
|
+
taskId: "task-1",
|
|
167
|
+
call: { toolName: "Edit", toolInput: { file_path: "package.json" } },
|
|
168
|
+
workflows: WORKFLOWS,
|
|
169
|
+
});
|
|
170
|
+
assert.equal(result.allowed, false);
|
|
171
|
+
assert.match(result.reason, /outside capability builder/);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
test("builder: blocks Edit on .github/workflows (CI file)", () => {
|
|
175
|
+
const cwd = makeWorkspace();
|
|
176
|
+
startBuilderRun(cwd);
|
|
177
|
+
const result = evaluateToolCall({
|
|
178
|
+
cwd,
|
|
179
|
+
taskId: "task-1",
|
|
180
|
+
call: {
|
|
181
|
+
toolName: "Edit",
|
|
182
|
+
toolInput: { file_path: ".github/workflows/release.yml" },
|
|
183
|
+
},
|
|
184
|
+
workflows: WORKFLOWS,
|
|
185
|
+
});
|
|
186
|
+
assert.equal(result.allowed, false);
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test("builder: handles absolute path inside cwd", () => {
|
|
190
|
+
const cwd = makeWorkspace();
|
|
191
|
+
startBuilderRun(cwd);
|
|
192
|
+
const abs = path.join(cwd, "src", "foo.ts");
|
|
193
|
+
const result = evaluateToolCall({
|
|
194
|
+
cwd,
|
|
195
|
+
taskId: "task-1",
|
|
196
|
+
call: { toolName: "Edit", toolInput: { file_path: abs } },
|
|
197
|
+
workflows: WORKFLOWS,
|
|
198
|
+
});
|
|
199
|
+
assert.equal(result.allowed, true);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
test("builder: blocks absolute path outside cwd", () => {
|
|
203
|
+
const cwd = makeWorkspace();
|
|
204
|
+
startBuilderRun(cwd);
|
|
205
|
+
const result = evaluateToolCall({
|
|
206
|
+
cwd,
|
|
207
|
+
taskId: "task-1",
|
|
208
|
+
call: { toolName: "Edit", toolInput: { file_path: "/etc/passwd" } },
|
|
209
|
+
workflows: WORKFLOWS,
|
|
210
|
+
});
|
|
211
|
+
assert.equal(result.allowed, false);
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
test("builder: blocks Edit without file_path", () => {
|
|
215
|
+
const cwd = makeWorkspace();
|
|
216
|
+
startBuilderRun(cwd);
|
|
217
|
+
const result = evaluateToolCall({
|
|
218
|
+
cwd,
|
|
219
|
+
taskId: "task-1",
|
|
220
|
+
call: { toolName: "Edit", toolInput: {} },
|
|
221
|
+
workflows: WORKFLOWS,
|
|
222
|
+
});
|
|
223
|
+
assert.equal(result.allowed, false);
|
|
224
|
+
assert.match(result.reason, /did not include a file_path/);
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
test("evaluator: tools_allowed list restricts tools by name", () => {
|
|
228
|
+
const cwd = makeWorkspace();
|
|
229
|
+
const customCapabilities = {
|
|
230
|
+
...DEFAULT_CAPABILITIES,
|
|
231
|
+
"search-only": {
|
|
232
|
+
name: "search-only",
|
|
233
|
+
description: "Only Read + Grep allowed",
|
|
234
|
+
read_globs: ["**"],
|
|
235
|
+
write_globs: [],
|
|
236
|
+
tools_allowed: ["Read", "Grep"],
|
|
237
|
+
},
|
|
238
|
+
};
|
|
239
|
+
const customWorkflow = {
|
|
240
|
+
id: "search-only",
|
|
241
|
+
description: "Search only",
|
|
242
|
+
version: 1,
|
|
243
|
+
stages: [
|
|
244
|
+
{
|
|
245
|
+
name: "scan",
|
|
246
|
+
artifact: "scan.md",
|
|
247
|
+
reads: [],
|
|
248
|
+
required_fields: [],
|
|
249
|
+
capability: "search-only",
|
|
250
|
+
description: "Scan",
|
|
251
|
+
},
|
|
252
|
+
],
|
|
253
|
+
};
|
|
254
|
+
createRun({
|
|
255
|
+
cwd,
|
|
256
|
+
taskId: "task-1",
|
|
257
|
+
workflow: customWorkflow,
|
|
258
|
+
taskDescription: "x",
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
const grep = evaluateToolCall({
|
|
262
|
+
cwd,
|
|
263
|
+
taskId: "task-1",
|
|
264
|
+
call: { toolName: "Grep", toolInput: { pattern: "x" } },
|
|
265
|
+
workflows: { "search-only": customWorkflow },
|
|
266
|
+
capabilities: customCapabilities,
|
|
267
|
+
});
|
|
268
|
+
assert.equal(grep.allowed, true);
|
|
269
|
+
|
|
270
|
+
const glob = evaluateToolCall({
|
|
271
|
+
cwd,
|
|
272
|
+
taskId: "task-1",
|
|
273
|
+
call: { toolName: "Glob", toolInput: { pattern: "*.ts" } },
|
|
274
|
+
workflows: { "search-only": customWorkflow },
|
|
275
|
+
capabilities: customCapabilities,
|
|
276
|
+
});
|
|
277
|
+
assert.equal(glob.allowed, false);
|
|
278
|
+
assert.match(glob.reason, /does not allow tool Glob/);
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
test("evaluator: human capability blocks all tool calls", () => {
|
|
282
|
+
const cwd = makeWorkspace();
|
|
283
|
+
const humanWorkflow = {
|
|
284
|
+
id: "human-only",
|
|
285
|
+
description: "Human-only stage",
|
|
286
|
+
version: 1,
|
|
287
|
+
stages: [
|
|
288
|
+
{
|
|
289
|
+
name: "approval",
|
|
290
|
+
artifact: "approval.md",
|
|
291
|
+
reads: [],
|
|
292
|
+
required_fields: [],
|
|
293
|
+
capability: "human",
|
|
294
|
+
description: "Approve",
|
|
295
|
+
},
|
|
296
|
+
],
|
|
297
|
+
};
|
|
298
|
+
createRun({
|
|
299
|
+
cwd,
|
|
300
|
+
taskId: "task-1",
|
|
301
|
+
workflow: humanWorkflow,
|
|
302
|
+
taskDescription: "x",
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
const read = evaluateToolCall({
|
|
306
|
+
cwd,
|
|
307
|
+
taskId: "task-1",
|
|
308
|
+
call: { toolName: "Read", toolInput: { file_path: "src/foo.ts" } },
|
|
309
|
+
workflows: { "human-only": humanWorkflow },
|
|
310
|
+
});
|
|
311
|
+
assert.equal(read.allowed, false);
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
test("evaluator: stage without capability is unrestricted", () => {
|
|
315
|
+
const cwd = makeWorkspace();
|
|
316
|
+
const noCapabilityWorkflow = {
|
|
317
|
+
id: "free",
|
|
318
|
+
description: "No capability",
|
|
319
|
+
version: 1,
|
|
320
|
+
stages: [
|
|
321
|
+
{
|
|
322
|
+
name: "free-stage",
|
|
323
|
+
artifact: "out.md",
|
|
324
|
+
reads: [],
|
|
325
|
+
required_fields: [],
|
|
326
|
+
description: "No restriction",
|
|
327
|
+
},
|
|
328
|
+
],
|
|
329
|
+
};
|
|
330
|
+
createRun({
|
|
331
|
+
cwd,
|
|
332
|
+
taskId: "task-1",
|
|
333
|
+
workflow: noCapabilityWorkflow,
|
|
334
|
+
taskDescription: "x",
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
const result = evaluateToolCall({
|
|
338
|
+
cwd,
|
|
339
|
+
taskId: "task-1",
|
|
340
|
+
call: { toolName: "Bash", toolInput: { command: "ls" } },
|
|
341
|
+
workflows: { free: noCapabilityWorkflow },
|
|
342
|
+
});
|
|
343
|
+
assert.equal(result.allowed, true);
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
test("evaluator: completed run does not gate any tool calls", () => {
|
|
347
|
+
const cwd = makeWorkspace();
|
|
348
|
+
const completed = createRun({
|
|
349
|
+
cwd,
|
|
350
|
+
taskId: "task-1",
|
|
351
|
+
workflow: WORKFLOW_PLANNER,
|
|
352
|
+
taskDescription: "x",
|
|
353
|
+
});
|
|
354
|
+
// Manually corrupt to "complete" without going through advanceStage —
|
|
355
|
+
// simulates a run that finished before the hook fires.
|
|
356
|
+
const statePath = path.join(cwd, ".agents", "task-1", "state.json");
|
|
357
|
+
const raw = JSON.parse(fs.readFileSync(statePath, "utf8"));
|
|
358
|
+
raw.outcome = "complete";
|
|
359
|
+
raw.current_stage = null;
|
|
360
|
+
raw.completed_at = new Date().toISOString();
|
|
361
|
+
fs.writeFileSync(statePath, JSON.stringify(raw, null, 2));
|
|
362
|
+
|
|
363
|
+
const result = evaluateToolCall({
|
|
364
|
+
cwd,
|
|
365
|
+
taskId: "task-1",
|
|
366
|
+
call: { toolName: "Edit", toolInput: { file_path: "src/foo.ts" } },
|
|
367
|
+
workflows: WORKFLOWS,
|
|
368
|
+
});
|
|
369
|
+
assert.equal(result.allowed, true);
|
|
370
|
+
});
|