@dotdrelle/wiki-manager 0.7.3 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +20 -0
- package/README.md +50 -1
- package/docker-compose.yml +1 -23
- package/mcp.endpoints.example.json +13 -0
- package/package.json +2 -2
- package/src/agent/graph.js +101 -15
- package/src/agent/graph.test.js +145 -0
- package/src/cli/wiki-manager.js +306 -53
- package/src/commands/slash.js +4 -24
- package/src/core/agentEvents.js +169 -4
- package/src/core/agentEvents.test.js +176 -4
- package/src/core/agentLoop.js +3 -0
- package/src/core/compose.js +1 -2
- package/src/core/dockerCompose.test.js +5 -5
- package/src/core/jobQueue.js +29 -12
- package/src/core/mcp.js +120 -10
- package/src/core/mcp.test.js +121 -1
- package/src/core/plan.js +33 -0
- package/src/core/queueStore.test.js +1 -0
- package/src/core/sessionConfig.js +24 -0
- package/src/core/wikiWorkspace.test.js +24 -0
- package/src/runtime/approvals.js +113 -0
- package/src/runtime/auth.test.js +8 -0
- package/src/runtime/client.js +52 -6
- package/src/runtime/lifecycle.js +27 -3
- package/src/runtime/queueStore.js +3 -3
- package/src/runtime/runner.js +340 -0
- package/src/runtime/runner.test.js +270 -0
- package/src/runtime/server.js +252 -33
- package/src/runtime/server.test.js +577 -0
- package/src/runtime/store.js +181 -39
- package/src/runtime/store.test.js +363 -4
- package/src/runtime/supervisor.js +6 -0
- package/src/runtime/supervisor.test.js +141 -0
- package/src/shell/RightPane.tsx +1 -1
- package/src/shell/repl.js +22 -6
- package/src/shell/useAgent.ts +1 -1
- package/src/shell/useSession.ts +10 -5
- package/wiki-workspace +198 -4
package/.env.example
CHANGED
|
@@ -51,6 +51,26 @@ MAILERSEND_FROM_NAME=Donna
|
|
|
51
51
|
# Used by mcp.endpoints.json when the Exa MCP endpoint is enabled.
|
|
52
52
|
EXA_MCP_API_KEY=
|
|
53
53
|
|
|
54
|
+
# ── MCP retry policy (optional) ────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
# Tool calls are retried on transient HTTP/MCP errors before the run fails.
|
|
57
|
+
# WIKI_MANAGER_MCP_RETRY_MAX_ATTEMPTS=2
|
|
58
|
+
# WIKI_MANAGER_MCP_RETRY_BACKOFF_MS=500
|
|
59
|
+
|
|
60
|
+
# ── Runtime evaluator (optional) ───────────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
# After a clean agentic runtime run, a second LLM pass checks whether the original
|
|
63
|
+
# task was accomplished. Set to 0/false/off/no to disable globally.
|
|
64
|
+
# WIKI_MANAGER_EVALUATOR=1
|
|
65
|
+
|
|
66
|
+
# If evaluator rejects a run or an activity ends in error, the runtime can ask
|
|
67
|
+
# the LLM for a partial recovery plan and retry remaining work.
|
|
68
|
+
# WIKI_MANAGER_REPLANNER_MAX_REPLANS=2
|
|
69
|
+
|
|
70
|
+
# Runtime approvals can pause runs or protected tools until /approve is called.
|
|
71
|
+
# WIKI_MANAGER_APPROVAL_TIMEOUT_MS=600000
|
|
72
|
+
# WIKI_MANAGER_REQUIRE_APPROVAL_TOOLS=production.production_start_job
|
|
73
|
+
|
|
54
74
|
# ── Agent ports (optional, change only if defaults conflict) ───────────────────
|
|
55
75
|
|
|
56
76
|
# CME_MCP_PORT=3336
|
package/README.md
CHANGED
|
@@ -414,7 +414,12 @@ process environment (including the `.env` loaded at startup):
|
|
|
414
414
|
"mcpServers": {
|
|
415
415
|
"cme": {
|
|
416
416
|
"url": "http://host.docker.internal:${CME_MCP_PORT:-3336}/mcp/",
|
|
417
|
-
"headers": { "Authorization": "Bearer ${CME_MCP_AUTH_TOKEN}" }
|
|
417
|
+
"headers": { "Authorization": "Bearer ${CME_MCP_AUTH_TOKEN}" },
|
|
418
|
+
"requireApproval": ["cme_export_run"],
|
|
419
|
+
"retry": { "maxAttempts": 2, "backoffMs": 500 },
|
|
420
|
+
"toolRetries": {
|
|
421
|
+
"cme_export_run": { "maxAttempts": 3, "backoffMs": 1000 }
|
|
422
|
+
}
|
|
418
423
|
},
|
|
419
424
|
"documents": {
|
|
420
425
|
"url": "http://host.docker.internal:${DOCUMENTS_MCP_PORT:-3337}/mcp/",
|
|
@@ -427,6 +432,49 @@ process environment (including the `.env` loaded at startup):
|
|
|
427
432
|
Copy `mcp.endpoints.example.json` to `mcp.endpoints.json` and set the matching
|
|
428
433
|
token variables in `.env`.
|
|
429
434
|
|
|
435
|
+
MCP `tools/call` requests retry transient HTTP/MCP failures before the run fails.
|
|
436
|
+
Set global defaults with `WIKI_MANAGER_MCP_RETRY_MAX_ATTEMPTS` and
|
|
437
|
+
`WIKI_MANAGER_MCP_RETRY_BACKOFF_MS`, or override them per endpoint with `retry`
|
|
438
|
+
and per tool with `toolRetries`.
|
|
439
|
+
|
|
440
|
+
After a clean runtime run, the manager runs a lightweight evaluator pass against
|
|
441
|
+
the original task, final plan, recent activities, and recent conversation. The
|
|
442
|
+
verdict is emitted as `run_evaluated` and appears in runtime state as
|
|
443
|
+
`evaluation`. Disable it globally with `WIKI_MANAGER_EVALUATOR=0`, or per run by
|
|
444
|
+
posting `/run` with `"evaluate": false`.
|
|
445
|
+
|
|
446
|
+
When evaluation fails, or when a watched activity ends in error, the runtime can
|
|
447
|
+
ask the LLM for a partial recovery plan and continue only the remaining steps.
|
|
448
|
+
Each recovery is emitted as `run_replanned` and appears in runtime state as
|
|
449
|
+
`replans`. Limit attempts with `WIKI_MANAGER_REPLANNER_MAX_REPLANS` or per run
|
|
450
|
+
with `"replans": 1` in the `/run` body.
|
|
451
|
+
|
|
452
|
+
Runtime approvals support two levels. For run-level approval, post `/run` with
|
|
453
|
+
`"requireApproval": true`; the runtime emits `run_pending_approval` before the
|
|
454
|
+
first action and waits for `POST /approve?runId=...`. For tool-level approval,
|
|
455
|
+
set `requireApproval` on an external endpoint, or set
|
|
456
|
+
`WIKI_MANAGER_REQUIRE_APPROVAL_TOOLS=production.production_start_job` for
|
|
457
|
+
workspace-native MCP tools. Pending tool approvals appear in the queue with
|
|
458
|
+
status `pending_approval` and can be approved with `POST /approve?itemId=...`
|
|
459
|
+
or the shell command `/approve item <id>`. The approval timeout defaults to 10
|
|
460
|
+
minutes and can be changed with `WIKI_MANAGER_APPROVAL_TIMEOUT_MS` or
|
|
461
|
+
`approvalTimeoutMs` in the `/run` body.
|
|
462
|
+
|
|
463
|
+
While a run is active, `GET`/`POST /control` still answers without waiting for
|
|
464
|
+
it to finish: `{"action":"status"}` returns the current run/plan/queue state,
|
|
465
|
+
`{"action":"explain"}` adds a one-line plain-language summary, and
|
|
466
|
+
`{"action":"enqueue","input":"..."}` accepts a new request without touching the
|
|
467
|
+
active plan. A queued request starts automatically as soon as the workspace
|
|
468
|
+
goes idle — either because the enqueue call itself found the workspace free,
|
|
469
|
+
or because the run in progress finished and drained the next queued item.
|
|
470
|
+
|
|
471
|
+
`GET /config/profiles` lists the `.wikirc` profiles for a workspace and
|
|
472
|
+
`POST /config/use {"profile":"..."}` switches the active one — the same
|
|
473
|
+
switch as the shell's `/config use`, rejected with 409 while a run is active.
|
|
474
|
+
The manager is the source of truth for which profile is active; `llm-wiki
|
|
475
|
+
serve`'s config-profile picker mirrors whatever the manager reports rather
|
|
476
|
+
than tracking its own state.
|
|
477
|
+
|
|
430
478
|
### Starting external agents
|
|
431
479
|
|
|
432
480
|
Start CME, documents, and mailer once for all workspaces:
|
|
@@ -569,6 +617,7 @@ Useful primitives:
|
|
|
569
617
|
/queue
|
|
570
618
|
/queue cancel <id>
|
|
571
619
|
/queue clear
|
|
620
|
+
/approve [run|item] <id>
|
|
572
621
|
/wiki
|
|
573
622
|
/wiki run <args...>
|
|
574
623
|
/skills
|
package/docker-compose.yml
CHANGED
|
@@ -97,25 +97,6 @@ services:
|
|
|
97
97
|
- host.docker.internal:host-gateway
|
|
98
98
|
restart: unless-stopped
|
|
99
99
|
|
|
100
|
-
# ── wiki-manager runtime ─────────────────────────────────────────────────
|
|
101
|
-
|
|
102
|
-
agent-runtime:
|
|
103
|
-
image: dotdrelle/llm-wiki-manager:latest
|
|
104
|
-
labels:
|
|
105
|
-
wiki-manager.description: "Agentic runtime — runs, plan, activities, queue."
|
|
106
|
-
command: runtime --host 0.0.0.0 --port 7788 --state-dir /state
|
|
107
|
-
volumes:
|
|
108
|
-
- ./.wiki-manager:/state
|
|
109
|
-
- ${WIKI_WORKSPACES_DIR:-./workspaces}:/workspaces
|
|
110
|
-
environment:
|
|
111
|
-
- WIKI_WORKSPACES_DIR=/workspaces
|
|
112
|
-
- WIKI_MANAGER_RUNTIME_TOKEN=${WIKI_MANAGER_RUNTIME_TOKEN:-}
|
|
113
|
-
ports:
|
|
114
|
-
- '127.0.0.1:7788:7788'
|
|
115
|
-
extra_hosts:
|
|
116
|
-
- host.docker.internal:host-gateway
|
|
117
|
-
restart: unless-stopped
|
|
118
|
-
|
|
119
100
|
# ── agent-wiki-production ─────────────────────────────────────────────────
|
|
120
101
|
|
|
121
102
|
production-mcp:
|
|
@@ -140,7 +121,7 @@ services:
|
|
|
140
121
|
x-wiki-manager:
|
|
141
122
|
service-aliases:
|
|
142
123
|
all:
|
|
143
|
-
targets: [serve, mcp-http,
|
|
124
|
+
targets: [serve, mcp-http, production-mcp]
|
|
144
125
|
description: "Full workspace service set."
|
|
145
126
|
ui:
|
|
146
127
|
targets: [serve]
|
|
@@ -151,9 +132,6 @@ x-wiki-manager:
|
|
|
151
132
|
mcp:
|
|
152
133
|
targets: [mcp-http]
|
|
153
134
|
description: "Alias for mcp-http: wiki MCP server."
|
|
154
|
-
runtime:
|
|
155
|
-
targets: [agent-runtime]
|
|
156
|
-
description: "Alias for agent-runtime: agentic runtime."
|
|
157
135
|
production:
|
|
158
136
|
targets: [production-mcp]
|
|
159
137
|
description: "Alias for production-mcp: production jobs."
|
|
@@ -4,6 +4,19 @@
|
|
|
4
4
|
"url": "http://host.docker.internal:${CME_MCP_PORT:-3336}/mcp/",
|
|
5
5
|
"headers": {
|
|
6
6
|
"Authorization": "Bearer ${CME_MCP_AUTH_TOKEN}"
|
|
7
|
+
},
|
|
8
|
+
"requireApproval": [
|
|
9
|
+
"cme_export_run"
|
|
10
|
+
],
|
|
11
|
+
"retry": {
|
|
12
|
+
"maxAttempts": 2,
|
|
13
|
+
"backoffMs": 500
|
|
14
|
+
},
|
|
15
|
+
"toolRetries": {
|
|
16
|
+
"cme_export_run": {
|
|
17
|
+
"maxAttempts": 3,
|
|
18
|
+
"backoffMs": 1000
|
|
19
|
+
}
|
|
7
20
|
}
|
|
8
21
|
},
|
|
9
22
|
"documents": {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dotdrelle/wiki-manager",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.3",
|
|
4
4
|
"description": "Agentic shell and orchestration cockpit for llm-wiki workspaces.",
|
|
5
5
|
"license": "PolyForm-Noncommercial-1.0.0",
|
|
6
6
|
"author": "dotrelle",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
},
|
|
12
12
|
"scripts": {
|
|
13
13
|
"start": "bun ./bin/wiki-manager.js",
|
|
14
|
-
"test": "node --test src/core/activity.test.js src/core/agentEvents.test.js src/core/agentLoop.test.js src/core/plan.test.js src/core/mcp.test.js src/core/documentIntake.test.js src/core/dockerCompose.test.js src/core/wikirc.test.js src/core/modelFetch.test.js src/core/startupCheck.test.js src/core/queueStore.test.js src/commands/slash.test.js src/shell/repl.test.js src/runtime/store.test.js src/runtime/server.test.js src/runtime/supervisor.test.js src/runtime/auth.test.js",
|
|
14
|
+
"test": "node --test src/agent/graph.test.js src/core/activity.test.js src/core/agentEvents.test.js src/core/agentLoop.test.js src/core/plan.test.js src/core/mcp.test.js src/core/documentIntake.test.js src/core/dockerCompose.test.js src/core/wikiWorkspace.test.js src/core/wikirc.test.js src/core/modelFetch.test.js src/core/startupCheck.test.js src/core/queueStore.test.js src/commands/slash.test.js src/shell/repl.test.js src/runtime/store.test.js src/runtime/server.test.js src/runtime/supervisor.test.js src/runtime/runner.test.js src/runtime/auth.test.js",
|
|
15
15
|
"check": "bun ./bin/wiki-manager.js --version && bun ./bin/wiki-manager.js --help && bun ./bin/wiki-manager.js --once \"verifie le mode agent\""
|
|
16
16
|
},
|
|
17
17
|
"engines": {
|
package/src/agent/graph.js
CHANGED
|
@@ -10,7 +10,7 @@ import { formatSkillsForAgent } from '../core/skills.js';
|
|
|
10
10
|
import { handleSlashCommand } from '../commands/slash.js';
|
|
11
11
|
import { extractActivity, formatActivitySummary, parseJsonText } from '../core/activity.js';
|
|
12
12
|
import { createAgentEvent, dispatchAgentEvent } from '../core/agentEvents.js';
|
|
13
|
-
import { enqueueProductionJob, formatQueue, productionLockBusy } from '../core/jobQueue.js';
|
|
13
|
+
import { enqueueProductionJob, ensureJobQueue, formatQueue, productionLockBusy } from '../core/jobQueue.js';
|
|
14
14
|
|
|
15
15
|
const MAX_TOOL_ITERATIONS = 80;
|
|
16
16
|
const MAX_SPINNER_ARG_LENGTH = 96;
|
|
@@ -265,6 +265,77 @@ function rememberProductionProgress(session, payload, label) {
|
|
|
265
265
|
};
|
|
266
266
|
}
|
|
267
267
|
|
|
268
|
+
function toolRequiresApproval(session, server, tool) {
|
|
269
|
+
const policy = session.mcp?.[server]?.requireApproval;
|
|
270
|
+
if (policy === true) return true;
|
|
271
|
+
if (typeof policy === 'string') return policy === tool || policy === '*';
|
|
272
|
+
if (Array.isArray(policy)) return policy.includes(tool) || policy.includes('*');
|
|
273
|
+
return false;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function queueApprovalItem(session, { itemId, server, tool, args }) {
|
|
277
|
+
const queue = ensureJobQueue(session);
|
|
278
|
+
const existing = queue.find((item) => item.id === itemId);
|
|
279
|
+
if (existing) return existing;
|
|
280
|
+
const item = {
|
|
281
|
+
id: itemId,
|
|
282
|
+
workspace: session.workspace ?? null,
|
|
283
|
+
server,
|
|
284
|
+
tool,
|
|
285
|
+
args,
|
|
286
|
+
status: 'pending_approval',
|
|
287
|
+
reason: 'approval_required',
|
|
288
|
+
createdAt: new Date().toISOString(),
|
|
289
|
+
};
|
|
290
|
+
queue.push(item);
|
|
291
|
+
session.queueStore?.changed?.();
|
|
292
|
+
return item;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function markApprovalQueueItem(session, itemId, status) {
|
|
296
|
+
const item = ensureJobQueue(session).find((entry) => entry.id === itemId);
|
|
297
|
+
if (!item) return;
|
|
298
|
+
item.status = status;
|
|
299
|
+
item.finishedAt = new Date().toISOString();
|
|
300
|
+
session.queueStore?.changed?.();
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
async function awaitRunApproval(session, { runId, tool }) {
|
|
304
|
+
if (!session._runApprovalRequired || session._runApprovalResolved || !session._requestApproval) return;
|
|
305
|
+
const plan = (session.headlessPlan ?? []).map((step) => step.description ?? step.label ?? `Step ${step.step}`);
|
|
306
|
+
await session._requestApproval({
|
|
307
|
+
scope: 'run',
|
|
308
|
+
runId,
|
|
309
|
+
reason: `Approve run plan before executing ${tool}.`,
|
|
310
|
+
plan,
|
|
311
|
+
tool,
|
|
312
|
+
timeoutMs: session._approvalTimeoutMs,
|
|
313
|
+
signal: session._abortSignal,
|
|
314
|
+
});
|
|
315
|
+
session._runApprovalResolved = true;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
async function awaitToolApproval(session, { runId, server, tool, args, callId }) {
|
|
319
|
+
if (!toolRequiresApproval(session, server, tool) || !session._requestApproval) return;
|
|
320
|
+
const itemId = `approval-${callId ?? `${server}-${tool}`}`;
|
|
321
|
+
queueApprovalItem(session, { itemId, server, tool, args });
|
|
322
|
+
try {
|
|
323
|
+
await session._requestApproval({
|
|
324
|
+
scope: 'tool',
|
|
325
|
+
runId,
|
|
326
|
+
itemId,
|
|
327
|
+
reason: `Approve MCP tool ${server}.${tool}.`,
|
|
328
|
+
tool: `${server}.${tool}`,
|
|
329
|
+
timeoutMs: session._approvalTimeoutMs,
|
|
330
|
+
signal: session._abortSignal,
|
|
331
|
+
});
|
|
332
|
+
markApprovalQueueItem(session, itemId, 'approved');
|
|
333
|
+
} catch (err) {
|
|
334
|
+
markApprovalQueueItem(session, itemId, 'failed');
|
|
335
|
+
throw err;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
268
339
|
function emitAgentEvent(session, type, origin, payload = {}) {
|
|
269
340
|
dispatchAgentEvent(session, createAgentEvent(type, { origin, payload }));
|
|
270
341
|
}
|
|
@@ -527,21 +598,33 @@ export function createAgentGraph(options = {}) {
|
|
|
527
598
|
if (server === 'production' && tool === 'production_start_job' && state.session.workspace && !args.callerLabel) {
|
|
528
599
|
args = { ...args, callerLabel: `${state.session.workspace}/wiki-manager` };
|
|
529
600
|
}
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
} else if (isInternalWikiTool) {
|
|
601
|
+
const runId = state.session._currentRunIdentity?.runId ?? null;
|
|
602
|
+
if (isInternalWikiTool) {
|
|
533
603
|
resultText = handleWikiTool(state.session, tool, args);
|
|
534
|
-
} else if (server === '
|
|
535
|
-
|
|
536
|
-
resultText =
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
604
|
+
} else if (server === 'shell' && tool === 'run_command') {
|
|
605
|
+
await awaitRunApproval(state.session, { runId, tool: toolName });
|
|
606
|
+
resultText = await runShellCommandTool(state.session, args.command);
|
|
607
|
+
} else if (server !== 'shell') {
|
|
608
|
+
await awaitRunApproval(state.session, { runId, tool: toolName });
|
|
609
|
+
await awaitToolApproval(state.session, {
|
|
610
|
+
runId,
|
|
611
|
+
server,
|
|
612
|
+
tool,
|
|
613
|
+
args,
|
|
614
|
+
callId: call.id,
|
|
615
|
+
});
|
|
616
|
+
if (server === 'production' && tool === 'production_start_job' && productionLockBusy(state.session)) {
|
|
617
|
+
const item = enqueueProductionJob(state.session, args, 'production lock busy');
|
|
618
|
+
resultText = buildQueuedResult(state.session, item);
|
|
619
|
+
if (minimalPlanActive) {
|
|
620
|
+
minimalPlanActive = false;
|
|
621
|
+
emitAgentEvent(state.session, 'plan_step_updated', 'tool', { step: 1, status: 'pending' });
|
|
622
|
+
}
|
|
623
|
+
} else {
|
|
624
|
+
args = withActiveWorkspaceForExternalTool(state.session, server, tool, args);
|
|
625
|
+
const result = await callMcpTool(state.session.mcp, server, tool, args, state.session._abortSignal);
|
|
626
|
+
resultText = formatMcpToolResult(result);
|
|
540
627
|
}
|
|
541
|
-
} else {
|
|
542
|
-
args = withActiveWorkspaceForExternalTool(state.session, server, tool, args);
|
|
543
|
-
const result = await callMcpTool(state.session.mcp, server, tool, args, state.session._abortSignal);
|
|
544
|
-
resultText = formatMcpToolResult(result);
|
|
545
628
|
}
|
|
546
629
|
if (server === 'production') {
|
|
547
630
|
let payload = parseJsonText(resultText);
|
|
@@ -574,7 +657,10 @@ export function createAgentGraph(options = {}) {
|
|
|
574
657
|
emitAgentEvent(state.session, 'plan_step_updated', 'tool', { step: 1, status: 'done' });
|
|
575
658
|
}
|
|
576
659
|
} catch (err) {
|
|
577
|
-
if (
|
|
660
|
+
if (
|
|
661
|
+
(err.name === 'AbortError' && state.session._abortSignal?.aborted) ||
|
|
662
|
+
err.name === 'ApprovalError'
|
|
663
|
+
) throw err;
|
|
578
664
|
ok = false;
|
|
579
665
|
resultText = `Error [${server}.${tool}]: ${err instanceof Error ? err.message : String(err)}`;
|
|
580
666
|
if (minimalPlanActive && state.session.headlessPlan?.[0]?._activityKey === null) {
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import assert from 'node:assert/strict';
|
|
2
|
+
import test from 'node:test';
|
|
3
|
+
import { createAgentGraph } from './graph.js';
|
|
4
|
+
|
|
5
|
+
function sessionBase(overrides = {}) {
|
|
6
|
+
return {
|
|
7
|
+
commands: ['status'],
|
|
8
|
+
workspace: 'docs',
|
|
9
|
+
workspaceEnv: {},
|
|
10
|
+
mcp: {
|
|
11
|
+
production: {
|
|
12
|
+
status: 'connected',
|
|
13
|
+
url: 'http://127.0.0.1:3000/mcp/',
|
|
14
|
+
tools: [{
|
|
15
|
+
name: 'production_start_job',
|
|
16
|
+
description: 'Start production job',
|
|
17
|
+
inputSchema: { type: 'object', properties: { type: { type: 'string' } } },
|
|
18
|
+
}],
|
|
19
|
+
},
|
|
20
|
+
},
|
|
21
|
+
...overrides,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function toolCallingLlm() {
|
|
26
|
+
let calls = 0;
|
|
27
|
+
return {
|
|
28
|
+
async completeWithTools() {
|
|
29
|
+
calls += 1;
|
|
30
|
+
if (calls === 1) {
|
|
31
|
+
return {
|
|
32
|
+
content: null,
|
|
33
|
+
message: { role: 'assistant', content: null },
|
|
34
|
+
tool_calls: [
|
|
35
|
+
{
|
|
36
|
+
id: 'plan-call',
|
|
37
|
+
type: 'function',
|
|
38
|
+
function: {
|
|
39
|
+
name: 'wiki__plan_set',
|
|
40
|
+
arguments: '{"steps":["Run production job"]}',
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
id: 'tool-call',
|
|
45
|
+
type: 'function',
|
|
46
|
+
function: {
|
|
47
|
+
name: 'production__production_start_job',
|
|
48
|
+
arguments: '{"type":"doctor"}',
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
],
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
content: 'Done.',
|
|
56
|
+
message: { role: 'assistant', content: 'Done.' },
|
|
57
|
+
tool_calls: null,
|
|
58
|
+
};
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
test('agent graph waits for run-level approval before first MCP action', async () => {
|
|
64
|
+
const originalFetch = globalThis.fetch;
|
|
65
|
+
let fetchCalls = 0;
|
|
66
|
+
globalThis.fetch = async () => {
|
|
67
|
+
fetchCalls += 1;
|
|
68
|
+
return {
|
|
69
|
+
ok: true,
|
|
70
|
+
status: 200,
|
|
71
|
+
headers: { get: () => null },
|
|
72
|
+
text: async () => JSON.stringify({ result: { content: [{ type: 'text', text: '{"ok":true}' }] } }),
|
|
73
|
+
};
|
|
74
|
+
};
|
|
75
|
+
const approvals = [];
|
|
76
|
+
const session = sessionBase({
|
|
77
|
+
_runApprovalRequired: true,
|
|
78
|
+
_currentRunIdentity: { runId: 'run-approval', turnId: 'run-approval:turn-1', workspace: 'docs' },
|
|
79
|
+
_requestApproval: async (request) => {
|
|
80
|
+
approvals.push(request);
|
|
81
|
+
assert.equal(fetchCalls, 0);
|
|
82
|
+
return { approved: true };
|
|
83
|
+
},
|
|
84
|
+
llm: toolCallingLlm(),
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
try {
|
|
88
|
+
const agent = createAgentGraph();
|
|
89
|
+
const result = await agent.invoke({ input: 'Run doctor', session });
|
|
90
|
+
|
|
91
|
+
assert.equal(result.response, 'Done.');
|
|
92
|
+
assert.equal(fetchCalls, 1);
|
|
93
|
+
assert.equal(approvals.length, 1);
|
|
94
|
+
assert.equal(approvals[0].scope, 'run');
|
|
95
|
+
assert.deepEqual(approvals[0].plan, ['Run production job']);
|
|
96
|
+
assert.equal(session._runApprovalResolved, true);
|
|
97
|
+
} finally {
|
|
98
|
+
globalThis.fetch = originalFetch;
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test('agent graph waits for tool-level approval configured on endpoint', async () => {
|
|
103
|
+
const originalFetch = globalThis.fetch;
|
|
104
|
+
globalThis.fetch = async () => ({
|
|
105
|
+
ok: true,
|
|
106
|
+
status: 200,
|
|
107
|
+
headers: { get: () => null },
|
|
108
|
+
text: async () => JSON.stringify({ result: { content: [{ type: 'text', text: '{"ok":true}' }] } }),
|
|
109
|
+
});
|
|
110
|
+
const approvals = [];
|
|
111
|
+
const session = sessionBase({
|
|
112
|
+
mcp: {
|
|
113
|
+
production: {
|
|
114
|
+
status: 'connected',
|
|
115
|
+
url: 'http://127.0.0.1:3000/mcp/',
|
|
116
|
+
requireApproval: ['production_start_job'],
|
|
117
|
+
tools: [{
|
|
118
|
+
name: 'production_start_job',
|
|
119
|
+
description: 'Start production job',
|
|
120
|
+
inputSchema: { type: 'object', properties: { type: { type: 'string' } } },
|
|
121
|
+
}],
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
_currentRunIdentity: { runId: 'run-tool-approval', turnId: 'run-tool-approval:turn-1', workspace: 'docs' },
|
|
125
|
+
_requestApproval: async (request) => {
|
|
126
|
+
approvals.push(request);
|
|
127
|
+
return { approved: true };
|
|
128
|
+
},
|
|
129
|
+
llm: toolCallingLlm(),
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
try {
|
|
133
|
+
const agent = createAgentGraph();
|
|
134
|
+
await agent.invoke({ input: 'Run doctor', session });
|
|
135
|
+
|
|
136
|
+
assert.equal(approvals.length, 1);
|
|
137
|
+
assert.equal(approvals[0].scope, 'tool');
|
|
138
|
+
assert.equal(approvals[0].tool, 'production.production_start_job');
|
|
139
|
+
assert.equal(session.jobQueue[0].status, 'approved');
|
|
140
|
+
assert.equal(session.jobQueue[0].reason, 'approval_required');
|
|
141
|
+
} finally {
|
|
142
|
+
globalThis.fetch = originalFetch;
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
|