@dotdrelle/wiki-manager 0.7.3 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -51,6 +51,26 @@ MAILERSEND_FROM_NAME=Donna
51
51
  # Used by mcp.endpoints.json when the Exa MCP endpoint is enabled.
52
52
  EXA_MCP_API_KEY=
53
53
 
54
+ # ── MCP retry policy (optional) ────────────────────────────────────────────────
55
+
56
+ # Tool calls are retried on transient HTTP/MCP errors before the run fails.
57
+ # WIKI_MANAGER_MCP_RETRY_MAX_ATTEMPTS=2
58
+ # WIKI_MANAGER_MCP_RETRY_BACKOFF_MS=500
59
+
60
+ # ── Runtime evaluator (optional) ───────────────────────────────────────────────
61
+
62
+ # After a clean agentic runtime run, a second LLM pass checks whether the original
63
+ # task was accomplished. Set to 0/false/off/no to disable globally.
64
+ # WIKI_MANAGER_EVALUATOR=1
65
+
66
+ # If evaluator rejects a run or an activity ends in error, the runtime can ask
67
+ # the LLM for a partial recovery plan and retry remaining work.
68
+ # WIKI_MANAGER_REPLANNER_MAX_REPLANS=2
69
+
70
+ # Runtime approvals can pause runs or protected tools until /approve is called.
71
+ # WIKI_MANAGER_APPROVAL_TIMEOUT_MS=600000
72
+ # WIKI_MANAGER_REQUIRE_APPROVAL_TOOLS=production.production_start_job
73
+
54
74
  # ── Agent ports (optional, change only if defaults conflict) ───────────────────
55
75
 
56
76
  # CME_MCP_PORT=3336
package/README.md CHANGED
@@ -414,7 +414,12 @@ process environment (including the `.env` loaded at startup):
414
414
  "mcpServers": {
415
415
  "cme": {
416
416
  "url": "http://host.docker.internal:${CME_MCP_PORT:-3336}/mcp/",
417
- "headers": { "Authorization": "Bearer ${CME_MCP_AUTH_TOKEN}" }
417
+ "headers": { "Authorization": "Bearer ${CME_MCP_AUTH_TOKEN}" },
418
+ "requireApproval": ["cme_export_run"],
419
+ "retry": { "maxAttempts": 2, "backoffMs": 500 },
420
+ "toolRetries": {
421
+ "cme_export_run": { "maxAttempts": 3, "backoffMs": 1000 }
422
+ }
418
423
  },
419
424
  "documents": {
420
425
  "url": "http://host.docker.internal:${DOCUMENTS_MCP_PORT:-3337}/mcp/",
@@ -427,6 +432,34 @@ process environment (including the `.env` loaded at startup):
427
432
  Copy `mcp.endpoints.example.json` to `mcp.endpoints.json` and set the matching
428
433
  token variables in `.env`.
429
434
 
435
+ MCP `tools/call` requests retry transient HTTP/MCP failures before the run fails.
436
+ Set global defaults with `WIKI_MANAGER_MCP_RETRY_MAX_ATTEMPTS` and
437
+ `WIKI_MANAGER_MCP_RETRY_BACKOFF_MS`, or override them per endpoint with `retry`
438
+ and per tool with `toolRetries`.
439
+
440
+ After a clean runtime run, the manager runs a lightweight evaluator pass against
441
+ the original task, final plan, recent activities, and recent conversation. The
442
+ verdict is emitted as `run_evaluated` and appears in runtime state as
443
+ `evaluation`. Disable it globally with `WIKI_MANAGER_EVALUATOR=0`, or per run by
444
+ posting `/run` with `"evaluate": false`.
445
+
446
+ When evaluation fails, or when a watched activity ends in error, the runtime can
447
+ ask the LLM for a partial recovery plan and continue only the remaining steps.
448
+ Each recovery is emitted as `run_replanned` and appears in runtime state as
449
+ `replans`. Limit attempts with `WIKI_MANAGER_REPLANNER_MAX_REPLANS` or per run
450
+ with `"replans": 1` in the `/run` body.
451
+
452
+ Runtime approvals support two levels. For run-level approval, post `/run` with
453
+ `"requireApproval": true`; the runtime emits `run_pending_approval` before the
454
+ first action and waits for `POST /approve?runId=...`. For tool-level approval,
455
+ set `requireApproval` on an external endpoint, or set
456
+ `WIKI_MANAGER_REQUIRE_APPROVAL_TOOLS=production.production_start_job` for
457
+ workspace-native MCP tools. Pending tool approvals appear in the queue with
458
+ status `pending_approval` and can be approved with `POST /approve?itemId=...`
459
+ or the shell command `/approve item <id>`. The approval timeout defaults to 10
460
+ minutes and can be changed with `WIKI_MANAGER_APPROVAL_TIMEOUT_MS` or
461
+ `approvalTimeoutMs` in the `/run` body.
462
+
430
463
  ### Starting external agents
431
464
 
432
465
  Start CME, documents, and mailer once for all workspaces:
@@ -569,6 +602,7 @@ Useful primitives:
569
602
  /queue
570
603
  /queue cancel <id>
571
604
  /queue clear
605
+ /approve [run|item] <id>
572
606
  /wiki
573
607
  /wiki run <args...>
574
608
  /skills
@@ -97,25 +97,6 @@ services:
97
97
  - host.docker.internal:host-gateway
98
98
  restart: unless-stopped
99
99
 
100
- # ── wiki-manager runtime ─────────────────────────────────────────────────
101
-
102
- agent-runtime:
103
- image: dotdrelle/llm-wiki-manager:latest
104
- labels:
105
- wiki-manager.description: "Agentic runtime — runs, plan, activities, queue."
106
- command: runtime --host 0.0.0.0 --port 7788 --state-dir /state
107
- volumes:
108
- - ./.wiki-manager:/state
109
- - ${WIKI_WORKSPACES_DIR:-./workspaces}:/workspaces
110
- environment:
111
- - WIKI_WORKSPACES_DIR=/workspaces
112
- - WIKI_MANAGER_RUNTIME_TOKEN=${WIKI_MANAGER_RUNTIME_TOKEN:-}
113
- ports:
114
- - '127.0.0.1:7788:7788'
115
- extra_hosts:
116
- - host.docker.internal:host-gateway
117
- restart: unless-stopped
118
-
119
100
  # ── agent-wiki-production ─────────────────────────────────────────────────
120
101
 
121
102
  production-mcp:
@@ -140,7 +121,7 @@ services:
140
121
  x-wiki-manager:
141
122
  service-aliases:
142
123
  all:
143
- targets: [serve, mcp-http, agent-runtime, production-mcp]
124
+ targets: [serve, mcp-http, production-mcp]
144
125
  description: "Full workspace service set."
145
126
  ui:
146
127
  targets: [serve]
@@ -151,9 +132,6 @@ x-wiki-manager:
151
132
  mcp:
152
133
  targets: [mcp-http]
153
134
  description: "Alias for mcp-http: wiki MCP server."
154
- runtime:
155
- targets: [agent-runtime]
156
- description: "Alias for agent-runtime: agentic runtime."
157
135
  production:
158
136
  targets: [production-mcp]
159
137
  description: "Alias for production-mcp: production jobs."
@@ -4,6 +4,19 @@
4
4
  "url": "http://host.docker.internal:${CME_MCP_PORT:-3336}/mcp/",
5
5
  "headers": {
6
6
  "Authorization": "Bearer ${CME_MCP_AUTH_TOKEN}"
7
+ },
8
+ "requireApproval": [
9
+ "cme_export_run"
10
+ ],
11
+ "retry": {
12
+ "maxAttempts": 2,
13
+ "backoffMs": 500
14
+ },
15
+ "toolRetries": {
16
+ "cme_export_run": {
17
+ "maxAttempts": 3,
18
+ "backoffMs": 1000
19
+ }
7
20
  }
8
21
  },
9
22
  "documents": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dotdrelle/wiki-manager",
3
- "version": "0.7.3",
3
+ "version": "0.8.2",
4
4
  "description": "Agentic shell and orchestration cockpit for llm-wiki workspaces.",
5
5
  "license": "PolyForm-Noncommercial-1.0.0",
6
6
  "author": "dotrelle",
@@ -11,7 +11,7 @@
11
11
  },
12
12
  "scripts": {
13
13
  "start": "bun ./bin/wiki-manager.js",
14
- "test": "node --test src/core/activity.test.js src/core/agentEvents.test.js src/core/agentLoop.test.js src/core/plan.test.js src/core/mcp.test.js src/core/documentIntake.test.js src/core/dockerCompose.test.js src/core/wikirc.test.js src/core/modelFetch.test.js src/core/startupCheck.test.js src/core/queueStore.test.js src/commands/slash.test.js src/shell/repl.test.js src/runtime/store.test.js src/runtime/server.test.js src/runtime/supervisor.test.js src/runtime/auth.test.js",
14
+ "test": "node --test src/agent/graph.test.js src/core/activity.test.js src/core/agentEvents.test.js src/core/agentLoop.test.js src/core/plan.test.js src/core/mcp.test.js src/core/documentIntake.test.js src/core/dockerCompose.test.js src/core/wikiWorkspace.test.js src/core/wikirc.test.js src/core/modelFetch.test.js src/core/startupCheck.test.js src/core/queueStore.test.js src/commands/slash.test.js src/shell/repl.test.js src/runtime/store.test.js src/runtime/server.test.js src/runtime/supervisor.test.js src/runtime/runner.test.js src/runtime/auth.test.js",
15
15
  "check": "bun ./bin/wiki-manager.js --version && bun ./bin/wiki-manager.js --help && bun ./bin/wiki-manager.js --once \"verifie le mode agent\""
16
16
  },
17
17
  "engines": {
@@ -10,7 +10,7 @@ import { formatSkillsForAgent } from '../core/skills.js';
10
10
  import { handleSlashCommand } from '../commands/slash.js';
11
11
  import { extractActivity, formatActivitySummary, parseJsonText } from '../core/activity.js';
12
12
  import { createAgentEvent, dispatchAgentEvent } from '../core/agentEvents.js';
13
- import { enqueueProductionJob, formatQueue, productionLockBusy } from '../core/jobQueue.js';
13
+ import { enqueueProductionJob, ensureJobQueue, formatQueue, productionLockBusy } from '../core/jobQueue.js';
14
14
 
15
15
  const MAX_TOOL_ITERATIONS = 80;
16
16
  const MAX_SPINNER_ARG_LENGTH = 96;
@@ -265,6 +265,77 @@ function rememberProductionProgress(session, payload, label) {
265
265
  };
266
266
  }
267
267
 
268
+ function toolRequiresApproval(session, server, tool) {
269
+ const policy = session.mcp?.[server]?.requireApproval;
270
+ if (policy === true) return true;
271
+ if (typeof policy === 'string') return policy === tool || policy === '*';
272
+ if (Array.isArray(policy)) return policy.includes(tool) || policy.includes('*');
273
+ return false;
274
+ }
275
+
276
+ function queueApprovalItem(session, { itemId, server, tool, args }) {
277
+ const queue = ensureJobQueue(session);
278
+ const existing = queue.find((item) => item.id === itemId);
279
+ if (existing) return existing;
280
+ const item = {
281
+ id: itemId,
282
+ workspace: session.workspace ?? null,
283
+ server,
284
+ tool,
285
+ args,
286
+ status: 'pending_approval',
287
+ reason: 'approval_required',
288
+ createdAt: new Date().toISOString(),
289
+ };
290
+ queue.push(item);
291
+ session.queueStore?.changed?.();
292
+ return item;
293
+ }
294
+
295
+ function markApprovalQueueItem(session, itemId, status) {
296
+ const item = ensureJobQueue(session).find((entry) => entry.id === itemId);
297
+ if (!item) return;
298
+ item.status = status;
299
+ item.finishedAt = new Date().toISOString();
300
+ session.queueStore?.changed?.();
301
+ }
302
+
303
+ async function awaitRunApproval(session, { runId, tool }) {
304
+ if (!session._runApprovalRequired || session._runApprovalResolved || !session._requestApproval) return;
305
+ const plan = (session.headlessPlan ?? []).map((step) => step.description ?? step.label ?? `Step ${step.step}`);
306
+ await session._requestApproval({
307
+ scope: 'run',
308
+ runId,
309
+ reason: `Approve run plan before executing ${tool}.`,
310
+ plan,
311
+ tool,
312
+ timeoutMs: session._approvalTimeoutMs,
313
+ signal: session._abortSignal,
314
+ });
315
+ session._runApprovalResolved = true;
316
+ }
317
+
318
+ async function awaitToolApproval(session, { runId, server, tool, args, callId }) {
319
+ if (!toolRequiresApproval(session, server, tool) || !session._requestApproval) return;
320
+ const itemId = `approval-${callId ?? `${server}-${tool}`}`;
321
+ queueApprovalItem(session, { itemId, server, tool, args });
322
+ try {
323
+ await session._requestApproval({
324
+ scope: 'tool',
325
+ runId,
326
+ itemId,
327
+ reason: `Approve MCP tool ${server}.${tool}.`,
328
+ tool: `${server}.${tool}`,
329
+ timeoutMs: session._approvalTimeoutMs,
330
+ signal: session._abortSignal,
331
+ });
332
+ markApprovalQueueItem(session, itemId, 'approved');
333
+ } catch (err) {
334
+ markApprovalQueueItem(session, itemId, 'failed');
335
+ throw err;
336
+ }
337
+ }
338
+
268
339
  function emitAgentEvent(session, type, origin, payload = {}) {
269
340
  dispatchAgentEvent(session, createAgentEvent(type, { origin, payload }));
270
341
  }
@@ -527,21 +598,33 @@ export function createAgentGraph(options = {}) {
527
598
  if (server === 'production' && tool === 'production_start_job' && state.session.workspace && !args.callerLabel) {
528
599
  args = { ...args, callerLabel: `${state.session.workspace}/wiki-manager` };
529
600
  }
530
- if (server === 'shell' && tool === 'run_command') {
531
- resultText = await runShellCommandTool(state.session, args.command);
532
- } else if (isInternalWikiTool) {
601
+ const runId = state.session._currentRunIdentity?.runId ?? null;
602
+ if (isInternalWikiTool) {
533
603
  resultText = handleWikiTool(state.session, tool, args);
534
- } else if (server === 'production' && tool === 'production_start_job' && productionLockBusy(state.session)) {
535
- const item = enqueueProductionJob(state.session, args, 'production lock busy');
536
- resultText = buildQueuedResult(state.session, item);
537
- if (minimalPlanActive) {
538
- minimalPlanActive = false;
539
- emitAgentEvent(state.session, 'plan_step_updated', 'tool', { step: 1, status: 'pending' });
604
+ } else if (server === 'shell' && tool === 'run_command') {
605
+ await awaitRunApproval(state.session, { runId, tool: toolName });
606
+ resultText = await runShellCommandTool(state.session, args.command);
607
+ } else if (server !== 'shell') {
608
+ await awaitRunApproval(state.session, { runId, tool: toolName });
609
+ await awaitToolApproval(state.session, {
610
+ runId,
611
+ server,
612
+ tool,
613
+ args,
614
+ callId: call.id,
615
+ });
616
+ if (server === 'production' && tool === 'production_start_job' && productionLockBusy(state.session)) {
617
+ const item = enqueueProductionJob(state.session, args, 'production lock busy');
618
+ resultText = buildQueuedResult(state.session, item);
619
+ if (minimalPlanActive) {
620
+ minimalPlanActive = false;
621
+ emitAgentEvent(state.session, 'plan_step_updated', 'tool', { step: 1, status: 'pending' });
622
+ }
623
+ } else {
624
+ args = withActiveWorkspaceForExternalTool(state.session, server, tool, args);
625
+ const result = await callMcpTool(state.session.mcp, server, tool, args, state.session._abortSignal);
626
+ resultText = formatMcpToolResult(result);
540
627
  }
541
- } else {
542
- args = withActiveWorkspaceForExternalTool(state.session, server, tool, args);
543
- const result = await callMcpTool(state.session.mcp, server, tool, args, state.session._abortSignal);
544
- resultText = formatMcpToolResult(result);
545
628
  }
546
629
  if (server === 'production') {
547
630
  let payload = parseJsonText(resultText);
@@ -574,7 +657,10 @@ export function createAgentGraph(options = {}) {
574
657
  emitAgentEvent(state.session, 'plan_step_updated', 'tool', { step: 1, status: 'done' });
575
658
  }
576
659
  } catch (err) {
577
- if (err.name === 'AbortError' && state.session._abortSignal?.aborted) throw err;
660
+ if (
661
+ (err.name === 'AbortError' && state.session._abortSignal?.aborted) ||
662
+ err.name === 'ApprovalError'
663
+ ) throw err;
578
664
  ok = false;
579
665
  resultText = `Error [${server}.${tool}]: ${err instanceof Error ? err.message : String(err)}`;
580
666
  if (minimalPlanActive && state.session.headlessPlan?.[0]?._activityKey === null) {
@@ -0,0 +1,145 @@
1
+ import assert from 'node:assert/strict';
2
+ import test from 'node:test';
3
+ import { createAgentGraph } from './graph.js';
4
+
5
+ function sessionBase(overrides = {}) {
6
+ return {
7
+ commands: ['status'],
8
+ workspace: 'docs',
9
+ workspaceEnv: {},
10
+ mcp: {
11
+ production: {
12
+ status: 'connected',
13
+ url: 'http://127.0.0.1:3000/mcp/',
14
+ tools: [{
15
+ name: 'production_start_job',
16
+ description: 'Start production job',
17
+ inputSchema: { type: 'object', properties: { type: { type: 'string' } } },
18
+ }],
19
+ },
20
+ },
21
+ ...overrides,
22
+ };
23
+ }
24
+
25
+ function toolCallingLlm() {
26
+ let calls = 0;
27
+ return {
28
+ async completeWithTools() {
29
+ calls += 1;
30
+ if (calls === 1) {
31
+ return {
32
+ content: null,
33
+ message: { role: 'assistant', content: null },
34
+ tool_calls: [
35
+ {
36
+ id: 'plan-call',
37
+ type: 'function',
38
+ function: {
39
+ name: 'wiki__plan_set',
40
+ arguments: '{"steps":["Run production job"]}',
41
+ },
42
+ },
43
+ {
44
+ id: 'tool-call',
45
+ type: 'function',
46
+ function: {
47
+ name: 'production__production_start_job',
48
+ arguments: '{"type":"doctor"}',
49
+ },
50
+ },
51
+ ],
52
+ };
53
+ }
54
+ return {
55
+ content: 'Done.',
56
+ message: { role: 'assistant', content: 'Done.' },
57
+ tool_calls: null,
58
+ };
59
+ },
60
+ };
61
+ }
62
+
63
+ test('agent graph waits for run-level approval before first MCP action', async () => {
64
+ const originalFetch = globalThis.fetch;
65
+ let fetchCalls = 0;
66
+ globalThis.fetch = async () => {
67
+ fetchCalls += 1;
68
+ return {
69
+ ok: true,
70
+ status: 200,
71
+ headers: { get: () => null },
72
+ text: async () => JSON.stringify({ result: { content: [{ type: 'text', text: '{"ok":true}' }] } }),
73
+ };
74
+ };
75
+ const approvals = [];
76
+ const session = sessionBase({
77
+ _runApprovalRequired: true,
78
+ _currentRunIdentity: { runId: 'run-approval', turnId: 'run-approval:turn-1', workspace: 'docs' },
79
+ _requestApproval: async (request) => {
80
+ approvals.push(request);
81
+ assert.equal(fetchCalls, 0);
82
+ return { approved: true };
83
+ },
84
+ llm: toolCallingLlm(),
85
+ });
86
+
87
+ try {
88
+ const agent = createAgentGraph();
89
+ const result = await agent.invoke({ input: 'Run doctor', session });
90
+
91
+ assert.equal(result.response, 'Done.');
92
+ assert.equal(fetchCalls, 1);
93
+ assert.equal(approvals.length, 1);
94
+ assert.equal(approvals[0].scope, 'run');
95
+ assert.deepEqual(approvals[0].plan, ['Run production job']);
96
+ assert.equal(session._runApprovalResolved, true);
97
+ } finally {
98
+ globalThis.fetch = originalFetch;
99
+ }
100
+ });
101
+
102
+ test('agent graph waits for tool-level approval configured on endpoint', async () => {
103
+ const originalFetch = globalThis.fetch;
104
+ globalThis.fetch = async () => ({
105
+ ok: true,
106
+ status: 200,
107
+ headers: { get: () => null },
108
+ text: async () => JSON.stringify({ result: { content: [{ type: 'text', text: '{"ok":true}' }] } }),
109
+ });
110
+ const approvals = [];
111
+ const session = sessionBase({
112
+ mcp: {
113
+ production: {
114
+ status: 'connected',
115
+ url: 'http://127.0.0.1:3000/mcp/',
116
+ requireApproval: ['production_start_job'],
117
+ tools: [{
118
+ name: 'production_start_job',
119
+ description: 'Start production job',
120
+ inputSchema: { type: 'object', properties: { type: { type: 'string' } } },
121
+ }],
122
+ },
123
+ },
124
+ _currentRunIdentity: { runId: 'run-tool-approval', turnId: 'run-tool-approval:turn-1', workspace: 'docs' },
125
+ _requestApproval: async (request) => {
126
+ approvals.push(request);
127
+ return { approved: true };
128
+ },
129
+ llm: toolCallingLlm(),
130
+ });
131
+
132
+ try {
133
+ const agent = createAgentGraph();
134
+ await agent.invoke({ input: 'Run doctor', session });
135
+
136
+ assert.equal(approvals.length, 1);
137
+ assert.equal(approvals[0].scope, 'tool');
138
+ assert.equal(approvals[0].tool, 'production.production_start_job');
139
+ assert.equal(session.jobQueue[0].status, 'approved');
140
+ assert.equal(session.jobQueue[0].reason, 'approval_required');
141
+ } finally {
142
+ globalThis.fetch = originalFetch;
143
+ }
144
+ });
145
+