@dotdrelle/wiki-manager 0.7.3 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +20 -0
- package/README.md +50 -1
- package/docker-compose.yml +1 -23
- package/mcp.endpoints.example.json +13 -0
- package/package.json +2 -2
- package/src/agent/graph.js +101 -15
- package/src/agent/graph.test.js +145 -0
- package/src/cli/wiki-manager.js +306 -53
- package/src/commands/slash.js +4 -24
- package/src/core/agentEvents.js +169 -4
- package/src/core/agentEvents.test.js +176 -4
- package/src/core/agentLoop.js +3 -0
- package/src/core/compose.js +1 -2
- package/src/core/dockerCompose.test.js +5 -5
- package/src/core/jobQueue.js +29 -12
- package/src/core/mcp.js +120 -10
- package/src/core/mcp.test.js +121 -1
- package/src/core/plan.js +33 -0
- package/src/core/queueStore.test.js +1 -0
- package/src/core/sessionConfig.js +24 -0
- package/src/core/wikiWorkspace.test.js +24 -0
- package/src/runtime/approvals.js +113 -0
- package/src/runtime/auth.test.js +8 -0
- package/src/runtime/client.js +52 -6
- package/src/runtime/lifecycle.js +27 -3
- package/src/runtime/queueStore.js +3 -3
- package/src/runtime/runner.js +340 -0
- package/src/runtime/runner.test.js +270 -0
- package/src/runtime/server.js +252 -33
- package/src/runtime/server.test.js +577 -0
- package/src/runtime/store.js +181 -39
- package/src/runtime/store.test.js +363 -4
- package/src/runtime/supervisor.js +6 -0
- package/src/runtime/supervisor.test.js +141 -0
- package/src/shell/RightPane.tsx +1 -1
- package/src/shell/repl.js +22 -6
- package/src/shell/useAgent.ts +1 -1
- package/src/shell/useSession.ts +10 -5
- package/wiki-workspace +198 -4
package/src/runtime/lifecycle.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { spawn } from 'node:child_process';
|
|
1
|
+
import { execFile, spawn } from 'node:child_process';
|
|
2
2
|
import { dirname, resolve } from 'node:path';
|
|
3
3
|
import { fileURLToPath } from 'node:url';
|
|
4
4
|
import { checkRuntimeHealth, runtimeUrlFromEnv } from './client.js';
|
|
@@ -9,6 +9,29 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
9
9
|
const managerRoot = resolve(__dirname, '../..');
|
|
10
10
|
const binPath = resolve(managerRoot, 'bin/wiki-manager.js');
|
|
11
11
|
|
|
12
|
+
export function runtimeNodeExecutable() {
|
|
13
|
+
return process.versions.bun
|
|
14
|
+
? (process.env.WIKI_MANAGER_NODE_BIN ?? 'node')
|
|
15
|
+
: process.execPath;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export async function assertRuntimeNode(executable = runtimeNodeExecutable()) {
|
|
19
|
+
const version = await new Promise((resolveVersion, reject) => {
|
|
20
|
+
execFile(executable, ['-p', 'process.versions.node'], (err, stdout) => {
|
|
21
|
+
if (err) {
|
|
22
|
+
reject(new Error(`Runtime requires Node.js 22+; could not execute ${executable}. Set WIKI_MANAGER_NODE_BIN to a Node.js 22 binary.`));
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
resolveVersion(String(stdout).trim());
|
|
26
|
+
});
|
|
27
|
+
});
|
|
28
|
+
const major = Number(String(version).split('.')[0]);
|
|
29
|
+
if (!Number.isInteger(major) || major < 22) {
|
|
30
|
+
throw new Error(`Runtime requires Node.js 22+ for node:sqlite; ${executable} is Node ${version}. Set WIKI_MANAGER_NODE_BIN to a Node.js 22 binary.`);
|
|
31
|
+
}
|
|
32
|
+
return { executable, version };
|
|
33
|
+
}
|
|
34
|
+
|
|
12
35
|
export async function ensureRuntime({
|
|
13
36
|
host = process.env.WIKI_MANAGER_RUNTIME_HOST ?? '0.0.0.0',
|
|
14
37
|
port = Number(process.env.WIKI_MANAGER_RUNTIME_PORT ?? 7788),
|
|
@@ -21,7 +44,8 @@ export async function ensureRuntime({
|
|
|
21
44
|
const existing = await runtimeHealthOrNull(url, auth.token);
|
|
22
45
|
if (existing) return { url, started: false, health: existing, token: auth.token, tokenPath: auth.tokenPath };
|
|
23
46
|
|
|
24
|
-
const
|
|
47
|
+
const runtimeNode = await assertRuntimeNode();
|
|
48
|
+
const child = spawn(runtimeNode.executable, [
|
|
25
49
|
binPath,
|
|
26
50
|
'runtime',
|
|
27
51
|
'--host',
|
|
@@ -45,7 +69,7 @@ export async function ensureRuntime({
|
|
|
45
69
|
let health = null;
|
|
46
70
|
while (Date.now() < deadline) {
|
|
47
71
|
health = await runtimeHealthOrNull(url, auth.token);
|
|
48
|
-
if (health) return { url, started: true, health, pid: child.pid, token: auth.token, tokenPath: auth.tokenPath };
|
|
72
|
+
if (health) return { url, started: true, health, pid: child.pid, token: auth.token, tokenPath: auth.tokenPath, node: runtimeNode };
|
|
49
73
|
await new Promise((resolveDelay) => setTimeout(resolveDelay, 150));
|
|
50
74
|
}
|
|
51
75
|
throw new Error(`Runtime did not become healthy at ${url}`);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { createQueueStore } from '../core/queueStore.js';
|
|
2
2
|
|
|
3
|
-
export function createSqliteQueueStore(store, session) {
|
|
4
|
-
session.jobQueue = store.listQueue();
|
|
5
|
-
return createQueueStore(session, { persist: () => store.saveQueue(session.jobQueue) });
|
|
3
|
+
export function createSqliteQueueStore(store, session, { workspace = session.workspace ?? null } = {}) {
|
|
4
|
+
session.jobQueue = store.listQueue({ workspace });
|
|
5
|
+
return createQueueStore(session, { persist: () => store.saveQueue(session.jobQueue, { workspace }) });
|
|
6
6
|
}
|
package/src/runtime/runner.js
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import { createAgentEvent, dispatchAgentEvent } from '../core/agentEvents.js';
|
|
2
2
|
import { sessionActivities, terminalFailures } from '../core/activity.js';
|
|
3
3
|
import { runAgenticLoop, throwIfAborted } from '../core/agentLoop.js';
|
|
4
|
+
import { formatPlanStatus } from '../core/plan.js';
|
|
4
5
|
import { emitRuntimeLog, pollActivitiesOnce } from './supervisor.js';
|
|
5
6
|
|
|
7
|
+
const DEFAULT_MAX_REPLANS = 2;
|
|
8
|
+
|
|
6
9
|
async function waitForRuntimeActivities(session, startedActivities, { timeoutMs, signal, pollBusy }) {
|
|
7
10
|
const deadline = Date.now() + timeoutMs;
|
|
8
11
|
const trackedKeys = new Set(startedActivities.map((activity) => activity.key));
|
|
@@ -71,3 +74,340 @@ export async function runRuntimeAgenticLoop(agent, session, initialInput, { sign
|
|
|
71
74
|
},
|
|
72
75
|
});
|
|
73
76
|
}
|
|
77
|
+
|
|
78
|
+
export async function runRuntimeAgenticWorkflow(agent, session, input, {
|
|
79
|
+
initialInput = null,
|
|
80
|
+
signal = null,
|
|
81
|
+
timeoutMs,
|
|
82
|
+
maxTurns,
|
|
83
|
+
runId,
|
|
84
|
+
pollBusy,
|
|
85
|
+
evaluate = true,
|
|
86
|
+
maxReplans = resolveMaxReplans(),
|
|
87
|
+
} = {}) {
|
|
88
|
+
let currentInput = initialInput ?? input;
|
|
89
|
+
let replansLeft = Math.max(0, Math.floor(Number(maxReplans) || 0));
|
|
90
|
+
|
|
91
|
+
while (true) {
|
|
92
|
+
const result = await runRuntimeAgenticLoop(agent, session, currentInput, {
|
|
93
|
+
signal,
|
|
94
|
+
timeoutMs,
|
|
95
|
+
maxTurns,
|
|
96
|
+
runId,
|
|
97
|
+
pollBusy,
|
|
98
|
+
});
|
|
99
|
+
if (!result.ok) {
|
|
100
|
+
const trigger = replanTriggerFromLoopResult(result);
|
|
101
|
+
if (trigger && replansLeft > 0) {
|
|
102
|
+
const replanned = await replanRuntimeRun(session, input, trigger, {
|
|
103
|
+
runId,
|
|
104
|
+
signal,
|
|
105
|
+
replansLeft: replansLeft - 1,
|
|
106
|
+
});
|
|
107
|
+
if (replanned.ok) {
|
|
108
|
+
replansLeft -= 1;
|
|
109
|
+
currentInput = buildReplannedRunPrompt(input, trigger, replanned.steps);
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
dispatchAgentEvent(session, createAgentEvent('run_error', {
|
|
114
|
+
origin: 'runtime',
|
|
115
|
+
runId,
|
|
116
|
+
payload: {
|
|
117
|
+
runId,
|
|
118
|
+
message: runtimeLoopErrorMessage(result),
|
|
119
|
+
},
|
|
120
|
+
}));
|
|
121
|
+
return { ok: false, result };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const evaluation = await evaluateRuntimeRun(session, input, { runId, signal, evaluate });
|
|
125
|
+
if (evaluation) {
|
|
126
|
+
dispatchAgentEvent(session, createAgentEvent('run_evaluated', {
|
|
127
|
+
origin: 'runtime',
|
|
128
|
+
runId,
|
|
129
|
+
payload: {
|
|
130
|
+
runId,
|
|
131
|
+
ok: evaluation.ok,
|
|
132
|
+
reason: evaluation.reason,
|
|
133
|
+
suggestedAction: evaluation.suggestedAction ?? null,
|
|
134
|
+
},
|
|
135
|
+
}));
|
|
136
|
+
if (!evaluation.ok) {
|
|
137
|
+
if (replansLeft > 0) {
|
|
138
|
+
const trigger = {
|
|
139
|
+
kind: 'evaluation',
|
|
140
|
+
reason: evaluation.reason,
|
|
141
|
+
suggestedAction: evaluation.suggestedAction ?? null,
|
|
142
|
+
};
|
|
143
|
+
const replanned = await replanRuntimeRun(session, input, trigger, {
|
|
144
|
+
runId,
|
|
145
|
+
signal,
|
|
146
|
+
replansLeft: replansLeft - 1,
|
|
147
|
+
});
|
|
148
|
+
if (replanned.ok) {
|
|
149
|
+
replansLeft -= 1;
|
|
150
|
+
currentInput = buildReplannedRunPrompt(input, trigger, replanned.steps);
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
dispatchAgentEvent(session, createAgentEvent('run_error', {
|
|
155
|
+
origin: 'runtime',
|
|
156
|
+
runId,
|
|
157
|
+
payload: {
|
|
158
|
+
runId,
|
|
159
|
+
message: `Runtime evaluator rejected the run: ${evaluation.reason}`,
|
|
160
|
+
suggestedAction: evaluation.suggestedAction ?? null,
|
|
161
|
+
},
|
|
162
|
+
}));
|
|
163
|
+
return { ok: false, evaluation, evaluationRejected: true };
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
dispatchAgentEvent(session, createAgentEvent('run_done', {
|
|
168
|
+
origin: 'runtime',
|
|
169
|
+
runId,
|
|
170
|
+
payload: { runId },
|
|
171
|
+
}));
|
|
172
|
+
return { ok: true, evaluation };
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export async function finishRuntimeRun(session, input, {
|
|
177
|
+
runId,
|
|
178
|
+
signal = null,
|
|
179
|
+
evaluate = true,
|
|
180
|
+
} = {}) {
|
|
181
|
+
const evaluation = await evaluateRuntimeRun(session, input, { runId, signal, evaluate });
|
|
182
|
+
if (evaluation) {
|
|
183
|
+
dispatchAgentEvent(session, createAgentEvent('run_evaluated', {
|
|
184
|
+
origin: 'runtime',
|
|
185
|
+
runId,
|
|
186
|
+
payload: {
|
|
187
|
+
runId,
|
|
188
|
+
ok: evaluation.ok,
|
|
189
|
+
reason: evaluation.reason,
|
|
190
|
+
suggestedAction: evaluation.suggestedAction ?? null,
|
|
191
|
+
},
|
|
192
|
+
}));
|
|
193
|
+
if (!evaluation.ok) {
|
|
194
|
+
dispatchAgentEvent(session, createAgentEvent('run_error', {
|
|
195
|
+
origin: 'runtime',
|
|
196
|
+
runId,
|
|
197
|
+
payload: {
|
|
198
|
+
runId,
|
|
199
|
+
message: `Runtime evaluator rejected the run: ${evaluation.reason}`,
|
|
200
|
+
suggestedAction: evaluation.suggestedAction ?? null,
|
|
201
|
+
},
|
|
202
|
+
}));
|
|
203
|
+
return { ok: false, evaluation, evaluationRejected: true };
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
dispatchAgentEvent(session, createAgentEvent('run_done', {
|
|
207
|
+
origin: 'runtime',
|
|
208
|
+
runId,
|
|
209
|
+
payload: { runId },
|
|
210
|
+
}));
|
|
211
|
+
return { ok: true, evaluation };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
export async function evaluateRuntimeRun(session, input, {
|
|
215
|
+
runId = null,
|
|
216
|
+
signal = null,
|
|
217
|
+
evaluate = true,
|
|
218
|
+
} = {}) {
|
|
219
|
+
if (!shouldEvaluate(evaluate)) return null;
|
|
220
|
+
const llm = session.llm;
|
|
221
|
+
if (!llm || typeof llm.completeWithTools !== 'function') {
|
|
222
|
+
return fallbackEvaluation('Evaluator unavailable: no LLM completeWithTools client.');
|
|
223
|
+
}
|
|
224
|
+
try {
|
|
225
|
+
emitRuntimeLog(session, 'runtime: evaluating completed run');
|
|
226
|
+
const result = await llm.completeWithTools({
|
|
227
|
+
system: [
|
|
228
|
+
'You are a strict evaluator for an agentic runtime run.',
|
|
229
|
+
'Inspect whether the original task was accomplished using the final plan and recent conversation.',
|
|
230
|
+
'Return only JSON with this exact shape: {"ok":boolean,"reason":"...","suggestedAction":string|null}.',
|
|
231
|
+
'Use ok=false only when a concrete missing action, failed requirement, or wrong result is visible.',
|
|
232
|
+
].join('\n'),
|
|
233
|
+
tools: [],
|
|
234
|
+
messages: [{ role: 'user', content: buildEvaluationPrompt(input, session, { runId }) }],
|
|
235
|
+
signal,
|
|
236
|
+
});
|
|
237
|
+
return normalizeEvaluation(parseJsonFenced(result.content, 'evaluator response'));
|
|
238
|
+
} catch (err) {
|
|
239
|
+
return fallbackEvaluation(`Evaluator unavailable: ${err instanceof Error ? err.message : String(err)}`);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function shouldEvaluate(value) {
|
|
244
|
+
if (value === false) return false;
|
|
245
|
+
const env = String(process.env.WIKI_MANAGER_EVALUATOR ?? '').trim().toLowerCase();
|
|
246
|
+
return !['0', 'false', 'off', 'no'].includes(env);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function buildEvaluationPrompt(input, session, { runId = null } = {}) {
|
|
250
|
+
const recentConversation = formatRecentConversation(session);
|
|
251
|
+
const activities = sessionActivities(session)
|
|
252
|
+
.slice(-12)
|
|
253
|
+
.map((activity) => `- ${activity.label ?? activity.id}: ${activity.status}${activity.error ? ` (${activity.error})` : ''}`)
|
|
254
|
+
.join('\n');
|
|
255
|
+
return [
|
|
256
|
+
runId ? `Run id: ${runId}` : null,
|
|
257
|
+
'Original task:',
|
|
258
|
+
input || '(unknown)',
|
|
259
|
+
'',
|
|
260
|
+
session.headlessPlan ? `Final plan:\n${formatPlanStatus(session.headlessPlan)}` : 'Final plan: none',
|
|
261
|
+
activities ? `Recent activities:\n${activities}` : null,
|
|
262
|
+
recentConversation ? `Recent conversation:\n${recentConversation}` : null,
|
|
263
|
+
'',
|
|
264
|
+
'Return JSON only.',
|
|
265
|
+
].filter(Boolean).join('\n');
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function parseJsonFenced(content, label = 'JSON response') {
|
|
269
|
+
const text = String(content ?? '').trim();
|
|
270
|
+
if (!text) throw new Error(`empty ${label}`);
|
|
271
|
+
const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
|
272
|
+
return JSON.parse(fenced ? fenced[1].trim() : text);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function normalizeEvaluation(value) {
|
|
276
|
+
return {
|
|
277
|
+
ok: value?.ok === true,
|
|
278
|
+
reason: String(value?.reason ?? '').trim() || (value?.ok === true ? 'Task completed.' : 'Evaluator rejected the run.'),
|
|
279
|
+
suggestedAction: value?.suggestedAction == null ? null : String(value.suggestedAction),
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
function fallbackEvaluation(reason) {
|
|
284
|
+
return {
|
|
285
|
+
ok: true,
|
|
286
|
+
reason,
|
|
287
|
+
suggestedAction: null,
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
export async function replanRuntimeRun(session, input, trigger, {
|
|
292
|
+
runId = null,
|
|
293
|
+
signal = null,
|
|
294
|
+
replansLeft = 0,
|
|
295
|
+
} = {}) {
|
|
296
|
+
const llm = session.llm;
|
|
297
|
+
if (!llm || typeof llm.completeWithTools !== 'function') {
|
|
298
|
+
return { ok: false, reason: 'Replanner unavailable: no LLM completeWithTools client.' };
|
|
299
|
+
}
|
|
300
|
+
try {
|
|
301
|
+
emitRuntimeLog(session, 'runtime: replanning remaining work');
|
|
302
|
+
const result = await llm.completeWithTools({
|
|
303
|
+
system: [
|
|
304
|
+
'You are a replanner for an agentic runtime run.',
|
|
305
|
+
'Given the original objective, current plan, and failure reason, return only the remaining steps required.',
|
|
306
|
+
'Do not include steps that are already done.',
|
|
307
|
+
'Return only JSON with this exact shape: {"steps":["..."]}.',
|
|
308
|
+
].join('\n'),
|
|
309
|
+
tools: [],
|
|
310
|
+
messages: [{ role: 'user', content: buildReplanPrompt(input, session, trigger) }],
|
|
311
|
+
signal,
|
|
312
|
+
});
|
|
313
|
+
const steps = normalizeReplan(parseJsonFenced(result.content, 'replan response').steps);
|
|
314
|
+
if (steps.length === 0) throw new Error('empty replan');
|
|
315
|
+
dispatchAgentEvent(session, createAgentEvent('run_replanned', {
|
|
316
|
+
origin: 'runtime',
|
|
317
|
+
runId,
|
|
318
|
+
payload: {
|
|
319
|
+
runId,
|
|
320
|
+
reason: trigger.reason,
|
|
321
|
+
plan: steps,
|
|
322
|
+
replansLeft,
|
|
323
|
+
},
|
|
324
|
+
}));
|
|
325
|
+
dispatchAgentEvent(session, createAgentEvent('plan_set', {
|
|
326
|
+
origin: 'runtime',
|
|
327
|
+
runId,
|
|
328
|
+
payload: {
|
|
329
|
+
steps: steps.map((description, index) => ({
|
|
330
|
+
step: index + 1,
|
|
331
|
+
description,
|
|
332
|
+
status: 'pending',
|
|
333
|
+
})),
|
|
334
|
+
},
|
|
335
|
+
}));
|
|
336
|
+
return { ok: true, steps };
|
|
337
|
+
} catch (err) {
|
|
338
|
+
return { ok: false, reason: err instanceof Error ? err.message : String(err) };
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function replanTriggerFromLoopResult(result) {
|
|
343
|
+
const failures = terminalFailures(result.completed ?? []);
|
|
344
|
+
const failure = failures[0];
|
|
345
|
+
if (!failure) return null;
|
|
346
|
+
return {
|
|
347
|
+
kind: 'activity_error',
|
|
348
|
+
reason: `${failure.label ?? failure.id ?? 'Activity'} ended with ${failure.status}${failure.error ? `: ${failure.error}` : ''}`,
|
|
349
|
+
suggestedAction: failure.error ?? null,
|
|
350
|
+
activity: failure,
|
|
351
|
+
};
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
function runtimeLoopErrorMessage(result) {
|
|
355
|
+
if (result.timedOut) return 'Runtime agentic loop timed out.';
|
|
356
|
+
if (result.maxTurns) return 'Runtime agentic loop reached max turns.';
|
|
357
|
+
const trigger = replanTriggerFromLoopResult(result);
|
|
358
|
+
return trigger?.reason ?? 'Runtime agentic loop failed.';
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
function buildReplanPrompt(input, session, trigger) {
|
|
362
|
+
const recentConversation = formatRecentConversation(session);
|
|
363
|
+
return [
|
|
364
|
+
'Original task:',
|
|
365
|
+
input || '(unknown)',
|
|
366
|
+
'',
|
|
367
|
+
session.headlessPlan ? `Current plan:\n${formatPlanStatus(session.headlessPlan)}` : 'Current plan: none',
|
|
368
|
+
'',
|
|
369
|
+
`Failure source: ${trigger.kind}`,
|
|
370
|
+
`Failure reason: ${trigger.reason}`,
|
|
371
|
+
trigger.suggestedAction ? `Suggested action: ${trigger.suggestedAction}` : null,
|
|
372
|
+
recentConversation ? `Recent conversation:\n${recentConversation}` : null,
|
|
373
|
+
'',
|
|
374
|
+
'Return only the remaining steps still required. Exclude already completed steps.',
|
|
375
|
+
].filter(Boolean).join('\n');
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
function buildReplannedRunPrompt(input, trigger, steps) {
|
|
379
|
+
return [
|
|
380
|
+
'Continue a replanned runtime run.',
|
|
381
|
+
'',
|
|
382
|
+
'Original task:',
|
|
383
|
+
input || '(unknown)',
|
|
384
|
+
'',
|
|
385
|
+
`Replan reason: ${trigger.reason}`,
|
|
386
|
+
'',
|
|
387
|
+
'New partial plan:',
|
|
388
|
+
...steps.map((step, index) => `${index + 1}. ${step}`),
|
|
389
|
+
'',
|
|
390
|
+
'Execute only the first pending replanned step. Do not repeat completed work.',
|
|
391
|
+
].join('\n');
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
function normalizeReplan(steps) {
|
|
395
|
+
if (!Array.isArray(steps)) return [];
|
|
396
|
+
return steps
|
|
397
|
+
.map((step) => String(step ?? '').trim())
|
|
398
|
+
.filter(Boolean)
|
|
399
|
+
.slice(0, 12);
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
function formatRecentConversation(session, n = 12) {
|
|
403
|
+
const conversation = session.agentProjection?.conversation ?? [];
|
|
404
|
+
return conversation
|
|
405
|
+
.slice(-n)
|
|
406
|
+
.map((message) => `${message.role}: ${message.content}`)
|
|
407
|
+
.join('\n');
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function resolveMaxReplans(value = process.env.WIKI_MANAGER_REPLANNER_MAX_REPLANS) {
|
|
411
|
+
const parsed = Number(value);
|
|
412
|
+
return Number.isFinite(parsed) ? Math.max(0, Math.floor(parsed)) : DEFAULT_MAX_REPLANS;
|
|
413
|
+
}
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import assert from 'node:assert/strict';
|
|
2
|
+
import test from 'node:test';
|
|
3
|
+
import { createAgentEvent, dispatchAgentEvent } from '../core/agentEvents.js';
|
|
4
|
+
import { finishRuntimeRun, runRuntimeAgenticWorkflow } from './runner.js';
|
|
5
|
+
|
|
6
|
+
test('finishRuntimeRun emits evaluation before run_done', async () => {
|
|
7
|
+
const events = [];
|
|
8
|
+
const session = {
|
|
9
|
+
activities: {},
|
|
10
|
+
headlessPlan: [
|
|
11
|
+
{ step: 1, description: 'Analyze', status: 'done' },
|
|
12
|
+
{ step: 2, description: 'Execute', status: 'done' },
|
|
13
|
+
],
|
|
14
|
+
agentProjection: {
|
|
15
|
+
conversation: [{ role: 'assistant', content: 'Done.' }],
|
|
16
|
+
},
|
|
17
|
+
llm: {
|
|
18
|
+
async completeWithTools({ system, tools, messages }) {
|
|
19
|
+
assert.match(system, /strict evaluator/);
|
|
20
|
+
assert.deepEqual(tools, []);
|
|
21
|
+
assert.match(messages[0].content, /Original task:/);
|
|
22
|
+
return { content: '{"ok":true,"reason":"Task complete.","suggestedAction":null}' };
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
_onAgentEvent: (event) => events.push(event),
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const result = await finishRuntimeRun(session, 'Build workspace', { runId: 'run-1' });
|
|
29
|
+
|
|
30
|
+
assert.equal(result.ok, true);
|
|
31
|
+
assert.deepEqual(events.map((event) => event.type), ['runtime_log', 'run_evaluated', 'run_done']);
|
|
32
|
+
assert.equal(session.agentProjection.evaluation.ok, true);
|
|
33
|
+
assert.equal(session.agentProjection.status, 'done');
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test('finishRuntimeRun turns negative evaluation into run_error', async () => {
|
|
37
|
+
const events = [];
|
|
38
|
+
const session = {
|
|
39
|
+
activities: {},
|
|
40
|
+
headlessPlan: [{ step: 1, description: 'Export', status: 'done' }],
|
|
41
|
+
agentProjection: {
|
|
42
|
+
conversation: [{ role: 'assistant', content: 'Done.' }],
|
|
43
|
+
},
|
|
44
|
+
llm: {
|
|
45
|
+
async completeWithTools() {
|
|
46
|
+
return { content: '{"ok":false,"reason":"Export file missing.","suggestedAction":"Run export again."}' };
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
_onAgentEvent: (event) => events.push(event),
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const result = await finishRuntimeRun(session, 'Export deliverable', { runId: 'run-2' });
|
|
53
|
+
|
|
54
|
+
assert.equal(result.ok, false);
|
|
55
|
+
assert.equal(result.evaluationRejected, true);
|
|
56
|
+
assert.deepEqual(events.map((event) => event.type), ['runtime_log', 'run_evaluated', 'run_error']);
|
|
57
|
+
assert.equal(session.agentProjection.evaluation.ok, false);
|
|
58
|
+
assert.equal(session.agentProjection.status, 'error');
|
|
59
|
+
assert.match(session.agentProjection.logs.at(-1), /Export file missing/);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test('finishRuntimeRun falls back open when evaluator response is invalid', async () => {
|
|
63
|
+
const session = {
|
|
64
|
+
activities: {},
|
|
65
|
+
headlessPlan: null,
|
|
66
|
+
agentProjection: { conversation: [] },
|
|
67
|
+
llm: {
|
|
68
|
+
async completeWithTools() {
|
|
69
|
+
return { content: 'not json' };
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
const result = await finishRuntimeRun(session, 'Do work', { runId: 'run-3' });
|
|
75
|
+
|
|
76
|
+
assert.equal(result.ok, true);
|
|
77
|
+
assert.equal(session.agentProjection.evaluation.ok, true);
|
|
78
|
+
assert.match(session.agentProjection.evaluation.reason, /Evaluator unavailable/);
|
|
79
|
+
assert.equal(session.agentProjection.status, 'done');
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test('finishRuntimeRun can skip evaluation', async () => {
|
|
83
|
+
let called = false;
|
|
84
|
+
const session = {
|
|
85
|
+
activities: {},
|
|
86
|
+
headlessPlan: null,
|
|
87
|
+
agentProjection: { conversation: [] },
|
|
88
|
+
llm: {
|
|
89
|
+
async completeWithTools() {
|
|
90
|
+
called = true;
|
|
91
|
+
return { content: '{"ok":true,"reason":"ok"}' };
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
const result = await finishRuntimeRun(session, 'Do work', { runId: 'run-4', evaluate: false });
|
|
97
|
+
|
|
98
|
+
assert.equal(result.ok, true);
|
|
99
|
+
assert.equal(result.evaluation, null);
|
|
100
|
+
assert.equal(called, false);
|
|
101
|
+
assert.equal(session.agentProjection.evaluation, null);
|
|
102
|
+
assert.equal(session.agentProjection.status, 'done');
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
test('runRuntimeAgenticWorkflow replans after negative evaluation', async () => {
|
|
106
|
+
const events = [];
|
|
107
|
+
const llmCalls = [];
|
|
108
|
+
const session = {
|
|
109
|
+
activities: {},
|
|
110
|
+
headlessPlan: null,
|
|
111
|
+
llm: {
|
|
112
|
+
async completeWithTools({ system }) {
|
|
113
|
+
llmCalls.push(system);
|
|
114
|
+
if (/strict evaluator/.test(system) && llmCalls.length === 1) {
|
|
115
|
+
return { content: '{"ok":false,"reason":"Export missing.","suggestedAction":"Run export."}' };
|
|
116
|
+
}
|
|
117
|
+
if (/replanner/.test(system)) {
|
|
118
|
+
return { content: '{"steps":["Run export"]}' };
|
|
119
|
+
}
|
|
120
|
+
return { content: '{"ok":true,"reason":"Export complete.","suggestedAction":null}' };
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
_onAgentEvent: (event) => events.push(event),
|
|
124
|
+
};
|
|
125
|
+
let turns = 0;
|
|
126
|
+
const agent = {
|
|
127
|
+
async invoke({ session: turnSession }) {
|
|
128
|
+
turns += 1;
|
|
129
|
+
if (turnSession.headlessPlan?.[0]?.status === 'pending') {
|
|
130
|
+
dispatchAgentEvent(turnSession, createAgentEvent('plan_step_updated', {
|
|
131
|
+
origin: 'tool',
|
|
132
|
+
payload: { step: 1, status: 'done' },
|
|
133
|
+
}));
|
|
134
|
+
}
|
|
135
|
+
return { response: turns === 1 ? 'Initial done.' : 'Export done.' };
|
|
136
|
+
},
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
const result = await runRuntimeAgenticWorkflow(agent, session, 'Export deliverable', {
|
|
140
|
+
runId: 'run-replan-eval',
|
|
141
|
+
timeoutMs: 1000,
|
|
142
|
+
maxTurns: 2,
|
|
143
|
+
maxReplans: 1,
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
assert.equal(result.ok, true);
|
|
147
|
+
assert.equal(turns, 2);
|
|
148
|
+
assert.ok(events.some((event) => event.type === 'run_replanned'));
|
|
149
|
+
assert.deepEqual(session.agentProjection.replans[0].plan, ['Run export']);
|
|
150
|
+
assert.equal(session.agentProjection.status, 'done');
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
test('runRuntimeAgenticWorkflow replans after terminal activity error', async () => {
|
|
154
|
+
const originalFetch = globalThis.fetch;
|
|
155
|
+
let pollAttempts = 0;
|
|
156
|
+
globalThis.fetch = async () => {
|
|
157
|
+
pollAttempts += 1;
|
|
158
|
+
return {
|
|
159
|
+
ok: true,
|
|
160
|
+
status: 200,
|
|
161
|
+
headers: { get: () => null },
|
|
162
|
+
text: async () => JSON.stringify({
|
|
163
|
+
result: {
|
|
164
|
+
content: [{ type: 'text', text: JSON.stringify({
|
|
165
|
+
_activity: {
|
|
166
|
+
id: 'job-failed',
|
|
167
|
+
source: 'production',
|
|
168
|
+
label: 'Production build',
|
|
169
|
+
status: 'error',
|
|
170
|
+
terminal: true,
|
|
171
|
+
error: 'build failed',
|
|
172
|
+
},
|
|
173
|
+
}) }],
|
|
174
|
+
},
|
|
175
|
+
}),
|
|
176
|
+
};
|
|
177
|
+
};
|
|
178
|
+
const session = {
|
|
179
|
+
mcp: {
|
|
180
|
+
production: {
|
|
181
|
+
status: 'connected',
|
|
182
|
+
url: 'http://127.0.0.1:3000/mcp/',
|
|
183
|
+
retry: { maxAttempts: 1, backoffMs: 0 },
|
|
184
|
+
},
|
|
185
|
+
},
|
|
186
|
+
activities: {},
|
|
187
|
+
headlessPlan: null,
|
|
188
|
+
llm: {
|
|
189
|
+
async completeWithTools({ system }) {
|
|
190
|
+
if (/replanner/.test(system)) return { content: '{"steps":["Retry build"]}' };
|
|
191
|
+
return { content: '{"ok":true,"reason":"Build complete.","suggestedAction":null}' };
|
|
192
|
+
},
|
|
193
|
+
},
|
|
194
|
+
};
|
|
195
|
+
let turns = 0;
|
|
196
|
+
const agent = {
|
|
197
|
+
async invoke({ session: turnSession }) {
|
|
198
|
+
turns += 1;
|
|
199
|
+
if (turns === 1) {
|
|
200
|
+
dispatchAgentEvent(turnSession, createAgentEvent('activity_upserted', {
|
|
201
|
+
payload: {
|
|
202
|
+
activity: {
|
|
203
|
+
id: 'job-failed',
|
|
204
|
+
source: 'production',
|
|
205
|
+
label: 'Production build',
|
|
206
|
+
status: 'running',
|
|
207
|
+
terminal: false,
|
|
208
|
+
poll: { server: 'production', tool: 'production_job_status', args: { jobId: 'job-failed' }, intervalMs: 0 },
|
|
209
|
+
},
|
|
210
|
+
},
|
|
211
|
+
}));
|
|
212
|
+
return { response: 'Started build.' };
|
|
213
|
+
}
|
|
214
|
+
if (turnSession.headlessPlan?.[0]?.status === 'pending') {
|
|
215
|
+
dispatchAgentEvent(turnSession, createAgentEvent('plan_step_updated', {
|
|
216
|
+
origin: 'tool',
|
|
217
|
+
payload: { step: 1, status: 'done' },
|
|
218
|
+
}));
|
|
219
|
+
}
|
|
220
|
+
return { response: 'Retry done.' };
|
|
221
|
+
},
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
try {
|
|
225
|
+
const result = await runRuntimeAgenticWorkflow(agent, session, 'Build workspace', {
|
|
226
|
+
runId: 'run-replan-activity',
|
|
227
|
+
timeoutMs: 1000,
|
|
228
|
+
maxTurns: 3,
|
|
229
|
+
maxReplans: 1,
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
assert.equal(result.ok, true);
|
|
233
|
+
assert.equal(pollAttempts, 1);
|
|
234
|
+
assert.equal(turns, 2);
|
|
235
|
+
assert.equal(session.agentProjection.replans[0].reason, 'Production build ended with error: build failed');
|
|
236
|
+
assert.deepEqual(session.agentProjection.replans[0].plan, ['Retry build']);
|
|
237
|
+
assert.equal(session.agentProjection.status, 'done');
|
|
238
|
+
} finally {
|
|
239
|
+
globalThis.fetch = originalFetch;
|
|
240
|
+
}
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
test('runRuntimeAgenticWorkflow stops after replan budget is exhausted', async () => {
|
|
244
|
+
const session = {
|
|
245
|
+
activities: {},
|
|
246
|
+
headlessPlan: null,
|
|
247
|
+
llm: {
|
|
248
|
+
async completeWithTools() {
|
|
249
|
+
return { content: '{"ok":false,"reason":"Still missing.","suggestedAction":"Try again."}' };
|
|
250
|
+
},
|
|
251
|
+
},
|
|
252
|
+
};
|
|
253
|
+
const agent = {
|
|
254
|
+
async invoke() {
|
|
255
|
+
return { response: 'Done.' };
|
|
256
|
+
},
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
const result = await runRuntimeAgenticWorkflow(agent, session, 'Do task', {
|
|
260
|
+
runId: 'run-replan-limit',
|
|
261
|
+
timeoutMs: 1000,
|
|
262
|
+
maxTurns: 1,
|
|
263
|
+
maxReplans: 0,
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
assert.equal(result.ok, false);
|
|
267
|
+
assert.equal(result.evaluationRejected, true);
|
|
268
|
+
assert.equal(session.agentProjection.status, 'error');
|
|
269
|
+
assert.equal(session.agentProjection.replans.length, 0);
|
|
270
|
+
});
|