@ch4p/cli 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-6WIHK7NM.js +767 -0
- package/dist/agent-ANIZYPPF.js +767 -0
- package/dist/agent-HSAJ5EBN.js +761 -0
- package/dist/audit-HLOQBMBT.js +12 -0
- package/dist/audit-UIGPH3FK.js +12 -0
- package/dist/canvas-3VTC4XPV.js +313 -0
- package/dist/canvas-4FMNW6FZ.js +313 -0
- package/dist/canvas-XQHVCY27.js +313 -0
- package/dist/chunk-3XAW4XHG.js +185 -0
- package/dist/chunk-4IRZQCRN.js +1832 -0
- package/dist/chunk-AORLXQHZ.js +304 -0
- package/dist/chunk-BMEBRUYL.js +6995 -0
- package/dist/chunk-IN2I6XRM.js +185 -0
- package/dist/chunk-TB4IZ7F7.js +301 -0
- package/dist/chunk-U7S375OS.js +1841 -0
- package/dist/dist-37TB6EWP.js +25 -0
- package/dist/dist-CIJPZC2B.js +25 -0
- package/dist/doctor-5M3ZB435.js +274 -0
- package/dist/doctor-IQ3MWQSN.js +274 -0
- package/dist/gateway-DV5OL45G.js +2164 -0
- package/dist/gateway-LUCG72YX.js +2129 -0
- package/dist/gateway-O3QNSZKF.js +2123 -0
- package/dist/gateway-OJW7RY3H.js +2094 -0
- package/dist/gateway-PBLJEK5I.js +2165 -0
- package/dist/gateway-PHPRQTZP.js +2165 -0
- package/dist/gateway-YKKJ4DZE.js +2115 -0
- package/dist/gateway-Z65DCM2Q.js +2097 -0
- package/dist/gateway-ZSXTAYPF.js +2157 -0
- package/dist/identity-RHQFPSDS.js +215 -0
- package/dist/identity-VGDDAKBY.js +215 -0
- package/dist/index.js +12 -12
- package/dist/install-6LV7B2SV.js +378 -0
- package/dist/install-NAUPXVCI.js +378 -0
- package/dist/message-TGAPVVI4.js +189 -0
- package/dist/message-YQGIARNE.js +189 -0
- package/dist/onboard-CN56V5P6.js +849 -0
- package/dist/onboard-LJFC6HXD.js +849 -0
- package/dist/pairing-ARWQYATE.js +147 -0
- package/dist/pairing-PXCJMCT2.js +147 -0
- package/dist/skills-4EELFYO2.js +138 -0
- package/dist/skills-KXRTDSF2.js +138 -0
- package/dist/status-2ZJPK3VL.js +94 -0
- package/dist/status-W2OXOSH4.js +94 -0
- package/package.json +24 -24
|
@@ -0,0 +1,1832 @@
|
|
|
1
|
+
import {
|
|
2
|
+
EngineError,
|
|
3
|
+
ToolError,
|
|
4
|
+
abortableSleep,
|
|
5
|
+
backoffDelay
|
|
6
|
+
} from "./chunk-YSCX2QQQ.js";
|
|
7
|
+
|
|
8
|
+
// ../../packages/agent/dist/index.js
|
|
9
|
+
import { Worker } from "worker_threads";
|
|
10
|
+
import { EventEmitter } from "events";
|
|
11
|
+
import { setMaxListeners } from "events";
|
|
12
|
+
import { homedir } from "os";
|
|
13
|
+
var SteeringQueue = class {
|
|
14
|
+
queue = [];
|
|
15
|
+
/**
|
|
16
|
+
* Push a message into the queue. The queue is re-sorted on every push
|
|
17
|
+
* so that drain() always returns messages in priority-then-timestamp order.
|
|
18
|
+
*/
|
|
19
|
+
push(msg) {
|
|
20
|
+
this.queue.push(msg);
|
|
21
|
+
this.queue.sort((a, b) => {
|
|
22
|
+
const pa = a.priority ?? 0;
|
|
23
|
+
const pb = b.priority ?? 0;
|
|
24
|
+
if (pa !== pb) return pb - pa;
|
|
25
|
+
return a.timestamp.getTime() - b.timestamp.getTime();
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Drain all messages from the queue, returning them in priority order.
|
|
30
|
+
* After this call the queue is empty.
|
|
31
|
+
*/
|
|
32
|
+
drain() {
|
|
33
|
+
const messages = this.queue;
|
|
34
|
+
this.queue = [];
|
|
35
|
+
return messages;
|
|
36
|
+
}
|
|
37
|
+
/** Peek at the highest-priority message without removing it. */
|
|
38
|
+
peek() {
|
|
39
|
+
return this.queue[0];
|
|
40
|
+
}
|
|
41
|
+
/** Returns true if any pending message is an abort request. */
|
|
42
|
+
hasAbort() {
|
|
43
|
+
return this.queue.some((m) => m.type === "abort");
|
|
44
|
+
}
|
|
45
|
+
/** Returns true if the queue has any pending messages. */
|
|
46
|
+
hasMessages() {
|
|
47
|
+
return this.queue.length > 0;
|
|
48
|
+
}
|
|
49
|
+
/** Discard all pending messages. */
|
|
50
|
+
clear() {
|
|
51
|
+
this.queue = [];
|
|
52
|
+
}
|
|
53
|
+
/** Number of messages currently in the queue. */
|
|
54
|
+
get length() {
|
|
55
|
+
return this.queue.length;
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
var NAMED_STRATEGIES = {
|
|
59
|
+
/** Aggressive sliding window — keeps only the 3 most recent exchanges. */
|
|
60
|
+
sliding_window_3: {
|
|
61
|
+
name: "sliding_window_3",
|
|
62
|
+
type: "sliding",
|
|
63
|
+
compactionTarget: 0.4,
|
|
64
|
+
keepRatio: 0.2,
|
|
65
|
+
preserveRecentToolPairs: 3,
|
|
66
|
+
preserveTaskDescription: true,
|
|
67
|
+
description: "Aggressive sliding window. Best for long multi-step tasks where older context is less relevant."
|
|
68
|
+
},
|
|
69
|
+
/** Conservative sliding — preserves more history for tasks that need it. */
|
|
70
|
+
sliding_conservative: {
|
|
71
|
+
name: "sliding_conservative",
|
|
72
|
+
type: "sliding",
|
|
73
|
+
compactionTarget: 0.7,
|
|
74
|
+
keepRatio: 0.5,
|
|
75
|
+
preserveRecentToolPairs: 5,
|
|
76
|
+
preserveTaskDescription: true,
|
|
77
|
+
description: "Conservative sliding window. Best for tasks that reference earlier context frequently."
|
|
78
|
+
},
|
|
79
|
+
/** Summarize with high keep ratio — good for coding tasks. */
|
|
80
|
+
summarize_coding: {
|
|
81
|
+
name: "summarize_coding",
|
|
82
|
+
type: "summarize",
|
|
83
|
+
compactionTarget: 0.6,
|
|
84
|
+
keepRatio: 0.4,
|
|
85
|
+
preserveRecentToolPairs: 4,
|
|
86
|
+
preserveTaskDescription: true,
|
|
87
|
+
description: "Summarize old context while keeping recent code-related tool calls. Best for coding tasks."
|
|
88
|
+
},
|
|
89
|
+
/** Drop oldest with task description pinning. */
|
|
90
|
+
drop_oldest_pinned: {
|
|
91
|
+
name: "drop_oldest_pinned",
|
|
92
|
+
type: "drop_oldest",
|
|
93
|
+
compactionTarget: 0.5,
|
|
94
|
+
preserveRecentToolPairs: 2,
|
|
95
|
+
preserveTaskDescription: true,
|
|
96
|
+
description: "Drop oldest messages but always preserve the original task and recent tool calls."
|
|
97
|
+
}
|
|
98
|
+
};
|
|
99
|
+
function estimateTokens(msg) {
|
|
100
|
+
if (typeof msg.content === "string") {
|
|
101
|
+
return Math.ceil(msg.content.length / 4);
|
|
102
|
+
}
|
|
103
|
+
let chars = 0;
|
|
104
|
+
for (const block of msg.content) {
|
|
105
|
+
if (block.text) chars += block.text.length;
|
|
106
|
+
if (block.toolOutput) chars += block.toolOutput.length;
|
|
107
|
+
if (block.toolInput) chars += JSON.stringify(block.toolInput).length;
|
|
108
|
+
}
|
|
109
|
+
return Math.ceil(chars / 4);
|
|
110
|
+
}
|
|
111
|
+
function isToolResultMessage(msg) {
|
|
112
|
+
return msg.role === "tool" || msg.toolCallId !== void 0 && msg.toolCallId !== "";
|
|
113
|
+
}
|
|
114
|
+
function hasToolCalls(msg) {
|
|
115
|
+
return msg.role === "assistant" && Array.isArray(msg.toolCalls) && msg.toolCalls.length > 0;
|
|
116
|
+
}
|
|
117
|
+
var ContextManager = class {
|
|
118
|
+
messages = [];
|
|
119
|
+
systemPrompt = null;
|
|
120
|
+
tokenEstimate = 0;
|
|
121
|
+
maxTokens;
|
|
122
|
+
compactionThreshold;
|
|
123
|
+
strategyType;
|
|
124
|
+
namedStrategy;
|
|
125
|
+
summarizer;
|
|
126
|
+
constructor(opts = {}) {
|
|
127
|
+
this.maxTokens = opts.maxTokens ?? 1e5;
|
|
128
|
+
this.compactionThreshold = opts.compactionThreshold ?? 0.85;
|
|
129
|
+
this.summarizer = opts.summarizer;
|
|
130
|
+
if (typeof opts.strategy === "object" && opts.strategy !== null) {
|
|
131
|
+
this.namedStrategy = opts.strategy;
|
|
132
|
+
this.strategyType = opts.strategy.type;
|
|
133
|
+
} else {
|
|
134
|
+
this.namedStrategy = null;
|
|
135
|
+
this.strategyType = opts.strategy ?? "sliding";
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// -----------------------------------------------------------------------
|
|
139
|
+
// Public API
|
|
140
|
+
// -----------------------------------------------------------------------
|
|
141
|
+
/** Set or replace the system prompt (always position 0). */
|
|
142
|
+
setSystemPrompt(prompt) {
|
|
143
|
+
const msg = { role: "system", content: prompt };
|
|
144
|
+
if (this.systemPrompt) {
|
|
145
|
+
this.tokenEstimate -= estimateTokens(this.systemPrompt);
|
|
146
|
+
}
|
|
147
|
+
this.systemPrompt = msg;
|
|
148
|
+
this.tokenEstimate += estimateTokens(msg);
|
|
149
|
+
}
|
|
150
|
+
/** Append a message to the context. Triggers compaction if over threshold. */
|
|
151
|
+
async addMessage(msg) {
|
|
152
|
+
const tokens = estimateTokens(msg);
|
|
153
|
+
this.messages.push(msg);
|
|
154
|
+
this.tokenEstimate += tokens;
|
|
155
|
+
if (this.tokenEstimate > this.maxTokens * this.compactionThreshold) {
|
|
156
|
+
await this.compact();
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/** Return the full message array (system prompt + conversation). */
|
|
160
|
+
getMessages() {
|
|
161
|
+
if (this.systemPrompt) {
|
|
162
|
+
return [this.systemPrompt, ...this.messages];
|
|
163
|
+
}
|
|
164
|
+
return [...this.messages];
|
|
165
|
+
}
|
|
166
|
+
/** Return the current approximate token usage. */
|
|
167
|
+
getTokenEstimate() {
|
|
168
|
+
return this.tokenEstimate;
|
|
169
|
+
}
|
|
170
|
+
/** Return the configured maximum token budget. */
|
|
171
|
+
getMaxTokens() {
|
|
172
|
+
return this.maxTokens;
|
|
173
|
+
}
|
|
174
|
+
/** Return the active strategy name. */
|
|
175
|
+
getStrategyName() {
|
|
176
|
+
return this.namedStrategy?.name ?? this.strategyType;
|
|
177
|
+
}
|
|
178
|
+
/** Return the full named strategy config if one is active. */
|
|
179
|
+
getNamedStrategy() {
|
|
180
|
+
return this.namedStrategy;
|
|
181
|
+
}
|
|
182
|
+
/** Remove all conversation messages (keeps system prompt). */
|
|
183
|
+
clear() {
|
|
184
|
+
this.messages = [];
|
|
185
|
+
this.tokenEstimate = this.systemPrompt ? estimateTokens(this.systemPrompt) : 0;
|
|
186
|
+
}
|
|
187
|
+
// -----------------------------------------------------------------------
|
|
188
|
+
// Compaction
|
|
189
|
+
// -----------------------------------------------------------------------
|
|
190
|
+
/**
|
|
191
|
+
* Compact the context to fit within the token budget.
|
|
192
|
+
*
|
|
193
|
+
* This is invoked automatically when `addMessage` pushes the estimate past
|
|
194
|
+
* the compaction threshold, but can also be called manually.
|
|
195
|
+
*/
|
|
196
|
+
async compact() {
|
|
197
|
+
switch (this.strategyType) {
|
|
198
|
+
case "drop_oldest":
|
|
199
|
+
this.compactDropOldest();
|
|
200
|
+
break;
|
|
201
|
+
case "summarize":
|
|
202
|
+
await this.compactSummarize();
|
|
203
|
+
break;
|
|
204
|
+
case "sliding":
|
|
205
|
+
await this.compactSliding();
|
|
206
|
+
break;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
// -----------------------------------------------------------------------
|
|
210
|
+
// Strategy 1: drop_oldest — remove oldest messages, preserving tool pairs
|
|
211
|
+
// -----------------------------------------------------------------------
|
|
212
|
+
compactDropOldest() {
|
|
213
|
+
const target = this.maxTokens * (this.namedStrategy?.compactionTarget ?? 0.6);
|
|
214
|
+
let idx = 0;
|
|
215
|
+
const protectedIndices = this.getProtectedIndices();
|
|
216
|
+
while (this.tokenEstimate > target && idx < this.messages.length - 1) {
|
|
217
|
+
const msg = this.messages[idx];
|
|
218
|
+
if (protectedIndices.has(idx)) {
|
|
219
|
+
idx++;
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
if (hasToolCalls(msg)) {
|
|
223
|
+
const groupEnd = this.findToolGroupEnd(idx);
|
|
224
|
+
let groupProtected = false;
|
|
225
|
+
for (let i = idx; i <= groupEnd; i++) {
|
|
226
|
+
if (protectedIndices.has(i)) {
|
|
227
|
+
groupProtected = true;
|
|
228
|
+
break;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
if (groupProtected) {
|
|
232
|
+
idx = groupEnd + 1;
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
235
|
+
const dropped = this.messages.splice(idx, groupEnd - idx + 1);
|
|
236
|
+
for (const d of dropped) this.tokenEstimate -= estimateTokens(d);
|
|
237
|
+
continue;
|
|
238
|
+
}
|
|
239
|
+
if (isToolResultMessage(msg)) {
|
|
240
|
+
idx++;
|
|
241
|
+
continue;
|
|
242
|
+
}
|
|
243
|
+
this.messages.splice(idx, 1);
|
|
244
|
+
this.tokenEstimate -= estimateTokens(msg);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// -----------------------------------------------------------------------
|
|
248
|
+
// Strategy 2: summarize — collapse old messages into one summary message
|
|
249
|
+
// -----------------------------------------------------------------------
|
|
250
|
+
async compactSummarize() {
|
|
251
|
+
if (!this.summarizer) {
|
|
252
|
+
this.compactDropOldest();
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
const keepRatio = this.namedStrategy?.keepRatio ?? 0.3;
|
|
256
|
+
const keepCount = Math.max(2, Math.floor(this.messages.length * keepRatio));
|
|
257
|
+
const splitIdx = this.messages.length - keepCount;
|
|
258
|
+
if (this.namedStrategy?.preserveTaskDescription) {
|
|
259
|
+
const firstUserIdx = this.messages.findIndex((m) => m.role === "user");
|
|
260
|
+
if (firstUserIdx >= 0 && firstUserIdx < splitIdx) {
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
const toSummarize = this.messages.slice(0, splitIdx);
|
|
264
|
+
const toKeep = this.messages.slice(splitIdx);
|
|
265
|
+
if (toSummarize.length === 0) return;
|
|
266
|
+
let taskDescription;
|
|
267
|
+
if (this.namedStrategy?.preserveTaskDescription) {
|
|
268
|
+
const firstUser = toSummarize.find((m) => m.role === "user");
|
|
269
|
+
if (firstUser) {
|
|
270
|
+
taskDescription = firstUser;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
const summary = await this.summarizer(toSummarize);
|
|
274
|
+
const summaryMsg = {
|
|
275
|
+
role: "system",
|
|
276
|
+
content: `[Conversation summary]
|
|
277
|
+
${summary}`
|
|
278
|
+
};
|
|
279
|
+
if (taskDescription) {
|
|
280
|
+
this.messages = [taskDescription, summaryMsg, ...toKeep];
|
|
281
|
+
} else {
|
|
282
|
+
this.messages = [summaryMsg, ...toKeep];
|
|
283
|
+
}
|
|
284
|
+
this.recalculateTokens();
|
|
285
|
+
}
|
|
286
|
+
// -----------------------------------------------------------------------
|
|
287
|
+
// Strategy 3: sliding — sliding window with summary prefix
|
|
288
|
+
// -----------------------------------------------------------------------
|
|
289
|
+
async compactSliding() {
|
|
290
|
+
if (!this.summarizer) {
|
|
291
|
+
this.compactDropOldest();
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
294
|
+
const compactionTarget = this.namedStrategy?.compactionTarget ?? 0.6;
|
|
295
|
+
const targetTokens = this.maxTokens * compactionTarget;
|
|
296
|
+
let windowTokens = 0;
|
|
297
|
+
let windowStart = this.messages.length;
|
|
298
|
+
const preserveToolPairs = this.namedStrategy?.preserveRecentToolPairs ?? 3;
|
|
299
|
+
let toolPairsFound = 0;
|
|
300
|
+
for (let i = this.messages.length - 1; i >= 0; i--) {
|
|
301
|
+
const msgTokens = estimateTokens(this.messages[i]);
|
|
302
|
+
if (windowTokens + msgTokens > targetTokens && toolPairsFound >= preserveToolPairs) break;
|
|
303
|
+
if (isToolResultMessage(this.messages[i]) && i > 0 && hasToolCalls(this.messages[i - 1])) {
|
|
304
|
+
windowTokens += msgTokens + estimateTokens(this.messages[i - 1]);
|
|
305
|
+
windowStart = i - 1;
|
|
306
|
+
toolPairsFound++;
|
|
307
|
+
i--;
|
|
308
|
+
} else {
|
|
309
|
+
windowTokens += msgTokens;
|
|
310
|
+
windowStart = i;
|
|
311
|
+
if (hasToolCalls(this.messages[i])) {
|
|
312
|
+
toolPairsFound++;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
if (windowStart <= 0) return;
|
|
317
|
+
const toSummarize = this.messages.slice(0, windowStart);
|
|
318
|
+
const window = this.messages.slice(windowStart);
|
|
319
|
+
if (toSummarize.length === 0) return;
|
|
320
|
+
let taskDescription;
|
|
321
|
+
if (this.namedStrategy?.preserveTaskDescription) {
|
|
322
|
+
const firstUser = toSummarize.find((m) => m.role === "user");
|
|
323
|
+
if (firstUser) {
|
|
324
|
+
taskDescription = firstUser;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
const summary = await this.summarizer(toSummarize);
|
|
328
|
+
const summaryMsg = {
|
|
329
|
+
role: "system",
|
|
330
|
+
content: `[Conversation summary]
|
|
331
|
+
${summary}`
|
|
332
|
+
};
|
|
333
|
+
if (taskDescription) {
|
|
334
|
+
this.messages = [taskDescription, summaryMsg, ...window];
|
|
335
|
+
} else {
|
|
336
|
+
this.messages = [summaryMsg, ...window];
|
|
337
|
+
}
|
|
338
|
+
this.recalculateTokens();
|
|
339
|
+
}
|
|
340
|
+
// -----------------------------------------------------------------------
|
|
341
|
+
// AWM: Protected message index computation
|
|
342
|
+
// -----------------------------------------------------------------------
|
|
343
|
+
/**
|
|
344
|
+
* Compute the set of message indices that should never be compacted away.
|
|
345
|
+
* This implements the AWM insight that preserving task description and
|
|
346
|
+
* recent tool interactions dramatically improves task success rates.
|
|
347
|
+
*/
|
|
348
|
+
getProtectedIndices() {
|
|
349
|
+
const protected_ = /* @__PURE__ */ new Set();
|
|
350
|
+
if (this.namedStrategy?.preserveTaskDescription !== false) {
|
|
351
|
+
const firstUserIdx = this.messages.findIndex((m) => m.role === "user");
|
|
352
|
+
if (firstUserIdx >= 0) {
|
|
353
|
+
protected_.add(firstUserIdx);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
if (this.namedStrategy?.pinnedRoles) {
|
|
357
|
+
for (let i = 0; i < this.messages.length; i++) {
|
|
358
|
+
if (this.namedStrategy.pinnedRoles.includes(this.messages[i].role)) {
|
|
359
|
+
protected_.add(i);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
const preserveCount = this.namedStrategy?.preserveRecentToolPairs ?? 0;
|
|
364
|
+
if (preserveCount > 0) {
|
|
365
|
+
let pairsFound = 0;
|
|
366
|
+
for (let i = this.messages.length - 1; i >= 0 && pairsFound < preserveCount; i--) {
|
|
367
|
+
if (hasToolCalls(this.messages[i])) {
|
|
368
|
+
protected_.add(i);
|
|
369
|
+
const groupEnd = this.findToolGroupEnd(i);
|
|
370
|
+
for (let j = i; j <= groupEnd; j++) {
|
|
371
|
+
protected_.add(j);
|
|
372
|
+
}
|
|
373
|
+
pairsFound++;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
return protected_;
|
|
378
|
+
}
|
|
379
|
+
// -----------------------------------------------------------------------
|
|
380
|
+
// Internal helpers
|
|
381
|
+
// -----------------------------------------------------------------------
|
|
382
|
+
/**
|
|
383
|
+
* Given the index of an assistant message with tool calls, find the index
|
|
384
|
+
* of the last related tool-result message that immediately follows it.
|
|
385
|
+
*/
|
|
386
|
+
findToolGroupEnd(assistantIdx) {
|
|
387
|
+
let end = assistantIdx;
|
|
388
|
+
for (let i = assistantIdx + 1; i < this.messages.length; i++) {
|
|
389
|
+
if (isToolResultMessage(this.messages[i])) {
|
|
390
|
+
end = i;
|
|
391
|
+
} else {
|
|
392
|
+
break;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
return end;
|
|
396
|
+
}
|
|
397
|
+
/** Recompute tokenEstimate from scratch. */
|
|
398
|
+
recalculateTokens() {
|
|
399
|
+
this.tokenEstimate = this.systemPrompt ? estimateTokens(this.systemPrompt) : 0;
|
|
400
|
+
for (const msg of this.messages) {
|
|
401
|
+
this.tokenEstimate += estimateTokens(msg);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
};
|
|
405
|
+
var DEFAULT_WORKER_SCRIPT = `
|
|
406
|
+
const { parentPort } = require('node:worker_threads');
|
|
407
|
+
|
|
408
|
+
parentPort.on('message', async (msg) => {
|
|
409
|
+
if (msg.type === 'execute') {
|
|
410
|
+
try {
|
|
411
|
+
// In production the worker would look up the tool in a registry.
|
|
412
|
+
// Here we simply return an error indicating the tool is not loaded.
|
|
413
|
+
parentPort.postMessage({
|
|
414
|
+
type: 'result',
|
|
415
|
+
result: {
|
|
416
|
+
success: false,
|
|
417
|
+
output: '',
|
|
418
|
+
error: 'Worker has no tool registry \u2014 provide a workerScript.',
|
|
419
|
+
},
|
|
420
|
+
});
|
|
421
|
+
} catch (err) {
|
|
422
|
+
parentPort.postMessage({
|
|
423
|
+
type: 'error',
|
|
424
|
+
message: err instanceof Error ? err.message : String(err),
|
|
425
|
+
});
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
});
|
|
429
|
+
`;
|
|
430
|
+
var ToolWorkerPool = class extends EventEmitter {
|
|
431
|
+
workers = [];
|
|
432
|
+
taskQueue = [];
|
|
433
|
+
shuttingDown = false;
|
|
434
|
+
maxWorkers;
|
|
435
|
+
taskTimeoutMs;
|
|
436
|
+
workerScript;
|
|
437
|
+
// Stats
|
|
438
|
+
totalTasks = 0;
|
|
439
|
+
completedTasks = 0;
|
|
440
|
+
failedTasks = 0;
|
|
441
|
+
totalDurationMs = 0;
|
|
442
|
+
stubWarned = false;
|
|
443
|
+
constructor(opts = {}) {
|
|
444
|
+
super();
|
|
445
|
+
this.maxWorkers = opts.maxWorkers ?? 4;
|
|
446
|
+
this.taskTimeoutMs = opts.taskTimeoutMs ?? 6e4;
|
|
447
|
+
this.workerScript = opts.workerScript;
|
|
448
|
+
}
|
|
449
|
+
// -----------------------------------------------------------------------
|
|
450
|
+
// Public API
|
|
451
|
+
// -----------------------------------------------------------------------
|
|
452
|
+
/**
|
|
453
|
+
* Execute a tool task in a worker thread. Returns a promise that resolves
|
|
454
|
+
* with the ToolResult. If `signal` is already aborted the task is rejected
|
|
455
|
+
* immediately.
|
|
456
|
+
*/
|
|
457
|
+
execute(task, signal, onProgress = () => {
|
|
458
|
+
}) {
|
|
459
|
+
if (this.shuttingDown) {
|
|
460
|
+
return Promise.reject(new Error("Worker pool is shutting down"));
|
|
461
|
+
}
|
|
462
|
+
if (signal?.aborted) {
|
|
463
|
+
return Promise.reject(new Error("Task aborted before execution"));
|
|
464
|
+
}
|
|
465
|
+
this.totalTasks++;
|
|
466
|
+
return new Promise((resolve, reject) => {
|
|
467
|
+
const queued = { task, signal, onProgress, resolve, reject };
|
|
468
|
+
if (signal) {
|
|
469
|
+
const onAbort = () => {
|
|
470
|
+
const idx = this.taskQueue.indexOf(queued);
|
|
471
|
+
if (idx !== -1) {
|
|
472
|
+
this.taskQueue.splice(idx, 1);
|
|
473
|
+
this.failedTasks++;
|
|
474
|
+
reject(new Error("Task aborted while queued"));
|
|
475
|
+
}
|
|
476
|
+
};
|
|
477
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
478
|
+
}
|
|
479
|
+
const idle = this.getIdleWorker();
|
|
480
|
+
if (idle) {
|
|
481
|
+
this.dispatch(idle, queued);
|
|
482
|
+
} else if (this.workers.length < this.maxWorkers) {
|
|
483
|
+
const managed = this.spawnWorker();
|
|
484
|
+
this.dispatch(managed, queued);
|
|
485
|
+
} else {
|
|
486
|
+
this.taskQueue.push(queued);
|
|
487
|
+
}
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
/** Gracefully shut down the pool: finish in-flight tasks, then terminate. */
|
|
491
|
+
async shutdown() {
|
|
492
|
+
this.shuttingDown = true;
|
|
493
|
+
for (const q of this.taskQueue) {
|
|
494
|
+
this.failedTasks++;
|
|
495
|
+
q.reject(new Error("Worker pool shutting down"));
|
|
496
|
+
}
|
|
497
|
+
this.taskQueue = [];
|
|
498
|
+
const terminatePromises = this.workers.map((mw) => {
|
|
499
|
+
if (mw.currentTimer) clearTimeout(mw.currentTimer);
|
|
500
|
+
return mw.worker.terminate();
|
|
501
|
+
});
|
|
502
|
+
await Promise.allSettled(terminatePromises);
|
|
503
|
+
this.workers = [];
|
|
504
|
+
}
|
|
505
|
+
/** Check whether a real worker script is configured (not the default stub). */
|
|
506
|
+
hasWorkerScript() {
|
|
507
|
+
return this.workerScript !== void 0;
|
|
508
|
+
}
|
|
509
|
+
/** Return current pool statistics. */
|
|
510
|
+
getStats() {
|
|
511
|
+
const activeWorkers = this.workers.filter((w) => w.busy).length;
|
|
512
|
+
return {
|
|
513
|
+
totalTasks: this.totalTasks,
|
|
514
|
+
completedTasks: this.completedTasks,
|
|
515
|
+
failedTasks: this.failedTasks,
|
|
516
|
+
activeTasks: activeWorkers,
|
|
517
|
+
queuedTasks: this.taskQueue.length,
|
|
518
|
+
avgDurationMs: this.completedTasks > 0 ? Math.round(this.totalDurationMs / this.completedTasks) : 0,
|
|
519
|
+
workerCount: this.workers.length,
|
|
520
|
+
idleWorkers: this.workers.filter((w) => !w.busy).length
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
// -----------------------------------------------------------------------
|
|
524
|
+
// Internal
|
|
525
|
+
// -----------------------------------------------------------------------
|
|
526
|
+
spawnWorker() {
|
|
527
|
+
let worker;
|
|
528
|
+
if (this.workerScript) {
|
|
529
|
+
worker = new Worker(this.workerScript);
|
|
530
|
+
} else {
|
|
531
|
+
if (!this.stubWarned) {
|
|
532
|
+
this.stubWarned = true;
|
|
533
|
+
this.emit("worker_stub", "Using default worker stub \u2014 heavyweight tools will fail. Build the worker script or provide workerScript option.");
|
|
534
|
+
}
|
|
535
|
+
worker = new Worker(DEFAULT_WORKER_SCRIPT, { eval: true });
|
|
536
|
+
}
|
|
537
|
+
const managed = {
|
|
538
|
+
worker,
|
|
539
|
+
busy: false,
|
|
540
|
+
taskCount: 0
|
|
541
|
+
};
|
|
542
|
+
worker.on("error", (err) => {
|
|
543
|
+
this.emit("worker_error", err);
|
|
544
|
+
this.handleWorkerCrash(managed, err);
|
|
545
|
+
});
|
|
546
|
+
worker.on("exit", (code) => {
|
|
547
|
+
if (code !== 0 && !this.shuttingDown) {
|
|
548
|
+
this.emit("worker_exit", code);
|
|
549
|
+
this.removeWorker(managed);
|
|
550
|
+
}
|
|
551
|
+
});
|
|
552
|
+
this.workers.push(managed);
|
|
553
|
+
return managed;
|
|
554
|
+
}
|
|
555
|
+
getIdleWorker() {
|
|
556
|
+
return this.workers.find((w) => !w.busy);
|
|
557
|
+
}
|
|
558
|
+
dispatch(managed, queued) {
|
|
559
|
+
managed.busy = true;
|
|
560
|
+
managed.taskCount++;
|
|
561
|
+
const startTime = Date.now();
|
|
562
|
+
const { task, signal, onProgress, resolve, reject } = queued;
|
|
563
|
+
const handler = (msg) => {
|
|
564
|
+
if (msg.type === "progress" && msg.update) {
|
|
565
|
+
onProgress(msg.update);
|
|
566
|
+
return;
|
|
567
|
+
}
|
|
568
|
+
if (msg.type === "result" && msg.result) {
|
|
569
|
+
cleanup();
|
|
570
|
+
const duration = Date.now() - startTime;
|
|
571
|
+
this.completedTasks++;
|
|
572
|
+
this.totalDurationMs += duration;
|
|
573
|
+
managed.worker.removeListener("message", handler);
|
|
574
|
+
resolve(msg.result);
|
|
575
|
+
this.dispatchNext();
|
|
576
|
+
return;
|
|
577
|
+
}
|
|
578
|
+
if (msg.type === "error") {
|
|
579
|
+
cleanup();
|
|
580
|
+
this.failedTasks++;
|
|
581
|
+
managed.worker.removeListener("message", handler);
|
|
582
|
+
resolve({
|
|
583
|
+
success: false,
|
|
584
|
+
output: "",
|
|
585
|
+
error: msg.message ?? "Unknown worker error"
|
|
586
|
+
});
|
|
587
|
+
this.dispatchNext();
|
|
588
|
+
return;
|
|
589
|
+
}
|
|
590
|
+
};
|
|
591
|
+
managed.currentTimer = setTimeout(() => {
|
|
592
|
+
this.failedTasks++;
|
|
593
|
+
managed.busy = false;
|
|
594
|
+
if (managed.currentTimer) clearTimeout(managed.currentTimer);
|
|
595
|
+
managed.currentTimer = void 0;
|
|
596
|
+
managed.worker.removeListener("message", handler);
|
|
597
|
+
try {
|
|
598
|
+
managed.worker.postMessage({ type: "abort" });
|
|
599
|
+
} catch {
|
|
600
|
+
}
|
|
601
|
+
managed.worker.terminate().catch(() => {
|
|
602
|
+
});
|
|
603
|
+
this.removeWorker(managed);
|
|
604
|
+
reject(new Error(`Tool "${task.tool}" timed out after ${this.taskTimeoutMs}ms`));
|
|
605
|
+
this.dispatchNext();
|
|
606
|
+
}, this.taskTimeoutMs);
|
|
607
|
+
let abortListener;
|
|
608
|
+
if (signal) {
|
|
609
|
+
abortListener = () => {
|
|
610
|
+
if (managed.currentTimer) clearTimeout(managed.currentTimer);
|
|
611
|
+
managed.currentTimer = void 0;
|
|
612
|
+
managed.busy = false;
|
|
613
|
+
this.failedTasks++;
|
|
614
|
+
managed.worker.removeListener("message", handler);
|
|
615
|
+
try {
|
|
616
|
+
managed.worker.postMessage({ type: "abort" });
|
|
617
|
+
} catch {
|
|
618
|
+
}
|
|
619
|
+
managed.worker.terminate().catch(() => {
|
|
620
|
+
});
|
|
621
|
+
this.removeWorker(managed);
|
|
622
|
+
reject(new Error("Task aborted during execution"));
|
|
623
|
+
this.dispatchNext();
|
|
624
|
+
};
|
|
625
|
+
signal.addEventListener("abort", abortListener, { once: true });
|
|
626
|
+
}
|
|
627
|
+
const cleanup = () => {
|
|
628
|
+
if (managed.currentTimer) clearTimeout(managed.currentTimer);
|
|
629
|
+
managed.currentTimer = void 0;
|
|
630
|
+
managed.busy = false;
|
|
631
|
+
if (abortListener && signal) {
|
|
632
|
+
signal.removeEventListener("abort", abortListener);
|
|
633
|
+
}
|
|
634
|
+
};
|
|
635
|
+
managed.worker.on("message", handler);
|
|
636
|
+
managed.worker.postMessage({
|
|
637
|
+
type: "execute",
|
|
638
|
+
tool: task.tool,
|
|
639
|
+
args: task.args,
|
|
640
|
+
context: task.context
|
|
641
|
+
});
|
|
642
|
+
}
|
|
643
|
+
dispatchNext() {
|
|
644
|
+
if (this.taskQueue.length === 0) return;
|
|
645
|
+
if (this.shuttingDown) return;
|
|
646
|
+
const idle = this.getIdleWorker();
|
|
647
|
+
if (idle) {
|
|
648
|
+
const next = this.taskQueue.shift();
|
|
649
|
+
if (next.signal?.aborted) {
|
|
650
|
+
this.failedTasks++;
|
|
651
|
+
next.reject(new Error("Task aborted while queued"));
|
|
652
|
+
this.dispatchNext();
|
|
653
|
+
return;
|
|
654
|
+
}
|
|
655
|
+
this.dispatch(idle, next);
|
|
656
|
+
} else if (this.workers.length < this.maxWorkers) {
|
|
657
|
+
const managed = this.spawnWorker();
|
|
658
|
+
const next = this.taskQueue.shift();
|
|
659
|
+
if (next.signal?.aborted) {
|
|
660
|
+
this.failedTasks++;
|
|
661
|
+
next.reject(new Error("Task aborted while queued"));
|
|
662
|
+
this.dispatchNext();
|
|
663
|
+
return;
|
|
664
|
+
}
|
|
665
|
+
this.dispatch(managed, next);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
handleWorkerCrash(managed, _err) {
|
|
669
|
+
if (managed.currentTimer) clearTimeout(managed.currentTimer);
|
|
670
|
+
managed.currentTimer = void 0;
|
|
671
|
+
this.removeWorker(managed);
|
|
672
|
+
this.dispatchNext();
|
|
673
|
+
}
|
|
674
|
+
removeWorker(managed) {
|
|
675
|
+
const idx = this.workers.indexOf(managed);
|
|
676
|
+
if (idx !== -1) {
|
|
677
|
+
this.workers.splice(idx, 1);
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
};
|
|
681
|
+
var Session = class {
|
|
682
|
+
config;
|
|
683
|
+
context;
|
|
684
|
+
steering;
|
|
685
|
+
metadata;
|
|
686
|
+
state;
|
|
687
|
+
constructor(config, opts = {}) {
|
|
688
|
+
this.config = config;
|
|
689
|
+
this.state = "created";
|
|
690
|
+
this.context = opts.sharedContext ?? new ContextManager(opts.contextOpts);
|
|
691
|
+
this.steering = new SteeringQueue();
|
|
692
|
+
this.metadata = {
|
|
693
|
+
id: config.sessionId,
|
|
694
|
+
channelId: config.channelId,
|
|
695
|
+
userId: config.userId,
|
|
696
|
+
engineId: config.engineId,
|
|
697
|
+
startedAt: /* @__PURE__ */ new Date(),
|
|
698
|
+
state: this.state,
|
|
699
|
+
loopIterations: 0,
|
|
700
|
+
toolInvocations: 0,
|
|
701
|
+
llmCalls: 0,
|
|
702
|
+
errors: []
|
|
703
|
+
};
|
|
704
|
+
if (config.systemPrompt) {
|
|
705
|
+
this.context.setSystemPrompt(config.systemPrompt);
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
// -----------------------------------------------------------------------
|
|
709
|
+
// Accessors
|
|
710
|
+
// -----------------------------------------------------------------------
|
|
711
|
+
getId() {
|
|
712
|
+
return this.config.sessionId;
|
|
713
|
+
}
|
|
714
|
+
getConfig() {
|
|
715
|
+
return this.config;
|
|
716
|
+
}
|
|
717
|
+
getContext() {
|
|
718
|
+
return this.context;
|
|
719
|
+
}
|
|
720
|
+
getSteering() {
|
|
721
|
+
return this.steering;
|
|
722
|
+
}
|
|
723
|
+
getState() {
|
|
724
|
+
return this.state;
|
|
725
|
+
}
|
|
726
|
+
getMetadata() {
|
|
727
|
+
return { ...this.metadata, state: this.state };
|
|
728
|
+
}
|
|
729
|
+
// -----------------------------------------------------------------------
|
|
730
|
+
// Lifecycle transitions
|
|
731
|
+
// -----------------------------------------------------------------------
|
|
732
|
+
/** Transition to active. Only valid from created or paused. */
|
|
733
|
+
activate() {
|
|
734
|
+
if (this.state !== "created" && this.state !== "paused") {
|
|
735
|
+
throw new Error(`Cannot activate session in state "${this.state}"`);
|
|
736
|
+
}
|
|
737
|
+
this.state = "active";
|
|
738
|
+
this.metadata.state = this.state;
|
|
739
|
+
}
|
|
740
|
+
/** Pause the session. Only valid from active. */
|
|
741
|
+
pause() {
|
|
742
|
+
if (this.state !== "active") {
|
|
743
|
+
throw new Error(`Cannot pause session in state "${this.state}"`);
|
|
744
|
+
}
|
|
745
|
+
this.state = "paused";
|
|
746
|
+
this.metadata.state = this.state;
|
|
747
|
+
}
|
|
748
|
+
/** Resume from paused back to active. */
|
|
749
|
+
resume() {
|
|
750
|
+
if (this.state !== "paused") {
|
|
751
|
+
throw new Error(`Cannot resume session in state "${this.state}"`);
|
|
752
|
+
}
|
|
753
|
+
this.state = "active";
|
|
754
|
+
this.metadata.state = this.state;
|
|
755
|
+
}
|
|
756
|
+
/** Mark the session as successfully completed. */
|
|
757
|
+
complete() {
|
|
758
|
+
if (this.state !== "active" && this.state !== "paused") {
|
|
759
|
+
throw new Error(`Cannot complete session in state "${this.state}"`);
|
|
760
|
+
}
|
|
761
|
+
this.state = "completed";
|
|
762
|
+
this.metadata.state = this.state;
|
|
763
|
+
this.metadata.endedAt = /* @__PURE__ */ new Date();
|
|
764
|
+
this.steering.clear();
|
|
765
|
+
}
|
|
766
|
+
/** Mark the session as failed with an error. */
|
|
767
|
+
fail(error) {
|
|
768
|
+
this.metadata.errors.push(error);
|
|
769
|
+
if (this.metadata.errors.length > 20) {
|
|
770
|
+
this.metadata.errors.shift();
|
|
771
|
+
}
|
|
772
|
+
this.state = "failed";
|
|
773
|
+
this.metadata.state = this.state;
|
|
774
|
+
this.metadata.endedAt = /* @__PURE__ */ new Date();
|
|
775
|
+
this.steering.clear();
|
|
776
|
+
}
|
|
777
|
+
// -----------------------------------------------------------------------
|
|
778
|
+
// Stats tracking
|
|
779
|
+
// -----------------------------------------------------------------------
|
|
780
|
+
/** Increment loop iteration counter. */
|
|
781
|
+
recordIteration() {
|
|
782
|
+
this.metadata.loopIterations++;
|
|
783
|
+
}
|
|
784
|
+
/** Increment tool invocation counter. */
|
|
785
|
+
recordToolInvocation() {
|
|
786
|
+
this.metadata.toolInvocations++;
|
|
787
|
+
}
|
|
788
|
+
/** Increment LLM call counter. */
|
|
789
|
+
recordLLMCall() {
|
|
790
|
+
this.metadata.llmCalls++;
|
|
791
|
+
}
|
|
792
|
+
/** Record an error without failing the session (capped to prevent unbounded growth). */
|
|
793
|
+
recordError(error) {
|
|
794
|
+
this.metadata.errors.push(error);
|
|
795
|
+
if (this.metadata.errors.length > 20) {
|
|
796
|
+
this.metadata.errors.shift();
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
// -----------------------------------------------------------------------
|
|
800
|
+
// Cleanup
|
|
801
|
+
// -----------------------------------------------------------------------
|
|
802
|
+
/** Clear all session resources. */
|
|
803
|
+
dispose() {
|
|
804
|
+
this.context.clear();
|
|
805
|
+
this.steering.clear();
|
|
806
|
+
}
|
|
807
|
+
};
|
|
808
|
+
function sanitizeWorkspacePath(cwd) {
|
|
809
|
+
const home = homedir();
|
|
810
|
+
if (cwd === home) return ".";
|
|
811
|
+
if (cwd.startsWith(home + "/")) {
|
|
812
|
+
return "./" + cwd.slice(home.length + 1);
|
|
813
|
+
}
|
|
814
|
+
return cwd;
|
|
815
|
+
}
|
|
816
|
+
var MAX_TOOL_RESULTS = 30;
|
|
817
|
+
var MAX_TOOL_OUTPUT_LEN = 65536;
|
|
818
|
+
var MAX_STATE_RECORDS = 20;
|
|
819
|
+
var PERMISSIVE_POLICY = {
|
|
820
|
+
autonomyLevel: "full",
|
|
821
|
+
validatePath: (_path, _op) => ({ allowed: true }),
|
|
822
|
+
validateCommand: (_cmd, _args) => ({ allowed: true }),
|
|
823
|
+
requiresConfirmation: () => false,
|
|
824
|
+
audit: () => [],
|
|
825
|
+
sanitizeOutput: (text) => ({ clean: text, redacted: false }),
|
|
826
|
+
validateInput: () => ({ safe: true, threats: [] })
|
|
827
|
+
};
|
|
828
|
+
function toolDefinitionsFrom(tools) {
|
|
829
|
+
return tools.map((t) => ({
|
|
830
|
+
name: t.name,
|
|
831
|
+
description: t.description,
|
|
832
|
+
parameters: t.parameters
|
|
833
|
+
}));
|
|
834
|
+
}
|
|
835
|
+
var AgentLoop = class {
|
|
836
|
+
session;
|
|
837
|
+
engine;
|
|
838
|
+
tools;
|
|
839
|
+
toolDefs;
|
|
840
|
+
observer;
|
|
841
|
+
opts;
|
|
842
|
+
abortController = null;
|
|
843
|
+
currentHandle = null;
|
|
844
|
+
workerPool;
|
|
845
|
+
ownsWorkerPool;
|
|
846
|
+
/** Accumulated state snapshots for verification (AWM). */
|
|
847
|
+
stateRecords = [];
|
|
848
|
+
/** Accumulated tool results for verification (AWM). */
|
|
849
|
+
allToolResults = [];
|
|
850
|
+
/** Cumulative token usage across all iterations. */
|
|
851
|
+
cumulativeTokens = { inputTokens: 0, outputTokens: 0 };
|
|
852
|
+
constructor(session, engine, tools, observer, opts = {}) {
|
|
853
|
+
this.session = session;
|
|
854
|
+
this.engine = engine;
|
|
855
|
+
this.observer = observer;
|
|
856
|
+
this.tools = new Map(tools.map((t) => [t.name, t]));
|
|
857
|
+
this.toolDefs = toolDefinitionsFrom(tools);
|
|
858
|
+
if (opts.workerPool) {
|
|
859
|
+
this.workerPool = opts.workerPool;
|
|
860
|
+
this.ownsWorkerPool = false;
|
|
861
|
+
} else {
|
|
862
|
+
this.workerPool = new ToolWorkerPool();
|
|
863
|
+
this.ownsWorkerPool = true;
|
|
864
|
+
}
|
|
865
|
+
this.opts = {
|
|
866
|
+
maxIterations: opts.maxIterations ?? 50,
|
|
867
|
+
maxRetries: opts.maxRetries ?? 3,
|
|
868
|
+
workerPool: this.workerPool,
|
|
869
|
+
verifier: opts.verifier,
|
|
870
|
+
enableStateSnapshots: opts.enableStateSnapshots ?? true,
|
|
871
|
+
memoryBackend: opts.memoryBackend,
|
|
872
|
+
securityPolicy: opts.securityPolicy,
|
|
873
|
+
toolContextExtensions: opts.toolContextExtensions,
|
|
874
|
+
onBeforeFirstRun: opts.onBeforeFirstRun,
|
|
875
|
+
onAfterComplete: opts.onAfterComplete
|
|
876
|
+
};
|
|
877
|
+
}
|
|
878
|
+
// -----------------------------------------------------------------------
|
|
879
|
+
// Public API
|
|
880
|
+
// -----------------------------------------------------------------------
|
|
881
|
+
/** Return the session ID for this agent loop. */
|
|
882
|
+
getSessionId() {
|
|
883
|
+
return this.session.getId();
|
|
884
|
+
}
|
|
885
|
+
/**
|
|
886
|
+
* Run the agent loop, returning an async iterable of AgentEvents.
|
|
887
|
+
* The loop continues until the engine signals completion, the iteration
|
|
888
|
+
* limit is reached, or the run is aborted.
|
|
889
|
+
*/
|
|
890
|
+
async *run(initialMessage) {
|
|
891
|
+
this.abortController = new AbortController();
|
|
892
|
+
const signal = this.abortController.signal;
|
|
893
|
+
try {
|
|
894
|
+
setMaxListeners(this.opts.maxIterations + 5, signal);
|
|
895
|
+
} catch {
|
|
896
|
+
}
|
|
897
|
+
this.stateRecords = [];
|
|
898
|
+
this.allToolResults = [];
|
|
899
|
+
this.session.activate();
|
|
900
|
+
this.observer.onSessionStart({
|
|
901
|
+
sessionId: this.session.getId(),
|
|
902
|
+
channelId: this.session.getConfig().channelId,
|
|
903
|
+
userId: this.session.getConfig().userId,
|
|
904
|
+
engineId: this.session.getConfig().engineId,
|
|
905
|
+
startedAt: /* @__PURE__ */ new Date()
|
|
906
|
+
});
|
|
907
|
+
await this.session.getContext().addMessage({
|
|
908
|
+
role: "user",
|
|
909
|
+
content: initialMessage
|
|
910
|
+
});
|
|
911
|
+
if (this.opts.onBeforeFirstRun) {
|
|
912
|
+
try {
|
|
913
|
+
await this.opts.onBeforeFirstRun(this.session.getContext());
|
|
914
|
+
} catch {
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
let iterations = 0;
|
|
918
|
+
let consecutiveErrors = 0;
|
|
919
|
+
let done = false;
|
|
920
|
+
let finalAnswer = "";
|
|
921
|
+
try {
|
|
922
|
+
while (!done && iterations < this.opts.maxIterations) {
|
|
923
|
+
iterations++;
|
|
924
|
+
this.session.recordIteration();
|
|
925
|
+
const steeringResult = this.processSteering();
|
|
926
|
+
if (steeringResult.abort) {
|
|
927
|
+
yield { type: "aborted", reason: steeringResult.abortReason };
|
|
928
|
+
return;
|
|
929
|
+
}
|
|
930
|
+
if (signal.aborted) {
|
|
931
|
+
yield { type: "aborted", reason: "Signal aborted" };
|
|
932
|
+
return;
|
|
933
|
+
}
|
|
934
|
+
const job = {
|
|
935
|
+
sessionId: this.session.getId(),
|
|
936
|
+
messages: this.session.getContext().getMessages(),
|
|
937
|
+
tools: this.toolDefs.length > 0 ? this.toolDefs : void 0,
|
|
938
|
+
systemPrompt: this.session.getConfig().systemPrompt,
|
|
939
|
+
model: this.session.getConfig().model
|
|
940
|
+
};
|
|
941
|
+
let handle;
|
|
942
|
+
try {
|
|
943
|
+
handle = await this.engine.startRun(job, { signal });
|
|
944
|
+
this.currentHandle = handle;
|
|
945
|
+
this.session.recordLLMCall();
|
|
946
|
+
} catch (err) {
|
|
947
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
948
|
+
consecutiveErrors++;
|
|
949
|
+
this.session.recordError(error);
|
|
950
|
+
this.observer.onError(error, { phase: "engine_start", iteration: iterations });
|
|
951
|
+
const nonRetryable = error instanceof EngineError && !error.retryable;
|
|
952
|
+
if (nonRetryable || consecutiveErrors >= this.opts.maxRetries) {
|
|
953
|
+
yield { type: "error", error };
|
|
954
|
+
done = true;
|
|
955
|
+
break;
|
|
956
|
+
}
|
|
957
|
+
await abortableSleep(backoffDelay(consecutiveErrors), signal);
|
|
958
|
+
continue;
|
|
959
|
+
}
|
|
960
|
+
consecutiveErrors = 0;
|
|
961
|
+
let accumulatedText = "";
|
|
962
|
+
const pendingToolCalls = [];
|
|
963
|
+
let completionAnswer;
|
|
964
|
+
let completionUsage;
|
|
965
|
+
let engineErrored = false;
|
|
966
|
+
let lastEngineError;
|
|
967
|
+
try {
|
|
968
|
+
for await (const event of handle.events) {
|
|
969
|
+
if (signal.aborted) {
|
|
970
|
+
await handle.cancel();
|
|
971
|
+
yield { type: "aborted", reason: "Signal aborted" };
|
|
972
|
+
return;
|
|
973
|
+
}
|
|
974
|
+
if (this.session.getSteering().hasAbort()) {
|
|
975
|
+
await handle.cancel();
|
|
976
|
+
const reason = this.drainAbortReason();
|
|
977
|
+
yield { type: "aborted", reason };
|
|
978
|
+
return;
|
|
979
|
+
}
|
|
980
|
+
yield* this.handleEngineEvent(
|
|
981
|
+
event,
|
|
982
|
+
accumulatedText,
|
|
983
|
+
pendingToolCalls
|
|
984
|
+
);
|
|
985
|
+
if (event.type === "text_delta") {
|
|
986
|
+
accumulatedText += event.delta;
|
|
987
|
+
}
|
|
988
|
+
if (event.type === "completed") {
|
|
989
|
+
completionAnswer = event.answer;
|
|
990
|
+
completionUsage = event.usage;
|
|
991
|
+
if (completionUsage) {
|
|
992
|
+
this.cumulativeTokens.inputTokens += completionUsage.inputTokens;
|
|
993
|
+
this.cumulativeTokens.outputTokens += completionUsage.outputTokens;
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
if (event.type === "error") {
|
|
997
|
+
engineErrored = true;
|
|
998
|
+
lastEngineError = event.error;
|
|
999
|
+
break;
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
} catch (err) {
|
|
1003
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
1004
|
+
consecutiveErrors++;
|
|
1005
|
+
this.session.recordError(error);
|
|
1006
|
+
this.observer.onError(error, { phase: "engine_stream", iteration: iterations });
|
|
1007
|
+
const nonRetryable = error instanceof EngineError && !error.retryable;
|
|
1008
|
+
if (nonRetryable || consecutiveErrors >= this.opts.maxRetries) {
|
|
1009
|
+
yield { type: "error", error };
|
|
1010
|
+
done = true;
|
|
1011
|
+
break;
|
|
1012
|
+
}
|
|
1013
|
+
await abortableSleep(backoffDelay(consecutiveErrors), signal);
|
|
1014
|
+
continue;
|
|
1015
|
+
}
|
|
1016
|
+
if (engineErrored) {
|
|
1017
|
+
consecutiveErrors++;
|
|
1018
|
+
const nonRetryable = lastEngineError instanceof EngineError && !lastEngineError.retryable;
|
|
1019
|
+
if (nonRetryable || consecutiveErrors >= this.opts.maxRetries) {
|
|
1020
|
+
yield { type: "error", error: lastEngineError ?? new EngineError("Engine returned error", this.engine.id) };
|
|
1021
|
+
done = true;
|
|
1022
|
+
break;
|
|
1023
|
+
}
|
|
1024
|
+
await abortableSleep(backoffDelay(consecutiveErrors), signal);
|
|
1025
|
+
continue;
|
|
1026
|
+
}
|
|
1027
|
+
if (completionAnswer !== void 0 && pendingToolCalls.length === 0) {
|
|
1028
|
+
await this.session.getContext().addMessage({
|
|
1029
|
+
role: "assistant",
|
|
1030
|
+
content: completionAnswer
|
|
1031
|
+
});
|
|
1032
|
+
finalAnswer = completionAnswer;
|
|
1033
|
+
yield { type: "complete", answer: completionAnswer, usage: completionUsage };
|
|
1034
|
+
done = true;
|
|
1035
|
+
break;
|
|
1036
|
+
}
|
|
1037
|
+
if (pendingToolCalls.length > 0) {
|
|
1038
|
+
await this.session.getContext().addMessage({
|
|
1039
|
+
role: "assistant",
|
|
1040
|
+
content: accumulatedText || "",
|
|
1041
|
+
toolCalls: pendingToolCalls
|
|
1042
|
+
});
|
|
1043
|
+
for (const toolCall of pendingToolCalls) {
|
|
1044
|
+
const preToolSteering = this.processSteering();
|
|
1045
|
+
if (preToolSteering.abort) {
|
|
1046
|
+
yield { type: "aborted", reason: preToolSteering.abortReason };
|
|
1047
|
+
return;
|
|
1048
|
+
}
|
|
1049
|
+
if (signal.aborted) {
|
|
1050
|
+
yield { type: "aborted", reason: "Signal aborted" };
|
|
1051
|
+
return;
|
|
1052
|
+
}
|
|
1053
|
+
const validationResult = this.validateToolCall(toolCall);
|
|
1054
|
+
if (validationResult !== null) {
|
|
1055
|
+
yield {
|
|
1056
|
+
type: "tool_validation_error",
|
|
1057
|
+
tool: toolCall.name,
|
|
1058
|
+
errors: validationResult.errors ?? ["Validation failed"]
|
|
1059
|
+
};
|
|
1060
|
+
await this.session.getContext().addMessage({
|
|
1061
|
+
role: "tool",
|
|
1062
|
+
content: `[VALIDATION ERROR] Invalid arguments for tool "${toolCall.name}": ${validationResult.errors?.join(", ") ?? "validation failed"}. Please fix the arguments and try again.`,
|
|
1063
|
+
toolCallId: toolCall.id
|
|
1064
|
+
});
|
|
1065
|
+
this.session.recordToolInvocation();
|
|
1066
|
+
continue;
|
|
1067
|
+
}
|
|
1068
|
+
yield { type: "tool_start", tool: toolCall.name, args: toolCall.args };
|
|
1069
|
+
const result = await this.executeTool(toolCall, signal);
|
|
1070
|
+
const cappedResult = { ...result };
|
|
1071
|
+
if (cappedResult.output && cappedResult.output.length > MAX_TOOL_OUTPUT_LEN) {
|
|
1072
|
+
cappedResult.output = cappedResult.output.slice(0, MAX_TOOL_OUTPUT_LEN) + "\n[truncated]";
|
|
1073
|
+
}
|
|
1074
|
+
if (cappedResult.error && cappedResult.error.length > MAX_TOOL_OUTPUT_LEN) {
|
|
1075
|
+
cappedResult.error = cappedResult.error.slice(0, MAX_TOOL_OUTPUT_LEN) + "\n[truncated]";
|
|
1076
|
+
}
|
|
1077
|
+
this.allToolResults.push(cappedResult);
|
|
1078
|
+
if (this.allToolResults.length > MAX_TOOL_RESULTS) {
|
|
1079
|
+
this.allToolResults.shift();
|
|
1080
|
+
}
|
|
1081
|
+
yield { type: "tool_end", tool: toolCall.name, result };
|
|
1082
|
+
const rawContent = result.output || result.error || "";
|
|
1083
|
+
const policy = this.opts.securityPolicy ?? PERMISSIVE_POLICY;
|
|
1084
|
+
const sanitized = policy.sanitizeOutput(rawContent);
|
|
1085
|
+
if (sanitized.redacted) {
|
|
1086
|
+
this.observer.onSecurityEvent({
|
|
1087
|
+
type: "secret_redacted",
|
|
1088
|
+
details: {
|
|
1089
|
+
source: "tool_output",
|
|
1090
|
+
tool: toolCall.name,
|
|
1091
|
+
patterns: sanitized.redactedPatterns
|
|
1092
|
+
},
|
|
1093
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
1094
|
+
});
|
|
1095
|
+
}
|
|
1096
|
+
await this.session.getContext().addMessage({
|
|
1097
|
+
role: "tool",
|
|
1098
|
+
content: sanitized.clean,
|
|
1099
|
+
toolCallId: toolCall.id
|
|
1100
|
+
});
|
|
1101
|
+
this.session.recordToolInvocation();
|
|
1102
|
+
}
|
|
1103
|
+
continue;
|
|
1104
|
+
}
|
|
1105
|
+
if (accumulatedText) {
|
|
1106
|
+
await this.session.getContext().addMessage({
|
|
1107
|
+
role: "assistant",
|
|
1108
|
+
content: accumulatedText
|
|
1109
|
+
});
|
|
1110
|
+
finalAnswer = accumulatedText;
|
|
1111
|
+
yield { type: "complete", answer: accumulatedText, usage: completionUsage };
|
|
1112
|
+
done = true;
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
if (!done) {
|
|
1116
|
+
const error = new Error(`Agent loop exceeded maximum iterations (${this.opts.maxIterations})`);
|
|
1117
|
+
yield { type: "error", error };
|
|
1118
|
+
this.session.fail(error);
|
|
1119
|
+
return;
|
|
1120
|
+
}
|
|
1121
|
+
if (this.opts.verifier && finalAnswer) {
|
|
1122
|
+
try {
|
|
1123
|
+
const verificationResult = await this.opts.verifier.verify({
|
|
1124
|
+
taskDescription: initialMessage,
|
|
1125
|
+
finalAnswer,
|
|
1126
|
+
messages: this.session.getContext().getMessages(),
|
|
1127
|
+
toolResults: this.allToolResults,
|
|
1128
|
+
stateSnapshots: this.stateRecords
|
|
1129
|
+
});
|
|
1130
|
+
yield { type: "verification", result: verificationResult };
|
|
1131
|
+
if (verificationResult.outcome === "partial" || verificationResult.outcome === "failure") {
|
|
1132
|
+
const suggestions = verificationResult.suggestions?.join("\n- ") ?? "No specific suggestions.";
|
|
1133
|
+
const feedback = `[VERIFICATION ${verificationResult.outcome.toUpperCase()}] ${verificationResult.reasoning}
|
|
1134
|
+
Suggestions:
|
|
1135
|
+
- ${suggestions}`;
|
|
1136
|
+
this.observer.onError(
|
|
1137
|
+
new Error(`Task verification: ${verificationResult.outcome}`),
|
|
1138
|
+
{
|
|
1139
|
+
phase: "verification",
|
|
1140
|
+
confidence: verificationResult.confidence,
|
|
1141
|
+
issues: verificationResult.issues?.length ?? 0
|
|
1142
|
+
}
|
|
1143
|
+
);
|
|
1144
|
+
await this.session.getContext().addMessage({
|
|
1145
|
+
role: "system",
|
|
1146
|
+
content: feedback
|
|
1147
|
+
});
|
|
1148
|
+
}
|
|
1149
|
+
} catch (err) {
|
|
1150
|
+
this.observer.onError(
|
|
1151
|
+
err instanceof Error ? err : new Error(String(err)),
|
|
1152
|
+
{ phase: "verification" }
|
|
1153
|
+
);
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
this.session.complete();
|
|
1157
|
+
if (this.opts.onAfterComplete && finalAnswer) {
|
|
1158
|
+
try {
|
|
1159
|
+
await this.opts.onAfterComplete(this.session.getContext(), finalAnswer);
|
|
1160
|
+
} catch {
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
} catch (err) {
|
|
1164
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
1165
|
+
yield { type: "error", error };
|
|
1166
|
+
this.session.fail(error);
|
|
1167
|
+
} finally {
|
|
1168
|
+
this.currentHandle = null;
|
|
1169
|
+
this.observer.onSessionEnd(
|
|
1170
|
+
{
|
|
1171
|
+
sessionId: this.session.getId(),
|
|
1172
|
+
channelId: this.session.getConfig().channelId,
|
|
1173
|
+
userId: this.session.getConfig().userId,
|
|
1174
|
+
engineId: this.session.getConfig().engineId,
|
|
1175
|
+
startedAt: this.session.getMetadata().startedAt
|
|
1176
|
+
},
|
|
1177
|
+
{
|
|
1178
|
+
duration: Date.now() - this.session.getMetadata().startedAt.getTime(),
|
|
1179
|
+
toolInvocations: this.session.getMetadata().toolInvocations,
|
|
1180
|
+
llmCalls: this.session.getMetadata().llmCalls,
|
|
1181
|
+
tokensUsed: {
|
|
1182
|
+
inputTokens: this.cumulativeTokens.inputTokens,
|
|
1183
|
+
outputTokens: this.cumulativeTokens.outputTokens
|
|
1184
|
+
},
|
|
1185
|
+
errors: this.session.getMetadata().errors.length
|
|
1186
|
+
}
|
|
1187
|
+
);
|
|
1188
|
+
if (this.ownsWorkerPool) {
|
|
1189
|
+
await this.workerPool.shutdown();
|
|
1190
|
+
}
|
|
1191
|
+
await this.observer.flush?.();
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
/** Abort the running loop with a reason. */
|
|
1195
|
+
abort(reason) {
|
|
1196
|
+
this.session.getSteering().push({
|
|
1197
|
+
type: "abort",
|
|
1198
|
+
content: reason,
|
|
1199
|
+
priority: 100,
|
|
1200
|
+
// highest
|
|
1201
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
1202
|
+
});
|
|
1203
|
+
this.abortController?.abort(reason);
|
|
1204
|
+
}
|
|
1205
|
+
/** Push a live steering message into the session's queue. */
|
|
1206
|
+
steer(message) {
|
|
1207
|
+
this.session.getSteering().push(message);
|
|
1208
|
+
}
|
|
1209
|
+
/**
|
|
1210
|
+
* Forward a raw string to the engine's stdin.
|
|
1211
|
+
* Used to respond to permission prompts from SubprocessEngine (e.g. claude-cli).
|
|
1212
|
+
*/
|
|
1213
|
+
steerEngine(message) {
|
|
1214
|
+
this.currentHandle?.steer(message);
|
|
1215
|
+
}
|
|
1216
|
+
/** Get accumulated state records for external inspection. */
|
|
1217
|
+
getStateRecords() {
|
|
1218
|
+
return this.stateRecords;
|
|
1219
|
+
}
|
|
1220
|
+
/** Get accumulated tool results for external inspection. */
|
|
1221
|
+
getToolResults() {
|
|
1222
|
+
return this.allToolResults;
|
|
1223
|
+
}
|
|
1224
|
+
// -----------------------------------------------------------------------
|
|
1225
|
+
// Engine event handling
|
|
1226
|
+
// -----------------------------------------------------------------------
|
|
1227
|
+
*handleEngineEvent(event, accumulatedText, pendingToolCalls) {
|
|
1228
|
+
switch (event.type) {
|
|
1229
|
+
case "text_delta":
|
|
1230
|
+
yield {
|
|
1231
|
+
type: "text",
|
|
1232
|
+
delta: event.delta,
|
|
1233
|
+
partial: accumulatedText + event.delta
|
|
1234
|
+
};
|
|
1235
|
+
break;
|
|
1236
|
+
case "tool_start":
|
|
1237
|
+
pendingToolCalls.push({
|
|
1238
|
+
id: event.id,
|
|
1239
|
+
name: event.tool,
|
|
1240
|
+
args: event.args
|
|
1241
|
+
});
|
|
1242
|
+
break;
|
|
1243
|
+
case "tool_progress":
|
|
1244
|
+
yield { type: "tool_progress", tool: "", update: event.update };
|
|
1245
|
+
break;
|
|
1246
|
+
case "tool_end":
|
|
1247
|
+
yield { type: "tool_end", tool: "", result: event.result };
|
|
1248
|
+
break;
|
|
1249
|
+
case "error":
|
|
1250
|
+
yield { type: "error", error: event.error };
|
|
1251
|
+
break;
|
|
1252
|
+
case "completed":
|
|
1253
|
+
break;
|
|
1254
|
+
case "started":
|
|
1255
|
+
break;
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
// -----------------------------------------------------------------------
|
|
1259
|
+
// AWM: Mandatory step-level tool call validation
|
|
1260
|
+
// -----------------------------------------------------------------------
|
|
1261
|
+
/**
|
|
1262
|
+
* Validate a tool call's arguments before execution.
|
|
1263
|
+
* Returns null if validation passes, or a ValidationResult with errors.
|
|
1264
|
+
*
|
|
1265
|
+
* This is a mandatory step — if a tool does not implement validate(),
|
|
1266
|
+
* we perform basic structural checks (args must be an object or undefined).
|
|
1267
|
+
*/
|
|
1268
|
+
validateToolCall(toolCall) {
|
|
1269
|
+
const tool = this.tools.get(toolCall.name);
|
|
1270
|
+
if (!tool) {
|
|
1271
|
+
return { errors: [`Tool "${toolCall.name}" not found.`] };
|
|
1272
|
+
}
|
|
1273
|
+
if (tool.validate) {
|
|
1274
|
+
const result = tool.validate(toolCall.args);
|
|
1275
|
+
if (!result.valid) {
|
|
1276
|
+
return { errors: result.errors ?? ["Validation failed."] };
|
|
1277
|
+
}
|
|
1278
|
+
return null;
|
|
1279
|
+
}
|
|
1280
|
+
if (toolCall.args !== void 0 && toolCall.args !== null) {
|
|
1281
|
+
if (typeof toolCall.args !== "object" || Array.isArray(toolCall.args)) {
|
|
1282
|
+
return { errors: ["Arguments must be an object."] };
|
|
1283
|
+
}
|
|
1284
|
+
}
|
|
1285
|
+
return null;
|
|
1286
|
+
}
|
|
1287
|
+
// -----------------------------------------------------------------------
|
|
1288
|
+
// Tool execution (with AWM state snapshots)
|
|
1289
|
+
// -----------------------------------------------------------------------
|
|
1290
|
+
async executeTool(toolCall, signal) {
|
|
1291
|
+
const tool = this.tools.get(toolCall.name);
|
|
1292
|
+
if (!tool) {
|
|
1293
|
+
const error = new ToolError(`Unknown tool: ${toolCall.name}`, toolCall.name);
|
|
1294
|
+
this.observer.onError(error, { tool: toolCall.name });
|
|
1295
|
+
return {
|
|
1296
|
+
success: false,
|
|
1297
|
+
output: "",
|
|
1298
|
+
error: `Tool "${toolCall.name}" not found`
|
|
1299
|
+
};
|
|
1300
|
+
}
|
|
1301
|
+
const startTime = Date.now();
|
|
1302
|
+
const rawCwd = this.session.getConfig().cwd ?? process.cwd();
|
|
1303
|
+
const toolContext = {
|
|
1304
|
+
sessionId: this.session.getId(),
|
|
1305
|
+
cwd: sanitizeWorkspacePath(rawCwd),
|
|
1306
|
+
securityPolicy: this.opts.securityPolicy ?? PERMISSIVE_POLICY,
|
|
1307
|
+
abortSignal: signal,
|
|
1308
|
+
onProgress: (_update) => {
|
|
1309
|
+
},
|
|
1310
|
+
// Inject memory backend so memory_store / memory_recall tools can access it.
|
|
1311
|
+
...this.opts.memoryBackend ? { memoryBackend: this.opts.memoryBackend } : {},
|
|
1312
|
+
// Spread any domain-specific extensions (e.g. canvasState for canvas tool).
|
|
1313
|
+
...this.opts.toolContextExtensions ?? {}
|
|
1314
|
+
};
|
|
1315
|
+
let beforeSnapshot;
|
|
1316
|
+
if (this.opts.enableStateSnapshots && tool.getStateSnapshot) {
|
|
1317
|
+
try {
|
|
1318
|
+
beforeSnapshot = await tool.getStateSnapshot(toolCall.args, toolContext);
|
|
1319
|
+
} catch {
|
|
1320
|
+
}
|
|
1321
|
+
}
|
|
1322
|
+
let result;
|
|
1323
|
+
try {
|
|
1324
|
+
if (tool.weight === "heavyweight" && this.opts.workerPool?.hasWorkerScript?.()) {
|
|
1325
|
+
result = await this.workerPool.execute(
|
|
1326
|
+
{
|
|
1327
|
+
tool: toolCall.name,
|
|
1328
|
+
args: toolCall.args,
|
|
1329
|
+
context: {
|
|
1330
|
+
sessionId: this.session.getId(),
|
|
1331
|
+
cwd: sanitizeWorkspacePath(rawCwd)
|
|
1332
|
+
}
|
|
1333
|
+
},
|
|
1334
|
+
signal,
|
|
1335
|
+
(_update) => {
|
|
1336
|
+
}
|
|
1337
|
+
);
|
|
1338
|
+
} else {
|
|
1339
|
+
result = await tool.execute(toolCall.args, toolContext);
|
|
1340
|
+
}
|
|
1341
|
+
} catch (err) {
|
|
1342
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
1343
|
+
result = {
|
|
1344
|
+
success: false,
|
|
1345
|
+
output: "",
|
|
1346
|
+
error: error.message
|
|
1347
|
+
};
|
|
1348
|
+
}
|
|
1349
|
+
let afterSnapshot;
|
|
1350
|
+
if (this.opts.enableStateSnapshots && tool.getStateSnapshot) {
|
|
1351
|
+
try {
|
|
1352
|
+
afterSnapshot = await tool.getStateSnapshot(toolCall.args, toolContext);
|
|
1353
|
+
result.stateSnapshot = afterSnapshot;
|
|
1354
|
+
} catch {
|
|
1355
|
+
}
|
|
1356
|
+
}
|
|
1357
|
+
if (beforeSnapshot || afterSnapshot) {
|
|
1358
|
+
this.stateRecords.push({
|
|
1359
|
+
tool: toolCall.name,
|
|
1360
|
+
args: toolCall.args,
|
|
1361
|
+
before: beforeSnapshot,
|
|
1362
|
+
after: afterSnapshot
|
|
1363
|
+
});
|
|
1364
|
+
if (this.stateRecords.length > MAX_STATE_RECORDS) {
|
|
1365
|
+
this.stateRecords.shift();
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
const duration = Date.now() - startTime;
|
|
1369
|
+
this.observer.onToolInvocation({
|
|
1370
|
+
sessionId: this.session.getId(),
|
|
1371
|
+
tool: toolCall.name,
|
|
1372
|
+
args: toolCall.args,
|
|
1373
|
+
result,
|
|
1374
|
+
duration,
|
|
1375
|
+
error: result.success ? void 0 : new Error(result.error ?? "Tool failed")
|
|
1376
|
+
});
|
|
1377
|
+
return result;
|
|
1378
|
+
}
|
|
1379
|
+
// -----------------------------------------------------------------------
|
|
1380
|
+
// Steering
|
|
1381
|
+
// -----------------------------------------------------------------------
|
|
1382
|
+
/**
|
|
1383
|
+
* Drain the steering queue and process all messages. Returns an object
|
|
1384
|
+
* indicating whether an abort was requested.
|
|
1385
|
+
*/
|
|
1386
|
+
processSteering() {
|
|
1387
|
+
const steering = this.session.getSteering();
|
|
1388
|
+
if (!steering.hasMessages()) {
|
|
1389
|
+
return { abort: false };
|
|
1390
|
+
}
|
|
1391
|
+
const messages = steering.drain();
|
|
1392
|
+
let abort = false;
|
|
1393
|
+
let abortReason;
|
|
1394
|
+
for (const msg of messages) {
|
|
1395
|
+
switch (msg.type) {
|
|
1396
|
+
case "abort":
|
|
1397
|
+
abort = true;
|
|
1398
|
+
abortReason = msg.content ?? "Abort requested";
|
|
1399
|
+
break;
|
|
1400
|
+
case "inject": {
|
|
1401
|
+
if (msg.content) {
|
|
1402
|
+
const injectMsg = {
|
|
1403
|
+
role: "user",
|
|
1404
|
+
content: msg.content
|
|
1405
|
+
};
|
|
1406
|
+
this.session.getContext().addMessage(injectMsg).catch((err) => {
|
|
1407
|
+
this.session.recordError(
|
|
1408
|
+
err instanceof Error ? err : new Error(String(err))
|
|
1409
|
+
);
|
|
1410
|
+
});
|
|
1411
|
+
}
|
|
1412
|
+
break;
|
|
1413
|
+
}
|
|
1414
|
+
case "priority":
|
|
1415
|
+
if (msg.content) {
|
|
1416
|
+
const priorityMsg = {
|
|
1417
|
+
role: "user",
|
|
1418
|
+
content: `[PRIORITY] ${msg.content}`
|
|
1419
|
+
};
|
|
1420
|
+
this.session.getContext().addMessage(priorityMsg).catch((err) => {
|
|
1421
|
+
this.session.recordError(
|
|
1422
|
+
err instanceof Error ? err : new Error(String(err))
|
|
1423
|
+
);
|
|
1424
|
+
});
|
|
1425
|
+
}
|
|
1426
|
+
break;
|
|
1427
|
+
case "context_update":
|
|
1428
|
+
if (msg.content) {
|
|
1429
|
+
this.session.getContext().setSystemPrompt(msg.content);
|
|
1430
|
+
}
|
|
1431
|
+
break;
|
|
1432
|
+
}
|
|
1433
|
+
}
|
|
1434
|
+
return { abort, abortReason };
|
|
1435
|
+
}
|
|
1436
|
+
/**
|
|
1437
|
+
* Extract the abort reason from a pending abort message.
|
|
1438
|
+
*/
|
|
1439
|
+
drainAbortReason() {
|
|
1440
|
+
const messages = this.session.getSteering().drain();
|
|
1441
|
+
const abortMsg = messages.find((m) => m.type === "abort");
|
|
1442
|
+
return abortMsg?.content ?? "Abort requested";
|
|
1443
|
+
}
|
|
1444
|
+
};
|
|
1445
|
+
function builtinRules(opts) {
|
|
1446
|
+
return [
|
|
1447
|
+
{
|
|
1448
|
+
id: "non-empty-answer",
|
|
1449
|
+
description: "Final answer must not be empty",
|
|
1450
|
+
check: (ctx) => {
|
|
1451
|
+
if (!ctx.finalAnswer || ctx.finalAnswer.trim().length < opts.minAnswerLength) {
|
|
1452
|
+
return `Final answer is empty or too short (minimum ${opts.minAnswerLength} characters)`;
|
|
1453
|
+
}
|
|
1454
|
+
return null;
|
|
1455
|
+
}
|
|
1456
|
+
},
|
|
1457
|
+
{
|
|
1458
|
+
id: "tool-success-ratio",
|
|
1459
|
+
description: "Tool error ratio must be below threshold",
|
|
1460
|
+
check: (ctx) => {
|
|
1461
|
+
if (ctx.toolResults.length === 0) return null;
|
|
1462
|
+
const errors = ctx.toolResults.filter((r) => !r.success).length;
|
|
1463
|
+
const ratio = errors / ctx.toolResults.length;
|
|
1464
|
+
if (ratio > opts.maxToolErrorRatio) {
|
|
1465
|
+
return `${errors}/${ctx.toolResults.length} tool calls failed (${(ratio * 100).toFixed(0)}% error rate, max allowed ${(opts.maxToolErrorRatio * 100).toFixed(0)}%)`;
|
|
1466
|
+
}
|
|
1467
|
+
return null;
|
|
1468
|
+
}
|
|
1469
|
+
},
|
|
1470
|
+
{
|
|
1471
|
+
id: "no-error-only-answer",
|
|
1472
|
+
description: "Final answer should not consist solely of an error message",
|
|
1473
|
+
check: (ctx) => {
|
|
1474
|
+
const lower = ctx.finalAnswer.toLowerCase().trim();
|
|
1475
|
+
if (lower.startsWith("error:") || lower.startsWith("i encountered an error")) {
|
|
1476
|
+
return "Final answer appears to be an error message rather than a real response";
|
|
1477
|
+
}
|
|
1478
|
+
return null;
|
|
1479
|
+
},
|
|
1480
|
+
severity: "warning"
|
|
1481
|
+
},
|
|
1482
|
+
{
|
|
1483
|
+
id: "task-reference",
|
|
1484
|
+
description: "Final answer should reference the task",
|
|
1485
|
+
check: (ctx) => {
|
|
1486
|
+
const taskWords = ctx.taskDescription.toLowerCase().split(/\W+/).filter((w) => w.length > 4);
|
|
1487
|
+
if (taskWords.length === 0) return null;
|
|
1488
|
+
const answerLower = ctx.finalAnswer.toLowerCase();
|
|
1489
|
+
const matches = taskWords.filter((w) => answerLower.includes(w));
|
|
1490
|
+
if (matches.length === 0) {
|
|
1491
|
+
return "Final answer does not appear to reference any key terms from the original task";
|
|
1492
|
+
}
|
|
1493
|
+
return null;
|
|
1494
|
+
},
|
|
1495
|
+
severity: "warning"
|
|
1496
|
+
},
|
|
1497
|
+
{
|
|
1498
|
+
id: "state-consistency",
|
|
1499
|
+
description: "State snapshots should show changes for write operations",
|
|
1500
|
+
check: (ctx) => {
|
|
1501
|
+
for (const record of ctx.stateSnapshots) {
|
|
1502
|
+
if (record.before && record.after) {
|
|
1503
|
+
const beforeKeys = Object.keys(record.before.state);
|
|
1504
|
+
const afterKeys = Object.keys(record.after.state);
|
|
1505
|
+
if (beforeKeys.length > 0 && afterKeys.length > 0) {
|
|
1506
|
+
const identical = beforeKeys.every(
|
|
1507
|
+
(k) => JSON.stringify(record.before.state[k]) === JSON.stringify(record.after.state[k])
|
|
1508
|
+
);
|
|
1509
|
+
if (identical && afterKeys.every((k) => beforeKeys.includes(k))) {
|
|
1510
|
+
return `Tool "${record.tool}" produced identical before/after state \u2014 may not have executed correctly`;
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
}
|
|
1514
|
+
}
|
|
1515
|
+
return null;
|
|
1516
|
+
},
|
|
1517
|
+
severity: "info"
|
|
1518
|
+
}
|
|
1519
|
+
];
|
|
1520
|
+
}
|
|
1521
|
+
var FormatVerifier = class {
|
|
1522
|
+
id = "format-verifier";
|
|
1523
|
+
name = "Format Verifier";
|
|
1524
|
+
rules;
|
|
1525
|
+
constructor(opts = {}) {
|
|
1526
|
+
const resolvedOpts = {
|
|
1527
|
+
minAnswerLength: opts.minAnswerLength ?? 1,
|
|
1528
|
+
maxToolErrorRatio: opts.maxToolErrorRatio ?? 0.5
|
|
1529
|
+
};
|
|
1530
|
+
this.rules = [
|
|
1531
|
+
...opts.skipBuiltinRules ? [] : builtinRules(resolvedOpts),
|
|
1532
|
+
...opts.customRules ?? []
|
|
1533
|
+
];
|
|
1534
|
+
}
|
|
1535
|
+
async checkFormat(context) {
|
|
1536
|
+
const errors = [];
|
|
1537
|
+
for (const rule of this.rules) {
|
|
1538
|
+
const result = rule.check(context);
|
|
1539
|
+
if (result !== null && (rule.severity ?? "error") === "error") {
|
|
1540
|
+
errors.push(`[${rule.id}] ${result}`);
|
|
1541
|
+
}
|
|
1542
|
+
}
|
|
1543
|
+
return {
|
|
1544
|
+
passed: errors.length === 0,
|
|
1545
|
+
errors: errors.length > 0 ? errors : void 0
|
|
1546
|
+
};
|
|
1547
|
+
}
|
|
1548
|
+
async verify(context) {
|
|
1549
|
+
const formatCheck = await this.checkFormat(context);
|
|
1550
|
+
const issues = [];
|
|
1551
|
+
for (const rule of this.rules) {
|
|
1552
|
+
const result = rule.check(context);
|
|
1553
|
+
if (result !== null) {
|
|
1554
|
+
issues.push({
|
|
1555
|
+
severity: rule.severity ?? "error",
|
|
1556
|
+
message: `[${rule.id}] ${result}`
|
|
1557
|
+
});
|
|
1558
|
+
}
|
|
1559
|
+
}
|
|
1560
|
+
const errorCount = issues.filter((i) => i.severity === "error").length;
|
|
1561
|
+
const warningCount = issues.filter((i) => i.severity === "warning").length;
|
|
1562
|
+
let outcome;
|
|
1563
|
+
let confidence;
|
|
1564
|
+
if (errorCount > 0) {
|
|
1565
|
+
outcome = "failure";
|
|
1566
|
+
confidence = Math.max(0, 1 - errorCount * 0.3);
|
|
1567
|
+
} else if (warningCount > 0) {
|
|
1568
|
+
outcome = "partial";
|
|
1569
|
+
confidence = Math.max(0.5, 1 - warningCount * 0.15);
|
|
1570
|
+
} else {
|
|
1571
|
+
outcome = "success";
|
|
1572
|
+
confidence = 1;
|
|
1573
|
+
}
|
|
1574
|
+
const suggestions = issues.filter((i) => i.severity === "error" || i.severity === "warning").map((i) => `Fix: ${i.message}`);
|
|
1575
|
+
return {
|
|
1576
|
+
outcome,
|
|
1577
|
+
confidence,
|
|
1578
|
+
reasoning: formatCheck.passed ? `All ${this.rules.length} format checks passed.` : `${errorCount} error(s) and ${warningCount} warning(s) found across ${this.rules.length} checks.`,
|
|
1579
|
+
issues: issues.length > 0 ? issues : void 0,
|
|
1580
|
+
suggestions: suggestions.length > 0 ? suggestions : void 0,
|
|
1581
|
+
formatCheck
|
|
1582
|
+
};
|
|
1583
|
+
}
|
|
1584
|
+
};
|
|
1585
|
+
var JUDGE_SYSTEM_PROMPT = `You are a task verification judge. Your job is to assess whether an AI agent correctly completed a user's task.
|
|
1586
|
+
|
|
1587
|
+
You will receive:
|
|
1588
|
+
1. The original task the user requested
|
|
1589
|
+
2. The agent's final answer
|
|
1590
|
+
3. A summary of tool calls the agent made
|
|
1591
|
+
|
|
1592
|
+
Evaluate the following criteria:
|
|
1593
|
+
- **Completeness**: Did the agent fully address the task?
|
|
1594
|
+
- **Correctness**: Is the agent's answer accurate and appropriate?
|
|
1595
|
+
- **Quality**: Is the answer well-structured and useful?
|
|
1596
|
+
|
|
1597
|
+
Respond in EXACTLY this JSON format (no markdown, no code fences):
|
|
1598
|
+
{"score": <number 0-100>, "passed": <boolean>, "reasoning": "<brief explanation>", "issues": [{"severity": "<error|warning|info>", "message": "<issue description>"}]}
|
|
1599
|
+
|
|
1600
|
+
Score guide: 0-30 = failure, 31-70 = partial, 71-100 = success.
|
|
1601
|
+
Set "passed" to true only if score >= 71.`;
|
|
1602
|
+
function buildJudgePrompt(context, maxToolResults) {
|
|
1603
|
+
const parts = [];
|
|
1604
|
+
parts.push(`## Original Task
|
|
1605
|
+
${context.taskDescription}`);
|
|
1606
|
+
parts.push(`
|
|
1607
|
+
## Agent's Final Answer
|
|
1608
|
+
${context.finalAnswer}`);
|
|
1609
|
+
if (context.toolResults.length > 0) {
|
|
1610
|
+
const results = context.toolResults.slice(0, maxToolResults);
|
|
1611
|
+
const toolSummary = results.map((r, i) => {
|
|
1612
|
+
const status = r.success ? "\u2713" : "\u2717";
|
|
1613
|
+
const output = r.output ? r.output.length > 200 ? r.output.slice(0, 200) + "..." : r.output : "(no output)";
|
|
1614
|
+
const error = r.error ? ` | Error: ${r.error}` : "";
|
|
1615
|
+
return `${i + 1}. ${status} ${output}${error}`;
|
|
1616
|
+
});
|
|
1617
|
+
parts.push(`
|
|
1618
|
+
## Tool Call Results (${context.toolResults.length} total)
|
|
1619
|
+
${toolSummary.join("\n")}`);
|
|
1620
|
+
if (context.toolResults.length > maxToolResults) {
|
|
1621
|
+
parts.push(`
|
|
1622
|
+
(${context.toolResults.length - maxToolResults} additional tool results omitted)`);
|
|
1623
|
+
}
|
|
1624
|
+
}
|
|
1625
|
+
if (context.stateSnapshots.length > 0) {
|
|
1626
|
+
const diffs = context.stateSnapshots.filter((s) => s.before || s.after).slice(0, 5).map((s) => {
|
|
1627
|
+
const before = s.before ? JSON.stringify(s.before.state) : "(none)";
|
|
1628
|
+
const after = s.after ? JSON.stringify(s.after.state) : "(none)";
|
|
1629
|
+
return `- ${s.tool}: before=${before.slice(0, 100)}, after=${after.slice(0, 100)}`;
|
|
1630
|
+
});
|
|
1631
|
+
if (diffs.length > 0) {
|
|
1632
|
+
parts.push(`
|
|
1633
|
+
## State Changes
|
|
1634
|
+
${diffs.join("\n")}`);
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
return parts.join("\n");
|
|
1638
|
+
}
|
|
1639
|
+
function parseJudgeResponse(text) {
|
|
1640
|
+
let jsonStr = text.trim();
|
|
1641
|
+
const fenceMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
|
|
1642
|
+
if (fenceMatch) {
|
|
1643
|
+
jsonStr = fenceMatch[1].trim();
|
|
1644
|
+
}
|
|
1645
|
+
try {
|
|
1646
|
+
const parsed = JSON.parse(jsonStr);
|
|
1647
|
+
return {
|
|
1648
|
+
score: Math.max(0, Math.min(100, typeof parsed.score === "number" ? parsed.score : 0)),
|
|
1649
|
+
passed: typeof parsed.passed === "boolean" ? parsed.passed : parsed.score >= 71,
|
|
1650
|
+
reasoning: typeof parsed.reasoning === "string" ? parsed.reasoning : "No reasoning provided.",
|
|
1651
|
+
issues: Array.isArray(parsed.issues) ? parsed.issues : void 0
|
|
1652
|
+
};
|
|
1653
|
+
} catch {
|
|
1654
|
+
const scoreMatch = text.match(/score["\s:]+(\d+)/i);
|
|
1655
|
+
const score = scoreMatch ? parseInt(scoreMatch[1], 10) : 50;
|
|
1656
|
+
return {
|
|
1657
|
+
score,
|
|
1658
|
+
passed: score >= 71,
|
|
1659
|
+
reasoning: `Failed to parse structured judge response. Raw text: ${text.slice(0, 200)}`
|
|
1660
|
+
};
|
|
1661
|
+
}
|
|
1662
|
+
}
|
|
1663
|
+
var LLMVerifier = class {
|
|
1664
|
+
id = "llm-verifier";
|
|
1665
|
+
name = "LLM Verifier";
|
|
1666
|
+
provider;
|
|
1667
|
+
model;
|
|
1668
|
+
formatVerifier;
|
|
1669
|
+
maxJudgeTokens;
|
|
1670
|
+
temperature;
|
|
1671
|
+
skipSemanticOnFormatFailure;
|
|
1672
|
+
maxToolResultsInPrompt;
|
|
1673
|
+
constructor(opts) {
|
|
1674
|
+
this.provider = opts.provider;
|
|
1675
|
+
this.model = opts.model;
|
|
1676
|
+
this.formatVerifier = new FormatVerifier(opts.formatOpts);
|
|
1677
|
+
this.maxJudgeTokens = opts.maxJudgeTokens ?? 1024;
|
|
1678
|
+
this.temperature = opts.temperature ?? 0;
|
|
1679
|
+
this.skipSemanticOnFormatFailure = opts.skipSemanticOnFormatFailure ?? true;
|
|
1680
|
+
this.maxToolResultsInPrompt = opts.maxToolResultsInPrompt ?? 20;
|
|
1681
|
+
}
|
|
1682
|
+
async checkFormat(context) {
|
|
1683
|
+
return this.formatVerifier.checkFormat(context);
|
|
1684
|
+
}
|
|
1685
|
+
async checkSemantic(context) {
|
|
1686
|
+
const judgePrompt = buildJudgePrompt(context, this.maxToolResultsInPrompt);
|
|
1687
|
+
const messages = [
|
|
1688
|
+
{ role: "user", content: judgePrompt }
|
|
1689
|
+
];
|
|
1690
|
+
const result = await this.provider.complete(this.model, messages, {
|
|
1691
|
+
systemPrompt: JUDGE_SYSTEM_PROMPT,
|
|
1692
|
+
maxTokens: this.maxJudgeTokens,
|
|
1693
|
+
temperature: this.temperature
|
|
1694
|
+
});
|
|
1695
|
+
const responseText = typeof result.message.content === "string" ? result.message.content : "";
|
|
1696
|
+
const judge = parseJudgeResponse(responseText);
|
|
1697
|
+
return {
|
|
1698
|
+
passed: judge.passed,
|
|
1699
|
+
score: judge.score / 100,
|
|
1700
|
+
// Normalize to 0–1.
|
|
1701
|
+
reasoning: judge.reasoning
|
|
1702
|
+
};
|
|
1703
|
+
}
|
|
1704
|
+
async verify(context) {
|
|
1705
|
+
const formatCheck = await this.checkFormat(context);
|
|
1706
|
+
let semanticCheck;
|
|
1707
|
+
if (!this.skipSemanticOnFormatFailure || formatCheck.passed) {
|
|
1708
|
+
try {
|
|
1709
|
+
semanticCheck = await this.checkSemantic(context);
|
|
1710
|
+
} catch (err) {
|
|
1711
|
+
semanticCheck = {
|
|
1712
|
+
passed: false,
|
|
1713
|
+
score: 0,
|
|
1714
|
+
reasoning: `Semantic check failed: ${err instanceof Error ? err.message : String(err)}`
|
|
1715
|
+
};
|
|
1716
|
+
}
|
|
1717
|
+
}
|
|
1718
|
+
const issues = [];
|
|
1719
|
+
if (!formatCheck.passed && formatCheck.errors) {
|
|
1720
|
+
for (const error of formatCheck.errors) {
|
|
1721
|
+
issues.push({ severity: "error", message: error });
|
|
1722
|
+
}
|
|
1723
|
+
}
|
|
1724
|
+
if (semanticCheck && !semanticCheck.passed) {
|
|
1725
|
+
issues.push({
|
|
1726
|
+
severity: "error",
|
|
1727
|
+
message: `Semantic check: ${semanticCheck.reasoning}`
|
|
1728
|
+
});
|
|
1729
|
+
}
|
|
1730
|
+
let outcome;
|
|
1731
|
+
let confidence;
|
|
1732
|
+
if (!formatCheck.passed) {
|
|
1733
|
+
outcome = "failure";
|
|
1734
|
+
confidence = 0.2;
|
|
1735
|
+
} else if (semanticCheck) {
|
|
1736
|
+
if (semanticCheck.score >= 0.71) {
|
|
1737
|
+
outcome = "success";
|
|
1738
|
+
confidence = semanticCheck.score;
|
|
1739
|
+
} else if (semanticCheck.score >= 0.31) {
|
|
1740
|
+
outcome = "partial";
|
|
1741
|
+
confidence = semanticCheck.score;
|
|
1742
|
+
} else {
|
|
1743
|
+
outcome = "failure";
|
|
1744
|
+
confidence = semanticCheck.score;
|
|
1745
|
+
}
|
|
1746
|
+
} else {
|
|
1747
|
+
outcome = formatCheck.passed ? "success" : "failure";
|
|
1748
|
+
confidence = formatCheck.passed ? 0.7 : 0.2;
|
|
1749
|
+
}
|
|
1750
|
+
const suggestions = [];
|
|
1751
|
+
if (!formatCheck.passed) {
|
|
1752
|
+
suggestions.push("Fix format issues before attempting semantic verification.");
|
|
1753
|
+
}
|
|
1754
|
+
if (semanticCheck && !semanticCheck.passed) {
|
|
1755
|
+
suggestions.push(`Semantic assessment: ${semanticCheck.reasoning}`);
|
|
1756
|
+
}
|
|
1757
|
+
return {
|
|
1758
|
+
outcome,
|
|
1759
|
+
confidence,
|
|
1760
|
+
reasoning: semanticCheck ? `Format: ${formatCheck.passed ? "PASS" : "FAIL"} | Semantic: ${semanticCheck.passed ? "PASS" : "FAIL"} (score: ${(semanticCheck.score * 100).toFixed(0)}%)` : `Format: ${formatCheck.passed ? "PASS" : "FAIL"} | Semantic: SKIPPED`,
|
|
1761
|
+
issues: issues.length > 0 ? issues : void 0,
|
|
1762
|
+
suggestions: suggestions.length > 0 ? suggestions : void 0,
|
|
1763
|
+
formatCheck,
|
|
1764
|
+
semanticCheck
|
|
1765
|
+
};
|
|
1766
|
+
}
|
|
1767
|
+
};
|
|
1768
|
+
function createAutoRecallHook(backend, opts) {
|
|
1769
|
+
const maxResults = opts?.maxResults ?? 5;
|
|
1770
|
+
const minScore = opts?.minScore ?? 0.1;
|
|
1771
|
+
return async (ctx) => {
|
|
1772
|
+
const messages = ctx.getMessages();
|
|
1773
|
+
const lastUserMsg = messages.filter((m) => m.role === "user").pop();
|
|
1774
|
+
if (!lastUserMsg || typeof lastUserMsg.content !== "string") return;
|
|
1775
|
+
const query = lastUserMsg.content;
|
|
1776
|
+
if (!query.trim()) return;
|
|
1777
|
+
const results = await backend.recall(query, {
|
|
1778
|
+
limit: maxResults,
|
|
1779
|
+
minScore,
|
|
1780
|
+
...opts?.recallOpts,
|
|
1781
|
+
...opts?.namespace ? { keyPrefix: `${opts.namespace}:` } : {}
|
|
1782
|
+
});
|
|
1783
|
+
if (results.length === 0) return;
|
|
1784
|
+
const memoryLines = results.map(
|
|
1785
|
+
(r, i) => `${i + 1}. [${r.matchType}, score=${r.score.toFixed(2)}] ${r.content}`
|
|
1786
|
+
);
|
|
1787
|
+
const memoryText = "Relevant memories from previous conversations:\n" + memoryLines.join("\n") + "\n\nUse these memories to provide more personalized and context-aware responses.";
|
|
1788
|
+
await ctx.addMessage({ role: "system", content: memoryText });
|
|
1789
|
+
};
|
|
1790
|
+
}
|
|
1791
|
+
function createAutoSummarizeHook(backend, opts) {
|
|
1792
|
+
const minMessages = opts?.minMessages ?? 1;
|
|
1793
|
+
const maxLength = opts?.maxSummaryLength ?? 2e3;
|
|
1794
|
+
return async (ctx, answer) => {
|
|
1795
|
+
const messages = ctx.getMessages();
|
|
1796
|
+
const userMessages = messages.filter((m) => m.role === "user");
|
|
1797
|
+
if (userMessages.length < minMessages) return;
|
|
1798
|
+
if (!answer || answer.trim().length < 10) return;
|
|
1799
|
+
const userQueries = userMessages.map((m) => typeof m.content === "string" ? m.content : "").filter(Boolean);
|
|
1800
|
+
const summaryParts = [
|
|
1801
|
+
`User asked: ${userQueries.join(" \u2192 ")}`,
|
|
1802
|
+
`Assistant answered: ${answer}`
|
|
1803
|
+
];
|
|
1804
|
+
let summary = summaryParts.join("\n");
|
|
1805
|
+
if (summary.length > maxLength) {
|
|
1806
|
+
summary = summary.slice(0, maxLength - 3) + "...";
|
|
1807
|
+
}
|
|
1808
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
1809
|
+
const queryPrefix = userQueries[0]?.slice(0, 50).replace(/[^a-zA-Z0-9 ]/g, "") ?? "conversation";
|
|
1810
|
+
const nsPrefix = opts?.namespace ? `${opts.namespace}:` : "";
|
|
1811
|
+
const key = `${nsPrefix}conv:${timestamp}:${queryPrefix}`;
|
|
1812
|
+
await backend.store(key, summary, {
|
|
1813
|
+
type: "conversation_summary",
|
|
1814
|
+
timestamp,
|
|
1815
|
+
messageCount: messages.length,
|
|
1816
|
+
userMessageCount: userMessages.length
|
|
1817
|
+
});
|
|
1818
|
+
};
|
|
1819
|
+
}
|
|
1820
|
+
|
|
1821
|
+
export {
|
|
1822
|
+
SteeringQueue,
|
|
1823
|
+
NAMED_STRATEGIES,
|
|
1824
|
+
ContextManager,
|
|
1825
|
+
ToolWorkerPool,
|
|
1826
|
+
Session,
|
|
1827
|
+
AgentLoop,
|
|
1828
|
+
FormatVerifier,
|
|
1829
|
+
LLMVerifier,
|
|
1830
|
+
createAutoRecallHook,
|
|
1831
|
+
createAutoSummarizeHook
|
|
1832
|
+
};
|