@huydao/karrot 0.1.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GUIDE.md +3 -3
- package/README.md +192 -38
- package/dist/assertions/assertion.d.ts +7 -2
- package/dist/assertions/assertion.js +142 -1
- package/dist/executors/adapters/ag-ui.d.ts +2 -2
- package/dist/executors/adapters/ag-ui.js +379 -186
- package/dist/executors/execute.js +5 -4
- package/dist/prompts/turn-eval-system-prompt.md +31 -1
- package/dist/prompts/turn-message-gen-system-prompt.md +4 -2
- package/dist/reports/report.d.ts +1 -1
- package/dist/reports/report.js +18 -0
- package/dist/scenarios/generated-message.js +2 -0
- package/dist/scenarios/scenario-loader.d.ts +1 -1
- package/dist/scenarios/scenario-loader.js +41 -3
- package/dist/scenarios/scenario.d.ts +11 -7
- package/dist/utils/config.d.ts +2 -4
- package/package.json +12 -5
|
@@ -9,29 +9,57 @@ exports.extractAppendedLog = extractAppendedLog;
|
|
|
9
9
|
exports.runAgUiMessage = runAgUiMessage;
|
|
10
10
|
const promises_1 = __importDefault(require("node:fs/promises"));
|
|
11
11
|
const node_path_1 = __importDefault(require("node:path"));
|
|
12
|
-
const
|
|
12
|
+
const stompjs_1 = require("@stomp/stompjs");
|
|
13
|
+
const uuid_1 = require("uuid");
|
|
14
|
+
const ws_1 = __importDefault(require("ws"));
|
|
13
15
|
const run_result_1 = require("../run-result");
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
const
|
|
17
|
-
|
|
18
|
-
const binaryRelativePath = packageJson.bin?.['ag-ui-wss'];
|
|
19
|
-
if (!binaryRelativePath) {
|
|
20
|
-
throw new Error(`Unable to resolve ag-ui-wss binary from ${packageJsonPath}.`);
|
|
21
|
-
}
|
|
22
|
-
return node_path_1.default.join(packageDirectory, binaryRelativePath);
|
|
16
|
+
Object.assign(globalThis, { WebSocket: ws_1.default });
|
|
17
|
+
function getEnv(name, env) {
|
|
18
|
+
const value = env[name];
|
|
19
|
+
return value && value.trim() ? value.trim() : undefined;
|
|
23
20
|
}
|
|
24
|
-
function
|
|
25
|
-
|
|
21
|
+
function parseHeaderMap(raw) {
|
|
22
|
+
if (!raw?.trim()) {
|
|
23
|
+
return {};
|
|
24
|
+
}
|
|
25
|
+
const result = {};
|
|
26
|
+
for (const pair of raw.split(',')) {
|
|
27
|
+
const separatorIndex = pair.indexOf(':');
|
|
28
|
+
if (separatorIndex <= 0) {
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
const key = pair.slice(0, separatorIndex).trim();
|
|
32
|
+
const value = pair.slice(separatorIndex + 1).trim();
|
|
33
|
+
if (key && value) {
|
|
34
|
+
result[key] = value;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return result;
|
|
26
38
|
}
|
|
27
|
-
function
|
|
28
|
-
const
|
|
29
|
-
|
|
30
|
-
|
|
39
|
+
function parseAgUiEnv(env) {
|
|
40
|
+
const agentUrl = getEnv('AGENT_URL', env);
|
|
41
|
+
const wsUrl = getEnv('WS_URL', env);
|
|
42
|
+
const wsTopic = getEnv('WS_TOPIC', env);
|
|
43
|
+
if (!agentUrl) {
|
|
44
|
+
throw new Error('Missing AGENT_URL for AG-UI transport.');
|
|
31
45
|
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
46
|
+
if (!wsUrl) {
|
|
47
|
+
throw new Error('Missing WS_URL for AG-UI transport.');
|
|
48
|
+
}
|
|
49
|
+
if (!wsTopic) {
|
|
50
|
+
throw new Error('Missing WS_TOPIC for AG-UI transport.');
|
|
51
|
+
}
|
|
52
|
+
const idleTimeoutRaw = Number(getEnv('IDLE_TIMEOUT', env) ?? 120_000);
|
|
53
|
+
return {
|
|
54
|
+
agentUrl,
|
|
55
|
+
agentId: getEnv('AGENT_ID', env) ?? 'orchestratorAgent',
|
|
56
|
+
wsUrl,
|
|
57
|
+
wsTopic,
|
|
58
|
+
idleTimeoutMs: Number.isFinite(idleTimeoutRaw) && idleTimeoutRaw > 0 ? idleTimeoutRaw : 120_000,
|
|
59
|
+
agentHeaders: parseHeaderMap(getEnv('AGENT_HEADERS', env)),
|
|
60
|
+
wsStompHeaders: parseHeaderMap(getEnv('WS_STOMP_HEADERS', env)),
|
|
61
|
+
wsHeaders: parseHeaderMap(getEnv('WS_HEADERS', env)),
|
|
62
|
+
};
|
|
35
63
|
}
|
|
36
64
|
function parseExecutionTestResultId(output) {
|
|
37
65
|
const urlMatch = output.match(/\/test-results\/(\d+)/);
|
|
@@ -41,50 +69,6 @@ function parseExecutionTestResultId(output) {
|
|
|
41
69
|
const plainMatch = output.match(/test result(?: ID)?\s+(\d{3,})/i);
|
|
42
70
|
return plainMatch?.[1];
|
|
43
71
|
}
|
|
44
|
-
function parseTimingMetrics(output) {
|
|
45
|
-
const matches = [
|
|
46
|
-
...output.matchAll(/(?:(?:TTF-Tool:\s*([\d.]+)s)\s*\|\s*)?TTF-Text:\s*([\d.]+)s\s*\|\s*Total:\s*([\d.]+)s\s*\|\s*Protocol efficiency:\s*([\d.]+)KB\/([\d.]+)KB\s*\((\d+)%\)/g),
|
|
47
|
-
];
|
|
48
|
-
const match = matches.at(-1);
|
|
49
|
-
if (!match) {
|
|
50
|
-
return {};
|
|
51
|
-
}
|
|
52
|
-
return {
|
|
53
|
-
ttfToolSeconds: match[1] ? Number(match[1]) : undefined,
|
|
54
|
-
ttfTextSeconds: Number(match[2]),
|
|
55
|
-
totalSeconds: Number(match[3]),
|
|
56
|
-
protocolUsedKb: Number(match[4]),
|
|
57
|
-
protocolTotalKb: Number(match[5]),
|
|
58
|
-
efficiencyPercent: Number(match[6]),
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
function parseRunError(logContent) {
|
|
62
|
-
const lines = logContent
|
|
63
|
-
.split('\n')
|
|
64
|
-
.map((line) => line.trim())
|
|
65
|
-
.filter(Boolean);
|
|
66
|
-
for (let index = lines.length - 1; index >= 0; index -= 1) {
|
|
67
|
-
try {
|
|
68
|
-
const event = JSON.parse(lines[index]);
|
|
69
|
-
if (event.type === 'RUN_ERROR' && typeof event.error === 'string' && event.error.trim()) {
|
|
70
|
-
return event.error.trim();
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
catch {
|
|
74
|
-
// Ignore malformed lines in the JSONL stream.
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
return undefined;
|
|
78
|
-
}
|
|
79
|
-
function parseConsoleError(output) {
|
|
80
|
-
const matches = [...output.matchAll(/^Error:\s*(.+)$/gm)]
|
|
81
|
-
.map((match) => match[1].trim())
|
|
82
|
-
.filter((message) => message && message !== 'Agent run failed');
|
|
83
|
-
return matches.at(0);
|
|
84
|
-
}
|
|
85
|
-
function countToolCalls(logContent) {
|
|
86
|
-
return [...logContent.matchAll(/"type":"TOOL_CALL_START"/g)].length;
|
|
87
|
-
}
|
|
88
72
|
function extractToolCallNames(logContent) {
|
|
89
73
|
const toolCalls = [];
|
|
90
74
|
const lines = logContent
|
|
@@ -117,50 +101,49 @@ function extractAppendedLog(previousLogContent, latestLogContent) {
|
|
|
117
101
|
}
|
|
118
102
|
return latestLogContent;
|
|
119
103
|
}
|
|
120
|
-
function
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
: '__default__';
|
|
137
|
-
if (!fragmentsByMessage.has(messageId)) {
|
|
138
|
-
fragmentsByMessage.set(messageId, []);
|
|
139
|
-
orderedMessageIds.push(messageId);
|
|
140
|
-
}
|
|
141
|
-
if (typeof event.content === 'string' && event.content.trim()) {
|
|
142
|
-
latestFullContent = event.content.trim();
|
|
143
|
-
}
|
|
144
|
-
if (typeof event.text === 'string' && event.text.trim()) {
|
|
145
|
-
latestFullContent = event.text.trim();
|
|
146
|
-
}
|
|
147
|
-
if (typeof event.delta === 'string' && event.delta.length > 0) {
|
|
148
|
-
fragmentsByMessage.get(messageId)?.push(event.delta);
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
catch {
|
|
152
|
-
// Ignore malformed JSONL lines.
|
|
153
|
-
}
|
|
104
|
+
function getMessageId(event) {
|
|
105
|
+
return typeof event.messageId === 'string' && event.messageId.trim()
|
|
106
|
+
? event.messageId.trim()
|
|
107
|
+
: '__default__';
|
|
108
|
+
}
|
|
109
|
+
function recordAssistantText(state, event) {
|
|
110
|
+
const messageId = getMessageId(event);
|
|
111
|
+
if (!state.fragmentsByMessage.has(messageId)) {
|
|
112
|
+
state.fragmentsByMessage.set(messageId, []);
|
|
113
|
+
state.orderedMessageIds.push(messageId);
|
|
114
|
+
}
|
|
115
|
+
if (typeof event.content === 'string' && event.content.trim()) {
|
|
116
|
+
state.latestFullContent = event.content.trim();
|
|
117
|
+
}
|
|
118
|
+
if (typeof event.text === 'string' && event.text.trim()) {
|
|
119
|
+
state.latestFullContent = event.text.trim();
|
|
154
120
|
}
|
|
155
|
-
|
|
156
|
-
|
|
121
|
+
const fragment = typeof event.delta === 'string' && event.delta.length > 0
|
|
122
|
+
? event.delta
|
|
123
|
+
: typeof event.content === 'string' && event.content.length > 0
|
|
124
|
+
? event.content
|
|
125
|
+
: typeof event.text === 'string' && event.text.length > 0
|
|
126
|
+
? event.text
|
|
127
|
+
: undefined;
|
|
128
|
+
if (fragment) {
|
|
129
|
+
state.fragmentsByMessage.get(messageId)?.push(fragment);
|
|
157
130
|
}
|
|
158
|
-
|
|
131
|
+
}
|
|
132
|
+
function getAssistantOutput(state) {
|
|
133
|
+
if (state.latestFullContent?.trim()) {
|
|
134
|
+
return state.latestFullContent.trim();
|
|
135
|
+
}
|
|
136
|
+
const lastMessageId = state.orderedMessageIds.at(-1);
|
|
159
137
|
if (!lastMessageId) {
|
|
160
|
-
return
|
|
138
|
+
return '';
|
|
161
139
|
}
|
|
162
|
-
|
|
163
|
-
|
|
140
|
+
return state.fragmentsByMessage.get(lastMessageId)?.join('').trim() ?? '';
|
|
141
|
+
}
|
|
142
|
+
function countToolCalls(logContent) {
|
|
143
|
+
return [...logContent.matchAll(/"type":"TOOL_CALL_START"/g)].length;
|
|
144
|
+
}
|
|
145
|
+
async function appendEvent(logFilePath, event) {
|
|
146
|
+
await promises_1.default.appendFile(logFilePath, `${JSON.stringify(event)}\n`, 'utf8');
|
|
164
147
|
}
|
|
165
148
|
async function readJsonl(pathname) {
|
|
166
149
|
try {
|
|
@@ -170,104 +153,314 @@ async function readJsonl(pathname) {
|
|
|
170
153
|
return '';
|
|
171
154
|
}
|
|
172
155
|
}
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
const
|
|
176
|
-
|
|
177
|
-
? node_path_1.default.join(options.outputDirectory, `${expectedThreadId}.jsonl`)
|
|
156
|
+
function computeMetrics(state) {
|
|
157
|
+
const totalSeconds = Number(((Date.now() - state.startTimeMs) / 1000).toFixed(1));
|
|
158
|
+
const ttfTextSeconds = typeof state.firstTextTimeMs === 'number'
|
|
159
|
+
? Number(((state.firstTextTimeMs - state.startTimeMs) / 1000).toFixed(1))
|
|
178
160
|
: undefined;
|
|
179
|
-
const
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
161
|
+
const ttfToolSeconds = typeof state.firstToolTimeMs === 'number'
|
|
162
|
+
? Number(((state.firstToolTimeMs - state.startTimeMs) / 1000).toFixed(1))
|
|
163
|
+
: undefined;
|
|
164
|
+
const protocolUsedKb = Number((state.protocolUsedBytes / 1024).toFixed(1));
|
|
165
|
+
const protocolTotalKb = Number((state.protocolTotalBytes / 1024).toFixed(1));
|
|
166
|
+
const efficiencyPercent = state.protocolTotalBytes > 0
|
|
167
|
+
? Math.round((state.protocolUsedBytes / state.protocolTotalBytes) * 100)
|
|
168
|
+
: undefined;
|
|
169
|
+
return {
|
|
170
|
+
ttfToolSeconds,
|
|
171
|
+
ttfTextSeconds,
|
|
172
|
+
totalSeconds,
|
|
173
|
+
protocolUsedKb,
|
|
174
|
+
protocolTotalKb,
|
|
175
|
+
efficiencyPercent,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
function writeMetricsToStdout(metrics) {
|
|
179
|
+
const parts = [
|
|
180
|
+
metrics.ttfToolSeconds != null ? `TTF-Tool: ${metrics.ttfToolSeconds.toFixed(1)}s` : undefined,
|
|
181
|
+
metrics.ttfTextSeconds != null ? `TTF-Text: ${metrics.ttfTextSeconds.toFixed(1)}s` : undefined,
|
|
182
|
+
metrics.totalSeconds != null ? `Total: ${metrics.totalSeconds.toFixed(1)}s` : undefined,
|
|
183
|
+
metrics.protocolUsedKb != null && metrics.protocolTotalKb != null && metrics.efficiencyPercent != null
|
|
184
|
+
? `Protocol efficiency: ${metrics.protocolUsedKb.toFixed(1)}KB/${metrics.protocolTotalKb.toFixed(1)}KB (${metrics.efficiencyPercent}%)`
|
|
185
|
+
: undefined,
|
|
186
|
+
].filter(Boolean);
|
|
187
|
+
if (parts.length > 0) {
|
|
188
|
+
process.stdout.write(`${parts.join(' | ')}\n`);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
function getEventIdentity(event) {
|
|
192
|
+
if (!event || typeof event !== 'object') {
|
|
193
|
+
return JSON.stringify(event);
|
|
194
|
+
}
|
|
195
|
+
const typedEvent = event;
|
|
196
|
+
return typedEvent.eventId ?? typedEvent.id ?? JSON.stringify(event);
|
|
197
|
+
}
|
|
198
|
+
async function connectAndRun(options) {
|
|
199
|
+
const state = {
|
|
200
|
+
seenEventIds: new Set(),
|
|
201
|
+
fragmentsByMessage: new Map(),
|
|
202
|
+
orderedMessageIds: [],
|
|
203
|
+
renderedContentByMessage: new Map(),
|
|
204
|
+
toolCalls: [],
|
|
205
|
+
protocolTotalBytes: 0,
|
|
206
|
+
protocolUsedBytes: 0,
|
|
207
|
+
startTimeMs: Date.now(),
|
|
208
|
+
runFinished: false,
|
|
209
|
+
};
|
|
210
|
+
await appendEvent(options.logFilePath, {
|
|
211
|
+
type: 'USER_MESSAGE',
|
|
212
|
+
timestamp: Date.now(),
|
|
213
|
+
content: options.message,
|
|
196
214
|
});
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
:
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
215
|
+
process.stderr.write(`Thread: ${options.threadId}\n`);
|
|
216
|
+
process.stderr.write('Connecting...\n');
|
|
217
|
+
return await new Promise((resolve, reject) => {
|
|
218
|
+
let settled = false;
|
|
219
|
+
let idleTimer;
|
|
220
|
+
let processTimer;
|
|
221
|
+
let responseDrainPromise;
|
|
222
|
+
let activeRunId;
|
|
223
|
+
const client = new stompjs_1.Client({
|
|
224
|
+
webSocketFactory: () => new ws_1.default(options.config.wsUrl, Object.keys(options.config.wsHeaders).length > 0
|
|
225
|
+
? { headers: options.config.wsHeaders }
|
|
226
|
+
: undefined),
|
|
227
|
+
connectHeaders: options.config.wsStompHeaders,
|
|
228
|
+
reconnectDelay: 0,
|
|
229
|
+
debug: () => { },
|
|
230
|
+
});
|
|
231
|
+
const cleanup = async () => {
|
|
232
|
+
if (idleTimer) {
|
|
233
|
+
clearTimeout(idleTimer);
|
|
234
|
+
}
|
|
235
|
+
if (processTimer) {
|
|
236
|
+
clearTimeout(processTimer);
|
|
214
237
|
}
|
|
215
|
-
if (
|
|
238
|
+
if (responseDrainPromise) {
|
|
239
|
+
await responseDrainPromise.catch(() => { });
|
|
240
|
+
}
|
|
241
|
+
if (client.active) {
|
|
242
|
+
await client.deactivate();
|
|
243
|
+
}
|
|
244
|
+
};
|
|
245
|
+
const finish = async (callback) => {
|
|
246
|
+
if (settled) {
|
|
216
247
|
return;
|
|
217
248
|
}
|
|
218
|
-
|
|
219
|
-
|
|
249
|
+
settled = true;
|
|
250
|
+
await cleanup();
|
|
251
|
+
callback();
|
|
252
|
+
};
|
|
253
|
+
const onIdleTimeout = async () => {
|
|
254
|
+
const assistantOutput = getAssistantOutput(state);
|
|
255
|
+
if (assistantOutput.trim()) {
|
|
256
|
+
await finish(() => resolve(state));
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
await finish(() => reject(new run_result_1.MessageRunError(`No events for ${Math.round(options.config.idleTimeoutMs / 1000)}s, idle timeout.`, {
|
|
260
|
+
threadId: options.threadId,
|
|
261
|
+
outputPath: options.logFilePath,
|
|
262
|
+
output: assistantOutput,
|
|
263
|
+
metrics: computeMetrics(state),
|
|
264
|
+
toolCallCount: state.toolCalls.length,
|
|
265
|
+
toolCalls: [...state.toolCalls],
|
|
266
|
+
})));
|
|
267
|
+
};
|
|
268
|
+
const resetIdleTimer = () => {
|
|
269
|
+
if (idleTimer) {
|
|
270
|
+
clearTimeout(idleTimer);
|
|
271
|
+
}
|
|
272
|
+
idleTimer = setTimeout(() => {
|
|
273
|
+
void onIdleTimeout();
|
|
274
|
+
}, options.config.idleTimeoutMs);
|
|
275
|
+
};
|
|
276
|
+
if (typeof options.processTimeoutMs === 'number') {
|
|
277
|
+
processTimer = setTimeout(() => {
|
|
278
|
+
void finish(() => reject(new run_result_1.MessageRunError(`AG-UI run exceeded ${options.processTimeoutMs}ms and was terminated.`, {
|
|
279
|
+
threadId: options.threadId,
|
|
280
|
+
outputPath: options.logFilePath,
|
|
281
|
+
output: getAssistantOutput(state),
|
|
282
|
+
metrics: computeMetrics(state),
|
|
283
|
+
toolCallCount: state.toolCalls.length,
|
|
284
|
+
toolCalls: [...state.toolCalls],
|
|
285
|
+
})));
|
|
286
|
+
}, options.processTimeoutMs);
|
|
287
|
+
}
|
|
288
|
+
client.onConnect = () => {
|
|
289
|
+
client.subscribe(options.config.wsTopic, (message) => {
|
|
290
|
+
resetIdleTimer();
|
|
291
|
+
state.protocolTotalBytes += Buffer.byteLength(message.body, 'utf8');
|
|
292
|
+
state.lastEventTimeMs = Date.now();
|
|
293
|
+
const payload = JSON.parse(message.body);
|
|
294
|
+
const additionalData = payload.additionalData;
|
|
295
|
+
if (!additionalData || (additionalData.conversationId && additionalData.conversationId !== options.threadId)) {
|
|
296
|
+
return;
|
|
297
|
+
}
|
|
298
|
+
const events = Array.isArray(additionalData.events) ? additionalData.events : [];
|
|
299
|
+
for (const rawEvent of events) {
|
|
300
|
+
const identity = getEventIdentity(rawEvent);
|
|
301
|
+
if (state.seenEventIds.has(identity)) {
|
|
302
|
+
continue;
|
|
303
|
+
}
|
|
304
|
+
state.seenEventIds.add(identity);
|
|
305
|
+
state.protocolUsedBytes += Buffer.byteLength(JSON.stringify(rawEvent), 'utf8');
|
|
306
|
+
void appendEvent(options.logFilePath, rawEvent);
|
|
307
|
+
if (!rawEvent || typeof rawEvent !== 'object') {
|
|
308
|
+
continue;
|
|
309
|
+
}
|
|
310
|
+
const event = rawEvent;
|
|
311
|
+
const eventRunId = typeof event.runId === 'string' ? event.runId : undefined;
|
|
312
|
+
if (!activeRunId) {
|
|
313
|
+
if (eventRunId === options.runId) {
|
|
314
|
+
activeRunId = eventRunId;
|
|
315
|
+
}
|
|
316
|
+
else if (event.type === 'RUN_STARTED' && eventRunId) {
|
|
317
|
+
activeRunId = eventRunId;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
if (activeRunId && eventRunId && eventRunId !== activeRunId) {
|
|
321
|
+
continue;
|
|
322
|
+
}
|
|
323
|
+
if (!activeRunId && eventRunId && eventRunId !== options.runId && event.type !== 'RUN_STARTED') {
|
|
324
|
+
continue;
|
|
325
|
+
}
|
|
326
|
+
switch (event.type) {
|
|
327
|
+
case 'TEXT_MESSAGE_CONTENT': {
|
|
328
|
+
if (typeof state.firstTextTimeMs !== 'number') {
|
|
329
|
+
state.firstTextTimeMs = Date.now();
|
|
330
|
+
}
|
|
331
|
+
const messageId = getMessageId(event);
|
|
332
|
+
const renderedContent = typeof event.content === 'string' && event.content.length > 0
|
|
333
|
+
? event.content
|
|
334
|
+
: typeof event.text === 'string' && event.text.length > 0
|
|
335
|
+
? event.text
|
|
336
|
+
: typeof event.delta === 'string' && event.delta.length > 0
|
|
337
|
+
? event.delta
|
|
338
|
+
: undefined;
|
|
339
|
+
recordAssistantText(state, event);
|
|
340
|
+
if (renderedContent) {
|
|
341
|
+
const previousRenderedContent = state.renderedContentByMessage.get(messageId) ?? '';
|
|
342
|
+
const appendedContent = extractAppendedLog(previousRenderedContent, renderedContent);
|
|
343
|
+
state.renderedContentByMessage.set(messageId, renderedContent);
|
|
344
|
+
if (appendedContent) {
|
|
345
|
+
process.stdout.write(appendedContent);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
break;
|
|
349
|
+
}
|
|
350
|
+
case 'TOOL_CALL_START':
|
|
351
|
+
if (typeof state.firstToolTimeMs !== 'number') {
|
|
352
|
+
state.firstToolTimeMs = Date.now();
|
|
353
|
+
}
|
|
354
|
+
if (typeof event.toolCallName === 'string' && event.toolCallName.trim()) {
|
|
355
|
+
state.toolCalls.push(event.toolCallName.trim());
|
|
356
|
+
}
|
|
357
|
+
break;
|
|
358
|
+
case 'RUN_ERROR':
|
|
359
|
+
state.runError = typeof event.error === 'string' ? event.error.trim() : 'Agent run failed.';
|
|
360
|
+
void finish(() => reject(new run_result_1.MessageRunError(state.runError ?? 'Agent run failed.', {
|
|
361
|
+
threadId: options.threadId,
|
|
362
|
+
outputPath: options.logFilePath,
|
|
363
|
+
output: getAssistantOutput(state),
|
|
364
|
+
metrics: computeMetrics(state),
|
|
365
|
+
toolCallCount: state.toolCalls.length,
|
|
366
|
+
toolCalls: [...state.toolCalls],
|
|
367
|
+
})));
|
|
368
|
+
return;
|
|
369
|
+
case 'RUN_FINISHED':
|
|
370
|
+
state.runFinished = true;
|
|
371
|
+
process.stdout.write('\n');
|
|
372
|
+
void finish(() => resolve(state));
|
|
373
|
+
return;
|
|
374
|
+
default:
|
|
375
|
+
break;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}, options.config.wsStompHeaders);
|
|
379
|
+
process.stderr.write('Sending...\n');
|
|
380
|
+
responseDrainPromise = (async () => {
|
|
381
|
+
const response = await fetch(`${options.config.agentUrl}/${options.config.agentId}/run`, {
|
|
382
|
+
method: 'POST',
|
|
383
|
+
headers: {
|
|
384
|
+
'Content-Type': 'application/json',
|
|
385
|
+
Accept: 'text/event-stream',
|
|
386
|
+
...options.config.agentHeaders,
|
|
387
|
+
},
|
|
388
|
+
body: JSON.stringify({
|
|
389
|
+
threadId: options.threadId,
|
|
390
|
+
runId: options.runId,
|
|
391
|
+
messages: [{ id: Date.now().toString(), role: 'user', content: options.message }],
|
|
392
|
+
tools: [],
|
|
393
|
+
context: [],
|
|
394
|
+
forwardedProps: {},
|
|
395
|
+
state: {},
|
|
396
|
+
}),
|
|
397
|
+
});
|
|
398
|
+
if (!response.ok) {
|
|
399
|
+
const errorText = await response.text();
|
|
400
|
+
throw new Error(`HTTP ${response.status}: ${errorText}`);
|
|
401
|
+
}
|
|
402
|
+
await response.text().catch(() => '');
|
|
403
|
+
})();
|
|
404
|
+
responseDrainPromise.catch((error) => {
|
|
405
|
+
void finish(() => reject(new run_result_1.MessageRunError(error instanceof Error ? error.message : String(error), {
|
|
406
|
+
threadId: options.threadId,
|
|
407
|
+
outputPath: options.logFilePath,
|
|
408
|
+
output: getAssistantOutput(state),
|
|
409
|
+
metrics: computeMetrics(state),
|
|
410
|
+
toolCallCount: state.toolCalls.length,
|
|
411
|
+
toolCalls: [...state.toolCalls],
|
|
412
|
+
})));
|
|
413
|
+
});
|
|
414
|
+
};
|
|
415
|
+
client.onStompError = (frame) => {
|
|
416
|
+
void finish(() => reject(new Error(`STOMP error: ${frame.headers.message ?? 'unknown'}`)));
|
|
417
|
+
};
|
|
418
|
+
client.onWebSocketError = (event) => {
|
|
419
|
+
const detail = event instanceof Error
|
|
420
|
+
? event.message
|
|
421
|
+
: typeof event === 'object' && event !== null && 'message' in event
|
|
422
|
+
? String(event.message)
|
|
423
|
+
: JSON.stringify(event);
|
|
424
|
+
void finish(() => reject(new Error(`WebSocket error: ${detail}`)));
|
|
425
|
+
};
|
|
426
|
+
resetIdleTimer();
|
|
427
|
+
client.activate();
|
|
220
428
|
});
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
429
|
+
}
|
|
430
|
+
async function runAgUiMessage(options) {
|
|
431
|
+
await promises_1.default.mkdir(options.outputDirectory, { recursive: true });
|
|
432
|
+
const config = parseAgUiEnv(options.env);
|
|
433
|
+
const threadId = options.threadId ?? options.threadIdFallback ?? (0, uuid_1.v7)();
|
|
434
|
+
const runId = (0, uuid_1.v7)();
|
|
226
435
|
const logPath = node_path_1.default.join(options.outputDirectory, `${threadId}.jsonl`);
|
|
436
|
+
const previousLogContent = await readJsonl(logPath);
|
|
437
|
+
const state = await connectAndRun({
|
|
438
|
+
config,
|
|
439
|
+
logFilePath: logPath,
|
|
440
|
+
message: options.message,
|
|
441
|
+
threadId,
|
|
442
|
+
runId,
|
|
443
|
+
processTimeoutMs: options.processTimeoutMs,
|
|
444
|
+
});
|
|
445
|
+
const metrics = computeMetrics(state);
|
|
446
|
+
writeMetricsToStdout(metrics);
|
|
227
447
|
const logContent = await readJsonl(logPath);
|
|
228
448
|
const runLogContent = extractAppendedLog(previousLogContent, logContent);
|
|
229
|
-
const assistantOutput =
|
|
230
|
-
?? extractAssistantText(logContent)
|
|
231
|
-
?? extractStdoutAssistantText(stdoutOutput)
|
|
232
|
-
?? '';
|
|
233
|
-
const metrics = parseTimingMetrics(combinedConsoleOutput);
|
|
449
|
+
const assistantOutput = getAssistantOutput(state);
|
|
234
450
|
const toolCallCount = countToolCalls(runLogContent);
|
|
235
451
|
const toolCalls = extractToolCallNames(runLogContent);
|
|
236
|
-
const
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
const hasAssistantText = hasAssistantTextEvent || Boolean(extractStdoutAssistantText(stdoutOutput));
|
|
244
|
-
if (!hasRunError &&
|
|
245
|
-
hasAssistantText &&
|
|
246
|
-
combinedConsoleOutput.includes('idle timeout')) {
|
|
247
|
-
return {
|
|
248
|
-
output: assistantOutput,
|
|
249
|
-
threadId,
|
|
250
|
-
outputPath: logPath,
|
|
251
|
-
note: 'Assistant text received but the run did not emit RUN_FINISHED before idle timeout.',
|
|
252
|
-
toolCallCount,
|
|
253
|
-
toolCalls,
|
|
254
|
-
metrics,
|
|
255
|
-
};
|
|
256
|
-
}
|
|
257
|
-
throw new run_result_1.MessageRunError(parseRunError(runLogContent) ?? parseConsoleError(combinedConsoleOutput) ?? `ag-ui-wss exited with code ${exitCode}.`, {
|
|
258
|
-
threadId,
|
|
259
|
-
outputPath: logPath,
|
|
260
|
-
output: assistantOutput,
|
|
261
|
-
metrics,
|
|
262
|
-
toolCallCount,
|
|
263
|
-
toolCalls,
|
|
264
|
-
});
|
|
265
|
-
}
|
|
452
|
+
const note = !assistantOutput.trim()
|
|
453
|
+
? state.runFinished
|
|
454
|
+
? 'Run finished without any assistant text content.'
|
|
455
|
+
: 'Assistant text received but the run did not emit RUN_FINISHED before idle timeout.'
|
|
456
|
+
: !state.runFinished
|
|
457
|
+
? 'Assistant text received but the run did not emit RUN_FINISHED before idle timeout.'
|
|
458
|
+
: undefined;
|
|
266
459
|
return {
|
|
267
460
|
output: assistantOutput,
|
|
268
461
|
threadId,
|
|
269
462
|
outputPath: logPath,
|
|
270
|
-
note
|
|
463
|
+
note,
|
|
271
464
|
toolCallCount,
|
|
272
465
|
toolCalls,
|
|
273
466
|
metrics,
|
|
@@ -67,8 +67,8 @@ function normalizeConfig(config) {
|
|
|
67
67
|
if (!isAgUiWssConfig(config) && !isAgUiPostConfig(config)) {
|
|
68
68
|
throw new Error(`Unsupported transport type "${config.transport.type}".`);
|
|
69
69
|
}
|
|
70
|
-
if (
|
|
71
|
-
throw new Error('karrot config
|
|
70
|
+
if (config.context != null && (typeof config.context !== 'object' || Array.isArray(config.context))) {
|
|
71
|
+
throw new Error('karrot config context must be an object when provided.');
|
|
72
72
|
}
|
|
73
73
|
return config;
|
|
74
74
|
}
|
|
@@ -92,9 +92,10 @@ async function execute(configOrPath, options) {
|
|
|
92
92
|
scenarioFile: scenarioSelection.file,
|
|
93
93
|
defaultRelativePath: scenarioSelection.file,
|
|
94
94
|
});
|
|
95
|
+
const baseContext = resolvedConfig.context ?? {};
|
|
95
96
|
const context = {
|
|
96
|
-
...scenarioModule.buildScenarioContext(
|
|
97
|
-
...
|
|
97
|
+
...scenarioModule.buildScenarioContext(baseContext),
|
|
98
|
+
...baseContext,
|
|
98
99
|
};
|
|
99
100
|
const selectedScenarios = scenarioModule.scenarioSet.select(scenarioSelection.ids);
|
|
100
101
|
if (selectedScenarios.length === 0) {
|