@forwardimpact/libeval 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-trace.js +49 -0
- package/package.json +6 -3
- package/src/agent-runner.js +5 -1
- package/src/commands/facilitate.js +3 -2
- package/src/commands/run.js +4 -2
- package/src/commands/supervise.js +3 -2
- package/src/commands/trace.js +46 -14
- package/src/facilitator.js +78 -135
- package/src/index.js +1 -0
- package/src/message-bus.js +78 -13
- package/src/orchestration-toolkit.js +211 -63
- package/src/orchestrator-helpers.js +58 -0
- package/src/render/tool-hints.js +3 -3
- package/src/signature-filter.js +27 -0
- package/src/supervisor.js +110 -38
- package/src/tee-writer.js +21 -0
- package/src/trace-collector.js +52 -3
- package/src/trace-query.js +141 -28
package/src/supervisor.js
CHANGED
|
@@ -4,8 +4,11 @@
|
|
|
4
4
|
* introduces itself, and delegates work to the agent. The loop then alternates:
|
|
5
5
|
* agent → supervisor → agent.
|
|
6
6
|
*
|
|
7
|
-
* Signaling uses orchestration tools (
|
|
8
|
-
* MCP servers.
|
|
7
|
+
* Signaling uses orchestration tools (Ask / Answer / Announce / Redirect /
|
|
8
|
+
* Conclude) via in-process MCP servers. The Ask/Answer contract is enforced
|
|
9
|
+
* at turn boundaries: an unanswered Ask triggers one synthetic reminder and
|
|
10
|
+
* then a `protocol_violation` trace event plus a null-answer injection so the
|
|
11
|
+
* session advances without silent deadlock.
|
|
9
12
|
*
|
|
10
13
|
* Follows OO+DI: constructor injection, factory function, tests bypass factory.
|
|
11
14
|
*/
|
|
@@ -16,27 +19,30 @@ import { createAgentRunner } from "./agent-runner.js";
|
|
|
16
19
|
import { composeProfilePrompt } from "./profile-prompt.js";
|
|
17
20
|
import { TraceCollector } from "./trace-collector.js";
|
|
18
21
|
import { SequenceCounter } from "./sequence-counter.js";
|
|
22
|
+
import { createMessageBus } from "./message-bus.js";
|
|
19
23
|
import {
|
|
20
24
|
createOrchestrationContext,
|
|
21
25
|
createSupervisorToolServer,
|
|
22
26
|
createSupervisedAgentToolServer,
|
|
27
|
+
checkPendingAsk,
|
|
23
28
|
} from "./orchestration-toolkit.js";
|
|
29
|
+
import { formatMessages } from "./orchestrator-helpers.js";
|
|
24
30
|
|
|
25
31
|
/** System prompt appended for the supervisor runner in supervise mode. */
|
|
26
32
|
export const SUPERVISOR_SYSTEM_PROMPT =
|
|
27
|
-
"You
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
+
"You supervise one agent. " +
|
|
34
|
+
"Ask sends a question to the agent; the reply arrives via Answer. " +
|
|
35
|
+
"Answer replies to an ask the agent addressed to you. " +
|
|
36
|
+
"Announce sends a message with no reply obligation. " +
|
|
37
|
+
"Redirect interrupts the agent with replacement instructions. " +
|
|
38
|
+
"Conclude ends the session with a summary.";
|
|
33
39
|
|
|
34
40
|
/** System prompt appended for the agent runner in supervise mode. */
|
|
35
41
|
export const AGENT_SYSTEM_PROMPT =
|
|
36
|
-
"A supervisor watches your work
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
42
|
+
"A supervisor watches your work. " +
|
|
43
|
+
"Answer replies to an ask addressed to you. " +
|
|
44
|
+
"Ask sends a question to the supervisor; the reply arrives via Answer. " +
|
|
45
|
+
"Announce sends a message with no reply expected.";
|
|
40
46
|
|
|
41
47
|
/**
|
|
42
48
|
* Maximum number of mid-turn interventions allowed within a single agent turn.
|
|
@@ -54,8 +60,18 @@ export class Supervisor {
|
|
|
54
60
|
* @param {import("stream").Writable} deps.output - Stream to emit tagged NDJSON to
|
|
55
61
|
* @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
|
|
56
62
|
* @param {object} [deps.ctx] - Orchestration context (injected by factory)
|
|
63
|
+
* @param {import("./message-bus.js").MessageBus} [deps.messageBus] - Two-participant message bus ("supervisor" / "agent")
|
|
64
|
+
* @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
|
|
57
65
|
*/
|
|
58
|
-
constructor({
|
|
66
|
+
constructor({
|
|
67
|
+
agentRunner,
|
|
68
|
+
supervisorRunner,
|
|
69
|
+
output,
|
|
70
|
+
maxTurns,
|
|
71
|
+
ctx,
|
|
72
|
+
messageBus,
|
|
73
|
+
taskAmend,
|
|
74
|
+
}) {
|
|
59
75
|
if (!agentRunner) throw new Error("agentRunner is required");
|
|
60
76
|
if (!supervisorRunner) throw new Error("supervisorRunner is required");
|
|
61
77
|
if (!output) throw new Error("output is required");
|
|
@@ -64,7 +80,11 @@ export class Supervisor {
|
|
|
64
80
|
this.output = output;
|
|
65
81
|
this.maxTurns = maxTurns ?? 100;
|
|
66
82
|
this.ctx = ctx ?? createOrchestrationContext();
|
|
83
|
+
this.messageBus =
|
|
84
|
+
messageBus ?? createMessageBus({ participants: ["supervisor", "agent"] });
|
|
85
|
+
if (!this.ctx.messageBus) this.ctx.messageBus = this.messageBus;
|
|
67
86
|
this.counter = new SequenceCounter();
|
|
87
|
+
this.taskAmend = taskAmend ?? null;
|
|
68
88
|
/** @type {"agent"|"supervisor"} */
|
|
69
89
|
this.currentSource = "agent";
|
|
70
90
|
/** @type {number} */
|
|
@@ -77,9 +97,10 @@ export class Supervisor {
|
|
|
77
97
|
* @returns {Promise<{success: boolean, turns: number}>}
|
|
78
98
|
*/
|
|
79
99
|
async run(task) {
|
|
100
|
+
const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
|
|
80
101
|
this.currentSource = "supervisor";
|
|
81
102
|
this.currentTurn = 0;
|
|
82
|
-
let supervisorResult = await this.supervisorRunner.run(
|
|
103
|
+
let supervisorResult = await this.supervisorRunner.run(initialTask);
|
|
83
104
|
|
|
84
105
|
if (supervisorResult.error) {
|
|
85
106
|
this.emitSummary({ success: false, turns: 0 });
|
|
@@ -95,8 +116,7 @@ export class Supervisor {
|
|
|
95
116
|
const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
|
|
96
117
|
for (let turn = 1; turn <= turnLimit; turn++) {
|
|
97
118
|
const relay =
|
|
98
|
-
pendingRelay ??
|
|
99
|
-
this.extractLastText(this.supervisorRunner, supervisorResult.text);
|
|
119
|
+
pendingRelay ?? this.#buildInitialRelay(supervisorResult.text);
|
|
100
120
|
|
|
101
121
|
const turnOutcome = await this.#runAgentTurn(turn, relay);
|
|
102
122
|
if (turnOutcome.exit) return turnOutcome.exit;
|
|
@@ -111,6 +131,22 @@ export class Supervisor {
|
|
|
111
131
|
return { success: false, turns: this.maxTurns };
|
|
112
132
|
}
|
|
113
133
|
|
|
134
|
+
#buildInitialRelay(fallbackText) {
|
|
135
|
+
const queued = this.messageBus.drain("agent");
|
|
136
|
+
if (queued.length > 0) return formatMessages(queued);
|
|
137
|
+
return this.extractLastText(this.supervisorRunner, fallbackText);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
#checkAsk(name) {
|
|
141
|
+
return checkPendingAsk({
|
|
142
|
+
ctx: this.ctx,
|
|
143
|
+
messageBus: this.messageBus,
|
|
144
|
+
addresseeName: name,
|
|
145
|
+
mode: "supervised",
|
|
146
|
+
emitViolation: (e) => this.emitOrchestratorEvent(e),
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
114
150
|
/**
|
|
115
151
|
* Drive the agent through one turn, allowing the supervisor to interrupt
|
|
116
152
|
* via the Redirect tool. Returns either an `exit` outcome (the loop should
|
|
@@ -122,6 +158,7 @@ export class Supervisor {
|
|
|
122
158
|
async #runAgentTurn(turn, initialRelay) {
|
|
123
159
|
let relay = initialRelay;
|
|
124
160
|
let interventions = 0;
|
|
161
|
+
let agentCalled = this.agentRunner.sessionId !== null;
|
|
125
162
|
|
|
126
163
|
this.agentRunner.onBatch = (batchLines, ctx) =>
|
|
127
164
|
this.#midTurnReview(turn, batchLines, ctx);
|
|
@@ -130,10 +167,10 @@ export class Supervisor {
|
|
|
130
167
|
while (true) {
|
|
131
168
|
this.currentSource = "agent";
|
|
132
169
|
this.currentTurn = turn;
|
|
133
|
-
const
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
170
|
+
const agentResult = agentCalled
|
|
171
|
+
? await this.agentRunner.resume(relay)
|
|
172
|
+
: await this.agentRunner.run(relay);
|
|
173
|
+
agentCalled = true;
|
|
137
174
|
|
|
138
175
|
if (agentResult.error && !agentResult.aborted) {
|
|
139
176
|
this.emitSummary({ success: false, turns: turn });
|
|
@@ -162,6 +199,14 @@ export class Supervisor {
|
|
|
162
199
|
continue;
|
|
163
200
|
}
|
|
164
201
|
|
|
202
|
+
if (this.#checkAsk("agent") === "recheck" && !this.ctx.concluded) {
|
|
203
|
+
const reminders = this.messageBus.drain("agent");
|
|
204
|
+
if (reminders.length > 0) {
|
|
205
|
+
relay = formatMessages(reminders);
|
|
206
|
+
continue;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
165
210
|
return { exit: null };
|
|
166
211
|
}
|
|
167
212
|
} finally {
|
|
@@ -209,14 +254,20 @@ export class Supervisor {
|
|
|
209
254
|
* @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object, relay?: string}>}
|
|
210
255
|
*/
|
|
211
256
|
async #endOfTurnReview(turn) {
|
|
257
|
+
const queuedForSupervisor = this.messageBus.drain("supervisor");
|
|
212
258
|
const agentTranscript = this.extractTranscript(this.agentRunner);
|
|
213
259
|
this.currentSource = "supervisor";
|
|
214
260
|
this.currentTurn = turn;
|
|
215
261
|
this.ctx.redirect = null;
|
|
216
262
|
|
|
217
|
-
const
|
|
218
|
-
|
|
219
|
-
|
|
263
|
+
const reviewPrompt =
|
|
264
|
+
queuedForSupervisor.length > 0
|
|
265
|
+
? `The agent reported:\n\n${agentTranscript}\n\n` +
|
|
266
|
+
`Agent messages:\n${formatMessages(queuedForSupervisor)}\n\n` +
|
|
267
|
+
`Review and decide how to proceed.`
|
|
268
|
+
: `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`;
|
|
269
|
+
|
|
270
|
+
let supervisorResult = await this.supervisorRunner.resume(reviewPrompt);
|
|
220
271
|
|
|
221
272
|
if (supervisorResult.error) {
|
|
222
273
|
this.emitSummary({ success: false, turns: turn });
|
|
@@ -232,13 +283,34 @@ export class Supervisor {
|
|
|
232
283
|
return { exit: { success: true, turns: turn } };
|
|
233
284
|
}
|
|
234
285
|
|
|
286
|
+
if (this.#checkAsk("supervisor") === "recheck" && !this.ctx.concluded) {
|
|
287
|
+
const reminders = this.messageBus.drain("supervisor");
|
|
288
|
+
if (reminders.length > 0) {
|
|
289
|
+
supervisorResult = await this.supervisorRunner.resume(
|
|
290
|
+
formatMessages(reminders),
|
|
291
|
+
);
|
|
292
|
+
if (this.ctx.concluded) {
|
|
293
|
+
this.emitSummary({
|
|
294
|
+
success: true,
|
|
295
|
+
turns: turn,
|
|
296
|
+
summary: this.ctx.summary,
|
|
297
|
+
});
|
|
298
|
+
return { exit: { success: true, turns: turn } };
|
|
299
|
+
}
|
|
300
|
+
this.#checkAsk("supervisor");
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
235
304
|
if (this.ctx.redirect) {
|
|
236
305
|
const redirect = this.ctx.redirect;
|
|
237
306
|
this.ctx.redirect = null;
|
|
238
307
|
return { exit: null, supervisorResult, relay: redirect.message };
|
|
239
308
|
}
|
|
240
309
|
|
|
241
|
-
|
|
310
|
+
const queuedForAgent = this.messageBus.drain("agent");
|
|
311
|
+
const relay =
|
|
312
|
+
queuedForAgent.length > 0 ? formatMessages(queuedForAgent) : undefined;
|
|
313
|
+
return { exit: null, supervisorResult, relay };
|
|
242
314
|
}
|
|
243
315
|
|
|
244
316
|
/**
|
|
@@ -360,6 +432,7 @@ const devNull = new Writable({
|
|
|
360
432
|
* @param {string} [deps.supervisorProfile] - Supervisor profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
|
|
361
433
|
* @param {string} [deps.agentProfile] - Agent profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
|
|
362
434
|
* @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<supervisorCwd>/.claude/agents`. Resolved once from the orchestrator's cwd so profiles travel with the project, not with a per-agent sandbox.
|
|
435
|
+
* @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
|
|
363
436
|
* @returns {Supervisor}
|
|
364
437
|
*/
|
|
365
438
|
export function createSupervisor({
|
|
@@ -375,6 +448,7 @@ export function createSupervisor({
|
|
|
375
448
|
supervisorProfile,
|
|
376
449
|
agentProfile,
|
|
377
450
|
profilesDir,
|
|
451
|
+
taskAmend,
|
|
378
452
|
}) {
|
|
379
453
|
const resolvedProfilesDir =
|
|
380
454
|
profilesDir ?? resolve(supervisorCwd, ".claude/agents");
|
|
@@ -388,23 +462,19 @@ export function createSupervisor({
|
|
|
388
462
|
: { type: "preset", preset: "claude_code", append: trailer };
|
|
389
463
|
};
|
|
390
464
|
let supervisor;
|
|
391
|
-
let supervisorRunner;
|
|
392
465
|
|
|
393
466
|
const ctx = createOrchestrationContext();
|
|
467
|
+
const messageBus = createMessageBus({
|
|
468
|
+
participants: ["supervisor", "agent"],
|
|
469
|
+
});
|
|
470
|
+
ctx.messageBus = messageBus;
|
|
471
|
+
ctx.participants = [
|
|
472
|
+
{ name: "supervisor", role: "supervisor" },
|
|
473
|
+
{ name: "agent", role: "agent" },
|
|
474
|
+
];
|
|
394
475
|
|
|
395
476
|
const supervisorServer = createSupervisorToolServer(ctx);
|
|
396
|
-
const agentServer = createSupervisedAgentToolServer(ctx
|
|
397
|
-
onAsk: async (question) => {
|
|
398
|
-
supervisor.currentSource = "supervisor";
|
|
399
|
-
supervisor.emitOrchestratorEvent({ type: "ask_received" });
|
|
400
|
-
await supervisorRunner.resume(
|
|
401
|
-
`The agent asks: "${question}"\n\nAnswer the question directly.`,
|
|
402
|
-
);
|
|
403
|
-
supervisor.currentSource = "agent";
|
|
404
|
-
supervisor.emitOrchestratorEvent({ type: "ask_answered" });
|
|
405
|
-
return supervisor.extractLastText(supervisorRunner, "No answer.");
|
|
406
|
-
},
|
|
407
|
-
});
|
|
477
|
+
const agentServer = createSupervisedAgentToolServer(ctx);
|
|
408
478
|
|
|
409
479
|
const onLine = (line) => supervisor.emitLine(line);
|
|
410
480
|
|
|
@@ -426,7 +496,7 @@ export function createSupervisor({
|
|
|
426
496
|
? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
|
|
427
497
|
: defaultDisallowed;
|
|
428
498
|
|
|
429
|
-
supervisorRunner = createAgentRunner({
|
|
499
|
+
const supervisorRunner = createAgentRunner({
|
|
430
500
|
cwd: supervisorCwd,
|
|
431
501
|
query,
|
|
432
502
|
output: devNull,
|
|
@@ -453,6 +523,8 @@ export function createSupervisor({
|
|
|
453
523
|
output,
|
|
454
524
|
maxTurns,
|
|
455
525
|
ctx,
|
|
526
|
+
messageBus,
|
|
527
|
+
taskAmend,
|
|
456
528
|
});
|
|
457
529
|
return supervisor;
|
|
458
530
|
}
|
package/src/tee-writer.js
CHANGED
|
@@ -163,6 +163,27 @@ export class TeeWriter extends Writable {
|
|
|
163
163
|
withPrefix,
|
|
164
164
|
}),
|
|
165
165
|
);
|
|
166
|
+
} else if (turn.role === "system") {
|
|
167
|
+
const label = turn.subtype ?? "system";
|
|
168
|
+
this.textStream.write(
|
|
169
|
+
renderTextLine({
|
|
170
|
+
source: turn.source,
|
|
171
|
+
text: `[${label}]`,
|
|
172
|
+
withPrefix,
|
|
173
|
+
}),
|
|
174
|
+
);
|
|
175
|
+
} else if (turn.role === "user") {
|
|
176
|
+
for (const block of turn.content) {
|
|
177
|
+
if (block.type === "text") {
|
|
178
|
+
this.textStream.write(
|
|
179
|
+
renderTextLine({
|
|
180
|
+
source: turn.source,
|
|
181
|
+
text: `[user] ${block.text}`,
|
|
182
|
+
withPrefix,
|
|
183
|
+
}),
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
166
187
|
}
|
|
167
188
|
}
|
|
168
189
|
}
|
package/src/trace-collector.js
CHANGED
|
@@ -37,6 +37,8 @@ export class TraceCollector {
|
|
|
37
37
|
this.result = null;
|
|
38
38
|
/** @type {number} */
|
|
39
39
|
this.turnIndex = 0;
|
|
40
|
+
/** @type {object|null} */
|
|
41
|
+
this.initEvent = null;
|
|
40
42
|
}
|
|
41
43
|
|
|
42
44
|
/**
|
|
@@ -73,7 +75,7 @@ export class TraceCollector {
|
|
|
73
75
|
|
|
74
76
|
switch (event.type) {
|
|
75
77
|
case "system":
|
|
76
|
-
this.handleSystem(event);
|
|
78
|
+
this.handleSystem(event, source);
|
|
77
79
|
break;
|
|
78
80
|
case "assistant":
|
|
79
81
|
this.handleAssistant(event, source);
|
|
@@ -91,8 +93,11 @@ export class TraceCollector {
|
|
|
91
93
|
|
|
92
94
|
/**
|
|
93
95
|
* @param {object} event
|
|
96
|
+
* @param {string|null} source
|
|
94
97
|
*/
|
|
95
|
-
handleSystem(event) {
|
|
98
|
+
handleSystem(event, source) {
|
|
99
|
+
const { type: _type, ...payload } = event;
|
|
100
|
+
|
|
96
101
|
if (event.subtype === "init") {
|
|
97
102
|
this.metadata = {
|
|
98
103
|
timestamp: event.timestamp ?? this.now(),
|
|
@@ -102,7 +107,16 @@ export class TraceCollector {
|
|
|
102
107
|
tools: event.tools ?? [],
|
|
103
108
|
permissionMode: event.permissionMode ?? null,
|
|
104
109
|
};
|
|
110
|
+
this.initEvent = payload;
|
|
105
111
|
}
|
|
112
|
+
|
|
113
|
+
this.turns.push({
|
|
114
|
+
index: this.turnIndex++,
|
|
115
|
+
role: "system",
|
|
116
|
+
source,
|
|
117
|
+
subtype: event.subtype ?? null,
|
|
118
|
+
data: payload,
|
|
119
|
+
});
|
|
106
120
|
}
|
|
107
121
|
|
|
108
122
|
/**
|
|
@@ -158,6 +172,19 @@ export class TraceCollector {
|
|
|
158
172
|
const contentItems = message.content;
|
|
159
173
|
if (!Array.isArray(contentItems)) return;
|
|
160
174
|
|
|
175
|
+
const textBlocks = contentItems
|
|
176
|
+
.filter((item) => item.type === "text")
|
|
177
|
+
.map((item) => ({ type: "text", text: item.text }));
|
|
178
|
+
|
|
179
|
+
if (textBlocks.length > 0) {
|
|
180
|
+
this.turns.push({
|
|
181
|
+
index: this.turnIndex++,
|
|
182
|
+
role: "user",
|
|
183
|
+
source,
|
|
184
|
+
content: textBlocks,
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
|
|
161
188
|
for (const item of contentItems) {
|
|
162
189
|
if (item.type === "tool_result") {
|
|
163
190
|
this.turns.push({
|
|
@@ -204,7 +231,7 @@ export class TraceCollector {
|
|
|
204
231
|
*/
|
|
205
232
|
toJSON() {
|
|
206
233
|
return {
|
|
207
|
-
version: "1.
|
|
234
|
+
version: "1.1.0",
|
|
208
235
|
metadata: this.metadata ?? {
|
|
209
236
|
timestamp: this.now(),
|
|
210
237
|
sessionId: null,
|
|
@@ -213,6 +240,7 @@ export class TraceCollector {
|
|
|
213
240
|
tools: [],
|
|
214
241
|
permissionMode: null,
|
|
215
242
|
},
|
|
243
|
+
initEvent: this.initEvent ?? null,
|
|
216
244
|
turns: this.turns,
|
|
217
245
|
summary: this.result ?? {
|
|
218
246
|
result: "unknown",
|
|
@@ -271,6 +299,27 @@ export class TraceCollector {
|
|
|
271
299
|
withPrefix,
|
|
272
300
|
}),
|
|
273
301
|
);
|
|
302
|
+
} else if (turn.role === "system") {
|
|
303
|
+
const label = turn.subtype ?? "system";
|
|
304
|
+
out.push(
|
|
305
|
+
renderTextLine({
|
|
306
|
+
source: turn.source,
|
|
307
|
+
text: `[${label}]`,
|
|
308
|
+
withPrefix,
|
|
309
|
+
}),
|
|
310
|
+
);
|
|
311
|
+
} else if (turn.role === "user") {
|
|
312
|
+
for (const block of turn.content) {
|
|
313
|
+
if (block.type === "text") {
|
|
314
|
+
out.push(
|
|
315
|
+
renderTextLine({
|
|
316
|
+
source: turn.source,
|
|
317
|
+
text: `[user] ${block.text}`,
|
|
318
|
+
withPrefix,
|
|
319
|
+
}),
|
|
320
|
+
);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
274
323
|
}
|
|
275
324
|
}
|
|
276
325
|
|
package/src/trace-query.js
CHANGED
|
@@ -17,18 +17,90 @@ export class TraceQuery {
|
|
|
17
17
|
}
|
|
18
18
|
|
|
19
19
|
/**
|
|
20
|
-
* High-level overview: metadata, summary, turn count,
|
|
20
|
+
* High-level overview: metadata, summary, turn count, tool frequency,
|
|
21
|
+
* and the first user message text (taskPrompt) when present.
|
|
21
22
|
* @returns {object}
|
|
22
23
|
*/
|
|
23
24
|
overview() {
|
|
25
|
+
const firstUser = this.turns.find((t) => t.role === "user");
|
|
26
|
+
const taskPrompt = firstUser
|
|
27
|
+
? firstUser.content
|
|
28
|
+
.filter((b) => b.type === "text")
|
|
29
|
+
.map((b) => b.text)
|
|
30
|
+
.join("\n")
|
|
31
|
+
: null;
|
|
24
32
|
return {
|
|
25
33
|
metadata: this.metadata,
|
|
26
34
|
summary: this.summary,
|
|
27
35
|
turnCount: this.turns.length,
|
|
28
36
|
tools: this.toolFrequency(),
|
|
37
|
+
taskPrompt,
|
|
29
38
|
};
|
|
30
39
|
}
|
|
31
40
|
|
|
41
|
+
/**
|
|
42
|
+
* Full system/init event — the single most diagnostic message for
|
|
43
|
+
* root-cause analysis. Returns null for traces collected before this
|
|
44
|
+
* field existed.
|
|
45
|
+
* @returns {object|null}
|
|
46
|
+
*/
|
|
47
|
+
init() {
|
|
48
|
+
return this.trace.initEvent ?? null;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Retrieve a single turn by its index.
|
|
53
|
+
* @param {number} index
|
|
54
|
+
* @returns {object|null}
|
|
55
|
+
*/
|
|
56
|
+
turn(index) {
|
|
57
|
+
return this.turns.find((t) => t.index === index) ?? null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Filter turns by composable structural criteria. All criteria are
|
|
62
|
+
* combined as AND. `tool()` and `errors()` remain as convenience
|
|
63
|
+
* shortcuts for pre-existing workflows.
|
|
64
|
+
*
|
|
65
|
+
* `toolName` matches assistant turns only. Applying `toolName` without
|
|
66
|
+
* `role: "assistant"` still drops every non-assistant turn, because
|
|
67
|
+
* resolving tool_use → tool_result pairs requires the `tool()` method.
|
|
68
|
+
* `isError` matches tool_result turns only. Combining `toolName` with
|
|
69
|
+
* `isError` therefore always returns `[]` (no turn is both assistant
|
|
70
|
+
* and tool_result) — use `tool(name)` for "errors from Bash"–shaped
|
|
71
|
+
* queries.
|
|
72
|
+
*
|
|
73
|
+
* @param {object} [opts]
|
|
74
|
+
* @param {string} [opts.role] - Exact role match (system | user |
|
|
75
|
+
* assistant | tool_result).
|
|
76
|
+
* @param {string} [opts.toolName] - Matches assistant turns with a
|
|
77
|
+
* tool_use block of this name. Drops all non-assistant turns.
|
|
78
|
+
* @param {boolean} [opts.isError] - Matches tool_result turns by
|
|
79
|
+
* `isError` value. Drops all non-tool_result turns.
|
|
80
|
+
* @returns {object[]}
|
|
81
|
+
*/
|
|
82
|
+
filter(opts = {}) {
|
|
83
|
+
const { role, toolName, isError } = opts;
|
|
84
|
+
return this.turns.filter((turn) => {
|
|
85
|
+
if (role !== undefined && turn.role !== role) return false;
|
|
86
|
+
if (isError !== undefined) {
|
|
87
|
+
if (turn.role !== "tool_result") return false;
|
|
88
|
+
if (turn.isError !== isError) return false;
|
|
89
|
+
}
|
|
90
|
+
if (toolName !== undefined) {
|
|
91
|
+
if (turn.role === "assistant") {
|
|
92
|
+
const has = turn.content.some(
|
|
93
|
+
(b) => b.type === "tool_use" && b.name === toolName,
|
|
94
|
+
);
|
|
95
|
+
if (!has) return false;
|
|
96
|
+
} else {
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return true;
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
32
104
|
/** @returns {number} */
|
|
33
105
|
count() {
|
|
34
106
|
return this.turns.length;
|
|
@@ -73,16 +145,18 @@ export class TraceQuery {
|
|
|
73
145
|
* @param {object} [opts]
|
|
74
146
|
* @param {number} [opts.context=0] - Number of surrounding turns to include
|
|
75
147
|
* @param {number} [opts.limit=50] - Max results
|
|
148
|
+
* @param {boolean} [opts.full=false] - Emit full content block text in
|
|
149
|
+
* match descriptions instead of the default narrow excerpt window.
|
|
76
150
|
* @returns {object[]} Array of {turn, matches, context?}
|
|
77
151
|
*/
|
|
78
152
|
search(pattern, opts = {}) {
|
|
79
|
-
const { context = 0, limit = 50 } = opts;
|
|
153
|
+
const { context = 0, limit = 50, full = false } = opts;
|
|
80
154
|
// eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
|
|
81
155
|
const re = new RegExp(pattern, "gi");
|
|
82
156
|
const hits = [];
|
|
83
157
|
|
|
84
158
|
for (const turn of this.turns) {
|
|
85
|
-
const matches = matchTurn(turn, re);
|
|
159
|
+
const matches = matchTurn(turn, re, full);
|
|
86
160
|
if (matches.length > 0) {
|
|
87
161
|
const entry = { turn, matches };
|
|
88
162
|
if (context > 0) {
|
|
@@ -273,40 +347,79 @@ export class TraceQuery {
|
|
|
273
347
|
* Search a single turn for regex matches. Returns array of match descriptions.
|
|
274
348
|
* @param {object} turn
|
|
275
349
|
* @param {RegExp} re
|
|
350
|
+
* @param {boolean} [full=false] - Emit full block text instead of an excerpt.
|
|
276
351
|
* @returns {string[]}
|
|
277
352
|
*/
|
|
278
|
-
function matchTurn(turn, re) {
|
|
353
|
+
function matchTurn(turn, re, full = false) {
|
|
354
|
+
if (turn.role === "assistant") return matchAssistantTurn(turn, re, full);
|
|
355
|
+
if (turn.role === "tool_result") return matchToolResultTurn(turn, re, full);
|
|
356
|
+
if (turn.role === "user") return matchUserTurn(turn, re, full);
|
|
357
|
+
return [];
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function matchAssistantTurn(turn, re, full) {
|
|
279
361
|
const matches = [];
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
if (block.type === "tool_use") {
|
|
287
|
-
if (re.test(block.name)) {
|
|
288
|
-
re.lastIndex = 0;
|
|
289
|
-
matches.push(`tool_name: ${block.name}`);
|
|
290
|
-
}
|
|
291
|
-
const inputStr = JSON.stringify(block.input);
|
|
292
|
-
if (re.test(inputStr)) {
|
|
293
|
-
re.lastIndex = 0;
|
|
294
|
-
matches.push(
|
|
295
|
-
`tool_input(${block.name}): ${excerptAround(inputStr, re)}`,
|
|
296
|
-
);
|
|
297
|
-
}
|
|
298
|
-
}
|
|
362
|
+
for (const block of turn.content) {
|
|
363
|
+
if (block.type === "text") {
|
|
364
|
+
const desc = describeText(block.text, re, "text", full);
|
|
365
|
+
if (desc) matches.push(desc);
|
|
366
|
+
} else if (block.type === "tool_use") {
|
|
367
|
+
matches.push(...matchToolUseBlock(block, re, full));
|
|
299
368
|
}
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
369
|
+
}
|
|
370
|
+
return matches;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
function matchToolUseBlock(block, re, full) {
|
|
374
|
+
const matches = [];
|
|
375
|
+
if (re.test(block.name)) {
|
|
376
|
+
re.lastIndex = 0;
|
|
377
|
+
matches.push(`tool_name: ${block.name}`);
|
|
378
|
+
}
|
|
379
|
+
const inputStr = JSON.stringify(block.input);
|
|
380
|
+
const inputDesc = describeText(
|
|
381
|
+
inputStr,
|
|
382
|
+
re,
|
|
383
|
+
`tool_input(${block.name})`,
|
|
384
|
+
full,
|
|
385
|
+
);
|
|
386
|
+
if (inputDesc) matches.push(inputDesc);
|
|
387
|
+
return matches;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
function matchToolResultTurn(turn, re, full) {
|
|
391
|
+
const content = turn.content ?? "";
|
|
392
|
+
const desc = describeText(content, re, "result", full);
|
|
393
|
+
return desc ? [desc] : [];
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
function matchUserTurn(turn, re, full) {
|
|
397
|
+
const matches = [];
|
|
398
|
+
for (const block of turn.content ?? []) {
|
|
399
|
+
if (block.type === "text") {
|
|
400
|
+
const desc = describeText(block.text, re, "user_text", full);
|
|
401
|
+
if (desc) matches.push(desc);
|
|
305
402
|
}
|
|
306
403
|
}
|
|
307
404
|
return matches;
|
|
308
405
|
}
|
|
309
406
|
|
|
407
|
+
/**
|
|
408
|
+
* Return a `<prefix>: <text-or-excerpt>` description when `text` matches
|
|
409
|
+
* the regex, or null when it does not. Centralises the full-vs-excerpt
|
|
410
|
+
* choice so each call site just supplies its prefix.
|
|
411
|
+
* @param {string} text
|
|
412
|
+
* @param {RegExp} re
|
|
413
|
+
* @param {string} prefix
|
|
414
|
+
* @param {boolean} full
|
|
415
|
+
* @returns {string|null}
|
|
416
|
+
*/
|
|
417
|
+
function describeText(text, re, prefix, full) {
|
|
418
|
+
if (!re.test(text)) return null;
|
|
419
|
+
re.lastIndex = 0;
|
|
420
|
+
return `${prefix}: ${full ? text : excerptAround(text, re)}`;
|
|
421
|
+
}
|
|
422
|
+
|
|
310
423
|
/**
|
|
311
424
|
* Extract a short excerpt around the first regex match in text.
|
|
312
425
|
* @param {string} text
|