@forwardimpact/libeval 0.1.18 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.18",
3
+ "version": "0.1.19",
4
4
  "description": "Process Claude Code stream-json output into structured traces",
5
5
  "license": "Apache-2.0",
6
6
  "author": "D. Olsson <hi@senzilla.io>",
@@ -8,6 +8,11 @@
8
8
 
9
9
  const DEFAULT_ALLOWED_TOOLS = ["Bash", "Read", "Glob", "Grep", "Write", "Edit"];
10
10
 
11
+ // fit-eval and kata-action run headless in CI/CD with no human to answer
12
+ // permission prompts. The SDK is always launched in bypass mode — not
13
+ // overridable — so a future caller can't accidentally reduce permissions.
14
+ const PERMISSION_MODE = "bypassPermissions";
15
+
11
16
  function applyDefaults(deps) {
12
17
  return {
13
18
  cwd: deps.cwd,
@@ -16,7 +21,6 @@ function applyDefaults(deps) {
16
21
  model: deps.model ?? "opus",
17
22
  maxTurns: deps.maxTurns ?? 50,
18
23
  allowedTools: deps.allowedTools ?? DEFAULT_ALLOWED_TOOLS,
19
- permissionMode: deps.permissionMode ?? "bypassPermissions",
20
24
  onLine: deps.onLine ?? null,
21
25
  onBatch: deps.onBatch ?? null,
22
26
  batchSize: deps.batchSize ?? 3,
@@ -36,7 +40,6 @@ export class AgentRunner {
36
40
  * @param {string} [deps.model] - Claude model identifier
37
41
  * @param {number} [deps.maxTurns] - Maximum agentic turns
38
42
  * @param {string[]} [deps.allowedTools] - Tools the agent may use
39
- * @param {string} [deps.permissionMode] - SDK permission mode
40
43
  * @param {function} [deps.onLine] - Callback invoked with each NDJSON line as it's produced
41
44
  * @param {function} [deps.onBatch] - Async callback invoked with a batch of NDJSON lines at flush boundaries: every `batchSize` assistant text blocks, the terminal `result` message, and — on iterator crash/abort — once more in a final flush carrying any lines that never reached a boundary. Receives `(lines, { abort })` where calling `abort()` stops the in-flight SDK session via the AbortController. Optional; assignable at runtime so the Supervisor can swap it per turn.
42
45
  * @param {number} [deps.batchSize] - Assistant text-block messages to accumulate before firing onBatch. Tool-only assistant messages ride along without counting. Default 3: the supervisor reviews the agent every three text turns instead of every turn. The terminal `result` always flushes regardless of count.
@@ -72,7 +75,7 @@ export class AgentRunner {
72
75
  allowedTools: this.allowedTools,
73
76
  ...(this.maxTurns > 0 && { maxTurns: this.maxTurns }),
74
77
  model: this.model,
75
- permissionMode: this.permissionMode,
78
+ permissionMode: PERMISSION_MODE,
76
79
  allowDangerouslySkipPermissions: true,
77
80
  settingSources: this.settingSources,
78
81
  abortController,
@@ -102,7 +105,7 @@ export class AgentRunner {
102
105
  prompt,
103
106
  options: {
104
107
  resume: this.sessionId,
105
- permissionMode: this.permissionMode,
108
+ permissionMode: PERMISSION_MODE,
106
109
  allowDangerouslySkipPermissions: true,
107
110
  abortController,
108
111
  ...(this.mcpServers && { mcpServers: this.mcpServers }),
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Line renderer — composes prefix + color + body + reset into a single
3
+ * terminal line. Pure; no side effects.
4
+ *
5
+ * Every renderer returns a `\n`-terminated string:
6
+ * <source>: <ESC><color><body><RESET>\n
7
+ *
8
+ * The `<source>: ` prefix lives outside the color escape so grep and
9
+ * color-stripping terminals preserve the participant tag. Colons separate
10
+ * the source label and the kind label (`Bash:`, `Result:`, `Error:`) for a
11
+ * tighter line on narrow viewports without losing structure.
12
+ */
13
+
14
+ import { colorForSource, ERROR_COLOR, RESET } from "./palette.js";
15
+
16
+ /**
17
+ * @param {string|null} source
18
+ * @param {boolean} withPrefix
19
+ * @returns {string}
20
+ */
21
+ function prefix(source, withPrefix) {
22
+ if (!withPrefix || !source) return "";
23
+ return `${source}: `;
24
+ }
25
+
26
+ /**
27
+ * @param {{source: string|null, text: string, withPrefix: boolean}} args
28
+ * @returns {string}
29
+ */
30
+ export function renderTextLine({ source, text, withPrefix }) {
31
+ const color = colorForSource(source);
32
+ return `${prefix(source, withPrefix)}${color}${text}${RESET}\n`;
33
+ }
34
+
35
+ /**
36
+ * @param {{source: string|null, toolName: string, hint: string, withPrefix: boolean}} args
37
+ * @returns {string}
38
+ */
39
+ export function renderToolCallLine({ source, toolName, hint, withPrefix }) {
40
+ const color = colorForSource(source);
41
+ const body = hint ? `${toolName}: ${hint}` : `${toolName}`;
42
+ return `${prefix(source, withPrefix)}${color}${body}${RESET}\n`;
43
+ }
44
+
45
+ /**
46
+ * @param {{source: string|null, preview: {text: string, isError: boolean}, withPrefix: boolean}} args
47
+ * @returns {string}
48
+ */
49
+ export function renderToolResultLine({ source, preview, withPrefix }) {
50
+ const color = preview.isError ? ERROR_COLOR : colorForSource(source);
51
+ const label = preview.isError ? "Error" : "Result";
52
+ const body = `${label}: ${preview.text}`;
53
+ return `${prefix(source, withPrefix)}${color}${body}${RESET}\n`;
54
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Orchestrator filter — predicate for the orchestrator lifecycle events that
3
+ * should be suppressed from the human-readable log.
4
+ *
5
+ * NDJSON artifacts still carry every orchestrator event; this module only
6
+ * controls what the live `textStream` and offline `toText()` show.
7
+ */
8
+
9
+ const SUPPRESSED = new Set([
10
+ "session_start",
11
+ "agent_start",
12
+ "ask_received",
13
+ "ask_answered",
14
+ "redirect",
15
+ "summary",
16
+ ]);
17
+
18
+ /**
19
+ * @param {{type?: string}|null|undefined} event
20
+ * @returns {boolean} true when the event's type is one we hide from text output
21
+ */
22
+ export function isSuppressedOrchestratorEvent(event) {
23
+ return Boolean(
24
+ event && typeof event === "object" && SUPPRESSED.has(event.type),
25
+ );
26
+ }
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Palette — pure profile-name → ANSI SGR foreground color function.
3
+ *
4
+ * Assignment is a FNV-1a hash of the source name modulo the palette size, so
5
+ * the same name maps to the same color in every process. Red is reserved for
6
+ * tool-result errors and is never in the palette.
7
+ *
8
+ * Colors use the 24-bit truecolor SGR escape (`ESC[38;2;R;G;Bm`) rather than
9
+ * the 16-color table. GitHub Actions' log viewer and most modern terminals
10
+ * render truecolor as the exact hex requested, avoiding the washed-out
11
+ * mustard/olive tones GHA applies to `ESC[93m` etc. Eight slots cover the
12
+ * largest concurrent cast in any existing workflow (five domain agents plus
13
+ * the facilitator) with headroom.
14
+ */
15
+
16
+ const PALETTE = [
17
+ "\u001b[38;2;79;195;247m", // sky blue #4FC3F7
18
+ "\u001b[38;2;129;199;132m", // bright green #81C784
19
+ "\u001b[38;2;255;202;40m", // amber #FFCA28
20
+ "\u001b[38;2;236;64;122m", // magenta #EC407A
21
+ "\u001b[38;2;38;198;218m", // cyan #26C6DA
22
+ "\u001b[38;2;186;104;200m", // lavender #BA68C8
23
+ "\u001b[38;2;255;167;38m", // orange #FFA726
24
+ "\u001b[38;2;66;165;245m", // blue #42A5F5
25
+ ];
26
+
27
+ /** 24-bit SGR foreground code reserved for tool-result errors (#F14C4C). */
28
+ export const ERROR_COLOR = "\u001b[38;2;241;76;76m";
29
+
30
+ /** ANSI SGR reset sequence. */
31
+ export const RESET = "\u001b[0m";
32
+
33
+ /**
34
+ * Map a source name to a stable ANSI foreground color.
35
+ *
36
+ * The mapping is a pure function of the name via FNV-1a 32-bit hash — same
37
+ * name, same color, every call, in every process. Returns `RESET` for
38
+ * missing/empty names so callers never emit a stray escape.
39
+ *
40
+ * @param {string|null|undefined} name
41
+ * @returns {string} ANSI SGR escape, never equal to `ERROR_COLOR`
42
+ */
43
+ export function colorForSource(name) {
44
+ if (!name) return RESET;
45
+ let h = 0x811c9dc5;
46
+ for (let i = 0; i < name.length; i++) {
47
+ h ^= name.charCodeAt(i);
48
+ h = Math.imul(h, 0x01000193) >>> 0;
49
+ }
50
+ // Length mixer: reduces FNV's intrinsic birthday collisions on short
51
+ // names with shared affixes (e.g. `staff-engineer`/`facilitator`).
52
+ h ^= name.length;
53
+ h = Math.imul(h, 0x01000193) >>> 0;
54
+ return PALETTE[h % PALETTE.length];
55
+ }
56
+
57
+ /**
58
+ * Expose the palette size so tests can assert distinctness on the full set.
59
+ * @returns {number}
60
+ */
61
+ export function paletteSize() {
62
+ return PALETTE.length;
63
+ }
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Tool hints — pure one-line formatters for tool-call arguments and
3
+ * tool-result previews.
4
+ *
5
+ * `hintForCall(name, input)` renders the human-meaningful field for each
6
+ * tool (file path, command, pattern, …) sanitized to strip JSON punctuation
7
+ * (`{`, `}`, `"`) and collapsed to a single line ≤ 80 chars.
8
+ *
9
+ * `previewForResult(content, isError)` collapses a tool result to a single
10
+ * line ≤ 80 chars and flags errors so the renderer can apply the reserved
11
+ * error color and the `Error:` label.
12
+ */
13
+
14
+ const MAX_HINT_CHARS = 80;
15
+
16
+ /**
17
+ * Strip `{`, `}`, `"`, collapse whitespace, and truncate to MAX_HINT_CHARS.
18
+ * First line only — anything past a newline is dropped. Always returns a
19
+ * string, never null/undefined.
20
+ * @param {unknown} raw
21
+ * @returns {string}
22
+ */
23
+ function sanitize(raw) {
24
+ if (raw === null || raw === undefined) return "";
25
+ const str = String(raw);
26
+ const firstLine = str.split(/\r?\n/)[0] ?? "";
27
+ const stripped = firstLine.replace(/[{}"]/g, "");
28
+ const collapsed = stripped.replace(/\s+/g, " ").trim();
29
+ if (collapsed.length <= MAX_HINT_CHARS) return collapsed;
30
+ return collapsed.slice(0, MAX_HINT_CHARS - 3) + "...";
31
+ }
32
+
33
+ /**
34
+ * Truncate an already-sanitized string to MAX_HINT_CHARS with a trailing
35
+ * ellipsis when it overflows. Shared by the few handlers that concatenate
36
+ * multiple sanitized pieces before deciding on truncation.
37
+ * @param {string} str
38
+ * @returns {string}
39
+ */
40
+ function truncate(str) {
41
+ return str.length <= MAX_HINT_CHARS
42
+ ? str
43
+ : str.slice(0, MAX_HINT_CHARS - 3) + "...";
44
+ }
45
+
46
+ /**
47
+ * Per-tool hint handlers. Each entry takes the sanitized input object
48
+ * (never null) and returns the hint string. Kept as a flat table so adding
49
+ * a new tool is one entry, not a new branch in a growing switch.
50
+ */
51
+ const HINT_HANDLERS = {
52
+ Bash: (i) => sanitize(i.command),
53
+ Read: (i) => sanitize(i.file_path),
54
+ Write: (i) => sanitize(i.file_path),
55
+ Edit: (i) => {
56
+ const base = sanitize(i.file_path);
57
+ return i.replace_all
58
+ ? (base + " (replace_all)").slice(0, MAX_HINT_CHARS)
59
+ : base;
60
+ },
61
+ Glob: (i) => sanitize(i.pattern),
62
+ Grep: (i) => {
63
+ const pattern = sanitize(i.pattern);
64
+ return i.path ? truncate(`${pattern} in ${sanitize(i.path)}`) : pattern;
65
+ },
66
+ WebFetch: (i) => sanitize(i.url),
67
+ WebSearch: (i) => sanitize(i.query),
68
+ ToolSearch: (i) => sanitize(i.query),
69
+ TodoWrite: (i) => {
70
+ const count = Array.isArray(i.todos) ? i.todos.length : 0;
71
+ return `${count} todos`;
72
+ },
73
+ NotebookEdit: (i) => sanitize(i.notebook_path),
74
+ Skill: (i) => sanitize(i.skill),
75
+ Agent: (i) => sanitize(i.prompt ?? i.description),
76
+ Task: (i) => sanitize(i.prompt ?? i.description),
77
+ };
78
+
79
+ /**
80
+ * Strip the `mcp__<server>__` prefix from MCP-namespaced tool names so logs
81
+ * show the bare method (e.g. `mcp__orchestration__Tell` → `Tell`). Non-MCP
82
+ * names and malformed inputs pass through unchanged.
83
+ * @param {string} name
84
+ * @returns {string}
85
+ */
86
+ export function simplifyToolName(name) {
87
+ if (!name) return "";
88
+ if (!name.startsWith("mcp__")) return name;
89
+ const parts = name.split("__");
90
+ if (parts.length < 3) return name;
91
+ return parts.slice(2).join("__");
92
+ }
93
+
94
+ /**
95
+ * MCP-prefixed tool names (e.g. `mcp__orchestration__Tell`) take a different
96
+ * handler path. The method name itself is surfaced via `simplifyToolName`,
97
+ * so this only adds the `to/from` decorators for orchestration calls.
98
+ * Returns null if the name does not match any MCP prefix.
99
+ * @param {string} name
100
+ * @param {object} input
101
+ * @returns {string|null}
102
+ */
103
+ function hintForMcp(name, input) {
104
+ if (name.startsWith("mcp__orchestration__")) {
105
+ const parts = [];
106
+ if (input.to) parts.push(`to ${sanitize(input.to)}`);
107
+ if (input.from) parts.push(`from ${sanitize(input.from)}`);
108
+ return truncate(parts.join(" "));
109
+ }
110
+ if (name.startsWith("mcp__")) {
111
+ return "";
112
+ }
113
+ return null;
114
+ }
115
+
116
+ /**
117
+ * Map a tool name and input to a one-line human hint.
118
+ *
119
+ * Unknown tools return an empty hint — the caller still shows the tool
120
+ * name, just without extra detail. Sanitization is uniform: every branch
121
+ * ends with `sanitize`, so the output is guaranteed free of `{`, `}`, `"`
122
+ * from the input object (success criterion #2).
123
+ *
124
+ * @param {string} name - Tool name (e.g. "Bash", "Read", "mcp__orchestration__Tell")
125
+ * @param {object|null|undefined} input - Raw tool input object from the trace
126
+ * @returns {string} One-line hint, or "" when no rule matches
127
+ */
128
+ export function hintForCall(name, input) {
129
+ if (!name) return "";
130
+ const safeInput = input && typeof input === "object" ? input : {};
131
+
132
+ const handler = HINT_HANDLERS[name];
133
+ if (handler) return handler(safeInput);
134
+
135
+ const mcp = hintForMcp(name, safeInput);
136
+ if (mcp !== null) return mcp;
137
+
138
+ return "";
139
+ }
140
+
141
+ /**
142
+ * Render a tool result as a single preview line plus an `isError` flag.
143
+ * The flag lets the line-renderer pick the reserved error color without
144
+ * re-inspecting the content.
145
+ *
146
+ * @param {string|object|null|undefined} content - Tool result content
147
+ * @param {boolean} isError - Whether the tool call failed
148
+ * @returns {{text: string, isError: boolean}}
149
+ */
150
+ export function previewForResult(content, isError) {
151
+ const normalized =
152
+ content === null || content === undefined
153
+ ? ""
154
+ : typeof content === "string"
155
+ ? content
156
+ : JSON.stringify(content);
157
+ const lines = normalized.split(/\r?\n/);
158
+ let firstNonBlank = "";
159
+ for (const line of lines) {
160
+ if (line.trim().length > 0) {
161
+ firstNonBlank = line.trim();
162
+ break;
163
+ }
164
+ }
165
+
166
+ if (isError) {
167
+ const body = firstNonBlank || "(no output)";
168
+ return {
169
+ text:
170
+ body.length <= MAX_HINT_CHARS
171
+ ? body
172
+ : body.slice(0, MAX_HINT_CHARS - 3) + "...",
173
+ isError: true,
174
+ };
175
+ }
176
+
177
+ if (!firstNonBlank) return { text: "(ok)", isError: false };
178
+ return {
179
+ text:
180
+ firstNonBlank.length <= MAX_HINT_CHARS
181
+ ? firstNonBlank
182
+ : firstNonBlank.slice(0, MAX_HINT_CHARS - 3) + "...",
183
+ isError: false,
184
+ };
185
+ }
package/src/tee-writer.js CHANGED
@@ -7,11 +7,27 @@
7
7
  * parameter controls display formatting: multi-participant modes show
8
8
  * source labels on content lines.
9
9
  *
10
+ * Human text rendering is delegated to the pure modules under `./render/`
11
+ * so the live stream and the offline `TraceCollector.toText()` replay share
12
+ * one formatting path (spec 540). The NDJSON going to `fileStream` is
13
+ * untouched — only what reaches `textStream` changes.
14
+ *
10
15
  * Follows OO+DI: constructor injection, factory function, tests bypass factory.
11
16
  */
12
17
 
13
18
  import { Writable } from "node:stream";
14
19
  import { TraceCollector } from "./trace-collector.js";
20
+ import {
21
+ renderTextLine,
22
+ renderToolCallLine,
23
+ renderToolResultLine,
24
+ } from "./render/line-renderer.js";
25
+ import {
26
+ hintForCall,
27
+ previewForResult,
28
+ simplifyToolName,
29
+ } from "./render/tool-hints.js";
30
+ import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
15
31
 
16
32
  export class TeeWriter extends Writable {
17
33
  /**
@@ -29,8 +45,6 @@ export class TeeWriter extends Writable {
29
45
  this.mode = mode ?? "raw";
30
46
  this.collector = new TraceCollector();
31
47
  this.turnsEmitted = 0;
32
- this.lastSource = null;
33
- this.partial = "";
34
48
  }
35
49
 
36
50
  /**
@@ -39,7 +53,7 @@ export class TeeWriter extends Writable {
39
53
  * @param {function} callback
40
54
  */
41
55
  _write(chunk, encoding, callback) {
42
- const str = this.partial + chunk.toString();
56
+ const str = (this.partial ?? "") + chunk.toString();
43
57
  const lines = str.split("\n");
44
58
  this.partial = lines.pop() ?? "";
45
59
 
@@ -55,16 +69,25 @@ export class TeeWriter extends Writable {
55
69
  * @param {function} callback
56
70
  */
57
71
  _final(callback) {
58
- if (this.partial.trim()) {
72
+ if (this.partial && this.partial.trim()) {
59
73
  this.fileStream.write(this.partial + "\n");
60
74
  this.processLine(this.partial);
61
75
  }
62
76
 
63
- if (this.mode === "raw" && this.collector.result) {
77
+ // Emit the trailing `--- Result: ... ---` footer — the one summary line
78
+ // humans want (spec 540). This is the same tail TraceCollector.toText()
79
+ // appends, so the live stream and the offline replay stay in sync
80
+ // (spec 540 criterion #6). The superseded `--- Evaluation ... ---`
81
+ // footer is gone in every mode.
82
+ if (this.collector.result) {
64
83
  const text = this.collector.toText();
65
84
  const idx = text.lastIndexOf("\n---");
66
85
  if (idx !== -1) {
67
- this.textStream.write(text.slice(idx) + "\n");
86
+ // Slice past the leading `\n` — the previously-streamed body
87
+ // already ended with its own newline, so re-emitting `\n---` here
88
+ // would produce a blank line before the footer and desync from
89
+ // the offline replay (spec 540 #6).
90
+ this.textStream.write(text.slice(idx + 1) + "\n");
68
91
  }
69
92
  }
70
93
 
@@ -85,19 +108,15 @@ export class TeeWriter extends Writable {
85
108
 
86
109
  // Universal envelope: { source, seq, event }
87
110
  if (parsed.event) {
88
- // Orchestrator summary event
89
- if (parsed.source === "orchestrator" && parsed.event.type === "summary") {
90
- const status = parsed.event.success ? "completed" : "incomplete";
91
- this.textStream.write(
92
- `\n--- Evaluation ${status} after ${parsed.event.turns} turns ---\n`,
93
- );
111
+ // Orchestrator lifecycle events are suppressed from the text stream
112
+ // entirely (spec 540). They still reached fileStream above.
113
+ if (
114
+ parsed.source === "orchestrator" &&
115
+ isSuppressedOrchestratorEvent(parsed.event)
116
+ ) {
94
117
  return;
95
118
  }
96
-
97
- if (parsed.source && parsed.source !== this.lastSource) {
98
- this.lastSource = parsed.source;
99
- }
100
- this.collector.addLine(JSON.stringify(parsed.event));
119
+ this.collector.addLine(line);
101
120
  this.flushTurns();
102
121
  return;
103
122
  }
@@ -112,38 +131,43 @@ export class TeeWriter extends Writable {
112
131
  */
113
132
  flushTurns() {
114
133
  const turns = this.collector.turns;
115
- const prefix =
116
- this.mode === "supervised" && this.lastSource
117
- ? `[${this.lastSource}] `
118
- : "";
134
+ const withPrefix = this.mode !== "raw";
119
135
  while (this.turnsEmitted < turns.length) {
120
136
  const turn = turns[this.turnsEmitted++];
121
137
  if (turn.role === "assistant") {
122
138
  for (const block of turn.content) {
123
139
  if (block.type === "text") {
124
- this.textStream.write(`${prefix}${block.text}\n`);
140
+ this.textStream.write(
141
+ renderTextLine({
142
+ source: turn.source,
143
+ text: block.text,
144
+ withPrefix,
145
+ }),
146
+ );
125
147
  } else if (block.type === "tool_use") {
126
- const input = summarizeInput(block.input);
127
- this.textStream.write(`${prefix}> Tool: ${block.name} ${input}\n`);
148
+ this.textStream.write(
149
+ renderToolCallLine({
150
+ source: turn.source,
151
+ toolName: simplifyToolName(block.name),
152
+ hint: hintForCall(block.name, block.input),
153
+ withPrefix,
154
+ }),
155
+ );
128
156
  }
129
157
  }
158
+ } else if (turn.role === "tool_result") {
159
+ this.textStream.write(
160
+ renderToolResultLine({
161
+ source: turn.source,
162
+ preview: previewForResult(turn.content, turn.isError),
163
+ withPrefix,
164
+ }),
165
+ );
130
166
  }
131
167
  }
132
168
  }
133
169
  }
134
170
 
135
- /**
136
- * Summarize tool input for text display, truncated to keep logs readable.
137
- * @param {object} input - Tool input object
138
- * @returns {string} Truncated summary
139
- */
140
- function summarizeInput(input) {
141
- if (!input || typeof input !== "object") return "";
142
- const json = JSON.stringify(input);
143
- if (json.length <= 200) return json;
144
- return json.slice(0, 197) + "...";
145
- }
146
-
147
171
  /**
148
172
  * Factory function — wires a TeeWriter with the given streams.
149
173
  * @param {object} deps - Same as TeeWriter constructor
@@ -3,7 +3,24 @@
3
3
  *
4
4
  * Accepts one NDJSON line at a time via addLine(), then produces either a
5
5
  * structured JSON trace (toJSON) or human-readable text (toText).
6
+ *
7
+ * Human text rendering is delegated to the pure modules under `./render/`
8
+ * so the live `TeeWriter` stream and the offline `toText()` replay share
9
+ * one formatting path (spec 540).
6
10
  */
11
+
12
+ import {
13
+ renderTextLine,
14
+ renderToolCallLine,
15
+ renderToolResultLine,
16
+ } from "./render/line-renderer.js";
17
+ import {
18
+ hintForCall,
19
+ previewForResult,
20
+ simplifyToolName,
21
+ } from "./render/tool-hints.js";
22
+ import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
23
+
7
24
  export class TraceCollector {
8
25
  /**
9
26
  * @param {object} [deps]
@@ -38,22 +55,31 @@ export class TraceCollector {
38
55
  return;
39
56
  }
40
57
 
41
- // Unwrap combined supervised trace format {source, turn, event}.
42
- // The Supervisor emits this wrapper; when replayed through addLine the
43
- // inner event is the one we need.
58
+ // Unwrap combined supervised trace format {source, seq, event}. The
59
+ // Supervisor / Facilitator emits this wrapper; when replayed through
60
+ // addLine the inner event is the one we care about. Carry the envelope
61
+ // `source` onto each new turn so the renderer can color it correctly.
62
+ let source = null;
44
63
  if (event.event && !event.type && typeof event.source === "string") {
64
+ source = event.source;
45
65
  event = event.event;
46
66
  }
47
67
 
68
+ // Orchestrator lifecycle events carry no content and are suppressed
69
+ // from turns entirely — the NDJSON artifact keeps them separately.
70
+ if (source === "orchestrator" && isSuppressedOrchestratorEvent(event)) {
71
+ return;
72
+ }
73
+
48
74
  switch (event.type) {
49
75
  case "system":
50
76
  this.handleSystem(event);
51
77
  break;
52
78
  case "assistant":
53
- this.handleAssistant(event);
79
+ this.handleAssistant(event, source);
54
80
  break;
55
81
  case "user":
56
- this.handleUser(event);
82
+ this.handleUser(event, source);
57
83
  break;
58
84
  case "result":
59
85
  this.handleResult(event);
@@ -81,8 +107,9 @@ export class TraceCollector {
81
107
 
82
108
  /**
83
109
  * @param {object} event
110
+ * @param {string|null} source
84
111
  */
85
- handleAssistant(event) {
112
+ handleAssistant(event, source) {
86
113
  const message = event.message;
87
114
  if (!message) return;
88
115
 
@@ -114,6 +141,7 @@ export class TraceCollector {
114
141
  this.turns.push({
115
142
  index: this.turnIndex++,
116
143
  role: "assistant",
144
+ source,
117
145
  content,
118
146
  usage,
119
147
  });
@@ -121,8 +149,9 @@ export class TraceCollector {
121
149
 
122
150
  /**
123
151
  * @param {object} event
152
+ * @param {string|null} source
124
153
  */
125
- handleUser(event) {
154
+ handleUser(event, source) {
126
155
  const message = event.message;
127
156
  if (!message) return;
128
157
 
@@ -134,6 +163,7 @@ export class TraceCollector {
134
163
  this.turns.push({
135
164
  index: this.turnIndex++,
136
165
  role: "tool_result",
166
+ source,
137
167
  toolUseId: item.tool_use_id ?? null,
138
168
  content:
139
169
  typeof item.content === "string"
@@ -197,50 +227,73 @@ export class TraceCollector {
197
227
  }
198
228
 
199
229
  /**
200
- * Return human-readable text for workflow logs.
201
- * @returns {string} Formatted text output
230
+ * Render the accumulated turns as human-readable text the same path the
231
+ * live `TeeWriter` stream uses, so `fit-eval output --format=text` over a
232
+ * captured trace reproduces what the live workflow log showed.
233
+ *
234
+ * Source prefixes are emitted whenever at least one turn has a non-null
235
+ * source (supervised / facilitated traces). A pure `run` trace has no
236
+ * envelope, all turn sources are null, and the renderer drops the prefix.
237
+ *
238
+ * @returns {string} Formatted text output including ANSI escapes
202
239
  */
203
240
  toText() {
204
- const lines = [];
241
+ const withPrefix = this.turns.some((t) => t.source);
242
+ const out = [];
205
243
 
206
244
  for (const turn of this.turns) {
207
245
  if (turn.role === "assistant") {
208
246
  for (const block of turn.content) {
209
247
  if (block.type === "text") {
210
- lines.push(block.text);
248
+ out.push(
249
+ renderTextLine({
250
+ source: turn.source,
251
+ text: block.text,
252
+ withPrefix,
253
+ }),
254
+ );
211
255
  } else if (block.type === "tool_use") {
212
- const inputSummary = summarizeInput(block.input);
213
- lines.push(`> Tool: ${block.name} ${inputSummary}`);
256
+ out.push(
257
+ renderToolCallLine({
258
+ source: turn.source,
259
+ toolName: simplifyToolName(block.name),
260
+ hint: hintForCall(block.name, block.input),
261
+ withPrefix,
262
+ }),
263
+ );
214
264
  }
215
265
  }
266
+ } else if (turn.role === "tool_result") {
267
+ out.push(
268
+ renderToolResultLine({
269
+ source: turn.source,
270
+ preview: previewForResult(turn.content, turn.isError),
271
+ withPrefix,
272
+ }),
273
+ );
216
274
  }
217
275
  }
218
276
 
277
+ // Trailing result block — the one summary line humans want (spec 540).
278
+ let tail = "";
219
279
  if (this.result) {
220
280
  const duration = formatDuration(this.result.durationMs);
221
281
  const cost = Number(this.result.totalCostUsd).toFixed(4);
222
- lines.push("");
223
- lines.push(
224
- `--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`,
225
- );
282
+ tail =
283
+ "\n" +
284
+ `--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`;
226
285
  }
227
286
 
228
- return lines.join("\n");
287
+ // Each rendered line already ends with `\n`; concatenate, drop the
288
+ // trailing newline, then append the tail so the output shape stays
289
+ // compatible with existing consumers (no double-blank line before
290
+ // the result footer when there are turns, no leading blank when there
291
+ // are not).
292
+ const body = out.join("").replace(/\n$/, "");
293
+ return body + tail;
229
294
  }
230
295
  }
231
296
 
232
- /**
233
- * Summarize tool input for text display, truncated to keep logs readable.
234
- * @param {object} input - Tool input object
235
- * @returns {string} Truncated summary
236
- */
237
- function summarizeInput(input) {
238
- if (!input || typeof input !== "object") return "";
239
- const json = JSON.stringify(input);
240
- if (json.length <= 200) return json;
241
- return json.slice(0, 197) + "...";
242
- }
243
-
244
297
  /**
245
298
  * Format milliseconds into a human-readable duration.
246
299
  * @param {number} ms - Duration in milliseconds