@forwardimpact/libeval 0.1.17 → 0.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-trace.js +3 -1
- package/package.json +1 -1
- package/src/agent-runner.js +7 -7
- package/src/commands/run.js +8 -1
- package/src/commands/trace.js +12 -4
- package/src/facilitator.js +24 -13
- package/src/index.js +1 -0
- package/src/profile-prompt.js +41 -0
- package/src/render/line-renderer.js +54 -0
- package/src/render/orchestrator-filter.js +26 -0
- package/src/render/palette.js +63 -0
- package/src/render/tool-hints.js +185 -0
- package/src/supervisor.js +19 -14
- package/src/tee-writer.js +60 -36
- package/src/trace-collector.js +83 -30
- package/src/trace-github.js +17 -8
package/bin/fit-trace.js
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import { readFileSync } from "node:fs";
|
|
4
4
|
import { createCli } from "@forwardimpact/libcli";
|
|
5
|
+
import { createScriptConfig } from "@forwardimpact/libconfig";
|
|
5
6
|
import { createLogger } from "@forwardimpact/libtelemetry";
|
|
6
7
|
|
|
7
8
|
import {
|
|
@@ -188,7 +189,8 @@ async function main() {
|
|
|
188
189
|
process.exit(2);
|
|
189
190
|
}
|
|
190
191
|
|
|
191
|
-
await
|
|
192
|
+
const config = await createScriptConfig("eval");
|
|
193
|
+
await handler(values, args, { config });
|
|
192
194
|
}
|
|
193
195
|
|
|
194
196
|
main().catch((error) => {
|
package/package.json
CHANGED
package/src/agent-runner.js
CHANGED
|
@@ -8,6 +8,11 @@
|
|
|
8
8
|
|
|
9
9
|
const DEFAULT_ALLOWED_TOOLS = ["Bash", "Read", "Glob", "Grep", "Write", "Edit"];
|
|
10
10
|
|
|
11
|
+
// fit-eval and kata-action run headless in CI/CD with no human to answer
|
|
12
|
+
// permission prompts. The SDK is always launched in bypass mode — not
|
|
13
|
+
// overridable — so a future caller can't accidentally reduce permissions.
|
|
14
|
+
const PERMISSION_MODE = "bypassPermissions";
|
|
15
|
+
|
|
11
16
|
function applyDefaults(deps) {
|
|
12
17
|
return {
|
|
13
18
|
cwd: deps.cwd,
|
|
@@ -16,12 +21,10 @@ function applyDefaults(deps) {
|
|
|
16
21
|
model: deps.model ?? "opus",
|
|
17
22
|
maxTurns: deps.maxTurns ?? 50,
|
|
18
23
|
allowedTools: deps.allowedTools ?? DEFAULT_ALLOWED_TOOLS,
|
|
19
|
-
permissionMode: deps.permissionMode ?? "bypassPermissions",
|
|
20
24
|
onLine: deps.onLine ?? null,
|
|
21
25
|
onBatch: deps.onBatch ?? null,
|
|
22
26
|
batchSize: deps.batchSize ?? 3,
|
|
23
27
|
settingSources: deps.settingSources ?? [],
|
|
24
|
-
agentProfile: deps.agentProfile ?? null,
|
|
25
28
|
systemPrompt: deps.systemPrompt ?? null,
|
|
26
29
|
disallowedTools: deps.disallowedTools ?? [],
|
|
27
30
|
mcpServers: deps.mcpServers ?? null,
|
|
@@ -37,12 +40,10 @@ export class AgentRunner {
|
|
|
37
40
|
* @param {string} [deps.model] - Claude model identifier
|
|
38
41
|
* @param {number} [deps.maxTurns] - Maximum agentic turns
|
|
39
42
|
* @param {string[]} [deps.allowedTools] - Tools the agent may use
|
|
40
|
-
* @param {string} [deps.permissionMode] - SDK permission mode
|
|
41
43
|
* @param {function} [deps.onLine] - Callback invoked with each NDJSON line as it's produced
|
|
42
44
|
* @param {function} [deps.onBatch] - Async callback invoked with a batch of NDJSON lines at flush boundaries: every `batchSize` assistant text blocks, the terminal `result` message, and — on iterator crash/abort — once more in a final flush carrying any lines that never reached a boundary. Receives `(lines, { abort })` where calling `abort()` stops the in-flight SDK session via the AbortController. Optional; assignable at runtime so the Supervisor can swap it per turn.
|
|
43
45
|
* @param {number} [deps.batchSize] - Assistant text-block messages to accumulate before firing onBatch. Tool-only assistant messages ride along without counting. Default 3: the supervisor reviews the agent every three text turns instead of every turn. The terminal `result` always flushes regardless of count.
|
|
44
46
|
* @param {string[]} [deps.settingSources] - SDK setting sources (e.g. ['project'] to load CLAUDE.md)
|
|
45
|
-
* @param {string} [deps.agentProfile] - Agent profile name to pass as --agent to the Claude CLI
|
|
46
47
|
* @param {string|object} [deps.systemPrompt] - SDK system prompt (string replaces default; {type:'preset', preset:'claude_code', append} appends)
|
|
47
48
|
* @param {string[]} [deps.disallowedTools] - Tools to explicitly remove from the model's context
|
|
48
49
|
* @param {Record<string, object>} [deps.mcpServers] - MCP server configs to pass to the SDK query
|
|
@@ -74,7 +75,7 @@ export class AgentRunner {
|
|
|
74
75
|
allowedTools: this.allowedTools,
|
|
75
76
|
...(this.maxTurns > 0 && { maxTurns: this.maxTurns }),
|
|
76
77
|
model: this.model,
|
|
77
|
-
permissionMode:
|
|
78
|
+
permissionMode: PERMISSION_MODE,
|
|
78
79
|
allowDangerouslySkipPermissions: true,
|
|
79
80
|
settingSources: this.settingSources,
|
|
80
81
|
abortController,
|
|
@@ -82,7 +83,6 @@ export class AgentRunner {
|
|
|
82
83
|
disallowedTools: this.disallowedTools,
|
|
83
84
|
}),
|
|
84
85
|
...(this.systemPrompt && { systemPrompt: this.systemPrompt }),
|
|
85
|
-
...(this.agentProfile && { extraArgs: { agent: this.agentProfile } }),
|
|
86
86
|
...(this.mcpServers && { mcpServers: this.mcpServers }),
|
|
87
87
|
},
|
|
88
88
|
});
|
|
@@ -105,7 +105,7 @@ export class AgentRunner {
|
|
|
105
105
|
prompt,
|
|
106
106
|
options: {
|
|
107
107
|
resume: this.sessionId,
|
|
108
|
-
permissionMode:
|
|
108
|
+
permissionMode: PERMISSION_MODE,
|
|
109
109
|
allowDangerouslySkipPermissions: true,
|
|
110
110
|
abortController,
|
|
111
111
|
...(this.mcpServers && { mcpServers: this.mcpServers }),
|
package/src/commands/run.js
CHANGED
|
@@ -2,6 +2,7 @@ import { readFileSync, createWriteStream } from "node:fs";
|
|
|
2
2
|
import { Writable } from "node:stream";
|
|
3
3
|
import { resolve } from "node:path";
|
|
4
4
|
import { createAgentRunner } from "../agent-runner.js";
|
|
5
|
+
import { composeProfilePrompt } from "../profile-prompt.js";
|
|
5
6
|
import { createTeeWriter } from "../tee-writer.js";
|
|
6
7
|
import { SequenceCounter } from "../sequence-counter.js";
|
|
7
8
|
|
|
@@ -76,6 +77,12 @@ export async function runRunCommand(values, _args) {
|
|
|
76
77
|
);
|
|
77
78
|
};
|
|
78
79
|
|
|
80
|
+
const systemPrompt = agentProfile
|
|
81
|
+
? composeProfilePrompt(agentProfile, {
|
|
82
|
+
profilesDir: resolve(cwd, ".claude/agents"),
|
|
83
|
+
})
|
|
84
|
+
: undefined;
|
|
85
|
+
|
|
79
86
|
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
80
87
|
const runner = createAgentRunner({
|
|
81
88
|
cwd,
|
|
@@ -86,7 +93,7 @@ export async function runRunCommand(values, _args) {
|
|
|
86
93
|
allowedTools,
|
|
87
94
|
onLine,
|
|
88
95
|
settingSources: ["project"],
|
|
89
|
-
|
|
96
|
+
systemPrompt,
|
|
90
97
|
});
|
|
91
98
|
|
|
92
99
|
const result = await runner.run(taskContent);
|
package/src/commands/trace.js
CHANGED
|
@@ -10,9 +10,13 @@ import { createTraceGitHub } from "../trace-github.js";
|
|
|
10
10
|
* List recent workflow runs matching a pattern.
|
|
11
11
|
* @param {object} values - Parsed option values
|
|
12
12
|
* @param {string[]} args - [pattern?]
|
|
13
|
+
* @param {{config: import("@forwardimpact/libconfig").Config}} ctx
|
|
13
14
|
*/
|
|
14
|
-
export async function runRunsCommand(values, args) {
|
|
15
|
-
const gh = await createTraceGitHub({
|
|
15
|
+
export async function runRunsCommand(values, args, ctx) {
|
|
16
|
+
const gh = await createTraceGitHub({
|
|
17
|
+
token: ctx.config.ghToken(),
|
|
18
|
+
repo: values.repo,
|
|
19
|
+
});
|
|
16
20
|
const pattern = args[0] ?? "agent";
|
|
17
21
|
const lookback = values.lookback ?? "7d";
|
|
18
22
|
const runs = await gh.listRuns({ pattern, lookback });
|
|
@@ -23,9 +27,13 @@ export async function runRunsCommand(values, args) {
|
|
|
23
27
|
* Download a trace artifact and auto-convert to structured JSON.
|
|
24
28
|
* @param {object} values - Parsed option values
|
|
25
29
|
* @param {string[]} args - [run-id]
|
|
30
|
+
* @param {{config: import("@forwardimpact/libconfig").Config}} ctx
|
|
26
31
|
*/
|
|
27
|
-
export async function runDownloadCommand(values, args) {
|
|
28
|
-
const gh = await createTraceGitHub({
|
|
32
|
+
export async function runDownloadCommand(values, args, ctx) {
|
|
33
|
+
const gh = await createTraceGitHub({
|
|
34
|
+
token: ctx.config.ghToken(),
|
|
35
|
+
repo: values.repo,
|
|
36
|
+
});
|
|
29
37
|
const result = await gh.downloadTrace(args[0], {
|
|
30
38
|
dir: values.dir,
|
|
31
39
|
name: values.artifact,
|
package/src/facilitator.js
CHANGED
|
@@ -7,7 +7,9 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import { Writable } from "node:stream";
|
|
10
|
+
import { resolve } from "node:path";
|
|
10
11
|
import { createAgentRunner } from "./agent-runner.js";
|
|
12
|
+
import { composeProfilePrompt } from "./profile-prompt.js";
|
|
11
13
|
import { SequenceCounter } from "./sequence-counter.js";
|
|
12
14
|
import { createMessageBus } from "./message-bus.js";
|
|
13
15
|
import {
|
|
@@ -415,7 +417,8 @@ const devNull = new Writable({
|
|
|
415
417
|
* @param {import("stream").Writable} deps.output
|
|
416
418
|
* @param {string} [deps.model]
|
|
417
419
|
* @param {number} [deps.maxTurns]
|
|
418
|
-
* @param {string} [deps.facilitatorProfile]
|
|
420
|
+
* @param {string} [deps.facilitatorProfile] - Facilitator profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
|
|
421
|
+
* @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<facilitatorCwd>/.claude/agents`. Resolved once from the facilitator's cwd so profiles travel with the project, not with per-agent sandboxes.
|
|
419
422
|
* @returns {Facilitator}
|
|
420
423
|
*/
|
|
421
424
|
export function createFacilitator({
|
|
@@ -426,7 +429,19 @@ export function createFacilitator({
|
|
|
426
429
|
model,
|
|
427
430
|
maxTurns,
|
|
428
431
|
facilitatorProfile,
|
|
432
|
+
profilesDir,
|
|
429
433
|
}) {
|
|
434
|
+
const resolvedProfilesDir =
|
|
435
|
+
profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
|
|
436
|
+
const systemPromptFor = (profile, trailer) => {
|
|
437
|
+
if (!trailer) throw new Error("trailer is required");
|
|
438
|
+
return profile
|
|
439
|
+
? composeProfilePrompt(profile, {
|
|
440
|
+
profilesDir: resolvedProfilesDir,
|
|
441
|
+
trailer,
|
|
442
|
+
})
|
|
443
|
+
: { type: "preset", preset: "claude_code", append: trailer };
|
|
444
|
+
};
|
|
430
445
|
const ctx = createOrchestrationContext();
|
|
431
446
|
const messageBus = createMessageBus({
|
|
432
447
|
participants: ["facilitator", ...agentConfigs.map((a) => a.name)],
|
|
@@ -471,12 +486,10 @@ export function createFacilitator({
|
|
|
471
486
|
onLine: (line) => facilitator.emitLine(config.name, line),
|
|
472
487
|
mcpServers: { orchestration: agentServer },
|
|
473
488
|
settingSources: ["project"],
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
append: FACILITATED_AGENT_SYSTEM_PROMPT,
|
|
479
|
-
},
|
|
489
|
+
systemPrompt: systemPromptFor(
|
|
490
|
+
config.agentProfile,
|
|
491
|
+
FACILITATED_AGENT_SYSTEM_PROMPT,
|
|
492
|
+
),
|
|
480
493
|
});
|
|
481
494
|
|
|
482
495
|
return { name: config.name, role: config.role, runner };
|
|
@@ -491,12 +504,10 @@ export function createFacilitator({
|
|
|
491
504
|
onLine: (line) => facilitator.emitLine("facilitator", line),
|
|
492
505
|
mcpServers: { orchestration: facilitatorServer },
|
|
493
506
|
settingSources: ["project"],
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
append: FACILITATOR_SYSTEM_PROMPT,
|
|
499
|
-
},
|
|
507
|
+
systemPrompt: systemPromptFor(
|
|
508
|
+
facilitatorProfile,
|
|
509
|
+
FACILITATOR_SYSTEM_PROMPT,
|
|
510
|
+
),
|
|
500
511
|
});
|
|
501
512
|
|
|
502
513
|
facilitator = new Facilitator({
|
package/src/index.js
CHANGED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compose an SDK `systemPrompt` value from a `.claude/agents/<name>.md` file.
|
|
3
|
+
*
|
|
4
|
+
* Pure function. Reads the profile file, strips YAML frontmatter, and returns
|
|
5
|
+
* the SDK-shaped `{ type: "preset", preset: "claude_code", append }` object
|
|
6
|
+
* with the profile body — plus an optional mode-specific trailer — in the
|
|
7
|
+
* `append` slot. Callers in libeval pass the result straight into an
|
|
8
|
+
* `AgentRunner`'s `systemPrompt` input so the profile reaches the main-thread
|
|
9
|
+
* system prompt without going through the SDK's top-level `agent` option.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { readFileSync } from "node:fs";
|
|
13
|
+
import { join } from "node:path";
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* @param {string} name - Profile basename (no `.md` suffix)
|
|
17
|
+
* @param {object} opts
|
|
18
|
+
* @param {string} opts.profilesDir - Directory containing `<name>.md`
|
|
19
|
+
* @param {string} [opts.trailer] - Optional mode-specific trailer appended after a blank line
|
|
20
|
+
* @returns {{type: "preset", preset: "claude_code", append: string}}
|
|
21
|
+
*/
|
|
22
|
+
export function composeProfilePrompt(name, { profilesDir, trailer }) {
|
|
23
|
+
const path = join(profilesDir, `${name}.md`);
|
|
24
|
+
const raw = readFileSync(path, "utf8");
|
|
25
|
+
const body = stripFrontmatter(raw).trim();
|
|
26
|
+
const append = trailer && trailer.length > 0 ? `${body}\n\n${trailer}` : body;
|
|
27
|
+
return { type: "preset", preset: "claude_code", append };
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Strip a leading YAML frontmatter fence (`---\n…\n---\n`) from a markdown
|
|
32
|
+
* string. Returns the input unchanged when no frontmatter is present.
|
|
33
|
+
* @param {string} raw
|
|
34
|
+
* @returns {string}
|
|
35
|
+
*/
|
|
36
|
+
function stripFrontmatter(raw) {
|
|
37
|
+
if (!raw.startsWith("---\n")) return raw;
|
|
38
|
+
const end = raw.indexOf("\n---\n", 4);
|
|
39
|
+
if (end === -1) return raw;
|
|
40
|
+
return raw.slice(end + 5);
|
|
41
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Line renderer — composes prefix + color + body + reset into a single
|
|
3
|
+
* terminal line. Pure; no side effects.
|
|
4
|
+
*
|
|
5
|
+
* Every renderer returns a `\n`-terminated string:
|
|
6
|
+
* <source>: <ESC><color><body><RESET>\n
|
|
7
|
+
*
|
|
8
|
+
* The `<source>: ` prefix lives outside the color escape so grep and
|
|
9
|
+
* color-stripping terminals preserve the participant tag. Colons separate
|
|
10
|
+
* the source label and the kind label (`Bash:`, `Result:`, `Error:`) for a
|
|
11
|
+
* tighter line on narrow viewports without losing structure.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { colorForSource, ERROR_COLOR, RESET } from "./palette.js";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @param {string|null} source
|
|
18
|
+
* @param {boolean} withPrefix
|
|
19
|
+
* @returns {string}
|
|
20
|
+
*/
|
|
21
|
+
function prefix(source, withPrefix) {
|
|
22
|
+
if (!withPrefix || !source) return "";
|
|
23
|
+
return `${source}: `;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* @param {{source: string|null, text: string, withPrefix: boolean}} args
|
|
28
|
+
* @returns {string}
|
|
29
|
+
*/
|
|
30
|
+
export function renderTextLine({ source, text, withPrefix }) {
|
|
31
|
+
const color = colorForSource(source);
|
|
32
|
+
return `${prefix(source, withPrefix)}${color}${text}${RESET}\n`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* @param {{source: string|null, toolName: string, hint: string, withPrefix: boolean}} args
|
|
37
|
+
* @returns {string}
|
|
38
|
+
*/
|
|
39
|
+
export function renderToolCallLine({ source, toolName, hint, withPrefix }) {
|
|
40
|
+
const color = colorForSource(source);
|
|
41
|
+
const body = hint ? `${toolName}: ${hint}` : `${toolName}`;
|
|
42
|
+
return `${prefix(source, withPrefix)}${color}${body}${RESET}\n`;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* @param {{source: string|null, preview: {text: string, isError: boolean}, withPrefix: boolean}} args
|
|
47
|
+
* @returns {string}
|
|
48
|
+
*/
|
|
49
|
+
export function renderToolResultLine({ source, preview, withPrefix }) {
|
|
50
|
+
const color = preview.isError ? ERROR_COLOR : colorForSource(source);
|
|
51
|
+
const label = preview.isError ? "Error" : "Result";
|
|
52
|
+
const body = `${label}: ${preview.text}`;
|
|
53
|
+
return `${prefix(source, withPrefix)}${color}${body}${RESET}\n`;
|
|
54
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Orchestrator filter — predicate for the orchestrator lifecycle events that
|
|
3
|
+
* should be suppressed from the human-readable log.
|
|
4
|
+
*
|
|
5
|
+
* NDJSON artifacts still carry every orchestrator event; this module only
|
|
6
|
+
* controls what the live `textStream` and offline `toText()` show.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const SUPPRESSED = new Set([
|
|
10
|
+
"session_start",
|
|
11
|
+
"agent_start",
|
|
12
|
+
"ask_received",
|
|
13
|
+
"ask_answered",
|
|
14
|
+
"redirect",
|
|
15
|
+
"summary",
|
|
16
|
+
]);
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* @param {{type?: string}|null|undefined} event
|
|
20
|
+
* @returns {boolean} true when the event's type is one we hide from text output
|
|
21
|
+
*/
|
|
22
|
+
export function isSuppressedOrchestratorEvent(event) {
|
|
23
|
+
return Boolean(
|
|
24
|
+
event && typeof event === "object" && SUPPRESSED.has(event.type),
|
|
25
|
+
);
|
|
26
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Palette — pure profile-name → ANSI SGR foreground color function.
|
|
3
|
+
*
|
|
4
|
+
* Assignment is a FNV-1a hash of the source name modulo the palette size, so
|
|
5
|
+
* the same name maps to the same color in every process. Red is reserved for
|
|
6
|
+
* tool-result errors and is never in the palette.
|
|
7
|
+
*
|
|
8
|
+
* Colors use the 24-bit truecolor SGR escape (`ESC[38;2;R;G;Bm`) rather than
|
|
9
|
+
* the 16-color table. GitHub Actions' log viewer and most modern terminals
|
|
10
|
+
* render truecolor as the exact hex requested, avoiding the washed-out
|
|
11
|
+
* mustard/olive tones GHA applies to `ESC[93m` etc. Eight slots cover the
|
|
12
|
+
* largest concurrent cast in any existing workflow (five domain agents plus
|
|
13
|
+
* the facilitator) with headroom.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
const PALETTE = [
|
|
17
|
+
"\u001b[38;2;79;195;247m", // sky blue #4FC3F7
|
|
18
|
+
"\u001b[38;2;129;199;132m", // bright green #81C784
|
|
19
|
+
"\u001b[38;2;255;202;40m", // amber #FFCA28
|
|
20
|
+
"\u001b[38;2;236;64;122m", // magenta #EC407A
|
|
21
|
+
"\u001b[38;2;38;198;218m", // cyan #26C6DA
|
|
22
|
+
"\u001b[38;2;186;104;200m", // lavender #BA68C8
|
|
23
|
+
"\u001b[38;2;255;167;38m", // orange #FFA726
|
|
24
|
+
"\u001b[38;2;66;165;245m", // blue #42A5F5
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
/** 24-bit SGR foreground code reserved for tool-result errors (#F14C4C). */
|
|
28
|
+
export const ERROR_COLOR = "\u001b[38;2;241;76;76m";
|
|
29
|
+
|
|
30
|
+
/** ANSI SGR reset sequence. */
|
|
31
|
+
export const RESET = "\u001b[0m";
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Map a source name to a stable ANSI foreground color.
|
|
35
|
+
*
|
|
36
|
+
* The mapping is a pure function of the name via FNV-1a 32-bit hash — same
|
|
37
|
+
* name, same color, every call, in every process. Returns `RESET` for
|
|
38
|
+
* missing/empty names so callers never emit a stray escape.
|
|
39
|
+
*
|
|
40
|
+
* @param {string|null|undefined} name
|
|
41
|
+
* @returns {string} ANSI SGR escape, never equal to `ERROR_COLOR`
|
|
42
|
+
*/
|
|
43
|
+
export function colorForSource(name) {
|
|
44
|
+
if (!name) return RESET;
|
|
45
|
+
let h = 0x811c9dc5;
|
|
46
|
+
for (let i = 0; i < name.length; i++) {
|
|
47
|
+
h ^= name.charCodeAt(i);
|
|
48
|
+
h = Math.imul(h, 0x01000193) >>> 0;
|
|
49
|
+
}
|
|
50
|
+
// Length mixer: reduces FNV's intrinsic birthday collisions on short
|
|
51
|
+
// names with shared affixes (e.g. `staff-engineer`/`facilitator`).
|
|
52
|
+
h ^= name.length;
|
|
53
|
+
h = Math.imul(h, 0x01000193) >>> 0;
|
|
54
|
+
return PALETTE[h % PALETTE.length];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Expose the palette size so tests can assert distinctness on the full set.
|
|
59
|
+
* @returns {number}
|
|
60
|
+
*/
|
|
61
|
+
export function paletteSize() {
|
|
62
|
+
return PALETTE.length;
|
|
63
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool hints — pure one-line formatters for tool-call arguments and
|
|
3
|
+
* tool-result previews.
|
|
4
|
+
*
|
|
5
|
+
* `hintForCall(name, input)` renders the human-meaningful field for each
|
|
6
|
+
* tool (file path, command, pattern, …) sanitized to strip JSON punctuation
|
|
7
|
+
* (`{`, `}`, `"`) and collapsed to a single line ≤ 80 chars.
|
|
8
|
+
*
|
|
9
|
+
* `previewForResult(content, isError)` collapses a tool result to a single
|
|
10
|
+
* line ≤ 80 chars and flags errors so the renderer can apply the reserved
|
|
11
|
+
* error color and the `Error:` label.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const MAX_HINT_CHARS = 80;
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Strip `{`, `}`, `"`, collapse whitespace, and truncate to MAX_HINT_CHARS.
|
|
18
|
+
* First line only — anything past a newline is dropped. Always returns a
|
|
19
|
+
* string, never null/undefined.
|
|
20
|
+
* @param {unknown} raw
|
|
21
|
+
* @returns {string}
|
|
22
|
+
*/
|
|
23
|
+
function sanitize(raw) {
|
|
24
|
+
if (raw === null || raw === undefined) return "";
|
|
25
|
+
const str = String(raw);
|
|
26
|
+
const firstLine = str.split(/\r?\n/)[0] ?? "";
|
|
27
|
+
const stripped = firstLine.replace(/[{}"]/g, "");
|
|
28
|
+
const collapsed = stripped.replace(/\s+/g, " ").trim();
|
|
29
|
+
if (collapsed.length <= MAX_HINT_CHARS) return collapsed;
|
|
30
|
+
return collapsed.slice(0, MAX_HINT_CHARS - 3) + "...";
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Truncate an already-sanitized string to MAX_HINT_CHARS with a trailing
|
|
35
|
+
* ellipsis when it overflows. Shared by the few handlers that concatenate
|
|
36
|
+
* multiple sanitized pieces before deciding on truncation.
|
|
37
|
+
* @param {string} str
|
|
38
|
+
* @returns {string}
|
|
39
|
+
*/
|
|
40
|
+
function truncate(str) {
|
|
41
|
+
return str.length <= MAX_HINT_CHARS
|
|
42
|
+
? str
|
|
43
|
+
: str.slice(0, MAX_HINT_CHARS - 3) + "...";
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Per-tool hint handlers. Each entry takes the sanitized input object
|
|
48
|
+
* (never null) and returns the hint string. Kept as a flat table so adding
|
|
49
|
+
* a new tool is one entry, not a new branch in a growing switch.
|
|
50
|
+
*/
|
|
51
|
+
const HINT_HANDLERS = {
|
|
52
|
+
Bash: (i) => sanitize(i.command),
|
|
53
|
+
Read: (i) => sanitize(i.file_path),
|
|
54
|
+
Write: (i) => sanitize(i.file_path),
|
|
55
|
+
Edit: (i) => {
|
|
56
|
+
const base = sanitize(i.file_path);
|
|
57
|
+
return i.replace_all
|
|
58
|
+
? (base + " (replace_all)").slice(0, MAX_HINT_CHARS)
|
|
59
|
+
: base;
|
|
60
|
+
},
|
|
61
|
+
Glob: (i) => sanitize(i.pattern),
|
|
62
|
+
Grep: (i) => {
|
|
63
|
+
const pattern = sanitize(i.pattern);
|
|
64
|
+
return i.path ? truncate(`${pattern} in ${sanitize(i.path)}`) : pattern;
|
|
65
|
+
},
|
|
66
|
+
WebFetch: (i) => sanitize(i.url),
|
|
67
|
+
WebSearch: (i) => sanitize(i.query),
|
|
68
|
+
ToolSearch: (i) => sanitize(i.query),
|
|
69
|
+
TodoWrite: (i) => {
|
|
70
|
+
const count = Array.isArray(i.todos) ? i.todos.length : 0;
|
|
71
|
+
return `${count} todos`;
|
|
72
|
+
},
|
|
73
|
+
NotebookEdit: (i) => sanitize(i.notebook_path),
|
|
74
|
+
Skill: (i) => sanitize(i.skill),
|
|
75
|
+
Agent: (i) => sanitize(i.prompt ?? i.description),
|
|
76
|
+
Task: (i) => sanitize(i.prompt ?? i.description),
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Strip the `mcp__<server>__` prefix from MCP-namespaced tool names so logs
|
|
81
|
+
* show the bare method (e.g. `mcp__orchestration__Tell` → `Tell`). Non-MCP
|
|
82
|
+
* names and malformed inputs pass through unchanged.
|
|
83
|
+
* @param {string} name
|
|
84
|
+
* @returns {string}
|
|
85
|
+
*/
|
|
86
|
+
export function simplifyToolName(name) {
|
|
87
|
+
if (!name) return "";
|
|
88
|
+
if (!name.startsWith("mcp__")) return name;
|
|
89
|
+
const parts = name.split("__");
|
|
90
|
+
if (parts.length < 3) return name;
|
|
91
|
+
return parts.slice(2).join("__");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* MCP-prefixed tool names (e.g. `mcp__orchestration__Tell`) take a different
|
|
96
|
+
* handler path. The method name itself is surfaced via `simplifyToolName`,
|
|
97
|
+
* so this only adds the `to/from` decorators for orchestration calls.
|
|
98
|
+
* Returns null if the name does not match any MCP prefix.
|
|
99
|
+
* @param {string} name
|
|
100
|
+
* @param {object} input
|
|
101
|
+
* @returns {string|null}
|
|
102
|
+
*/
|
|
103
|
+
function hintForMcp(name, input) {
|
|
104
|
+
if (name.startsWith("mcp__orchestration__")) {
|
|
105
|
+
const parts = [];
|
|
106
|
+
if (input.to) parts.push(`to ${sanitize(input.to)}`);
|
|
107
|
+
if (input.from) parts.push(`from ${sanitize(input.from)}`);
|
|
108
|
+
return truncate(parts.join(" "));
|
|
109
|
+
}
|
|
110
|
+
if (name.startsWith("mcp__")) {
|
|
111
|
+
return "";
|
|
112
|
+
}
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Map a tool name and input to a one-line human hint.
|
|
118
|
+
*
|
|
119
|
+
* Unknown tools return an empty hint — the caller still shows the tool
|
|
120
|
+
* name, just without extra detail. Sanitization is uniform: every branch
|
|
121
|
+
* ends with `sanitize`, so the output is guaranteed free of `{`, `}`, `"`
|
|
122
|
+
* from the input object (success criterion #2).
|
|
123
|
+
*
|
|
124
|
+
* @param {string} name - Tool name (e.g. "Bash", "Read", "mcp__orchestration__Tell")
|
|
125
|
+
* @param {object|null|undefined} input - Raw tool input object from the trace
|
|
126
|
+
* @returns {string} One-line hint, or "" when no rule matches
|
|
127
|
+
*/
|
|
128
|
+
export function hintForCall(name, input) {
|
|
129
|
+
if (!name) return "";
|
|
130
|
+
const safeInput = input && typeof input === "object" ? input : {};
|
|
131
|
+
|
|
132
|
+
const handler = HINT_HANDLERS[name];
|
|
133
|
+
if (handler) return handler(safeInput);
|
|
134
|
+
|
|
135
|
+
const mcp = hintForMcp(name, safeInput);
|
|
136
|
+
if (mcp !== null) return mcp;
|
|
137
|
+
|
|
138
|
+
return "";
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Render a tool result as a single preview line plus an `isError` flag.
|
|
143
|
+
* The flag lets the line-renderer pick the reserved error color without
|
|
144
|
+
* re-inspecting the content.
|
|
145
|
+
*
|
|
146
|
+
* @param {string|object|null|undefined} content - Tool result content
|
|
147
|
+
* @param {boolean} isError - Whether the tool call failed
|
|
148
|
+
* @returns {{text: string, isError: boolean}}
|
|
149
|
+
*/
|
|
150
|
+
export function previewForResult(content, isError) {
|
|
151
|
+
const normalized =
|
|
152
|
+
content === null || content === undefined
|
|
153
|
+
? ""
|
|
154
|
+
: typeof content === "string"
|
|
155
|
+
? content
|
|
156
|
+
: JSON.stringify(content);
|
|
157
|
+
const lines = normalized.split(/\r?\n/);
|
|
158
|
+
let firstNonBlank = "";
|
|
159
|
+
for (const line of lines) {
|
|
160
|
+
if (line.trim().length > 0) {
|
|
161
|
+
firstNonBlank = line.trim();
|
|
162
|
+
break;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (isError) {
|
|
167
|
+
const body = firstNonBlank || "(no output)";
|
|
168
|
+
return {
|
|
169
|
+
text:
|
|
170
|
+
body.length <= MAX_HINT_CHARS
|
|
171
|
+
? body
|
|
172
|
+
: body.slice(0, MAX_HINT_CHARS - 3) + "...",
|
|
173
|
+
isError: true,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (!firstNonBlank) return { text: "(ok)", isError: false };
|
|
178
|
+
return {
|
|
179
|
+
text:
|
|
180
|
+
firstNonBlank.length <= MAX_HINT_CHARS
|
|
181
|
+
? firstNonBlank
|
|
182
|
+
: firstNonBlank.slice(0, MAX_HINT_CHARS - 3) + "...",
|
|
183
|
+
isError: false,
|
|
184
|
+
};
|
|
185
|
+
}
|
package/src/supervisor.js
CHANGED
|
@@ -11,7 +11,9 @@
|
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
import { Writable } from "node:stream";
|
|
14
|
+
import { resolve } from "node:path";
|
|
14
15
|
import { createAgentRunner } from "./agent-runner.js";
|
|
16
|
+
import { composeProfilePrompt } from "./profile-prompt.js";
|
|
15
17
|
import { TraceCollector } from "./trace-collector.js";
|
|
16
18
|
import { SequenceCounter } from "./sequence-counter.js";
|
|
17
19
|
import {
|
|
@@ -355,8 +357,9 @@ const devNull = new Writable({
|
|
|
355
357
|
* @param {string[]} [deps.allowedTools]
|
|
356
358
|
* @param {string[]} [deps.supervisorAllowedTools]
|
|
357
359
|
* @param {string[]} [deps.supervisorDisallowedTools]
|
|
358
|
-
* @param {string} [deps.supervisorProfile]
|
|
359
|
-
* @param {string} [deps.agentProfile]
|
|
360
|
+
* @param {string} [deps.supervisorProfile] - Supervisor profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
|
|
361
|
+
* @param {string} [deps.agentProfile] - Agent profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
|
|
362
|
+
* @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<supervisorCwd>/.claude/agents`. Resolved once from the orchestrator's cwd so profiles travel with the project, not with a per-agent sandbox.
|
|
360
363
|
* @returns {Supervisor}
|
|
361
364
|
*/
|
|
362
365
|
export function createSupervisor({
|
|
@@ -371,7 +374,19 @@ export function createSupervisor({
|
|
|
371
374
|
supervisorAllowedTools,
|
|
372
375
|
supervisorProfile,
|
|
373
376
|
agentProfile,
|
|
377
|
+
profilesDir,
|
|
374
378
|
}) {
|
|
379
|
+
const resolvedProfilesDir =
|
|
380
|
+
profilesDir ?? resolve(supervisorCwd, ".claude/agents");
|
|
381
|
+
const systemPromptFor = (profile, trailer) => {
|
|
382
|
+
if (!trailer) throw new Error("trailer is required");
|
|
383
|
+
return profile
|
|
384
|
+
? composeProfilePrompt(profile, {
|
|
385
|
+
profilesDir: resolvedProfilesDir,
|
|
386
|
+
trailer,
|
|
387
|
+
})
|
|
388
|
+
: { type: "preset", preset: "claude_code", append: trailer };
|
|
389
|
+
};
|
|
375
390
|
let supervisor;
|
|
376
391
|
let supervisorRunner;
|
|
377
392
|
|
|
@@ -402,12 +417,7 @@ export function createSupervisor({
|
|
|
402
417
|
allowedTools,
|
|
403
418
|
onLine,
|
|
404
419
|
settingSources: ["project"],
|
|
405
|
-
agentProfile,
|
|
406
|
-
systemPrompt: {
|
|
407
|
-
type: "preset",
|
|
408
|
-
preset: "claude_code",
|
|
409
|
-
append: AGENT_SYSTEM_PROMPT,
|
|
410
|
-
},
|
|
420
|
+
systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
|
|
411
421
|
mcpServers: { orchestration: agentServer },
|
|
412
422
|
});
|
|
413
423
|
|
|
@@ -433,12 +443,7 @@ export function createSupervisor({
|
|
|
433
443
|
disallowedTools,
|
|
434
444
|
onLine,
|
|
435
445
|
settingSources: ["project"],
|
|
436
|
-
|
|
437
|
-
systemPrompt: {
|
|
438
|
-
type: "preset",
|
|
439
|
-
preset: "claude_code",
|
|
440
|
-
append: SUPERVISOR_SYSTEM_PROMPT,
|
|
441
|
-
},
|
|
446
|
+
systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
|
|
442
447
|
mcpServers: { orchestration: supervisorServer },
|
|
443
448
|
});
|
|
444
449
|
|
package/src/tee-writer.js
CHANGED
|
@@ -7,11 +7,27 @@
|
|
|
7
7
|
* parameter controls display formatting: multi-participant modes show
|
|
8
8
|
* source labels on content lines.
|
|
9
9
|
*
|
|
10
|
+
* Human text rendering is delegated to the pure modules under `./render/`
|
|
11
|
+
* so the live stream and the offline `TraceCollector.toText()` replay share
|
|
12
|
+
* one formatting path (spec 540). The NDJSON going to `fileStream` is
|
|
13
|
+
* untouched — only what reaches `textStream` changes.
|
|
14
|
+
*
|
|
10
15
|
* Follows OO+DI: constructor injection, factory function, tests bypass factory.
|
|
11
16
|
*/
|
|
12
17
|
|
|
13
18
|
import { Writable } from "node:stream";
|
|
14
19
|
import { TraceCollector } from "./trace-collector.js";
|
|
20
|
+
import {
|
|
21
|
+
renderTextLine,
|
|
22
|
+
renderToolCallLine,
|
|
23
|
+
renderToolResultLine,
|
|
24
|
+
} from "./render/line-renderer.js";
|
|
25
|
+
import {
|
|
26
|
+
hintForCall,
|
|
27
|
+
previewForResult,
|
|
28
|
+
simplifyToolName,
|
|
29
|
+
} from "./render/tool-hints.js";
|
|
30
|
+
import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
|
|
15
31
|
|
|
16
32
|
export class TeeWriter extends Writable {
|
|
17
33
|
/**
|
|
@@ -29,8 +45,6 @@ export class TeeWriter extends Writable {
|
|
|
29
45
|
this.mode = mode ?? "raw";
|
|
30
46
|
this.collector = new TraceCollector();
|
|
31
47
|
this.turnsEmitted = 0;
|
|
32
|
-
this.lastSource = null;
|
|
33
|
-
this.partial = "";
|
|
34
48
|
}
|
|
35
49
|
|
|
36
50
|
/**
|
|
@@ -39,7 +53,7 @@ export class TeeWriter extends Writable {
|
|
|
39
53
|
* @param {function} callback
|
|
40
54
|
*/
|
|
41
55
|
_write(chunk, encoding, callback) {
|
|
42
|
-
const str = this.partial + chunk.toString();
|
|
56
|
+
const str = (this.partial ?? "") + chunk.toString();
|
|
43
57
|
const lines = str.split("\n");
|
|
44
58
|
this.partial = lines.pop() ?? "";
|
|
45
59
|
|
|
@@ -55,16 +69,25 @@ export class TeeWriter extends Writable {
|
|
|
55
69
|
* @param {function} callback
|
|
56
70
|
*/
|
|
57
71
|
_final(callback) {
|
|
58
|
-
if (this.partial.trim()) {
|
|
72
|
+
if (this.partial && this.partial.trim()) {
|
|
59
73
|
this.fileStream.write(this.partial + "\n");
|
|
60
74
|
this.processLine(this.partial);
|
|
61
75
|
}
|
|
62
76
|
|
|
63
|
-
|
|
77
|
+
// Emit the trailing `--- Result: ... ---` footer — the one summary line
|
|
78
|
+
// humans want (spec 540). This is the same tail TraceCollector.toText()
|
|
79
|
+
// appends, so the live stream and the offline replay stay in sync
|
|
80
|
+
// (spec 540 criterion #6). The superseded `--- Evaluation ... ---`
|
|
81
|
+
// footer is gone in every mode.
|
|
82
|
+
if (this.collector.result) {
|
|
64
83
|
const text = this.collector.toText();
|
|
65
84
|
const idx = text.lastIndexOf("\n---");
|
|
66
85
|
if (idx !== -1) {
|
|
67
|
-
|
|
86
|
+
// Slice past the leading `\n` — the previously-streamed body
|
|
87
|
+
// already ended with its own newline, so re-emitting `\n---` here
|
|
88
|
+
// would produce a blank line before the footer and desync from
|
|
89
|
+
// the offline replay (spec 540 #6).
|
|
90
|
+
this.textStream.write(text.slice(idx + 1) + "\n");
|
|
68
91
|
}
|
|
69
92
|
}
|
|
70
93
|
|
|
@@ -85,19 +108,15 @@ export class TeeWriter extends Writable {
|
|
|
85
108
|
|
|
86
109
|
// Universal envelope: { source, seq, event }
|
|
87
110
|
if (parsed.event) {
|
|
88
|
-
// Orchestrator
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
111
|
+
// Orchestrator lifecycle events are suppressed from the text stream
|
|
112
|
+
// entirely (spec 540). They still reached fileStream above.
|
|
113
|
+
if (
|
|
114
|
+
parsed.source === "orchestrator" &&
|
|
115
|
+
isSuppressedOrchestratorEvent(parsed.event)
|
|
116
|
+
) {
|
|
94
117
|
return;
|
|
95
118
|
}
|
|
96
|
-
|
|
97
|
-
if (parsed.source && parsed.source !== this.lastSource) {
|
|
98
|
-
this.lastSource = parsed.source;
|
|
99
|
-
}
|
|
100
|
-
this.collector.addLine(JSON.stringify(parsed.event));
|
|
119
|
+
this.collector.addLine(line);
|
|
101
120
|
this.flushTurns();
|
|
102
121
|
return;
|
|
103
122
|
}
|
|
@@ -112,38 +131,43 @@ export class TeeWriter extends Writable {
|
|
|
112
131
|
*/
|
|
113
132
|
flushTurns() {
|
|
114
133
|
const turns = this.collector.turns;
|
|
115
|
-
const
|
|
116
|
-
this.mode === "supervised" && this.lastSource
|
|
117
|
-
? `[${this.lastSource}] `
|
|
118
|
-
: "";
|
|
134
|
+
const withPrefix = this.mode !== "raw";
|
|
119
135
|
while (this.turnsEmitted < turns.length) {
|
|
120
136
|
const turn = turns[this.turnsEmitted++];
|
|
121
137
|
if (turn.role === "assistant") {
|
|
122
138
|
for (const block of turn.content) {
|
|
123
139
|
if (block.type === "text") {
|
|
124
|
-
this.textStream.write(
|
|
140
|
+
this.textStream.write(
|
|
141
|
+
renderTextLine({
|
|
142
|
+
source: turn.source,
|
|
143
|
+
text: block.text,
|
|
144
|
+
withPrefix,
|
|
145
|
+
}),
|
|
146
|
+
);
|
|
125
147
|
} else if (block.type === "tool_use") {
|
|
126
|
-
|
|
127
|
-
|
|
148
|
+
this.textStream.write(
|
|
149
|
+
renderToolCallLine({
|
|
150
|
+
source: turn.source,
|
|
151
|
+
toolName: simplifyToolName(block.name),
|
|
152
|
+
hint: hintForCall(block.name, block.input),
|
|
153
|
+
withPrefix,
|
|
154
|
+
}),
|
|
155
|
+
);
|
|
128
156
|
}
|
|
129
157
|
}
|
|
158
|
+
} else if (turn.role === "tool_result") {
|
|
159
|
+
this.textStream.write(
|
|
160
|
+
renderToolResultLine({
|
|
161
|
+
source: turn.source,
|
|
162
|
+
preview: previewForResult(turn.content, turn.isError),
|
|
163
|
+
withPrefix,
|
|
164
|
+
}),
|
|
165
|
+
);
|
|
130
166
|
}
|
|
131
167
|
}
|
|
132
168
|
}
|
|
133
169
|
}
|
|
134
170
|
|
|
135
|
-
/**
|
|
136
|
-
* Summarize tool input for text display, truncated to keep logs readable.
|
|
137
|
-
* @param {object} input - Tool input object
|
|
138
|
-
* @returns {string} Truncated summary
|
|
139
|
-
*/
|
|
140
|
-
function summarizeInput(input) {
|
|
141
|
-
if (!input || typeof input !== "object") return "";
|
|
142
|
-
const json = JSON.stringify(input);
|
|
143
|
-
if (json.length <= 200) return json;
|
|
144
|
-
return json.slice(0, 197) + "...";
|
|
145
|
-
}
|
|
146
|
-
|
|
147
171
|
/**
|
|
148
172
|
* Factory function — wires a TeeWriter with the given streams.
|
|
149
173
|
* @param {object} deps - Same as TeeWriter constructor
|
package/src/trace-collector.js
CHANGED
|
@@ -3,7 +3,24 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Accepts one NDJSON line at a time via addLine(), then produces either a
|
|
5
5
|
* structured JSON trace (toJSON) or human-readable text (toText).
|
|
6
|
+
*
|
|
7
|
+
* Human text rendering is delegated to the pure modules under `./render/`
|
|
8
|
+
* so the live `TeeWriter` stream and the offline `toText()` replay share
|
|
9
|
+
* one formatting path (spec 540).
|
|
6
10
|
*/
|
|
11
|
+
|
|
12
|
+
import {
|
|
13
|
+
renderTextLine,
|
|
14
|
+
renderToolCallLine,
|
|
15
|
+
renderToolResultLine,
|
|
16
|
+
} from "./render/line-renderer.js";
|
|
17
|
+
import {
|
|
18
|
+
hintForCall,
|
|
19
|
+
previewForResult,
|
|
20
|
+
simplifyToolName,
|
|
21
|
+
} from "./render/tool-hints.js";
|
|
22
|
+
import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
|
|
23
|
+
|
|
7
24
|
export class TraceCollector {
|
|
8
25
|
/**
|
|
9
26
|
* @param {object} [deps]
|
|
@@ -38,22 +55,31 @@ export class TraceCollector {
|
|
|
38
55
|
return;
|
|
39
56
|
}
|
|
40
57
|
|
|
41
|
-
// Unwrap combined supervised trace format {source,
|
|
42
|
-
//
|
|
43
|
-
// inner event is the one we
|
|
58
|
+
// Unwrap combined supervised trace format {source, seq, event}. The
|
|
59
|
+
// Supervisor / Facilitator emits this wrapper; when replayed through
|
|
60
|
+
// addLine the inner event is the one we care about. Carry the envelope
|
|
61
|
+
// `source` onto each new turn so the renderer can color it correctly.
|
|
62
|
+
let source = null;
|
|
44
63
|
if (event.event && !event.type && typeof event.source === "string") {
|
|
64
|
+
source = event.source;
|
|
45
65
|
event = event.event;
|
|
46
66
|
}
|
|
47
67
|
|
|
68
|
+
// Orchestrator lifecycle events carry no content and are suppressed
|
|
69
|
+
// from turns entirely — the NDJSON artifact keeps them separately.
|
|
70
|
+
if (source === "orchestrator" && isSuppressedOrchestratorEvent(event)) {
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
|
|
48
74
|
switch (event.type) {
|
|
49
75
|
case "system":
|
|
50
76
|
this.handleSystem(event);
|
|
51
77
|
break;
|
|
52
78
|
case "assistant":
|
|
53
|
-
this.handleAssistant(event);
|
|
79
|
+
this.handleAssistant(event, source);
|
|
54
80
|
break;
|
|
55
81
|
case "user":
|
|
56
|
-
this.handleUser(event);
|
|
82
|
+
this.handleUser(event, source);
|
|
57
83
|
break;
|
|
58
84
|
case "result":
|
|
59
85
|
this.handleResult(event);
|
|
@@ -81,8 +107,9 @@ export class TraceCollector {
|
|
|
81
107
|
|
|
82
108
|
/**
|
|
83
109
|
* @param {object} event
|
|
110
|
+
* @param {string|null} source
|
|
84
111
|
*/
|
|
85
|
-
handleAssistant(event) {
|
|
112
|
+
handleAssistant(event, source) {
|
|
86
113
|
const message = event.message;
|
|
87
114
|
if (!message) return;
|
|
88
115
|
|
|
@@ -114,6 +141,7 @@ export class TraceCollector {
|
|
|
114
141
|
this.turns.push({
|
|
115
142
|
index: this.turnIndex++,
|
|
116
143
|
role: "assistant",
|
|
144
|
+
source,
|
|
117
145
|
content,
|
|
118
146
|
usage,
|
|
119
147
|
});
|
|
@@ -121,8 +149,9 @@ export class TraceCollector {
|
|
|
121
149
|
|
|
122
150
|
/**
|
|
123
151
|
* @param {object} event
|
|
152
|
+
* @param {string|null} source
|
|
124
153
|
*/
|
|
125
|
-
handleUser(event) {
|
|
154
|
+
handleUser(event, source) {
|
|
126
155
|
const message = event.message;
|
|
127
156
|
if (!message) return;
|
|
128
157
|
|
|
@@ -134,6 +163,7 @@ export class TraceCollector {
|
|
|
134
163
|
this.turns.push({
|
|
135
164
|
index: this.turnIndex++,
|
|
136
165
|
role: "tool_result",
|
|
166
|
+
source,
|
|
137
167
|
toolUseId: item.tool_use_id ?? null,
|
|
138
168
|
content:
|
|
139
169
|
typeof item.content === "string"
|
|
@@ -197,50 +227,73 @@ export class TraceCollector {
|
|
|
197
227
|
}
|
|
198
228
|
|
|
199
229
|
/**
|
|
200
|
-
*
|
|
201
|
-
*
|
|
230
|
+
* Render the accumulated turns as human-readable text — the same path the
|
|
231
|
+
* live `TeeWriter` stream uses, so `fit-eval output --format=text` over a
|
|
232
|
+
* captured trace reproduces what the live workflow log showed.
|
|
233
|
+
*
|
|
234
|
+
* Source prefixes are emitted whenever at least one turn has a non-null
|
|
235
|
+
* source (supervised / facilitated traces). A pure `run` trace has no
|
|
236
|
+
* envelope, all turn sources are null, and the renderer drops the prefix.
|
|
237
|
+
*
|
|
238
|
+
* @returns {string} Formatted text output including ANSI escapes
|
|
202
239
|
*/
|
|
203
240
|
toText() {
|
|
204
|
-
const
|
|
241
|
+
const withPrefix = this.turns.some((t) => t.source);
|
|
242
|
+
const out = [];
|
|
205
243
|
|
|
206
244
|
for (const turn of this.turns) {
|
|
207
245
|
if (turn.role === "assistant") {
|
|
208
246
|
for (const block of turn.content) {
|
|
209
247
|
if (block.type === "text") {
|
|
210
|
-
|
|
248
|
+
out.push(
|
|
249
|
+
renderTextLine({
|
|
250
|
+
source: turn.source,
|
|
251
|
+
text: block.text,
|
|
252
|
+
withPrefix,
|
|
253
|
+
}),
|
|
254
|
+
);
|
|
211
255
|
} else if (block.type === "tool_use") {
|
|
212
|
-
|
|
213
|
-
|
|
256
|
+
out.push(
|
|
257
|
+
renderToolCallLine({
|
|
258
|
+
source: turn.source,
|
|
259
|
+
toolName: simplifyToolName(block.name),
|
|
260
|
+
hint: hintForCall(block.name, block.input),
|
|
261
|
+
withPrefix,
|
|
262
|
+
}),
|
|
263
|
+
);
|
|
214
264
|
}
|
|
215
265
|
}
|
|
266
|
+
} else if (turn.role === "tool_result") {
|
|
267
|
+
out.push(
|
|
268
|
+
renderToolResultLine({
|
|
269
|
+
source: turn.source,
|
|
270
|
+
preview: previewForResult(turn.content, turn.isError),
|
|
271
|
+
withPrefix,
|
|
272
|
+
}),
|
|
273
|
+
);
|
|
216
274
|
}
|
|
217
275
|
}
|
|
218
276
|
|
|
277
|
+
// Trailing result block — the one summary line humans want (spec 540).
|
|
278
|
+
let tail = "";
|
|
219
279
|
if (this.result) {
|
|
220
280
|
const duration = formatDuration(this.result.durationMs);
|
|
221
281
|
const cost = Number(this.result.totalCostUsd).toFixed(4);
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
`--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration}
|
|
225
|
-
);
|
|
282
|
+
tail =
|
|
283
|
+
"\n" +
|
|
284
|
+
`--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`;
|
|
226
285
|
}
|
|
227
286
|
|
|
228
|
-
|
|
287
|
+
// Each rendered line already ends with `\n`; concatenate, drop the
|
|
288
|
+
// trailing newline, then append the tail so the output shape stays
|
|
289
|
+
// compatible with existing consumers (no double-blank line before
|
|
290
|
+
// the result footer when there are turns, no leading blank when there
|
|
291
|
+
// are not).
|
|
292
|
+
const body = out.join("").replace(/\n$/, "");
|
|
293
|
+
return body + tail;
|
|
229
294
|
}
|
|
230
295
|
}
|
|
231
296
|
|
|
232
|
-
/**
|
|
233
|
-
* Summarize tool input for text display, truncated to keep logs readable.
|
|
234
|
-
* @param {object} input - Tool input object
|
|
235
|
-
* @returns {string} Truncated summary
|
|
236
|
-
*/
|
|
237
|
-
function summarizeInput(input) {
|
|
238
|
-
if (!input || typeof input !== "object") return "";
|
|
239
|
-
const json = JSON.stringify(input);
|
|
240
|
-
if (json.length <= 200) return json;
|
|
241
|
-
return json.slice(0, 197) + "...";
|
|
242
|
-
}
|
|
243
|
-
|
|
244
297
|
/**
|
|
245
298
|
* Format milliseconds into a human-readable duration.
|
|
246
299
|
* @param {number} ms - Duration in milliseconds
|
package/src/trace-github.js
CHANGED
|
@@ -186,21 +186,30 @@ export function parseGitRemote(remote) {
|
|
|
186
186
|
}
|
|
187
187
|
|
|
188
188
|
/**
|
|
189
|
-
* Create a TraceGitHub instance
|
|
190
|
-
*
|
|
189
|
+
* Create a TraceGitHub instance. The caller is responsible for resolving
|
|
190
|
+
* the GitHub token — typically via `Config.ghToken()` — so credential
|
|
191
|
+
* loading stays at the CLI entry point.
|
|
191
192
|
*
|
|
192
|
-
*
|
|
193
|
+
* Breaking change from the prior signature: `token` is now a required
|
|
194
|
+
* caller input. Construct a `Config` via `@forwardimpact/libconfig` and
|
|
195
|
+
* pass `config.ghToken()`.
|
|
196
|
+
*
|
|
197
|
+
* @param {object} opts
|
|
198
|
+
* @param {string} opts.token - GitHub token (e.g. from `Config.ghToken()`)
|
|
193
199
|
* @param {string} [opts.repo] - "owner/repo" override (default: detect from git remote)
|
|
194
200
|
* @returns {Promise<TraceGitHub>}
|
|
195
201
|
*/
|
|
196
202
|
export async function createTraceGitHub(opts = {}) {
|
|
197
|
-
const {
|
|
198
|
-
|
|
199
|
-
|
|
203
|
+
const { token, repo: repoOverride } = opts;
|
|
204
|
+
if (!token) {
|
|
205
|
+
throw new Error(
|
|
206
|
+
"createTraceGitHub: token is required (pass Config.ghToken())",
|
|
207
|
+
);
|
|
208
|
+
}
|
|
200
209
|
|
|
201
210
|
let owner, repo;
|
|
202
|
-
if (
|
|
203
|
-
({ owner, repo } = parseGitRemote(
|
|
211
|
+
if (repoOverride) {
|
|
212
|
+
({ owner, repo } = parseGitRemote(repoOverride));
|
|
204
213
|
} else {
|
|
205
214
|
const { execSync } = await import("node:child_process");
|
|
206
215
|
const remote = execSync("git remote get-url origin", {
|