@forwardimpact/libeval 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-eval.js CHANGED
@@ -117,10 +117,13 @@ const definition = {
117
117
  type: "string",
118
118
  description: "Facilitator profile name",
119
119
  },
120
- agents: {
120
+ "agent-profiles": {
121
121
  type: "string",
122
- description:
123
- "Agent configs: name1:cwd=/tmp/a:role=explorer,name2:cwd=/tmp/b:role=tester",
122
+ description: "Comma-separated agent profile names",
123
+ },
124
+ "agent-cwd": {
125
+ type: "string",
126
+ description: "Agent working directory (default: .)",
124
127
  },
125
128
  },
126
129
  },
@@ -135,7 +138,7 @@ const definition = {
135
138
  "fit-eval output --format=text < trace.ndjson",
136
139
  "fit-eval run --task-file=task.md --model=opus",
137
140
  "fit-eval supervise --task-file=task.md --supervisor-cwd=.",
138
- 'fit-eval facilitate --task-file=task.md --agents "explorer:cwd=/tmp/a,tester:cwd=/tmp/b"',
141
+ 'fit-eval facilitate --task-file=task.md --agent-profiles "security-engineer,technical-writer"',
139
142
  ],
140
143
  };
141
144
 
@@ -0,0 +1,198 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync } from "node:fs";
4
+ import { createCli } from "@forwardimpact/libcli";
5
+ import { createLogger } from "@forwardimpact/libtelemetry";
6
+
7
+ import {
8
+ runRunsCommand,
9
+ runDownloadCommand,
10
+ runOverviewCommand,
11
+ runCountCommand,
12
+ runBatchCommand,
13
+ runHeadCommand,
14
+ runTailCommand,
15
+ runSearchCommand,
16
+ runToolsCommand,
17
+ runToolCommand,
18
+ runErrorsCommand,
19
+ runReasoningCommand,
20
+ runTimelineCommand,
21
+ runStatsCommand,
22
+ } from "../src/commands/trace.js";
23
+
24
+ const { version: VERSION } = JSON.parse(
25
+ readFileSync(new URL("../package.json", import.meta.url), "utf8"),
26
+ );
27
+
28
+ const definition = {
29
+ name: "fit-trace",
30
+ version: VERSION,
31
+ description: "Download, query, and search agent execution traces",
32
+ commands: [
33
+ {
34
+ name: "runs",
35
+ args: "[pattern]",
36
+ description: "List recent workflow runs (default pattern: agent)",
37
+ options: {
38
+ lookback: {
39
+ type: "string",
40
+ description: "How far back to search (default: 7d)",
41
+ },
42
+ repo: {
43
+ type: "string",
44
+ description: "GitHub repo override (default: git remote)",
45
+ },
46
+ },
47
+ },
48
+ {
49
+ name: "download",
50
+ args: "<run-id>",
51
+ description: "Download trace artifact and convert to structured JSON",
52
+ options: {
53
+ dir: { type: "string", description: "Output directory" },
54
+ artifact: { type: "string", description: "Artifact name override" },
55
+ repo: {
56
+ type: "string",
57
+ description: "GitHub repo override (default: git remote)",
58
+ },
59
+ },
60
+ },
61
+ {
62
+ name: "overview",
63
+ args: "<file>",
64
+ description: "Metadata, summary, turn count, tool frequency",
65
+ },
66
+ {
67
+ name: "count",
68
+ args: "<file>",
69
+ description: "Number of turns",
70
+ },
71
+ {
72
+ name: "batch",
73
+ args: "<file> <from> <to>",
74
+ description: "Turns in range [from, to) (zero-indexed)",
75
+ },
76
+ {
77
+ name: "head",
78
+ args: "<file> [N]",
79
+ description: "First N turns (default 10)",
80
+ },
81
+ {
82
+ name: "tail",
83
+ args: "<file> [N]",
84
+ description: "Last N turns (default 10)",
85
+ },
86
+ {
87
+ name: "search",
88
+ args: "<file> <pattern>",
89
+ description: "Search all content for regex pattern",
90
+ options: {
91
+ limit: {
92
+ type: "string",
93
+ description: "Max results (default: 50)",
94
+ },
95
+ context: {
96
+ type: "string",
97
+ description: "Surrounding turns per hit (default: 0)",
98
+ },
99
+ },
100
+ },
101
+ {
102
+ name: "tools",
103
+ args: "<file>",
104
+ description: "Tool usage frequency (descending)",
105
+ },
106
+ {
107
+ name: "tool",
108
+ args: "<file> <name>",
109
+ description: "All turns involving a specific tool",
110
+ },
111
+ {
112
+ name: "errors",
113
+ args: "<file>",
114
+ description: "Tool results with isError=true",
115
+ },
116
+ {
117
+ name: "reasoning",
118
+ args: "<file>",
119
+ description: "Agent reasoning text only",
120
+ options: {
121
+ from: { type: "string", description: "Start at turn index" },
122
+ to: { type: "string", description: "Stop before turn index" },
123
+ },
124
+ },
125
+ {
126
+ name: "timeline",
127
+ args: "<file>",
128
+ description: "Compact one-line-per-turn overview",
129
+ },
130
+ {
131
+ name: "stats",
132
+ args: "<file>",
133
+ description: "Token usage and cost breakdown",
134
+ },
135
+ ],
136
+ globalOptions: {
137
+ help: { type: "boolean", short: "h", description: "Show this help" },
138
+ version: { type: "boolean", description: "Show version" },
139
+ json: { type: "boolean", description: "Output help as JSON" },
140
+ },
141
+ examples: [
142
+ "fit-trace runs --lookback 7d",
143
+ "fit-trace download 24497273755",
144
+ "fit-trace overview structured.json",
145
+ "fit-trace timeline structured.json",
146
+ "fit-trace search structured.json 'error|fail' --context 1",
147
+ "fit-trace tool structured.json Bash",
148
+ "fit-trace batch structured.json 0 20",
149
+ ],
150
+ };
151
+
152
+ const cli = createCli(definition);
153
+ const logger = createLogger("trace");
154
+
155
+ const COMMANDS = {
156
+ runs: runRunsCommand,
157
+ download: runDownloadCommand,
158
+ overview: runOverviewCommand,
159
+ count: runCountCommand,
160
+ batch: runBatchCommand,
161
+ head: runHeadCommand,
162
+ tail: runTailCommand,
163
+ search: runSearchCommand,
164
+ tools: runToolsCommand,
165
+ tool: runToolCommand,
166
+ errors: runErrorsCommand,
167
+ reasoning: runReasoningCommand,
168
+ timeline: runTimelineCommand,
169
+ stats: runStatsCommand,
170
+ };
171
+
172
+ async function main() {
173
+ const parsed = cli.parse(process.argv.slice(2));
174
+ if (!parsed) process.exit(0);
175
+
176
+ const { values, positionals } = parsed;
177
+
178
+ if (positionals.length === 0) {
179
+ cli.usageError("no command specified");
180
+ process.exit(2);
181
+ }
182
+
183
+ const [command, ...args] = positionals;
184
+ const handler = COMMANDS[command];
185
+
186
+ if (!handler) {
187
+ cli.usageError(`unknown command "${command}"`);
188
+ process.exit(2);
189
+ }
190
+
191
+ await handler(values, args);
192
+ }
193
+
194
+ main().catch((error) => {
195
+ logger.exception("main", error);
196
+ cli.error(error.message);
197
+ process.exit(1);
198
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.15",
3
+ "version": "0.1.16",
4
4
  "description": "Process Claude Code stream-json output into structured traces",
5
5
  "license": "Apache-2.0",
6
6
  "author": "D. Olsson <hi@senzilla.io>",
@@ -8,10 +8,12 @@
8
8
  "main": "./src/index.js",
9
9
  "exports": {
10
10
  ".": "./src/index.js",
11
- "./bin/fit-eval.js": "./bin/fit-eval.js"
11
+ "./bin/fit-eval.js": "./bin/fit-eval.js",
12
+ "./bin/fit-trace.js": "./bin/fit-trace.js"
12
13
  },
13
14
  "bin": {
14
- "fit-eval": "./bin/fit-eval.js"
15
+ "fit-eval": "./bin/fit-eval.js",
16
+ "fit-trace": "./bin/fit-trace.js"
15
17
  },
16
18
  "files": [
17
19
  "src/**/*.js",
@@ -26,8 +28,9 @@
26
28
  "test": "bun run node --test test/*.test.js"
27
29
  },
28
30
  "dependencies": {
29
- "@anthropic-ai/claude-agent-sdk": "^0.2.98",
31
+ "@anthropic-ai/claude-agent-sdk": "^0.2.112",
30
32
  "@forwardimpact/libcli": "^0.1.0",
33
+ "@forwardimpact/libconfig": "^0.1.0",
31
34
  "@forwardimpact/libtelemetry": "^0.1.22",
32
35
  "zod": "^3.23.0"
33
36
  },
@@ -108,6 +108,7 @@ export class AgentRunner {
108
108
  permissionMode: this.permissionMode,
109
109
  allowDangerouslySkipPermissions: true,
110
110
  abortController,
111
+ ...(this.mcpServers && { mcpServers: this.mcpServers }),
111
112
  },
112
113
  });
113
114
  return await this.#consumeQuery(iterator);
@@ -1,30 +1,18 @@
1
- import { readFileSync, createWriteStream, mkdtempSync } from "node:fs";
2
- import { resolve, join } from "node:path";
3
- import { tmpdir } from "node:os";
1
+ import { readFileSync, createWriteStream } from "node:fs";
2
+ import { resolve } from "node:path";
4
3
  import { createFacilitator } from "../facilitator.js";
5
4
  import { createTeeWriter } from "../tee-writer.js";
6
5
 
7
6
  /**
8
- * Parse agent config string into structured configs.
9
- * Format: "name1:key=val:key=val,name2:key=val"
10
- * @param {string} raw
11
- * @returns {Array<{name: string, role: string, cwd: string, maxTurns?: number}>}
7
+ * Parse comma-separated agent profile names into structured configs.
8
+ * @param {string} raw - Comma-separated profile names
9
+ * @param {string} cwd - Shared working directory for all agents
10
+ * @returns {Array<{name: string, role: string, cwd: string, agentProfile: string}>}
12
11
  */
13
- function parseAgentConfigs(raw) {
14
- return raw.split(",").map((spec) => {
15
- const parts = spec.split(":");
16
- const name = parts[0];
17
- const config = { name, role: name };
18
- for (let i = 1; i < parts.length; i++) {
19
- const [key, val] = parts[i].split("=");
20
- if (key === "cwd") config.cwd = resolve(val);
21
- else if (key === "role") config.role = val;
22
- else if (key === "maxTurns") config.maxTurns = parseInt(val, 10);
23
- }
24
- if (!config.cwd) {
25
- config.cwd = mkdtempSync(join(tmpdir(), `fit-eval-${name}-`));
26
- }
27
- return config;
12
+ function parseAgentProfiles(raw, cwd) {
13
+ return raw.split(",").map((entry) => {
14
+ const name = entry.trim();
15
+ return { name, role: name, cwd, agentProfile: name };
28
16
  });
29
17
  }
30
18
 
@@ -45,12 +33,10 @@ function parseFacilitateOptions(values) {
45
33
  let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
46
34
  if (taskAmend) taskContent += `\n\n${taskAmend}`;
47
35
 
48
- const agentsRaw = values.agents;
49
- if (!agentsRaw) throw new Error("--agents is required");
50
-
51
- const agentConfigs = parseAgentConfigs(agentsRaw);
52
- if (agentConfigs.length < 1)
53
- throw new Error("--agents must specify at least one agent");
36
+ const profilesRaw = values["agent-profiles"];
37
+ if (!profilesRaw) throw new Error("--agent-profiles is required");
38
+ const agentCwd = resolve(values["agent-cwd"] ?? ".");
39
+ const agentConfigs = parseAgentProfiles(profilesRaw, agentCwd);
54
40
 
55
41
  const maxTurnsRaw = values["max-turns"] ?? "20";
56
42
 
@@ -0,0 +1,149 @@
1
+ import { readFileSync, writeFileSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import { createTraceCollector } from "@forwardimpact/libeval";
4
+ import { createTraceQuery } from "../trace-query.js";
5
+ import { createTraceGitHub } from "../trace-github.js";
6
+
7
+ // --- GitHub commands ---
8
+
9
+ /**
10
+ * List recent workflow runs matching a pattern.
11
+ * @param {object} values - Parsed option values
12
+ * @param {string[]} args - [pattern?]
13
+ */
14
+ export async function runRunsCommand(values, args) {
15
+ const gh = await createTraceGitHub({ repo: values.repo });
16
+ const pattern = args[0] ?? "agent";
17
+ const lookback = values.lookback ?? "7d";
18
+ const runs = await gh.listRuns({ pattern, lookback });
19
+ writeJSON(runs);
20
+ }
21
+
22
+ /**
23
+ * Download a trace artifact and auto-convert to structured JSON.
24
+ * @param {object} values - Parsed option values
25
+ * @param {string[]} args - [run-id]
26
+ */
27
+ export async function runDownloadCommand(values, args) {
28
+ const gh = await createTraceGitHub({ repo: values.repo });
29
+ const result = await gh.downloadTrace(args[0], {
30
+ dir: values.dir,
31
+ name: values.artifact,
32
+ });
33
+
34
+ const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
35
+ if (ndjsonFile) {
36
+ const ndjsonPath = join(result.dir, ndjsonFile);
37
+ const collector = createTraceCollector();
38
+ for (const line of readFileSync(ndjsonPath, "utf8").split("\n")) {
39
+ collector.addLine(line);
40
+ }
41
+ const structuredPath = join(result.dir, "structured.json");
42
+ writeFileSync(structuredPath, JSON.stringify(collector.toJSON()) + "\n");
43
+ result.files.push("structured.json");
44
+ }
45
+
46
+ writeJSON(result);
47
+ }
48
+
49
+ // --- Query commands ---
50
+
51
+ /** @param {object} values @param {string[]} args - [file] */
52
+ export async function runOverviewCommand(values, args) {
53
+ writeJSON(loadTrace(args[0]).overview());
54
+ }
55
+
56
+ /** @param {object} values @param {string[]} args - [file] */
57
+ export async function runCountCommand(values, args) {
58
+ process.stdout.write(String(loadTrace(args[0]).count()) + "\n");
59
+ }
60
+
61
+ /** @param {object} values @param {string[]} args - [file, from, to] */
62
+ export async function runBatchCommand(values, args) {
63
+ writeJSON(
64
+ loadTrace(args[0]).batch(parseInt(args[1], 10), parseInt(args[2], 10)),
65
+ );
66
+ }
67
+
68
+ /** @param {object} values @param {string[]} args - [file, N?] */
69
+ export async function runHeadCommand(values, args) {
70
+ const n = args[1] ? parseInt(args[1], 10) : 10;
71
+ writeJSON(loadTrace(args[0]).head(n));
72
+ }
73
+
74
+ /** @param {object} values @param {string[]} args - [file, N?] */
75
+ export async function runTailCommand(values, args) {
76
+ const n = args[1] ? parseInt(args[1], 10) : 10;
77
+ writeJSON(loadTrace(args[0]).tail(n));
78
+ }
79
+
80
+ /** @param {object} values @param {string[]} args - [file, pattern] */
81
+ export async function runSearchCommand(values, args) {
82
+ const limit = values.limit ? parseInt(values.limit, 10) : 50;
83
+ const context = values.context ? parseInt(values.context, 10) : 0;
84
+ writeJSON(loadTrace(args[0]).search(args[1], { limit, context }));
85
+ }
86
+
87
+ /** @param {object} values @param {string[]} args - [file] */
88
+ export async function runToolsCommand(values, args) {
89
+ writeJSON(loadTrace(args[0]).toolFrequency());
90
+ }
91
+
92
+ /** @param {object} values @param {string[]} args - [file, name] */
93
+ export async function runToolCommand(values, args) {
94
+ writeJSON(loadTrace(args[0]).tool(args[1]));
95
+ }
96
+
97
+ /** @param {object} values @param {string[]} args - [file] */
98
+ export async function runErrorsCommand(values, args) {
99
+ writeJSON(loadTrace(args[0]).errors());
100
+ }
101
+
102
+ /** @param {object} values @param {string[]} args - [file] */
103
+ export async function runReasoningCommand(values, args) {
104
+ const from = values.from ? parseInt(values.from, 10) : undefined;
105
+ const to = values.to ? parseInt(values.to, 10) : undefined;
106
+ writeJSON(loadTrace(args[0]).reasoning({ from, to }));
107
+ }
108
+
109
+ /** @param {object} values @param {string[]} args - [file] */
110
+ export async function runTimelineCommand(values, args) {
111
+ const lines = loadTrace(args[0]).timeline();
112
+ process.stdout.write(lines.join("\n") + "\n");
113
+ }
114
+
115
+ /** @param {object} values @param {string[]} args - [file] */
116
+ export async function runStatsCommand(values, args) {
117
+ writeJSON(loadTrace(args[0]).stats());
118
+ }
119
+
120
+ // --- Shared helpers ---
121
+
122
+ /**
123
+ * Load a trace file. Supports structured JSON and raw NDJSON.
124
+ * @param {string} file
125
+ * @returns {import("../trace-query.js").TraceQuery}
126
+ */
127
+ function loadTrace(file) {
128
+ const content = readFileSync(file, "utf8");
129
+
130
+ try {
131
+ const parsed = JSON.parse(content);
132
+ if (parsed.turns) {
133
+ return createTraceQuery(parsed);
134
+ }
135
+ } catch {
136
+ // Not valid JSON — fall through to NDJSON.
137
+ }
138
+
139
+ const collector = createTraceCollector();
140
+ for (const line of content.split("\n")) {
141
+ collector.addLine(line);
142
+ }
143
+ return createTraceQuery(collector.toJSON());
144
+ }
145
+
146
+ /** @param {object} data */
147
+ function writeJSON(data) {
148
+ process.stdout.write(JSON.stringify(data, null, 2) + "\n");
149
+ }
@@ -18,19 +18,27 @@ import {
18
18
 
19
19
  /** System prompt appended for the facilitator runner. */
20
20
  export const FACILITATOR_SYSTEM_PROMPT =
21
- "You coordinate multiple agents working on a shared task. Use Tell to " +
22
- "assign work to individual agents. Use Share to broadcast to all. Use " +
23
- "Redirect to interrupt and correct agents. Use RollCall to see who is " +
24
- "available. Use Conclude with a summary when the task is done. Agents " +
25
- "communicate with you via Share and may Ask you questions directly.";
21
+ "You coordinate multiple agents working on a shared task. " +
22
+ "Tell sends a direct message to one participant. " +
23
+ "Share broadcasts a message to all participants. " +
24
+ "Redirect interrupts a participant and replaces their current instructions. " +
25
+ "RollCall lists available participants and their roles. " +
26
+ "Conclude ends the session with a summary. " +
27
+ "Participants communicate with you via Share and may Ask you questions. " +
28
+ "IMPORTANT: After sending messages via Tell or Share, stop making tool " +
29
+ "calls and produce a text response. The system will resume you with " +
30
+ "participant responses. Do not proceed to the next question or call " +
31
+ "Conclude until you have received responses from participants.";
26
32
 
27
33
  /** System prompt appended for facilitated agent runners. */
28
34
  export const FACILITATED_AGENT_SYSTEM_PROMPT =
29
35
  "You are one of several agents working on a shared task under a " +
30
- "facilitator's coordination. Use Share to broadcast findings. Use Tell " +
31
- "to message a specific participant. Use Ask to ask the facilitator a " +
32
- "question (you will block until answered). Use RollCall to see who " +
33
- "else is working. The facilitator may Redirect you with new instructions " +
36
+ "facilitator's coordination. " +
37
+ "Share broadcasts your message to all participants. " +
38
+ "Tell sends a direct message to one participant. " +
39
+ "Ask sends a question to the facilitator you block until answered. " +
40
+ "RollCall lists available participants and their roles. " +
41
+ "The facilitator may Redirect you with new instructions " +
34
42
  "— treat redirections as authoritative.";
35
43
 
36
44
  function createAsyncQueue() {
@@ -109,28 +117,40 @@ export class Facilitator {
109
117
  async run(task) {
110
118
  this.emitOrchestratorEvent({ type: "session_start" });
111
119
 
120
+ // Launch agent loops first — they wait for messages via messageBus.
121
+ // This lets agents process Tell/Share messages that arrive during the
122
+ // facilitator's initial run, rather than after it completes.
123
+ const agentPromises = this.agents.map((a) => this.#runAgent(a));
124
+
112
125
  // Turn 0: facilitator receives the task
113
126
  this.facilitatorTurns++;
114
127
  await this.facilitatorRunner.run(task);
115
128
 
129
+ // Handle redirect after turn 0
130
+ await this.#processRedirect();
131
+
116
132
  if (this.ctx.concluded) {
133
+ // Facilitator concluded during its initial run. Let agents finish any
134
+ // in-progress work before returning — they may have received Tell/Share
135
+ // messages and started processing concurrently.
117
136
  this.concludeResolve();
118
- this.emitSummary({ success: true, turns: 0, summary: this.ctx.summary });
119
- return { success: true, turns: 0 };
137
+ await Promise.allSettled(agentPromises);
138
+ this.emitSummary({
139
+ success: true,
140
+ turns: this.facilitatorTurns,
141
+ summary: this.ctx.summary,
142
+ });
143
+ return { success: true, turns: this.facilitatorTurns };
120
144
  }
121
145
 
122
- // Handle redirect after turn 0
123
- await this.#processRedirect();
124
-
125
- // Abort agents promptly when Conclude is called
146
+ // Abort agents promptly when Conclude is called during the event loop
126
147
  this.concludePromise.then(() => {
127
148
  for (const agent of this.agents) {
128
149
  agent.runner.currentAbortController?.abort();
129
150
  }
130
151
  });
131
152
 
132
- // Launch all loops concurrently
133
- const agentPromises = this.agents.map((a) => this.#runAgent(a));
153
+ // Concurrent phase: facilitator event loop + already-running agent loops
134
154
  const facilitatorPromise = this.#facilitatorLoop();
135
155
 
136
156
  try {
package/src/index.js CHANGED
@@ -1,4 +1,10 @@
1
1
  export { TraceCollector, createTraceCollector } from "./trace-collector.js";
2
+ export { TraceQuery, createTraceQuery } from "./trace-query.js";
3
+ export {
4
+ TraceGitHub,
5
+ createTraceGitHub,
6
+ parseGitRemote,
7
+ } from "./trace-github.js";
2
8
  export { AgentRunner, createAgentRunner } from "./agent-runner.js";
3
9
  export {
4
10
  Supervisor,
@@ -154,13 +154,13 @@ export function createFacilitatorToolServer(ctx) {
154
154
  ),
155
155
  tool(
156
156
  "Share",
157
- "Broadcast a message to all participants.",
157
+ "Broadcast a message to all participants. After sending, stop making tool calls to receive responses.",
158
158
  { message: z.string() },
159
159
  createShareHandler(ctx, { from: "facilitator" }),
160
160
  ),
161
161
  tool(
162
162
  "Tell",
163
- "Send a direct message to one participant.",
163
+ "Send a direct message to one participant. After sending, stop making tool calls to receive their response.",
164
164
  { message: z.string(), to: z.string() },
165
165
  createTellHandler(ctx, { from: "facilitator" }),
166
166
  ),
@@ -0,0 +1,213 @@
1
+ import { createWriteStream } from "node:fs";
2
+ import { mkdir } from "node:fs/promises";
3
+ import path from "node:path";
4
+ import { pipeline } from "node:stream/promises";
5
+ import { Readable } from "node:stream";
6
+
7
+ const API = "https://api.github.com";
8
+
9
+ /**
10
+ * GitHub API client for trace-related operations: listing workflow runs
11
+ * and downloading trace artifacts.
12
+ */
13
+ export class TraceGitHub {
14
+ /**
15
+ * @param {object} deps
16
+ * @param {string} deps.token - GitHub token
17
+ * @param {string} deps.owner - Repository owner
18
+ * @param {string} deps.repo - Repository name
19
+ */
20
+ constructor({ token, owner, repo }) {
21
+ this.token = token;
22
+ this.owner = owner;
23
+ this.repo = repo;
24
+ }
25
+
26
+ /**
27
+ * List recent workflow runs, optionally filtered by name pattern.
28
+ *
29
+ * @param {object} [opts]
30
+ * @param {string} [opts.pattern] - Case-insensitive substring to match workflow name (default: "agent")
31
+ * @param {number} [opts.limit=50] - Max runs to return from GitHub API
32
+ * @param {string} [opts.lookback="7d"] - How far back to search (e.g. "7d", "24h", "2w")
33
+ * @returns {Promise<object[]>} Array of {workflow, runId, status, conclusion, createdAt, branch, url}
34
+ */
35
+ async listRuns(opts = {}) {
36
+ const { pattern = "agent", limit = 50, lookback = "7d" } = opts;
37
+ const cutoff = parseLookback(lookback);
38
+
39
+ const params = new URLSearchParams({
40
+ per_page: String(Math.min(limit, 100)),
41
+ });
42
+ if (cutoff) {
43
+ params.set("created", `>=${cutoff}`);
44
+ }
45
+
46
+ const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs?${params}`;
47
+ const data = await this.#get(url);
48
+ const runs = data.workflow_runs ?? [];
49
+
50
+ // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
51
+ const re = new RegExp(pattern, "i");
52
+ return runs
53
+ .filter((r) => re.test(r.name))
54
+ .map((r) => ({
55
+ workflow: r.name,
56
+ runId: r.id,
57
+ status: r.status,
58
+ conclusion: r.conclusion,
59
+ createdAt: r.created_at,
60
+ branch: r.head_branch,
61
+ url: r.html_url,
62
+ }));
63
+ }
64
+
65
+ /**
66
+ * Download a trace artifact from a workflow run and extract it.
67
+ *
68
+ * Tries artifact names in order: combined-trace, agent-trace.
69
+ * The artifact zip is downloaded and extracted to the output directory.
70
+ *
71
+ * @param {number|string} runId
72
+ * @param {object} [opts]
73
+ * @param {string} [opts.dir] - Output directory (default: /tmp/trace-<runId>)
74
+ * @param {string} [opts.name] - Specific artifact name to download
75
+ * @returns {Promise<{dir: string, artifact: string, files: string[]}>}
76
+ */
77
+ async downloadTrace(runId, opts = {}) {
78
+ const dir = opts.dir ?? `/tmp/trace-${runId}`;
79
+ await mkdir(dir, { recursive: true });
80
+
81
+ // List artifacts for this run.
82
+ const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
83
+ const data = await this.#get(url);
84
+ const artifacts = data.artifacts ?? [];
85
+
86
+ // Find the trace artifact.
87
+ const preferredNames = opts.name
88
+ ? [opts.name]
89
+ : ["combined-trace", "agent-trace"];
90
+ let artifact = null;
91
+ for (const name of preferredNames) {
92
+ artifact = artifacts.find((a) => a.name === name);
93
+ if (artifact) break;
94
+ }
95
+
96
+ if (!artifact) {
97
+ const available = artifacts.map((a) => a.name).join(", ");
98
+ throw new Error(
99
+ `No trace artifact found for run ${runId}. Available: ${available || "none"}`,
100
+ );
101
+ }
102
+
103
+ // Download the zip.
104
+ const zipPath = path.join(dir, `${artifact.name}.zip`);
105
+ const downloadUrl = `${API}/repos/${this.owner}/${this.repo}/actions/artifacts/${artifact.id}/zip`;
106
+ const response = await fetch(downloadUrl, {
107
+ headers: this.#headers(),
108
+ redirect: "follow",
109
+ });
110
+ if (!response.ok) {
111
+ throw new Error(
112
+ `Failed to download artifact: ${response.status} ${response.statusText}`,
113
+ );
114
+ }
115
+
116
+ // Stream to disk then extract.
117
+ await pipeline(Readable.fromWeb(response.body), createWriteStream(zipPath));
118
+
119
+ const { execSync } = await import("node:child_process");
120
+ execSync(
121
+ `unzip -o -q ${JSON.stringify(zipPath)} -d ${JSON.stringify(dir)}`,
122
+ );
123
+
124
+ // List extracted files.
125
+ const { readdirSync } = await import("node:fs");
126
+ const files = readdirSync(dir).filter((f) => !f.endsWith(".zip"));
127
+
128
+ return { dir, artifact: artifact.name, files };
129
+ }
130
+
131
+ /**
132
+ * @param {string} url
133
+ * @returns {Promise<object>}
134
+ */
135
+ async #get(url) {
136
+ const response = await fetch(url, { headers: this.#headers() });
137
+ if (!response.ok) {
138
+ throw new Error(`GitHub API: ${response.status} ${response.statusText}`);
139
+ }
140
+ return response.json();
141
+ }
142
+
143
+ /** @returns {Record<string, string>} */
144
+ #headers() {
145
+ return {
146
+ Authorization: `Bearer ${this.token}`,
147
+ Accept: "application/vnd.github+json",
148
+ "X-GitHub-Api-Version": "2022-11-28",
149
+ };
150
+ }
151
+ }
152
+
153
+ /**
154
+ * Parse a lookback duration string into an ISO date string.
155
+ * Supports: Nd (days), Nh (hours), Nw (weeks).
156
+ * @param {string} lookback
157
+ * @returns {string|null} ISO date string or null if unparseable
158
+ */
159
+ function parseLookback(lookback) {
160
+ const match = lookback.match(/^(\d+)([dhw])$/);
161
+ if (!match) return null;
162
+ const [, val, unit] = match;
163
+ const ms = { d: 86400000, h: 3600000, w: 604800000 }[unit];
164
+ return new Date(Date.now() - parseInt(val, 10) * ms).toISOString();
165
+ }
166
+
167
+ /**
168
+ * Parse a GitHub repository URL or "owner/repo" string.
169
+ * @param {string} remote - Git remote URL or owner/repo string
170
+ * @returns {{owner: string, repo: string}}
171
+ */
172
+ export function parseGitRemote(remote) {
173
+ // SSH: git@github.com:owner/repo.git
174
+ const ssh = remote.match(/github\.com[:/]([^/]+)\/(.+?)(?:\.git)?$/);
175
+ if (ssh) return { owner: ssh[1], repo: ssh[2] };
176
+
177
+ // HTTPS: https://github.com/owner/repo
178
+ const https = remote.match(/github\.com\/([^/]+)\/(.+?)(?:\.git)?$/);
179
+ if (https) return { owner: https[1], repo: https[2] };
180
+
181
+ // Plain owner/repo format (no github.com prefix).
182
+ const simple = remote.match(/^([^/:@]+)\/([^/]+)$/);
183
+ if (simple) return { owner: simple[1], repo: simple[2] };
184
+
185
+ throw new Error(`Cannot parse GitHub remote: ${remote}`);
186
+ }
187
+
188
+ /**
189
+ * Create a TraceGitHub instance using libconfig for the token and
190
+ * git remote for the repo.
191
+ *
192
+ * @param {object} [opts]
193
+ * @param {string} [opts.repo] - "owner/repo" override (default: detect from git remote)
194
+ * @returns {Promise<TraceGitHub>}
195
+ */
196
+ export async function createTraceGitHub(opts = {}) {
197
+ const { createScriptConfig } = await import("@forwardimpact/libconfig");
198
+ const config = await createScriptConfig("eval");
199
+ const token = config.ghToken();
200
+
201
+ let owner, repo;
202
+ if (opts.repo) {
203
+ ({ owner, repo } = parseGitRemote(opts.repo));
204
+ } else {
205
+ const { execSync } = await import("node:child_process");
206
+ const remote = execSync("git remote get-url origin", {
207
+ encoding: "utf8",
208
+ }).trim();
209
+ ({ owner, repo } = parseGitRemote(remote));
210
+ }
211
+
212
+ return new TraceGitHub({ token, owner, repo });
213
+ }
@@ -0,0 +1,346 @@
1
+ /**
2
+ * Query engine for structured trace documents produced by TraceCollector.
3
+ *
4
+ * Loads a structured JSON trace into memory and provides methods for
5
+ * paging, searching, filtering, and summarizing turns — the operations
6
+ * agents need to analyze large traces efficiently.
7
+ */
8
+ export class TraceQuery {
9
+ /**
10
+ * @param {object} trace - Structured trace document (output of TraceCollector.toJSON())
11
+ */
12
+ constructor(trace) {
13
+ this.trace = trace;
14
+ this.metadata = trace.metadata ?? {};
15
+ this.turns = trace.turns ?? [];
16
+ this.summary = trace.summary ?? {};
17
+ }
18
+
19
+ /**
20
+ * High-level overview: metadata, summary, turn count, and tool frequency.
21
+ * @returns {object}
22
+ */
23
+ overview() {
24
+ return {
25
+ metadata: this.metadata,
26
+ summary: this.summary,
27
+ turnCount: this.turns.length,
28
+ tools: this.toolFrequency(),
29
+ };
30
+ }
31
+
32
+ /** @returns {number} */
33
+ count() {
34
+ return this.turns.length;
35
+ }
36
+
37
+ /**
38
+ * Return turns in range [from, to) (zero-indexed).
39
+ * @param {number} from
40
+ * @param {number} to
41
+ * @returns {object[]}
42
+ */
43
+ batch(from, to) {
44
+ return this.turns.slice(from, to);
45
+ }
46
+
47
+ /**
48
+ * First N turns.
49
+ * @param {number} [n=10]
50
+ * @returns {object[]}
51
+ */
52
+ head(n = 10) {
53
+ return this.turns.slice(0, n);
54
+ }
55
+
56
+ /**
57
+ * Last N turns.
58
+ * @param {number} [n=10]
59
+ * @returns {object[]}
60
+ */
61
+ tail(n = 10) {
62
+ return this.turns.slice(-n);
63
+ }
64
+
65
+ /**
66
+ * Search all turn content for a regex pattern. Returns matching turns
67
+ * with the matched text highlighted by context.
68
+ *
69
+ * Searches: assistant text blocks, tool_use names and stringified input,
70
+ * and tool_result content.
71
+ *
72
+ * @param {string} pattern - Regex pattern (case-insensitive)
73
+ * @param {object} [opts]
74
+ * @param {number} [opts.context=0] - Number of surrounding turns to include
75
+ * @param {number} [opts.limit=50] - Max results
76
+ * @returns {object[]} Array of {turn, matches, context?}
77
+ */
78
+ search(pattern, opts = {}) {
79
+ const { context = 0, limit = 50 } = opts;
80
+ // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
81
+ const re = new RegExp(pattern, "gi");
82
+ const hits = [];
83
+
84
+ for (const turn of this.turns) {
85
+ const matches = matchTurn(turn, re);
86
+ if (matches.length > 0) {
87
+ const entry = { turn, matches };
88
+ if (context > 0) {
89
+ const idx = turn.index;
90
+ entry.context = this.turns.filter(
91
+ (t) =>
92
+ t.index !== idx &&
93
+ t.index >= idx - context &&
94
+ t.index <= idx + context,
95
+ );
96
+ }
97
+ hits.push(entry);
98
+ if (hits.length >= limit) break;
99
+ }
100
+ }
101
+ return hits;
102
+ }
103
+
104
+ /**
105
+ * Tool usage frequency, sorted descending.
106
+ * @returns {Array<{tool: string, count: number}>}
107
+ */
108
+ toolFrequency() {
109
+ const counts = {};
110
+ for (const turn of this.turns) {
111
+ if (turn.role !== "assistant") continue;
112
+ for (const block of turn.content) {
113
+ if (block.type === "tool_use") {
114
+ counts[block.name] = (counts[block.name] ?? 0) + 1;
115
+ }
116
+ }
117
+ }
118
+ return Object.entries(counts)
119
+ .map(([tool, count]) => ({ tool, count }))
120
+ .sort((a, b) => b.count - a.count);
121
+ }
122
+
123
+ /**
124
+ * Filter turns involving a specific tool (both the tool_use and its result).
125
+ * @param {string} name - Tool name
126
+ * @returns {object[]}
127
+ */
128
+ tool(name) {
129
+ const toolUseIds = new Set();
130
+ const results = [];
131
+
132
+ for (const turn of this.turns) {
133
+ if (turn.role === "assistant") {
134
+ const hasTool = turn.content.some(
135
+ (b) => b.type === "tool_use" && b.name === name,
136
+ );
137
+ if (hasTool) {
138
+ results.push(turn);
139
+ for (const b of turn.content) {
140
+ if (b.type === "tool_use" && b.name === name && b.toolUseId) {
141
+ toolUseIds.add(b.toolUseId);
142
+ }
143
+ }
144
+ }
145
+ } else if (
146
+ turn.role === "tool_result" &&
147
+ toolUseIds.has(turn.toolUseId)
148
+ ) {
149
+ results.push(turn);
150
+ }
151
+ }
152
+ return results;
153
+ }
154
+
155
+ /**
156
+ * All error turns (tool results with isError=true).
157
+ * @returns {object[]}
158
+ */
159
+ errors() {
160
+ return this.turns.filter(
161
+ (t) => t.role === "tool_result" && t.isError === true,
162
+ );
163
+ }
164
+
165
+ /**
166
+ * Extract just the reasoning text from assistant turns.
167
+ * @param {object} [opts]
168
+ * @param {number} [opts.from] - Start turn index
169
+ * @param {number} [opts.to] - End turn index (exclusive)
170
+ * @returns {Array<{index: number, text: string}>}
171
+ */
172
+ reasoning(opts = {}) {
173
+ const { from, to } = opts;
174
+ const results = [];
175
+ for (const turn of this.turns) {
176
+ if (turn.role !== "assistant") continue;
177
+ if (from !== undefined && turn.index < from) continue;
178
+ if (to !== undefined && turn.index >= to) continue;
179
+ const texts = turn.content
180
+ .filter((b) => b.type === "text")
181
+ .map((b) => b.text);
182
+ if (texts.length > 0) {
183
+ results.push({ index: turn.index, text: texts.join("\n") });
184
+ }
185
+ }
186
+ return results;
187
+ }
188
+
189
+ /**
190
+ * Compact one-line-per-assistant-turn timeline showing tool names,
191
+ * reasoning snippet, and token usage. Thinking-only turns are marked
192
+ * as such and their content is omitted (it is model-internal).
193
+ * @returns {string[]}
194
+ */
195
+ timeline() {
196
+ const lines = [];
197
+ for (const turn of this.turns) {
198
+ if (turn.role !== "assistant") continue;
199
+
200
+ const tools = turn.content
201
+ .filter((b) => b.type === "tool_use")
202
+ .map((b) => b.name);
203
+
204
+ const textBlocks = turn.content
205
+ .filter((b) => b.type === "text")
206
+ .map((b) => b.text);
207
+
208
+ const hasThinking = turn.content.some((b) => b.type === "thinking");
209
+
210
+ // Skip thinking-only turns (no user-visible content).
211
+ if (hasThinking && tools.length === 0 && textBlocks.length === 0)
212
+ continue;
213
+
214
+ const snippet = textBlocks.join(" ").slice(0, 80).replace(/\n/g, " ");
215
+
216
+ const input = turn.usage?.inputTokens ?? 0;
217
+ const output = turn.usage?.outputTokens ?? 0;
218
+ const cacheRead = turn.usage?.cacheReadInputTokens ?? 0;
219
+
220
+ const toolStr = tools.length > 0 ? tools.join(", ") : "(text only)";
221
+ const tokenStr = `in:${fmtK(input + cacheRead)} out:${fmtK(output)}`;
222
+
223
+ lines.push(
224
+ `[${turn.index}] ${toolStr.padEnd(30)} ${tokenStr.padEnd(18)} ${snippet}`,
225
+ );
226
+ }
227
+ return lines;
228
+ }
229
+
230
+ /**
231
+ * Token usage and cost breakdown per assistant turn, plus totals.
232
+ * @returns {object}
233
+ */
234
+ stats() {
235
+ let totalInput = 0;
236
+ let totalOutput = 0;
237
+ let totalCacheRead = 0;
238
+ let totalCacheCreate = 0;
239
+ const perTurn = [];
240
+
241
+ for (const turn of this.turns) {
242
+ if (turn.role !== "assistant" || !turn.usage) continue;
243
+ const u = turn.usage;
244
+ totalInput += u.inputTokens ?? 0;
245
+ totalOutput += u.outputTokens ?? 0;
246
+ totalCacheRead += u.cacheReadInputTokens ?? 0;
247
+ totalCacheCreate += u.cacheCreationInputTokens ?? 0;
248
+
249
+ perTurn.push({
250
+ index: turn.index,
251
+ inputTokens: u.inputTokens ?? 0,
252
+ outputTokens: u.outputTokens ?? 0,
253
+ cacheReadInputTokens: u.cacheReadInputTokens ?? 0,
254
+ cacheCreationInputTokens: u.cacheCreationInputTokens ?? 0,
255
+ });
256
+ }
257
+
258
+ return {
259
+ totals: {
260
+ inputTokens: totalInput,
261
+ outputTokens: totalOutput,
262
+ cacheReadInputTokens: totalCacheRead,
263
+ cacheCreationInputTokens: totalCacheCreate,
264
+ totalCostUsd: this.summary.totalCostUsd ?? 0,
265
+ durationMs: this.summary.durationMs ?? 0,
266
+ },
267
+ perTurn,
268
+ };
269
+ }
270
+ }
271
+
272
+ /**
273
+ * Search a single turn for regex matches. Returns array of match descriptions.
274
+ * @param {object} turn
275
+ * @param {RegExp} re
276
+ * @returns {string[]}
277
+ */
278
+ function matchTurn(turn, re) {
279
+ const matches = [];
280
+ if (turn.role === "assistant") {
281
+ for (const block of turn.content) {
282
+ if (block.type === "text" && re.test(block.text)) {
283
+ re.lastIndex = 0;
284
+ matches.push(`text: ${excerptAround(block.text, re)}`);
285
+ }
286
+ if (block.type === "tool_use") {
287
+ if (re.test(block.name)) {
288
+ re.lastIndex = 0;
289
+ matches.push(`tool_name: ${block.name}`);
290
+ }
291
+ const inputStr = JSON.stringify(block.input);
292
+ if (re.test(inputStr)) {
293
+ re.lastIndex = 0;
294
+ matches.push(
295
+ `tool_input(${block.name}): ${excerptAround(inputStr, re)}`,
296
+ );
297
+ }
298
+ }
299
+ }
300
+ } else if (turn.role === "tool_result") {
301
+ const content = turn.content ?? "";
302
+ if (re.test(content)) {
303
+ re.lastIndex = 0;
304
+ matches.push(`result: ${excerptAround(content, re)}`);
305
+ }
306
+ }
307
+ return matches;
308
+ }
309
+
310
+ /**
311
+ * Extract a short excerpt around the first regex match in text.
312
+ * @param {string} text
313
+ * @param {RegExp} re
314
+ * @returns {string}
315
+ */
316
+ function excerptAround(text, re) {
317
+ re.lastIndex = 0;
318
+ const m = re.exec(text);
319
+ if (!m) return text.slice(0, 100);
320
+ const start = Math.max(0, m.index - 40);
321
+ const end = Math.min(text.length, m.index + m[0].length + 40);
322
+ let excerpt = text.slice(start, end);
323
+ if (start > 0) excerpt = "..." + excerpt;
324
+ if (end < text.length) excerpt = excerpt + "...";
325
+ return excerpt;
326
+ }
327
+
328
+ /**
329
+ * Format a token count as compact K notation.
330
+ * @param {number} n
331
+ * @returns {string}
332
+ */
333
+ function fmtK(n) {
334
+ if (n < 1000) return String(n);
335
+ return (n / 1000).toFixed(1) + "K";
336
+ }
337
+
338
+ /**
339
+ * Load a structured trace from a JSON string.
340
+ * @param {string} json
341
+ * @returns {TraceQuery}
342
+ */
343
+ export function createTraceQuery(json) {
344
+ const trace = typeof json === "string" ? JSON.parse(json) : json;
345
+ return new TraceQuery(trace);
346
+ }