npm - @forwardimpact/libeval - Versions diffs - 0.1.15 → 0.1.16 - Mend

@forwardimpact/libeval 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/bin/fit-eval.js +7 -4
package/bin/fit-trace.js +198 -0
package/package.json +7 -4
package/src/agent-runner.js +1 -0
package/src/commands/facilitate.js +14 -28
package/src/commands/trace.js +149 -0
package/src/facilitator.js +37 -17
package/src/index.js +6 -0
package/src/orchestration-toolkit.js +2 -2
package/src/trace-github.js +213 -0
package/src/trace-query.js +346 -0

package/bin/fit-eval.js CHANGED Viewed

@@ -117,10 +117,13 @@ const definition = {
           type: "string",
           description: "Facilitator profile name",
         },
-        agents: {
+        "agent-profiles": {
           type: "string",
-          description:
-            "Agent configs: name1:cwd=/tmp/a:role=explorer,name2:cwd=/tmp/b:role=tester",
+          description: "Comma-separated agent profile names",
+        },
+        "agent-cwd": {
+          type: "string",
+          description: "Agent working directory (default: .)",
         },
       },
     },
@@ -135,7 +138,7 @@ const definition = {
     "fit-eval output --format=text < trace.ndjson",
     "fit-eval run --task-file=task.md --model=opus",
     "fit-eval supervise --task-file=task.md --supervisor-cwd=.",
-    'fit-eval facilitate --task-file=task.md --agents "explorer:cwd=/tmp/a,tester:cwd=/tmp/b"',
+    'fit-eval facilitate --task-file=task.md --agent-profiles "security-engineer,technical-writer"',
   ],
 };

package/bin/fit-trace.js ADDED Viewed

@@ -0,0 +1,198 @@
+#!/usr/bin/env node
+import { readFileSync } from "node:fs";
+import { createCli } from "@forwardimpact/libcli";
+import { createLogger } from "@forwardimpact/libtelemetry";
+import {
+  runRunsCommand,
+  runDownloadCommand,
+  runOverviewCommand,
+  runCountCommand,
+  runBatchCommand,
+  runHeadCommand,
+  runTailCommand,
+  runSearchCommand,
+  runToolsCommand,
+  runToolCommand,
+  runErrorsCommand,
+  runReasoningCommand,
+  runTimelineCommand,
+  runStatsCommand,
+} from "../src/commands/trace.js";
+const { version: VERSION } = JSON.parse(
+  readFileSync(new URL("../package.json", import.meta.url), "utf8"),
+);
+const definition = {
+  name: "fit-trace",
+  version: VERSION,
+  description: "Download, query, and search agent execution traces",
+  commands: [
+    {
+      name: "runs",
+      args: "[pattern]",
+      description: "List recent workflow runs (default pattern: agent)",
+      options: {
+        lookback: {
+          type: "string",
+          description: "How far back to search (default: 7d)",
+        },
+        repo: {
+          type: "string",
+          description: "GitHub repo override (default: git remote)",
+        },
+      },
+    },
+    {
+      name: "download",
+      args: "<run-id>",
+      description: "Download trace artifact and convert to structured JSON",
+      options: {
+        dir: { type: "string", description: "Output directory" },
+        artifact: { type: "string", description: "Artifact name override" },
+        repo: {
+          type: "string",
+          description: "GitHub repo override (default: git remote)",
+        },
+      },
+    },
+    {
+      name: "overview",
+      args: "<file>",
+      description: "Metadata, summary, turn count, tool frequency",
+    },
+    {
+      name: "count",
+      args: "<file>",
+      description: "Number of turns",
+    },
+    {
+      name: "batch",
+      args: "<file> <from> <to>",
+      description: "Turns in range [from, to) (zero-indexed)",
+    },
+    {
+      name: "head",
+      args: "<file> [N]",
+      description: "First N turns (default 10)",
+    },
+    {
+      name: "tail",
+      args: "<file> [N]",
+      description: "Last N turns (default 10)",
+    },
+    {
+      name: "search",
+      args: "<file> <pattern>",
+      description: "Search all content for regex pattern",
+      options: {
+        limit: {
+          type: "string",
+          description: "Max results (default: 50)",
+        },
+        context: {
+          type: "string",
+          description: "Surrounding turns per hit (default: 0)",
+        },
+      },
+    },
+    {
+      name: "tools",
+      args: "<file>",
+      description: "Tool usage frequency (descending)",
+    },
+    {
+      name: "tool",
+      args: "<file> <name>",
+      description: "All turns involving a specific tool",
+    },
+    {
+      name: "errors",
+      args: "<file>",
+      description: "Tool results with isError=true",
+    },
+    {
+      name: "reasoning",
+      args: "<file>",
+      description: "Agent reasoning text only",
+      options: {
+        from: { type: "string", description: "Start at turn index" },
+        to: { type: "string", description: "Stop before turn index" },
+      },
+    },
+    {
+      name: "timeline",
+      args: "<file>",
+      description: "Compact one-line-per-turn overview",
+    },
+    {
+      name: "stats",
+      args: "<file>",
+      description: "Token usage and cost breakdown",
+    },
+  ],
+  globalOptions: {
+    help: { type: "boolean", short: "h", description: "Show this help" },
+    version: { type: "boolean", description: "Show version" },
+    json: { type: "boolean", description: "Output help as JSON" },
+  },
+  examples: [
+    "fit-trace runs --lookback 7d",
+    "fit-trace download 24497273755",
+    "fit-trace overview structured.json",
+    "fit-trace timeline structured.json",
+    "fit-trace search structured.json 'error|fail' --context 1",
+    "fit-trace tool structured.json Bash",
+    "fit-trace batch structured.json 0 20",
+  ],
+};
+const cli = createCli(definition);
+const logger = createLogger("trace");
+const COMMANDS = {
+  runs: runRunsCommand,
+  download: runDownloadCommand,
+  overview: runOverviewCommand,
+  count: runCountCommand,
+  batch: runBatchCommand,
+  head: runHeadCommand,
+  tail: runTailCommand,
+  search: runSearchCommand,
+  tools: runToolsCommand,
+  tool: runToolCommand,
+  errors: runErrorsCommand,
+  reasoning: runReasoningCommand,
+  timeline: runTimelineCommand,
+  stats: runStatsCommand,
+};
+async function main() {
+  const parsed = cli.parse(process.argv.slice(2));
+  if (!parsed) process.exit(0);
+  const { values, positionals } = parsed;
+  if (positionals.length === 0) {
+    cli.usageError("no command specified");
+    process.exit(2);
+  }
+  const [command, ...args] = positionals;
+  const handler = COMMANDS[command];
+  if (!handler) {
+    cli.usageError(`unknown command "${command}"`);
+    process.exit(2);
+  }
+  await handler(values, args);
+}
+main().catch((error) => {
+  logger.exception("main", error);
+  cli.error(error.message);
+  process.exit(1);
+});

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@forwardimpact/libeval",
-  "version": "0.1.15",
+  "version": "0.1.16",
   "description": "Process Claude Code stream-json output into structured traces",
   "license": "Apache-2.0",
   "author": "D. Olsson <hi@senzilla.io>",
@@ -8,10 +8,12 @@
   "main": "./src/index.js",
   "exports": {
     ".": "./src/index.js",
-    "./bin/fit-eval.js": "./bin/fit-eval.js"
+    "./bin/fit-eval.js": "./bin/fit-eval.js",
+    "./bin/fit-trace.js": "./bin/fit-trace.js"
   },
   "bin": {
-    "fit-eval": "./bin/fit-eval.js"
+    "fit-eval": "./bin/fit-eval.js",
+    "fit-trace": "./bin/fit-trace.js"
   },
   "files": [
     "src/**/*.js",
@@ -26,8 +28,9 @@
     "test": "bun run node --test test/*.test.js"
   },
   "dependencies": {
-    "@anthropic-ai/claude-agent-sdk": "^0.2.98",
+    "@anthropic-ai/claude-agent-sdk": "^0.2.112",
     "@forwardimpact/libcli": "^0.1.0",
+    "@forwardimpact/libconfig": "^0.1.0",
     "@forwardimpact/libtelemetry": "^0.1.22",
     "zod": "^3.23.0"
   },

package/src/agent-runner.js CHANGED Viewed

@@ -108,6 +108,7 @@ export class AgentRunner {
           permissionMode: this.permissionMode,
           allowDangerouslySkipPermissions: true,
           abortController,
+          ...(this.mcpServers && { mcpServers: this.mcpServers }),
         },
       });
       return await this.#consumeQuery(iterator);

package/src/commands/facilitate.js CHANGED Viewed

@@ -1,30 +1,18 @@
-import { readFileSync, createWriteStream, mkdtempSync } from "node:fs";
-import { resolve, join } from "node:path";
-import { tmpdir } from "node:os";
+import { readFileSync, createWriteStream } from "node:fs";
+import { resolve } from "node:path";
 import { createFacilitator } from "../facilitator.js";
 import { createTeeWriter } from "../tee-writer.js";
 /**
- * Parse agent config string into structured configs.
- * Format: "name1:key=val:key=val,name2:key=val"
- * @param {string} raw
- * @returns {Array<{name: string, role: string, cwd: string, maxTurns?: number}>}
+ * Parse comma-separated agent profile names into structured configs.
+ * @param {string} raw - Comma-separated profile names
+ * @param {string} cwd - Shared working directory for all agents
+ * @returns {Array<{name: string, role: string, cwd: string, agentProfile: string}>}
  */
-function parseAgentConfigs(raw) {
-  return raw.split(",").map((spec) => {
-    const parts = spec.split(":");
-    const name = parts[0];
-    const config = { name, role: name };
-    for (let i = 1; i < parts.length; i++) {
-      const [key, val] = parts[i].split("=");
-      if (key === "cwd") config.cwd = resolve(val);
-      else if (key === "role") config.role = val;
-      else if (key === "maxTurns") config.maxTurns = parseInt(val, 10);
-    }
-    if (!config.cwd) {
-      config.cwd = mkdtempSync(join(tmpdir(), `fit-eval-${name}-`));
-    }
-    return config;
+function parseAgentProfiles(raw, cwd) {
+  return raw.split(",").map((entry) => {
+    const name = entry.trim();
+    return { name, role: name, cwd, agentProfile: name };
   });
 }
@@ -45,12 +33,10 @@ function parseFacilitateOptions(values) {
   let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
   if (taskAmend) taskContent += `\n\n${taskAmend}`;
-  const agentsRaw = values.agents;
-  if (!agentsRaw) throw new Error("--agents is required");
-  const agentConfigs = parseAgentConfigs(agentsRaw);
-  if (agentConfigs.length < 1)
-    throw new Error("--agents must specify at least one agent");
+  const profilesRaw = values["agent-profiles"];
+  if (!profilesRaw) throw new Error("--agent-profiles is required");
+  const agentCwd = resolve(values["agent-cwd"] ?? ".");
+  const agentConfigs = parseAgentProfiles(profilesRaw, agentCwd);
   const maxTurnsRaw = values["max-turns"] ?? "20";

package/src/commands/trace.js ADDED Viewed

@@ -0,0 +1,149 @@
+import { readFileSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { createTraceCollector } from "@forwardimpact/libeval";
+import { createTraceQuery } from "../trace-query.js";
+import { createTraceGitHub } from "../trace-github.js";
+// --- GitHub commands ---
+/**
+ * List recent workflow runs matching a pattern.
+ * @param {object} values - Parsed option values
+ * @param {string[]} args - [pattern?]
+ */
+export async function runRunsCommand(values, args) {
+  const gh = await createTraceGitHub({ repo: values.repo });
+  const pattern = args[0] ?? "agent";
+  const lookback = values.lookback ?? "7d";
+  const runs = await gh.listRuns({ pattern, lookback });
+  writeJSON(runs);
+}
+/**
+ * Download a trace artifact and auto-convert to structured JSON.
+ * @param {object} values - Parsed option values
+ * @param {string[]} args - [run-id]
+ */
+export async function runDownloadCommand(values, args) {
+  const gh = await createTraceGitHub({ repo: values.repo });
+  const result = await gh.downloadTrace(args[0], {
+    dir: values.dir,
+    name: values.artifact,
+  });
+  const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
+  if (ndjsonFile) {
+    const ndjsonPath = join(result.dir, ndjsonFile);
+    const collector = createTraceCollector();
+    for (const line of readFileSync(ndjsonPath, "utf8").split("\n")) {
+      collector.addLine(line);
+    }
+    const structuredPath = join(result.dir, "structured.json");
+    writeFileSync(structuredPath, JSON.stringify(collector.toJSON()) + "\n");
+    result.files.push("structured.json");
+  }
+  writeJSON(result);
+}
+// --- Query commands ---
+/** @param {object} values @param {string[]} args - [file] */
+export async function runOverviewCommand(values, args) {
+  writeJSON(loadTrace(args[0]).overview());
+}
+/** @param {object} values @param {string[]} args - [file] */
+export async function runCountCommand(values, args) {
+  process.stdout.write(String(loadTrace(args[0]).count()) + "\n");
+}
+/** @param {object} values @param {string[]} args - [file, from, to] */
+export async function runBatchCommand(values, args) {
+  writeJSON(
+    loadTrace(args[0]).batch(parseInt(args[1], 10), parseInt(args[2], 10)),
+  );
+}
+/** @param {object} values @param {string[]} args - [file, N?] */
+export async function runHeadCommand(values, args) {
+  const n = args[1] ? parseInt(args[1], 10) : 10;
+  writeJSON(loadTrace(args[0]).head(n));
+}
+/** @param {object} values @param {string[]} args - [file, N?] */
+export async function runTailCommand(values, args) {
+  const n = args[1] ? parseInt(args[1], 10) : 10;
+  writeJSON(loadTrace(args[0]).tail(n));
+}
+/** @param {object} values @param {string[]} args - [file, pattern] */
+export async function runSearchCommand(values, args) {
+  const limit = values.limit ? parseInt(values.limit, 10) : 50;
+  const context = values.context ? parseInt(values.context, 10) : 0;
+  writeJSON(loadTrace(args[0]).search(args[1], { limit, context }));
+}
+/** @param {object} values @param {string[]} args - [file] */
+export async function runToolsCommand(values, args) {
+  writeJSON(loadTrace(args[0]).toolFrequency());
+}
+/** @param {object} values @param {string[]} args - [file, name] */
+export async function runToolCommand(values, args) {
+  writeJSON(loadTrace(args[0]).tool(args[1]));
+}
+/** @param {object} values @param {string[]} args - [file] */
+export async function runErrorsCommand(values, args) {
+  writeJSON(loadTrace(args[0]).errors());
+}
+/** @param {object} values @param {string[]} args - [file] */
+export async function runReasoningCommand(values, args) {
+  const from = values.from ? parseInt(values.from, 10) : undefined;
+  const to = values.to ? parseInt(values.to, 10) : undefined;
+  writeJSON(loadTrace(args[0]).reasoning({ from, to }));
+}
+/** @param {object} values @param {string[]} args - [file] */
+export async function runTimelineCommand(values, args) {
+  const lines = loadTrace(args[0]).timeline();
+  process.stdout.write(lines.join("\n") + "\n");
+}
+/** @param {object} values @param {string[]} args - [file] */
+export async function runStatsCommand(values, args) {
+  writeJSON(loadTrace(args[0]).stats());
+}
+// --- Shared helpers ---
+/**
+ * Load a trace file. Supports structured JSON and raw NDJSON.
+ * @param {string} file
+ * @returns {import("../trace-query.js").TraceQuery}
+ */
+function loadTrace(file) {
+  const content = readFileSync(file, "utf8");
+  try {
+    const parsed = JSON.parse(content);
+    if (parsed.turns) {
+      return createTraceQuery(parsed);
+    }
+  } catch {
+    // Not valid JSON — fall through to NDJSON.
+  }
+  const collector = createTraceCollector();
+  for (const line of content.split("\n")) {
+    collector.addLine(line);
+  }
+  return createTraceQuery(collector.toJSON());
+}
+/** @param {object} data */
+function writeJSON(data) {
+  process.stdout.write(JSON.stringify(data, null, 2) + "\n");
+}

package/src/facilitator.js CHANGED Viewed

@@ -18,19 +18,27 @@ import {
 /** System prompt appended for the facilitator runner. */
 export const FACILITATOR_SYSTEM_PROMPT =
-  "You coordinate multiple agents working on a shared task. Use Tell to " +
-  "assign work to individual agents. Use Share to broadcast to all. Use " +
-  "Redirect to interrupt and correct agents. Use RollCall to see who is " +
-  "available. Use Conclude with a summary when the task is done. Agents " +
-  "communicate with you via Share and may Ask you questions directly.";
+  "You coordinate multiple agents working on a shared task. " +
+  "Tell sends a direct message to one participant. " +
+  "Share broadcasts a message to all participants. " +
+  "Redirect interrupts a participant and replaces their current instructions. " +
+  "RollCall lists available participants and their roles. " +
+  "Conclude ends the session with a summary. " +
+  "Participants communicate with you via Share and may Ask you questions. " +
+  "IMPORTANT: After sending messages via Tell or Share, stop making tool " +
+  "calls and produce a text response. The system will resume you with " +
+  "participant responses. Do not proceed to the next question or call " +
+  "Conclude until you have received responses from participants.";
 /** System prompt appended for facilitated agent runners. */
 export const FACILITATED_AGENT_SYSTEM_PROMPT =
   "You are one of several agents working on a shared task under a " +
-  "facilitator's coordination. Use Share to broadcast findings. Use Tell " +
-  "to message a specific participant. Use Ask to ask the facilitator a " +
-  "question (you will block until answered). Use RollCall to see who " +
-  "else is working. The facilitator may Redirect you with new instructions " +
+  "facilitator's coordination. " +
+  "Share broadcasts your message to all participants. " +
+  "Tell sends a direct message to one participant. " +
+  "Ask sends a question to the facilitator — you block until answered. " +
+  "RollCall lists available participants and their roles. " +
+  "The facilitator may Redirect you with new instructions " +
   "— treat redirections as authoritative.";
 function createAsyncQueue() {
@@ -109,28 +117,40 @@ export class Facilitator {
   async run(task) {
     this.emitOrchestratorEvent({ type: "session_start" });
+    // Launch agent loops first — they wait for messages via messageBus.
+    // This lets agents process Tell/Share messages that arrive during the
+    // facilitator's initial run, rather than after it completes.
+    const agentPromises = this.agents.map((a) => this.#runAgent(a));
     // Turn 0: facilitator receives the task
     this.facilitatorTurns++;
     await this.facilitatorRunner.run(task);
+    // Handle redirect after turn 0
+    await this.#processRedirect();
     if (this.ctx.concluded) {
+      // Facilitator concluded during its initial run. Let agents finish any
+      // in-progress work before returning — they may have received Tell/Share
+      // messages and started processing concurrently.
       this.concludeResolve();
-      this.emitSummary({ success: true, turns: 0, summary: this.ctx.summary });
-      return { success: true, turns: 0 };
+      await Promise.allSettled(agentPromises);
+      this.emitSummary({
+        success: true,
+        turns: this.facilitatorTurns,
+        summary: this.ctx.summary,
+      });
+      return { success: true, turns: this.facilitatorTurns };
     }
-    // Handle redirect after turn 0
-    await this.#processRedirect();
-    // Abort agents promptly when Conclude is called
+    // Abort agents promptly when Conclude is called during the event loop
     this.concludePromise.then(() => {
       for (const agent of this.agents) {
         agent.runner.currentAbortController?.abort();
       }
     });
-    // Launch all loops concurrently
-    const agentPromises = this.agents.map((a) => this.#runAgent(a));
+    // Concurrent phase: facilitator event loop + already-running agent loops
     const facilitatorPromise = this.#facilitatorLoop();
     try {

package/src/index.js CHANGED Viewed

@@ -1,4 +1,10 @@
 export { TraceCollector, createTraceCollector } from "./trace-collector.js";
+export { TraceQuery, createTraceQuery } from "./trace-query.js";
+export {
+  TraceGitHub,
+  createTraceGitHub,
+  parseGitRemote,
+} from "./trace-github.js";
 export { AgentRunner, createAgentRunner } from "./agent-runner.js";
 export {
   Supervisor,

package/src/orchestration-toolkit.js CHANGED Viewed

@@ -154,13 +154,13 @@ export function createFacilitatorToolServer(ctx) {
       ),
       tool(
         "Share",
-        "Broadcast a message to all participants.",
+        "Broadcast a message to all participants. After sending, stop making tool calls to receive responses.",
         { message: z.string() },
         createShareHandler(ctx, { from: "facilitator" }),
       ),
       tool(
         "Tell",
-        "Send a direct message to one participant.",
+        "Send a direct message to one participant. After sending, stop making tool calls to receive their response.",
         { message: z.string(), to: z.string() },
         createTellHandler(ctx, { from: "facilitator" }),
       ),

package/src/trace-github.js ADDED Viewed

@@ -0,0 +1,213 @@
+import { createWriteStream } from "node:fs";
+import { mkdir } from "node:fs/promises";
+import path from "node:path";
+import { pipeline } from "node:stream/promises";
+import { Readable } from "node:stream";
+const API = "https://api.github.com";
+/**
+ * GitHub API client for trace-related operations: listing workflow runs
+ * and downloading trace artifacts.
+ */
+export class TraceGitHub {
+  /**
+   * @param {object} deps
+   * @param {string} deps.token - GitHub token
+   * @param {string} deps.owner - Repository owner
+   * @param {string} deps.repo  - Repository name
+   */
+  constructor({ token, owner, repo }) {
+    this.token = token;
+    this.owner = owner;
+    this.repo = repo;
+  }
+  /**
+   * List recent workflow runs, optionally filtered by name pattern.
+   *
+   * @param {object} [opts]
+   * @param {string} [opts.pattern] - Case-insensitive substring to match workflow name (default: "agent")
+   * @param {number} [opts.limit=50] - Max runs to return from GitHub API
+   * @param {string} [opts.lookback="7d"] - How far back to search (e.g. "7d", "24h", "2w")
+   * @returns {Promise<object[]>} Array of {workflow, runId, status, conclusion, createdAt, branch, url}
+   */
+  async listRuns(opts = {}) {
+    const { pattern = "agent", limit = 50, lookback = "7d" } = opts;
+    const cutoff = parseLookback(lookback);
+    const params = new URLSearchParams({
+      per_page: String(Math.min(limit, 100)),
+    });
+    if (cutoff) {
+      params.set("created", `>=${cutoff}`);
+    }
+    const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs?${params}`;
+    const data = await this.#get(url);
+    const runs = data.workflow_runs ?? [];
+    // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
+    const re = new RegExp(pattern, "i");
+    return runs
+      .filter((r) => re.test(r.name))
+      .map((r) => ({
+        workflow: r.name,
+        runId: r.id,
+        status: r.status,
+        conclusion: r.conclusion,
+        createdAt: r.created_at,
+        branch: r.head_branch,
+        url: r.html_url,
+      }));
+  }
+  /**
+   * Download a trace artifact from a workflow run and extract it.
+   *
+   * Tries artifact names in order: combined-trace, agent-trace.
+   * The artifact zip is downloaded and extracted to the output directory.
+   *
+   * @param {number|string} runId
+   * @param {object} [opts]
+   * @param {string} [opts.dir] - Output directory (default: /tmp/trace-<runId>)
+   * @param {string} [opts.name] - Specific artifact name to download
+   * @returns {Promise<{dir: string, artifact: string, files: string[]}>}
+   */
+  async downloadTrace(runId, opts = {}) {
+    const dir = opts.dir ?? `/tmp/trace-${runId}`;
+    await mkdir(dir, { recursive: true });
+    // List artifacts for this run.
+    const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
+    const data = await this.#get(url);
+    const artifacts = data.artifacts ?? [];
+    // Find the trace artifact.
+    const preferredNames = opts.name
+      ? [opts.name]
+      : ["combined-trace", "agent-trace"];
+    let artifact = null;
+    for (const name of preferredNames) {
+      artifact = artifacts.find((a) => a.name === name);
+      if (artifact) break;
+    }
+    if (!artifact) {
+      const available = artifacts.map((a) => a.name).join(", ");
+      throw new Error(
+        `No trace artifact found for run ${runId}. Available: ${available || "none"}`,
+      );
+    }
+    // Download the zip.
+    const zipPath = path.join(dir, `${artifact.name}.zip`);
+    const downloadUrl = `${API}/repos/${this.owner}/${this.repo}/actions/artifacts/${artifact.id}/zip`;
+    const response = await fetch(downloadUrl, {
+      headers: this.#headers(),
+      redirect: "follow",
+    });
+    if (!response.ok) {
+      throw new Error(
+        `Failed to download artifact: ${response.status} ${response.statusText}`,
+      );
+    }
+    // Stream to disk then extract.
+    await pipeline(Readable.fromWeb(response.body), createWriteStream(zipPath));
+    const { execSync } = await import("node:child_process");
+    execSync(
+      `unzip -o -q ${JSON.stringify(zipPath)} -d ${JSON.stringify(dir)}`,
+    );
+    // List extracted files.
+    const { readdirSync } = await import("node:fs");
+    const files = readdirSync(dir).filter((f) => !f.endsWith(".zip"));
+    return { dir, artifact: artifact.name, files };
+  }
+  /**
+   * @param {string} url
+   * @returns {Promise<object>}
+   */
+  async #get(url) {
+    const response = await fetch(url, { headers: this.#headers() });
+    if (!response.ok) {
+      throw new Error(`GitHub API: ${response.status} ${response.statusText}`);
+    }
+    return response.json();
+  }
+  /** @returns {Record<string, string>} */
+  #headers() {
+    return {
+      Authorization: `Bearer ${this.token}`,
+      Accept: "application/vnd.github+json",
+      "X-GitHub-Api-Version": "2022-11-28",
+    };
+  }
+}
+/**
+ * Parse a lookback duration string into an ISO date string.
+ * Supports: Nd (days), Nh (hours), Nw (weeks).
+ * @param {string} lookback
+ * @returns {string|null} ISO date string or null if unparseable
+ */
+function parseLookback(lookback) {
+  const match = lookback.match(/^(\d+)([dhw])$/);
+  if (!match) return null;
+  const [, val, unit] = match;
+  const ms = { d: 86400000, h: 3600000, w: 604800000 }[unit];
+  return new Date(Date.now() - parseInt(val, 10) * ms).toISOString();
+}
+/**
+ * Parse a GitHub repository URL or "owner/repo" string.
+ * @param {string} remote - Git remote URL or owner/repo string
+ * @returns {{owner: string, repo: string}}
+ */
+export function parseGitRemote(remote) {
+  // SSH: git@github.com:owner/repo.git
+  const ssh = remote.match(/github\.com[:/]([^/]+)\/(.+?)(?:\.git)?$/);
+  if (ssh) return { owner: ssh[1], repo: ssh[2] };
+  // HTTPS: https://github.com/owner/repo
+  const https = remote.match(/github\.com\/([^/]+)\/(.+?)(?:\.git)?$/);
+  if (https) return { owner: https[1], repo: https[2] };
+  // Plain owner/repo format (no github.com prefix).
+  const simple = remote.match(/^([^/:@]+)\/([^/]+)$/);
+  if (simple) return { owner: simple[1], repo: simple[2] };
+  throw new Error(`Cannot parse GitHub remote: ${remote}`);
+}
+/**
+ * Create a TraceGitHub instance using libconfig for the token and
+ * git remote for the repo.
+ *
+ * @param {object} [opts]
+ * @param {string} [opts.repo] - "owner/repo" override (default: detect from git remote)
+ * @returns {Promise<TraceGitHub>}
+ */
+export async function createTraceGitHub(opts = {}) {
+  const { createScriptConfig } = await import("@forwardimpact/libconfig");
+  const config = await createScriptConfig("eval");
+  const token = config.ghToken();
+  let owner, repo;
+  if (opts.repo) {
+    ({ owner, repo } = parseGitRemote(opts.repo));
+  } else {
+    const { execSync } = await import("node:child_process");
+    const remote = execSync("git remote get-url origin", {
+      encoding: "utf8",
+    }).trim();
+    ({ owner, repo } = parseGitRemote(remote));
+  }
+  return new TraceGitHub({ token, owner, repo });
+}

package/src/trace-query.js ADDED Viewed

@@ -0,0 +1,346 @@
+/**
+ * Query engine for structured trace documents produced by TraceCollector.
+ *
+ * Loads a structured JSON trace into memory and provides methods for
+ * paging, searching, filtering, and summarizing turns — the operations
+ * agents need to analyze large traces efficiently.
+ */
+export class TraceQuery {
+  /**
+   * @param {object} trace - Structured trace document (output of TraceCollector.toJSON())
+   */
+  constructor(trace) {
+    this.trace = trace;
+    this.metadata = trace.metadata ?? {};
+    this.turns = trace.turns ?? [];
+    this.summary = trace.summary ?? {};
+  }
+  /**
+   * High-level overview: metadata, summary, turn count, and tool frequency.
+   * @returns {object}
+   */
+  overview() {
+    return {
+      metadata: this.metadata,
+      summary: this.summary,
+      turnCount: this.turns.length,
+      tools: this.toolFrequency(),
+    };
+  }
+  /** @returns {number} */
+  count() {
+    return this.turns.length;
+  }
+  /**
+   * Return turns in range [from, to) (zero-indexed).
+   * @param {number} from
+   * @param {number} to
+   * @returns {object[]}
+   */
+  batch(from, to) {
+    return this.turns.slice(from, to);
+  }
+  /**
+   * First N turns.
+   * @param {number} [n=10]
+   * @returns {object[]}
+   */
+  head(n = 10) {
+    return this.turns.slice(0, n);
+  }
+  /**
+   * Last N turns.
+   * @param {number} [n=10]
+   * @returns {object[]}
+   */
+  tail(n = 10) {
+    return this.turns.slice(-n);
+  }
+  /**
+   * Search all turn content for a regex pattern.  Returns matching turns
+   * with the matched text highlighted by context.
+   *
+   * Searches: assistant text blocks, tool_use names and stringified input,
+   * and tool_result content.
+   *
+   * @param {string} pattern - Regex pattern (case-insensitive)
+   * @param {object} [opts]
+   * @param {number} [opts.context=0] - Number of surrounding turns to include
+   * @param {number} [opts.limit=50] - Max results
+   * @returns {object[]} Array of {turn, matches, context?}
+   */
+  search(pattern, opts = {}) {
+    const { context = 0, limit = 50 } = opts;
+    // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
+    const re = new RegExp(pattern, "gi");
+    const hits = [];
+    for (const turn of this.turns) {
+      const matches = matchTurn(turn, re);
+      if (matches.length > 0) {
+        const entry = { turn, matches };
+        if (context > 0) {
+          const idx = turn.index;
+          entry.context = this.turns.filter(
+            (t) =>
+              t.index !== idx &&
+              t.index >= idx - context &&
+              t.index <= idx + context,
+          );
+        }
+        hits.push(entry);
+        if (hits.length >= limit) break;
+      }
+    }
+    return hits;
+  }
+  /**
+   * Tool usage frequency, sorted descending.
+   * @returns {Array<{tool: string, count: number}>}
+   */
+  toolFrequency() {
+    const counts = {};
+    for (const turn of this.turns) {
+      if (turn.role !== "assistant") continue;
+      for (const block of turn.content) {
+        if (block.type === "tool_use") {
+          counts[block.name] = (counts[block.name] ?? 0) + 1;
+        }
+      }
+    }
+    return Object.entries(counts)
+      .map(([tool, count]) => ({ tool, count }))
+      .sort((a, b) => b.count - a.count);
+  }
+  /**
+   * Filter turns involving a specific tool (both the tool_use and its result).
+   * @param {string} name - Tool name
+   * @returns {object[]}
+   */
+  tool(name) {
+    const toolUseIds = new Set();
+    const results = [];
+    for (const turn of this.turns) {
+      if (turn.role === "assistant") {
+        const hasTool = turn.content.some(
+          (b) => b.type === "tool_use" && b.name === name,
+        );
+        if (hasTool) {
+          results.push(turn);
+          for (const b of turn.content) {
+            if (b.type === "tool_use" && b.name === name && b.toolUseId) {
+              toolUseIds.add(b.toolUseId);
+            }
+          }
+        }
+      } else if (
+        turn.role === "tool_result" &&
+        toolUseIds.has(turn.toolUseId)
+      ) {
+        results.push(turn);
+      }
+    }
+    return results;
+  }
+  /**
+   * All error turns (tool results with isError=true).
+   * @returns {object[]}
+   */
+  errors() {
+    return this.turns.filter(
+      (t) => t.role === "tool_result" && t.isError === true,
+    );
+  }
+  /**
+   * Extract just the reasoning text from assistant turns.
+   * @param {object} [opts]
+   * @param {number} [opts.from] - Start turn index
+   * @param {number} [opts.to] - End turn index (exclusive)
+   * @returns {Array<{index: number, text: string}>}
+   */
+  reasoning(opts = {}) {
+    const { from, to } = opts;
+    const results = [];
+    for (const turn of this.turns) {
+      if (turn.role !== "assistant") continue;
+      if (from !== undefined && turn.index < from) continue;
+      if (to !== undefined && turn.index >= to) continue;
+      const texts = turn.content
+        .filter((b) => b.type === "text")
+        .map((b) => b.text);
+      if (texts.length > 0) {
+        results.push({ index: turn.index, text: texts.join("\n") });
+      }
+    }
+    return results;
+  }
+  /**
+   * Compact one-line-per-assistant-turn timeline showing tool names,
+   * reasoning snippet, and token usage.  Thinking-only turns are marked
+   * as such and their content is omitted (it is model-internal).
+   * @returns {string[]}
+   */
+  timeline() {
+    const lines = [];
+    for (const turn of this.turns) {
+      if (turn.role !== "assistant") continue;
+      const tools = turn.content
+        .filter((b) => b.type === "tool_use")
+        .map((b) => b.name);
+      const textBlocks = turn.content
+        .filter((b) => b.type === "text")
+        .map((b) => b.text);
+      const hasThinking = turn.content.some((b) => b.type === "thinking");
+      // Skip thinking-only turns (no user-visible content).
+      if (hasThinking && tools.length === 0 && textBlocks.length === 0)
+        continue;
+      const snippet = textBlocks.join(" ").slice(0, 80).replace(/\n/g, " ");
+      const input = turn.usage?.inputTokens ?? 0;
+      const output = turn.usage?.outputTokens ?? 0;
+      const cacheRead = turn.usage?.cacheReadInputTokens ?? 0;
+      const toolStr = tools.length > 0 ? tools.join(", ") : "(text only)";
+      const tokenStr = `in:${fmtK(input + cacheRead)} out:${fmtK(output)}`;
+      lines.push(
+        `[${turn.index}] ${toolStr.padEnd(30)} ${tokenStr.padEnd(18)} ${snippet}`,
+      );
+    }
+    return lines;
+  }
+  /**
+   * Token usage and cost breakdown per assistant turn, plus totals.
+   * @returns {object}
+   */
+  stats() {
+    let totalInput = 0;
+    let totalOutput = 0;
+    let totalCacheRead = 0;
+    let totalCacheCreate = 0;
+    const perTurn = [];
+    for (const turn of this.turns) {
+      if (turn.role !== "assistant" || !turn.usage) continue;
+      const u = turn.usage;
+      totalInput += u.inputTokens ?? 0;
+      totalOutput += u.outputTokens ?? 0;
+      totalCacheRead += u.cacheReadInputTokens ?? 0;
+      totalCacheCreate += u.cacheCreationInputTokens ?? 0;
+      perTurn.push({
+        index: turn.index,
+        inputTokens: u.inputTokens ?? 0,
+        outputTokens: u.outputTokens ?? 0,
+        cacheReadInputTokens: u.cacheReadInputTokens ?? 0,
+        cacheCreationInputTokens: u.cacheCreationInputTokens ?? 0,
+      });
+    }
+    return {
+      totals: {
+        inputTokens: totalInput,
+        outputTokens: totalOutput,
+        cacheReadInputTokens: totalCacheRead,
+        cacheCreationInputTokens: totalCacheCreate,
+        totalCostUsd: this.summary.totalCostUsd ?? 0,
+        durationMs: this.summary.durationMs ?? 0,
+      },
+      perTurn,
+    };
+  }
+}
+/**
+ * Search a single turn for regex matches. Returns array of match descriptions.
+ * @param {object} turn
+ * @param {RegExp} re
+ * @returns {string[]}
+ */
+function matchTurn(turn, re) {
+  const matches = [];
+  if (turn.role === "assistant") {
+    for (const block of turn.content) {
+      if (block.type === "text" && re.test(block.text)) {
+        re.lastIndex = 0;
+        matches.push(`text: ${excerptAround(block.text, re)}`);
+      }
+      if (block.type === "tool_use") {
+        if (re.test(block.name)) {
+          re.lastIndex = 0;
+          matches.push(`tool_name: ${block.name}`);
+        }
+        const inputStr = JSON.stringify(block.input);
+        if (re.test(inputStr)) {
+          re.lastIndex = 0;
+          matches.push(
+            `tool_input(${block.name}): ${excerptAround(inputStr, re)}`,
+          );
+        }
+      }
+    }
+  } else if (turn.role === "tool_result") {
+    const content = turn.content ?? "";
+    if (re.test(content)) {
+      re.lastIndex = 0;
+      matches.push(`result: ${excerptAround(content, re)}`);
+    }
+  }
+  return matches;
+}
+/**
+ * Extract a short excerpt around the first regex match in text.
+ * @param {string} text
+ * @param {RegExp} re
+ * @returns {string}
+ */
+function excerptAround(text, re) {
+  re.lastIndex = 0;
+  const m = re.exec(text);
+  if (!m) return text.slice(0, 100);
+  const start = Math.max(0, m.index - 40);
+  const end = Math.min(text.length, m.index + m[0].length + 40);
+  let excerpt = text.slice(start, end);
+  if (start > 0) excerpt = "..." + excerpt;
+  if (end < text.length) excerpt = excerpt + "...";
+  return excerpt;
+}
+/**
+ * Format a token count as compact K notation.
+ * @param {number} n
+ * @returns {string}
+ */
+function fmtK(n) {
+  if (n < 1000) return String(n);
+  return (n / 1000).toFixed(1) + "K";
+}
+/**
+ * Load a structured trace from a JSON string.
+ * @param {string} json
+ * @returns {TraceQuery}
+ */
+export function createTraceQuery(json) {
+  const trace = typeof json === "string" ? JSON.parse(json) : json;
+  return new TraceQuery(trace);
+}