npm - @forwardimpact/libeval - Versions diffs - 0.1.14 → 0.1.16 - Mend

@forwardimpact/libeval 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/bin/fit-eval.js +38 -0
package/bin/fit-trace.js +198 -0
package/package.json +9 -5
package/src/agent-runner.js +30 -40
package/src/commands/facilitate.js +95 -0
package/src/commands/run.js +17 -1
package/src/commands/trace.js +149 -0
package/src/facilitator.js +512 -0
package/src/index.js +21 -2
package/src/message-bus.js +100 -0
package/src/orchestration-toolkit.js +209 -0
package/src/sequence-counter.js +17 -0
package/src/supervisor.js +128 -210
package/src/tee-writer.js +20 -26
package/src/trace-github.js +213 -0
package/src/trace-query.js +346 -0

package/src/trace-github.js ADDED Viewed

@@ -0,0 +1,213 @@
+import { createWriteStream } from "node:fs";
+import { mkdir } from "node:fs/promises";
+import path from "node:path";
+import { pipeline } from "node:stream/promises";
+import { Readable } from "node:stream";
+const API = "https://api.github.com";
+/**
+ * GitHub API client for trace-related operations: listing workflow runs
+ * and downloading trace artifacts.
+ */
+export class TraceGitHub {
+  /**
+   * @param {object} deps
+   * @param {string} deps.token - GitHub token
+   * @param {string} deps.owner - Repository owner
+   * @param {string} deps.repo  - Repository name
+   */
+  constructor({ token, owner, repo }) {
+    this.token = token;
+    this.owner = owner;
+    this.repo = repo;
+  }
+  /**
+   * List recent workflow runs, optionally filtered by name pattern.
+   *
+   * @param {object} [opts]
+   * @param {string} [opts.pattern] - Case-insensitive substring to match workflow name (default: "agent")
+   * @param {number} [opts.limit=50] - Max runs to return from GitHub API
+   * @param {string} [opts.lookback="7d"] - How far back to search (e.g. "7d", "24h", "2w")
+   * @returns {Promise<object[]>} Array of {workflow, runId, status, conclusion, createdAt, branch, url}
+   */
+  async listRuns(opts = {}) {
+    const { pattern = "agent", limit = 50, lookback = "7d" } = opts;
+    const cutoff = parseLookback(lookback);
+    const params = new URLSearchParams({
+      per_page: String(Math.min(limit, 100)),
+    });
+    if (cutoff) {
+      params.set("created", `>=${cutoff}`);
+    }
+    const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs?${params}`;
+    const data = await this.#get(url);
+    const runs = data.workflow_runs ?? [];
+    // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
+    const re = new RegExp(pattern, "i");
+    return runs
+      .filter((r) => re.test(r.name))
+      .map((r) => ({
+        workflow: r.name,
+        runId: r.id,
+        status: r.status,
+        conclusion: r.conclusion,
+        createdAt: r.created_at,
+        branch: r.head_branch,
+        url: r.html_url,
+      }));
+  }
+  /**
+   * Download a trace artifact from a workflow run and extract it.
+   *
+   * Tries artifact names in order: combined-trace, agent-trace.
+   * The artifact zip is downloaded and extracted to the output directory.
+   *
+   * @param {number|string} runId
+   * @param {object} [opts]
+   * @param {string} [opts.dir] - Output directory (default: /tmp/trace-<runId>)
+   * @param {string} [opts.name] - Specific artifact name to download
+   * @returns {Promise<{dir: string, artifact: string, files: string[]}>}
+   */
+  async downloadTrace(runId, opts = {}) {
+    const dir = opts.dir ?? `/tmp/trace-${runId}`;
+    await mkdir(dir, { recursive: true });
+    // List artifacts for this run.
+    const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
+    const data = await this.#get(url);
+    const artifacts = data.artifacts ?? [];
+    // Find the trace artifact.
+    const preferredNames = opts.name
+      ? [opts.name]
+      : ["combined-trace", "agent-trace"];
+    let artifact = null;
+    for (const name of preferredNames) {
+      artifact = artifacts.find((a) => a.name === name);
+      if (artifact) break;
+    }
+    if (!artifact) {
+      const available = artifacts.map((a) => a.name).join(", ");
+      throw new Error(
+        `No trace artifact found for run ${runId}. Available: ${available || "none"}`,
+      );
+    }
+    // Download the zip.
+    const zipPath = path.join(dir, `${artifact.name}.zip`);
+    const downloadUrl = `${API}/repos/${this.owner}/${this.repo}/actions/artifacts/${artifact.id}/zip`;
+    const response = await fetch(downloadUrl, {
+      headers: this.#headers(),
+      redirect: "follow",
+    });
+    if (!response.ok) {
+      throw new Error(
+        `Failed to download artifact: ${response.status} ${response.statusText}`,
+      );
+    }
+    // Stream to disk then extract.
+    await pipeline(Readable.fromWeb(response.body), createWriteStream(zipPath));
+    const { execSync } = await import("node:child_process");
+    execSync(
+      `unzip -o -q ${JSON.stringify(zipPath)} -d ${JSON.stringify(dir)}`,
+    );
+    // List extracted files.
+    const { readdirSync } = await import("node:fs");
+    const files = readdirSync(dir).filter((f) => !f.endsWith(".zip"));
+    return { dir, artifact: artifact.name, files };
+  }
+  /**
+   * @param {string} url
+   * @returns {Promise<object>}
+   */
+  async #get(url) {
+    const response = await fetch(url, { headers: this.#headers() });
+    if (!response.ok) {
+      throw new Error(`GitHub API: ${response.status} ${response.statusText}`);
+    }
+    return response.json();
+  }
+  /** @returns {Record<string, string>} */
+  #headers() {
+    return {
+      Authorization: `Bearer ${this.token}`,
+      Accept: "application/vnd.github+json",
+      "X-GitHub-Api-Version": "2022-11-28",
+    };
+  }
+}
+/**
+ * Parse a lookback duration string into an ISO date string.
+ * Supports: Nd (days), Nh (hours), Nw (weeks).
+ * @param {string} lookback
+ * @returns {string|null} ISO date string or null if unparseable
+ */
+function parseLookback(lookback) {
+  const match = lookback.match(/^(\d+)([dhw])$/);
+  if (!match) return null;
+  const [, val, unit] = match;
+  const ms = { d: 86400000, h: 3600000, w: 604800000 }[unit];
+  return new Date(Date.now() - parseInt(val, 10) * ms).toISOString();
+}
+/**
+ * Parse a GitHub repository URL or "owner/repo" string.
+ * @param {string} remote - Git remote URL or owner/repo string
+ * @returns {{owner: string, repo: string}}
+ */
+export function parseGitRemote(remote) {
+  // SSH: git@github.com:owner/repo.git
+  const ssh = remote.match(/github\.com[:/]([^/]+)\/(.+?)(?:\.git)?$/);
+  if (ssh) return { owner: ssh[1], repo: ssh[2] };
+  // HTTPS: https://github.com/owner/repo
+  const https = remote.match(/github\.com\/([^/]+)\/(.+?)(?:\.git)?$/);
+  if (https) return { owner: https[1], repo: https[2] };
+  // Plain owner/repo format (no github.com prefix).
+  const simple = remote.match(/^([^/:@]+)\/([^/]+)$/);
+  if (simple) return { owner: simple[1], repo: simple[2] };
+  throw new Error(`Cannot parse GitHub remote: ${remote}`);
+}
+/**
+ * Create a TraceGitHub instance using libconfig for the token and
+ * git remote for the repo.
+ *
+ * @param {object} [opts]
+ * @param {string} [opts.repo] - "owner/repo" override (default: detect from git remote)
+ * @returns {Promise<TraceGitHub>}
+ */
+export async function createTraceGitHub(opts = {}) {
+  const { createScriptConfig } = await import("@forwardimpact/libconfig");
+  const config = await createScriptConfig("eval");
+  const token = config.ghToken();
+  let owner, repo;
+  if (opts.repo) {
+    ({ owner, repo } = parseGitRemote(opts.repo));
+  } else {
+    const { execSync } = await import("node:child_process");
+    const remote = execSync("git remote get-url origin", {
+      encoding: "utf8",
+    }).trim();
+    ({ owner, repo } = parseGitRemote(remote));
+  }
+  return new TraceGitHub({ token, owner, repo });
+}

package/src/trace-query.js ADDED Viewed

@@ -0,0 +1,346 @@
+/**
+ * Query engine for structured trace documents produced by TraceCollector.
+ *
+ * Loads a structured JSON trace into memory and provides methods for
+ * paging, searching, filtering, and summarizing turns — the operations
+ * agents need to analyze large traces efficiently.
+ */
+export class TraceQuery {
+  /**
+   * @param {object} trace - Structured trace document (output of TraceCollector.toJSON())
+   */
+  constructor(trace) {
+    this.trace = trace;
+    this.metadata = trace.metadata ?? {};
+    this.turns = trace.turns ?? [];
+    this.summary = trace.summary ?? {};
+  }
+  /**
+   * High-level overview: metadata, summary, turn count, and tool frequency.
+   * @returns {object}
+   */
+  overview() {
+    return {
+      metadata: this.metadata,
+      summary: this.summary,
+      turnCount: this.turns.length,
+      tools: this.toolFrequency(),
+    };
+  }
+  /** @returns {number} */
+  count() {
+    return this.turns.length;
+  }
+  /**
+   * Return turns in range [from, to) (zero-indexed).
+   * @param {number} from
+   * @param {number} to
+   * @returns {object[]}
+   */
+  batch(from, to) {
+    return this.turns.slice(from, to);
+  }
+  /**
+   * First N turns.
+   * @param {number} [n=10]
+   * @returns {object[]}
+   */
+  head(n = 10) {
+    return this.turns.slice(0, n);
+  }
+  /**
+   * Last N turns.
+   * @param {number} [n=10]
+   * @returns {object[]}
+   */
+  tail(n = 10) {
+    return this.turns.slice(-n);
+  }
+  /**
+   * Search all turn content for a regex pattern.  Returns matching turns
+   * with the matched text highlighted by context.
+   *
+   * Searches: assistant text blocks, tool_use names and stringified input,
+   * and tool_result content.
+   *
+   * @param {string} pattern - Regex pattern (case-insensitive)
+   * @param {object} [opts]
+   * @param {number} [opts.context=0] - Number of surrounding turns to include
+   * @param {number} [opts.limit=50] - Max results
+   * @returns {object[]} Array of {turn, matches, context?}
+   */
+  search(pattern, opts = {}) {
+    const { context = 0, limit = 50 } = opts;
+    // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
+    const re = new RegExp(pattern, "gi");
+    const hits = [];
+    for (const turn of this.turns) {
+      const matches = matchTurn(turn, re);
+      if (matches.length > 0) {
+        const entry = { turn, matches };
+        if (context > 0) {
+          const idx = turn.index;
+          entry.context = this.turns.filter(
+            (t) =>
+              t.index !== idx &&
+              t.index >= idx - context &&
+              t.index <= idx + context,
+          );
+        }
+        hits.push(entry);
+        if (hits.length >= limit) break;
+      }
+    }
+    return hits;
+  }
+  /**
+   * Tool usage frequency, sorted descending.
+   * @returns {Array<{tool: string, count: number}>}
+   */
+  toolFrequency() {
+    const counts = {};
+    for (const turn of this.turns) {
+      if (turn.role !== "assistant") continue;
+      for (const block of turn.content) {
+        if (block.type === "tool_use") {
+          counts[block.name] = (counts[block.name] ?? 0) + 1;
+        }
+      }
+    }
+    return Object.entries(counts)
+      .map(([tool, count]) => ({ tool, count }))
+      .sort((a, b) => b.count - a.count);
+  }
+  /**
+   * Filter turns involving a specific tool (both the tool_use and its result).
+   * @param {string} name - Tool name
+   * @returns {object[]}
+   */
+  tool(name) {
+    const toolUseIds = new Set();
+    const results = [];
+    for (const turn of this.turns) {
+      if (turn.role === "assistant") {
+        const hasTool = turn.content.some(
+          (b) => b.type === "tool_use" && b.name === name,
+        );
+        if (hasTool) {
+          results.push(turn);
+          for (const b of turn.content) {
+            if (b.type === "tool_use" && b.name === name && b.toolUseId) {
+              toolUseIds.add(b.toolUseId);
+            }
+          }
+        }
+      } else if (
+        turn.role === "tool_result" &&
+        toolUseIds.has(turn.toolUseId)
+      ) {
+        results.push(turn);
+      }
+    }
+    return results;
+  }
+  /**
+   * All error turns (tool results with isError=true).
+   * @returns {object[]}
+   */
+  errors() {
+    return this.turns.filter(
+      (t) => t.role === "tool_result" && t.isError === true,
+    );
+  }
+  /**
+   * Extract just the reasoning text from assistant turns.
+   * @param {object} [opts]
+   * @param {number} [opts.from] - Start turn index
+   * @param {number} [opts.to] - End turn index (exclusive)
+   * @returns {Array<{index: number, text: string}>}
+   */
+  reasoning(opts = {}) {
+    const { from, to } = opts;
+    const results = [];
+    for (const turn of this.turns) {
+      if (turn.role !== "assistant") continue;
+      if (from !== undefined && turn.index < from) continue;
+      if (to !== undefined && turn.index >= to) continue;
+      const texts = turn.content
+        .filter((b) => b.type === "text")
+        .map((b) => b.text);
+      if (texts.length > 0) {
+        results.push({ index: turn.index, text: texts.join("\n") });
+      }
+    }
+    return results;
+  }
+  /**
+   * Compact one-line-per-assistant-turn timeline showing tool names,
+   * reasoning snippet, and token usage.  Thinking-only turns are marked
+   * as such and their content is omitted (it is model-internal).
+   * @returns {string[]}
+   */
+  timeline() {
+    const lines = [];
+    for (const turn of this.turns) {
+      if (turn.role !== "assistant") continue;
+      const tools = turn.content
+        .filter((b) => b.type === "tool_use")
+        .map((b) => b.name);
+      const textBlocks = turn.content
+        .filter((b) => b.type === "text")
+        .map((b) => b.text);
+      const hasThinking = turn.content.some((b) => b.type === "thinking");
+      // Skip thinking-only turns (no user-visible content).
+      if (hasThinking && tools.length === 0 && textBlocks.length === 0)
+        continue;
+      const snippet = textBlocks.join(" ").slice(0, 80).replace(/\n/g, " ");
+      const input = turn.usage?.inputTokens ?? 0;
+      const output = turn.usage?.outputTokens ?? 0;
+      const cacheRead = turn.usage?.cacheReadInputTokens ?? 0;
+      const toolStr = tools.length > 0 ? tools.join(", ") : "(text only)";
+      const tokenStr = `in:${fmtK(input + cacheRead)} out:${fmtK(output)}`;
+      lines.push(
+        `[${turn.index}] ${toolStr.padEnd(30)} ${tokenStr.padEnd(18)} ${snippet}`,
+      );
+    }
+    return lines;
+  }
+  /**
+   * Token usage and cost breakdown per assistant turn, plus totals.
+   * @returns {object}
+   */
+  stats() {
+    let totalInput = 0;
+    let totalOutput = 0;
+    let totalCacheRead = 0;
+    let totalCacheCreate = 0;
+    const perTurn = [];
+    for (const turn of this.turns) {
+      if (turn.role !== "assistant" || !turn.usage) continue;
+      const u = turn.usage;
+      totalInput += u.inputTokens ?? 0;
+      totalOutput += u.outputTokens ?? 0;
+      totalCacheRead += u.cacheReadInputTokens ?? 0;
+      totalCacheCreate += u.cacheCreationInputTokens ?? 0;
+      perTurn.push({
+        index: turn.index,
+        inputTokens: u.inputTokens ?? 0,
+        outputTokens: u.outputTokens ?? 0,
+        cacheReadInputTokens: u.cacheReadInputTokens ?? 0,
+        cacheCreationInputTokens: u.cacheCreationInputTokens ?? 0,
+      });
+    }
+    return {
+      totals: {
+        inputTokens: totalInput,
+        outputTokens: totalOutput,
+        cacheReadInputTokens: totalCacheRead,
+        cacheCreationInputTokens: totalCacheCreate,
+        totalCostUsd: this.summary.totalCostUsd ?? 0,
+        durationMs: this.summary.durationMs ?? 0,
+      },
+      perTurn,
+    };
+  }
+}
+/**
+ * Search a single turn for regex matches. Returns array of match descriptions.
+ * @param {object} turn
+ * @param {RegExp} re
+ * @returns {string[]}
+ */
+function matchTurn(turn, re) {
+  const matches = [];
+  if (turn.role === "assistant") {
+    for (const block of turn.content) {
+      if (block.type === "text" && re.test(block.text)) {
+        re.lastIndex = 0;
+        matches.push(`text: ${excerptAround(block.text, re)}`);
+      }
+      if (block.type === "tool_use") {
+        if (re.test(block.name)) {
+          re.lastIndex = 0;
+          matches.push(`tool_name: ${block.name}`);
+        }
+        const inputStr = JSON.stringify(block.input);
+        if (re.test(inputStr)) {
+          re.lastIndex = 0;
+          matches.push(
+            `tool_input(${block.name}): ${excerptAround(inputStr, re)}`,
+          );
+        }
+      }
+    }
+  } else if (turn.role === "tool_result") {
+    const content = turn.content ?? "";
+    if (re.test(content)) {
+      re.lastIndex = 0;
+      matches.push(`result: ${excerptAround(content, re)}`);
+    }
+  }
+  return matches;
+}
+/**
+ * Extract a short excerpt around the first regex match in text.
+ * @param {string} text
+ * @param {RegExp} re
+ * @returns {string}
+ */
+function excerptAround(text, re) {
+  re.lastIndex = 0;
+  const m = re.exec(text);
+  if (!m) return text.slice(0, 100);
+  const start = Math.max(0, m.index - 40);
+  const end = Math.min(text.length, m.index + m[0].length + 40);
+  let excerpt = text.slice(start, end);
+  if (start > 0) excerpt = "..." + excerpt;
+  if (end < text.length) excerpt = excerpt + "...";
+  return excerpt;
+}
+/**
+ * Format a token count as compact K notation.
+ * @param {number} n
+ * @returns {string}
+ */
+function fmtK(n) {
+  if (n < 1000) return String(n);
+  return (n / 1000).toFixed(1) + "K";
+}
+/**
+ * Load a structured trace from a JSON string.
+ * @param {string} json
+ * @returns {TraceQuery}
+ */
+export function createTraceQuery(json) {
+  const trace = typeof json === "string" ? JSON.parse(json) : json;
+  return new TraceQuery(trace);
+}