npm - @forwardimpact/libeval - Versions diffs - 0.1.64 → 0.1.65 - Mend

@forwardimpact/libeval 0.1.64 → 0.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/src/trace-query.js CHANGED Viewed

@@ -1,3 +1,13 @@
+import {
+  ZERO_USAGE,
+  bucketUsageByTool,
+  carriedPerTurn,
+  computeDivergence,
+  isPreChangeDoc,
+  perMessageUsage,
+  reconcileBucketsToTotals,
+} from "./trace-usage.js";
 /**
  * Query engine for structured trace documents produced by TraceCollector.
  *
@@ -367,149 +377,131 @@ export class TraceQuery {
       divergence: null,
     };
   }
-}
-/** Zero-valued token usage, used as the carried-document fallback. */
-const ZERO_USAGE = {
-  inputTokens: 0,
-  outputTokens: 0,
-  cacheReadInputTokens: 0,
-  cacheCreationInputTokens: 0,
-};
+  /**
+   * One record per `tool_use` block, each paired with its `tool_result`
+   * (joined by `toolUseId`) or `result: null` for orphaned calls.
+   * @returns {Array<{turnIndex: number, name: string, toolUseId: string, input: object, result: {content: *, isError: boolean}|null}>}
+   */
+  toolCalls() {
+    const blocks = collectToolUseBlocks(this.turns);
+    const results = new Map();
+    for (const turn of this.turns) {
+      if (turn.role === "tool_result" && turn.toolUseId) {
+        results.set(turn.toolUseId, {
+          content: turn.content ?? null,
+          isError: turn.isError ?? false,
+        });
+      }
+    }
+    return [...blocks.entries()].map(([toolUseId, b]) => ({
+      turnIndex: b.turnIndex,
+      name: b.name,
+      toolUseId,
+      input: b.input,
+      result: results.get(toolUseId) ?? null,
+    }));
+  }
-/**
- * Per-stream-event breakdown for a pre-change document, labeled as carried —
- * old documents lack message identity, so rows stay keyed by turn index.
- * @param {object[]} turns
- * @returns {object[]}
- */
-function carriedPerTurn(turns) {
-  const perTurn = [];
-  for (const turn of turns) {
-    if (turn.role !== "assistant" || !turn.usage) continue;
-    perTurn.push({
-      index: turn.index,
-      inputTokens: turn.usage.inputTokens ?? 0,
-      outputTokens: turn.usage.outputTokens ?? 0,
-      cacheReadInputTokens: turn.usage.cacheReadInputTokens ?? 0,
-      cacheCreationInputTokens: turn.usage.cacheCreationInputTokens ?? 0,
-      population: "carried-document-per-turn",
-    });
+  /**
+   * One record per `Bash` `tool_use` block, carrying its command text.
+   * @param {string} [re] - Optional regex source tested against `input.command`.
+   * @returns {Array<{turnIndex: number, toolUseId: string, command: string}>}
+   */
+  commands(re) {
+    const filter = re === undefined ? null : new RegExp(re);
+    const out = [];
+    for (const [toolUseId, b] of collectToolUseBlocks(this.turns, "Bash")) {
+      const command = b.input?.command ?? "";
+      if (filter && !filter.test(command)) continue;
+      out.push({ turnIndex: b.turnIndex, toolUseId, command });
+    }
+    return out;
   }
-  return perTurn;
-}
-/**
- * Whether a structured-document version predates per-message accounting
- * (1.2.0). A trace with no version (collected by this build from NDJSON) is
- * not pre-change. Compares numeric version parts so 1.10.0 reads as post-change.
- * @param {string|undefined|null} version
- * @returns {boolean}
- */
-function isPreChangeDoc(version) {
-  if (typeof version !== "string") return false;
-  const [major = 0, minor = 0] = version
-    .split(".")
-    .map((part) => parseInt(part, 10) || 0);
-  if (major !== 1) return major < 1;
-  // Per-message accounting arrived in 1.2.0; any 1.2.x is post-change.
-  return minor < 2;
-}
+  /**
+   * Distinct `file_path` arguments across `Read`/`Edit`/`Write` tool calls,
+   * frequency-sorted (count desc, path asc tiebreak).
+   * @param {string} [prefix] - Optional `startsWith` filter.
+   * @returns {Array<{path: string, count: number}>}
+   */
+  paths(prefix) {
+    return [...collectFilePaths(this.turns).entries()]
+      .filter(([path]) => prefix === undefined || path.startsWith(prefix))
+      .map(([path, count]) => ({ path, count }))
+      .sort((a, b) => b.count - a.count || a.path.localeCompare(b.path));
+  }
-/**
- * Account assistant usage once per API message. Turns are grouped by
- * `messageId` (a null id is its own singleton message); per message the
- * field-wise max across its snapshots is taken — order-insensitive, equal to
- * the single value when a message's duplicate snapshots are byte-identical
- * (zero residual against result-event sums), and a floor for output (the
- * largest streaming snapshot, never an overstatement).
- * @param {object[]} turns
- * @returns {{perMessage: object[], totals: object}}
- */
-function perMessageUsage(turns) {
-  const byMessage = new Map();
-  let singletonSeq = 0;
+  /**
+   * Side-by-side comparison of this trace against another peer `TraceQuery`.
+   * Identity (case name, participant) comes from the caller — the trace
+   * carries no filename.
+   * @param {TraceQuery} other
+   * @param {{aIdentity: {caseName: string, participant: string|null}, bIdentity: {caseName: string, participant: string|null}}} identities
+   * @returns {{a: object, b: object, toolDelta: Array, pathDelta: Array}}
+   */
+  compare(other, { aIdentity, bIdentity } = {}) {
+    const a = sideSummary(this, aIdentity);
+    const b = sideSummary(other, bIdentity);
+    const toolNames = [
+      ...new Set([...a.toolFreq.keys(), ...b.toolFreq.keys()]),
+    ];
+    const toolDelta = toolNames
+      .map((tool) => {
+        const av = a.toolFreq.get(tool) ?? 0;
+        const bv = b.toolFreq.get(tool) ?? 0;
+        return { tool, a: av, b: bv, diff: bv - av };
+      })
+      .sort(
+        (x, y) =>
+          Math.abs(y.diff) - Math.abs(x.diff) || x.tool.localeCompare(y.tool),
+      );
-  for (const turn of turns) {
-    if (turn.role !== "assistant" || !turn.usage) continue;
-    const key = turn.messageId ?? `__null__${singletonSeq++}`;
-    accumulateMessage(byMessage, key, turn);
-  }
+    const pathNames = [
+      ...new Set([...a.pathFreq.keys(), ...b.pathFreq.keys()]),
+    ];
+    const pathDelta = pathNames
+      .map((path) => {
+        const av = a.pathFreq.get(path) ?? 0;
+        const bv = b.pathFreq.get(path) ?? 0;
+        return { path, a: av, b: bv, diff: bv - av };
+      })
+      .sort(
+        (x, y) =>
+          Math.abs(y.diff) - Math.abs(x.diff) || x.path.localeCompare(y.path),
+      );
-  const totals = {
-    inputTokens: 0,
-    outputTokens: 0,
-    cacheReadInputTokens: 0,
-    cacheCreationInputTokens: 0,
-  };
-  const perMessage = [];
-  for (const row of byMessage.values()) {
-    totals.inputTokens += row.inputTokens;
-    totals.outputTokens += row.outputTokens;
-    totals.cacheReadInputTokens += row.cacheReadInputTokens;
-    totals.cacheCreationInputTokens += row.cacheCreationInputTokens;
-    perMessage.push({
-      ...row,
-      outputIsStreamingSnapshot: true,
-      population: "api-message",
-    });
+    return { a: a.surface, b: b.surface, toolDelta, pathDelta };
   }
-  return { perMessage, totals };
-}
-/**
- * Fold one assistant turn's usage into its message bucket by field-wise max.
- * @param {Map<string, object>} byMessage
- * @param {string} key
- * @param {object} turn
- */
-function accumulateMessage(byMessage, key, turn) {
-  const u = turn.usage;
-  const prev = byMessage.get(key);
-  if (!prev) {
-    byMessage.set(key, {
-      messageId: turn.messageId ?? null,
-      inputTokens: u.inputTokens ?? 0,
-      outputTokens: u.outputTokens ?? 0,
-      cacheReadInputTokens: u.cacheReadInputTokens ?? 0,
-      cacheCreationInputTokens: u.cacheCreationInputTokens ?? 0,
-    });
-    return;
+  /**
+   * Per-tool token attribution: each `tool_use` block gets an equal share of
+   * its host turn's usage; assistant turns with no `tool_use` block contribute
+   * full usage to the `(no-tool)` bucket. Per-bucket sums are scaled onto
+   * `stats().totals` — the authoritative population (result-event sums when the
+   * trace carries them, the per-message fallback otherwise) — so the buckets
+   * answer "of the reported total, what share did each tool drive" rather than
+   * a separate per-turn re-count that drifts from the headline figure. The
+   * largest bucket absorbs the rounding residual on each axis, so the input,
+   * output, and `costShare` columns each sum to the corresponding `totals`
+   * value (and `1.0`) exactly (criterion-6 invariant).
+   * @returns {{perTool: Array<{tool: string, turns: number, inputTokens: number, outputTokens: number, costShare: number}>, totals: object}}
+   */
+  statsByTool() {
+    const { buckets, bucketTurns } = bucketUsageByTool(this.turns);
+    const totals = this.stats().totals;
+    const perTool = reconcileBucketsToTotals(buckets, bucketTurns, totals);
+    return { perTool, totals };
   }
-  prev.inputTokens = Math.max(prev.inputTokens, u.inputTokens ?? 0);
-  prev.outputTokens = Math.max(prev.outputTokens, u.outputTokens ?? 0);
-  prev.cacheReadInputTokens = Math.max(
-    prev.cacheReadInputTokens,
-    u.cacheReadInputTokens ?? 0,
-  );
-  prev.cacheCreationInputTokens = Math.max(
-    prev.cacheCreationInputTokens,
-    u.cacheCreationInputTokens ?? 0,
-  );
-}
-/**
- * Compare per-message sums against the result-event sums on the fields the
- * spec guarantees parity for (input, cacheRead, cacheCreation — never output,
- * which always diverges by mechanism 2). Returns the first divergent field as
- * `{field, perMessageSum, resultEventSum}`, or null when all agree.
- * @param {object} perMessageTotals
- * @param {object} resultEventUsage
- * @returns {object|null}
- */
-function computeDivergence(perMessageTotals, resultEventUsage) {
-  for (const field of [
-    "inputTokens",
-    "cacheReadInputTokens",
-    "cacheCreationInputTokens",
-  ]) {
-    const perMessageSum = perMessageTotals[field] ?? 0;
-    const resultEventSum = resultEventUsage[field] ?? 0;
-    if (perMessageSum !== resultEventSum) {
-      return { field, perMessageSum, resultEventSum };
-    }
+  /**
+   * Totals-only view — `stats().totals` with no per-turn array.
+   * @returns {{totals: object}}
+   */
+  statsSummary() {
+    return { totals: this.stats().totals };
   }
-  return null;
 }
 /**
@@ -544,6 +536,31 @@ function matchesToolName(turn, toolName) {
   );
 }
+/**
+ * Collect every assistant `tool_use` block keyed by `toolUseId`, optionally
+ * filtered by tool name. The shared join-key source feeding `toolCalls()`,
+ * `commands()`, and `collectToolUseIds()`. Insertion order follows turn order.
+ * @param {object[]} turns
+ * @param {string} [name] - Optional tool-name filter.
+ * @returns {Map<string, {turnIndex: number, name: string, input: object}>}
+ */
+function collectToolUseBlocks(turns, name) {
+  const blocks = new Map();
+  for (const turn of turns) {
+    if (turn.role !== "assistant") continue;
+    for (const b of turn.content) {
+      if (b.type !== "tool_use" || !b.toolUseId) continue;
+      if (name !== undefined && b.name !== name) continue;
+      blocks.set(b.toolUseId, {
+        turnIndex: turn.index,
+        name: b.name,
+        input: b.input,
+      });
+    }
+  }
+  return blocks;
+}
 /**
  * Collect all toolUseIds for a given tool name from assistant turns.
  * @param {object[]} turns
@@ -551,16 +568,68 @@ function matchesToolName(turn, toolName) {
  * @returns {Set<string>}
  */
 function collectToolUseIds(turns, name) {
-  const ids = new Set();
+  return new Set(collectToolUseBlocks(turns, name).keys());
+}
+/** Tool names in `Read`/`Edit`/`Write` that carry a `file_path` argument. */
+const PATH_TOOLS = new Set(["Read", "Edit", "Write"]);
+/**
+ * Frequency map of distinct `file_path` arguments across `Read`/`Edit`/`Write`
+ * tool calls, in first-seen insertion order.
+ * @param {object[]} turns
+ * @returns {Map<string, number>}
+ */
+function collectFilePaths(turns) {
+  const counts = new Map();
   for (const turn of turns) {
     if (turn.role !== "assistant") continue;
-    for (const b of turn.content) {
-      if (b.type === "tool_use" && b.name === name && b.toolUseId) {
-        ids.add(b.toolUseId);
-      }
+    for (const block of turn.content) {
+      if (block.type !== "tool_use" || !PATH_TOOLS.has(block.name)) continue;
+      const p = block.input?.file_path;
+      if (typeof p !== "string") continue;
+      counts.set(p, (counts.get(p) ?? 0) + 1);
     }
   }
-  return ids;
+  return counts;
+}
+/**
+ * Build the per-side comparison surface plus the tool/path frequency maps
+ * the delta computation consumes. Empty traces emit a `(empty)` marker.
+ * @param {TraceQuery} query
+ * @param {{caseName: string, participant: string|null}} [identity]
+ * @returns {{surface: object, toolFreq: Map<string, number>, pathFreq: Map<string, number>}}
+ */
+function sideSummary(
+  query,
+  identity = { caseName: "(unknown)", participant: null },
+) {
+  const toolFreq = new Map(query.toolFrequency().map((t) => [t.tool, t.count]));
+  const pathFreq = collectFilePaths(query.turns);
+  const isEmpty = query.turns.length === 0;
+  const metadata = {
+    caseName: identity.caseName,
+    participant: identity.participant ?? null,
+  };
+  if (isEmpty) metadata.marker = "(empty)";
+  const tools = [...toolFreq.keys()].sort();
+  const paths = [...pathFreq.keys()].sort();
+  return {
+    surface: {
+      metadata,
+      turnCount: query.turns.length,
+      tools,
+      paths,
+      pathCount: paths.length,
+      cost: query.stats().totals.totalCostUsd,
+    },
+    toolFreq,
+    pathFreq,
+  };
 }
 /**

package/src/trace-render.js ADDED Viewed

@@ -0,0 +1,211 @@
+/**
+ * Text renderers for `fit-trace` query output.
+ *
+ * One named export per renderable verb. Each renderer accepts the query result
+ * plus `{multi, signatures}` and returns a string. `multi` controls
+ * source-attribution prefixing (`grep -H` convention); record-per-line
+ * renderers prepend `<basename>:`, block renderers emit `# <basename>` headers.
+ *
+ * Internal module — imported by `commands/trace.js` and tests by relative
+ * path, never re-exported from `src/index.js`.
+ */
+/** Collapse newlines/tabs in a value to a single-line, grep-friendly string. */
+function oneLine(value) {
+  const str = typeof value === "string" ? value : JSON.stringify(value ?? null);
+  return str.replace(/[\r\n\t]+/g, " ").trim();
+}
+/** Group records by their `source` field (multi-file path), preserving order. */
+function groupBySource(records) {
+  const groups = new Map();
+  for (const record of records) {
+    const key = record.source ?? "";
+    if (!groups.has(key)) groups.set(key, []);
+    groups.get(key).push(record);
+  }
+  return groups;
+}
+/**
+ * Render record-per-line output, prefixing each line with `<source>:` when
+ * multi-file. `lineOf` maps one record to its text line.
+ * @param {object[]} records
+ * @param {(record: object) => string} lineOf
+ * @param {{multi: boolean}} opts
+ * @returns {string}
+ */
+function renderLines(records, lineOf, { multi }) {
+  return records
+    .map((r) => (multi && r.source ? `${r.source}:${lineOf(r)}` : lineOf(r)))
+    .join("\n");
+}
+/**
+ * Render a block per source. `blockOf` maps one record to a multi-line string;
+ * multi-file output separates groups with `# <source>` headers.
+ * @param {object[]} records
+ * @param {(record: object) => string} blockOf
+ * @param {{multi: boolean}} opts
+ * @returns {string}
+ */
+function renderBlocks(records, blockOf, { multi }) {
+  if (!multi) return records.map(blockOf).join("\n");
+  const out = [];
+  for (const [source, group] of groupBySource(records)) {
+    out.push(`# ${source}`);
+    out.push(...group.map(blockOf));
+  }
+  return out.join("\n");
+}
+/** `[turnIdx] <Tool> <toolUseId>` / `  in:` / `  out:` per block. */
+export function renderToolCalls(records, opts = {}) {
+  return renderBlocks(
+    records,
+    (r) => {
+      const head = `[${r.turnIndex}] ${r.name} ${r.toolUseId}`;
+      const input = `  in: ${oneLine(r.input)}`;
+      const out = `  out: ${
+        r.result ? oneLine(r.result.content) : "(no result)"
+      }`;
+      return [head, input, out].join("\n");
+    },
+    opts,
+  );
+}
+/** `[turnIdx] <command>` per line, newlines escaped. */
+export function renderCommands(records, opts = {}) {
+  return renderLines(
+    records,
+    (r) => `[${r.turnIndex}] ${oneLine(r.command)}`,
+    opts,
+  );
+}
+/** `<count>\t<path>` frequency-sorted. */
+export function renderPaths(records, opts = {}) {
+  return renderLines(records, (r) => `${r.count}\t${r.path}`, opts);
+}
+/** Metadata header, per-row metrics, then Tool and Path delta tables. */
+export function renderCompare(result) {
+  const { a, b, toolDelta, pathDelta } = result;
+  const part = (p) => (p == null ? "(none)" : p);
+  const lines = [];
+  lines.push(
+    `A: ${a.metadata.caseName} / ${part(a.metadata.participant)}${
+      a.metadata.marker ? ` ${a.metadata.marker}` : ""
+    }`,
+  );
+  lines.push(
+    `B: ${b.metadata.caseName} / ${part(b.metadata.participant)}${
+      b.metadata.marker ? ` ${b.metadata.marker}` : ""
+    }`,
+  );
+  lines.push("");
+  lines.push(`turns    | ${a.turnCount} | ${b.turnCount}`);
+  lines.push(`tools    | ${a.tools.length} | ${b.tools.length}`);
+  lines.push(`paths    | ${a.pathCount} | ${b.pathCount}`);
+  lines.push(`cost     | ${a.cost} | ${b.cost}`);
+  lines.push("");
+  lines.push("Tool | A | B | Δ");
+  for (const d of toolDelta) {
+    lines.push(`${d.tool} | ${d.a} | ${d.b} | ${d.diff}`);
+  }
+  lines.push("");
+  lines.push("Path | A | B | Δ");
+  for (const d of pathDelta) {
+    lines.push(`${d.path} | ${d.a} | ${d.b} | ${d.diff}`);
+  }
+  return lines.join("\n");
+}
+/** `Tool | Turns | In | Out | Share` sorted Share desc. */
+export function renderStatsByTool(result) {
+  const lines = ["Tool | Turns | In | Out | Share"];
+  for (const b of result.perTool) {
+    lines.push(
+      `${b.tool} | ${b.turns} | ${Math.round(b.inputTokens)} | ${Math.round(
+        b.outputTokens,
+      )} | ${b.costShare.toFixed(4)}`,
+    );
+  }
+  return lines.join("\n");
+}
+/** Totals block only. */
+export function renderStatsSummary(result) {
+  const t = result.totals;
+  return [
+    `inputTokens: ${t.inputTokens}`,
+    `outputTokens: ${t.outputTokens}`,
+    `cacheReadInputTokens: ${t.cacheReadInputTokens}`,
+    `cacheCreationInputTokens: ${t.cacheCreationInputTokens}`,
+    `totalCostUsd: ${t.totalCostUsd}`,
+    `durationMs: ${t.durationMs}`,
+  ].join("\n");
+}
+/** `[turnIdx] <prefix>: <excerpt>` per match. */
+export function renderSearch(records, opts = {}) {
+  const lines = [];
+  for (const hit of records) {
+    const idx = hit.turn?.index;
+    const prefix = multiPrefix(hit, opts);
+    for (const match of hit.matches ?? []) {
+      lines.push(`${prefix}[${idx}] ${oneLine(match)}`);
+    }
+  }
+  return lines.join("\n");
+}
+/** Source prefix for a multi-file record (search/default), or "". */
+function multiPrefix(record, { multi }) {
+  return multi && record.source ? `${record.source}:` : "";
+}
+/**
+ * Default renderer for every other renderable verb: one record per block,
+ * fields rendered as `key: value` lines (no JSON braces or quotes, so the
+ * default output is grep/awk-friendly and does not parse as JSON). Nested
+ * values are collapsed to a single grep-friendly line. Multi-file output
+ * separates source groups with `# <source>` headers (`renderBlocks`
+ * convention).
+ * @param {object[]|object} result
+ * @param {{multi: boolean}} opts
+ * @returns {string}
+ */
+export function renderDefault(result, opts = {}) {
+  const records = Array.isArray(result) ? result : [result];
+  return renderBlocks(records, (r) => recordBlock(stripSource(r)), opts);
+}
+/**
+ * Render one record as `key: value` lines. Scalars render verbatim; objects
+ * and arrays collapse to a single line via `oneLine`. A non-object record
+ * (string/number) renders as its own single line.
+ * @param {*} record
+ * @returns {string}
+ */
+function recordBlock(record) {
+  if (record == null || typeof record !== "object" || Array.isArray(record)) {
+    return oneLine(record);
+  }
+  return Object.entries(record)
+    .map(([key, value]) => {
+      const scalar = value == null || typeof value !== "object";
+      return `${key}: ${scalar ? String(value) : oneLine(value)}`;
+    })
+    .join("\n");
+}
+/** Drop the orchestrator-injected `source` field before textifying. */
+function stripSource(record) {
+  if (record == null || typeof record !== "object" || Array.isArray(record)) {
+    return record;
+  }
+  const { source, ...rest } = record;
+  return rest;
+}