npm - @forwardimpact/libeval - Versions diffs - 0.1.23 → 0.1.25 - Mend

@forwardimpact/libeval 0.1.23 → 0.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/bin/fit-eval.js +102 -41
package/bin/fit-trace.js +48 -11
package/package.json +2 -2
package/src/agent-runner.js +1 -1
package/src/commands/facilitate.js +1 -1
package/src/commands/run.js +1 -1
package/src/commands/supervise.js +1 -1
package/src/commands/trace.js +109 -2

package/bin/fit-eval.js CHANGED Viewed

@@ -17,65 +17,77 @@ const { version: VERSION } = JSON.parse(
 const definition = {
   name: "fit-eval",
   version: VERSION,
-  description: "Process Claude Code stream-json output",
+  description:
+    "Run agents and capture NDJSON traces — for agent evaluations or multi-agent collaboration",
   commands: [
-    {
-      name: "output",
-      args: "",
-      description: "Process trace and output formatted result",
-    },
-    {
-      name: "tee",
-      args: "[output.ndjson]",
-      description: "Stream text to stdout, optionally save raw NDJSON",
-    },
     {
       name: "run",
       args: "",
-      description: "Run a single agent via the Claude Agent SDK",
+      description: "Run a single agent autonomously on a defined task",
       options: {
-        "task-file": { type: "string", description: "Path to task file" },
-        "task-text": { type: "string", description: "Inline task text" },
+        "task-file": {
+          type: "string",
+          description: "Path to a markdown task file",
+        },
+        "task-text": {
+          type: "string",
+          description: "Inline task text (alternative to --task-file)",
+        },
         "task-amend": {
           type: "string",
-          description: "Additional text appended to task",
+          description: "Additional text appended to the task",
         },
         model: { type: "string", description: "Claude model (default: opus)" },
         "max-turns": {
           type: "string",
-          description: "Max agentic turns (default: 50)",
+          description: "Max agentic turns (default: 50, 0 = unlimited)",
+        },
+        output: {
+          type: "string",
+          description: "Write the NDJSON trace to a file",
+        },
+        cwd: { type: "string", description: "Working directory for the agent" },
+        "agent-profile": {
+          type: "string",
+          description: "Agent profile name to load",
         },
-        output: { type: "string", description: "Write NDJSON trace to file" },
-        cwd: { type: "string", description: "Working directory" },
-        "agent-profile": { type: "string", description: "Agent profile name" },
         "allowed-tools": {
           type: "string",
-          description: "Comma-separated tool list",
+          description: "Comma-separated tool allowlist",
         },
       },
     },
     {
       name: "supervise",
       args: "",
-      description: "Run a supervised agent-supervisor relay loop",
+      description:
+        "Run a supervisor–agent relay — typical shape for agent-as-judge evaluations",
       options: {
-        "task-file": { type: "string", description: "Path to task file" },
-        "task-text": { type: "string", description: "Inline task text" },
+        "task-file": {
+          type: "string",
+          description: "Path to a markdown task file",
+        },
+        "task-text": {
+          type: "string",
+          description: "Inline task text (alternative to --task-file)",
+        },
         "task-amend": {
           type: "string",
-          description: "Additional text appended to task",
+          description: "Additional text appended to the task",
         },
         model: { type: "string", description: "Claude model (default: opus)" },
         "max-turns": {
           type: "string",
-          description: "Max agentic turns (default: 50)",
+          description: "Max agentic turns (default: 20, 0 = unlimited)",
+        },
+        output: {
+          type: "string",
+          description: "Write the NDJSON trace to a file",
         },
-        output: { type: "string", description: "Write NDJSON trace to file" },
-        cwd: { type: "string", description: "Working directory" },
         "agent-profile": { type: "string", description: "Agent profile name" },
         "allowed-tools": {
           type: "string",
-          description: "Comma-separated tool list",
+          description: "Agent tool allowlist",
         },
         "supervisor-cwd": {
           type: "string",
@@ -84,31 +96,41 @@ const definition = {
         "agent-cwd": { type: "string", description: "Agent working directory" },
         "supervisor-profile": {
           type: "string",
-          description: "Supervisor profile name",
+          description: "Supervisor (judge) profile name",
         },
         "supervisor-allowed-tools": {
           type: "string",
-          description: "Supervisor tool list",
+          description: "Supervisor tool allowlist",
         },
       },
     },
     {
       name: "facilitate",
       args: "",
-      description: "Run a facilitated multi-agent session",
+      description:
+        "Run a facilitator with N participants — typical shape for multi-agent collaboration",
       options: {
-        "task-file": { type: "string", description: "Path to task file" },
-        "task-text": { type: "string", description: "Inline task text" },
+        "task-file": {
+          type: "string",
+          description: "Path to a markdown task file",
+        },
+        "task-text": {
+          type: "string",
+          description: "Inline task text (alternative to --task-file)",
+        },
         "task-amend": {
           type: "string",
-          description: "Additional text appended to task",
+          description: "Additional text appended to the task",
         },
         model: { type: "string", description: "Claude model (default: opus)" },
         "max-turns": {
           type: "string",
-          description: "Max facilitator LLM turns (default: 20)",
+          description: "Max agentic turns (default: 20, 0 = unlimited)",
+        },
+        output: {
+          type: "string",
+          description: "Write the NDJSON trace to a file",
         },
-        output: { type: "string", description: "Write NDJSON trace to file" },
         "facilitator-cwd": {
           type: "string",
           description: "Facilitator working directory",
@@ -119,14 +141,27 @@ const definition = {
         },
         "agent-profiles": {
           type: "string",
-          description: "Comma-separated agent profile names",
+          description:
+            "Comma-separated list of participant profile names (required)",
         },
         "agent-cwd": {
           type: "string",
-          description: "Agent working directory (default: .)",
+          description: "Working directory shared by participants (default: .)",
         },
       },
     },
+    {
+      name: "output",
+      args: "",
+      description:
+        "Read NDJSON from stdin and emit a structured or readable form",
+    },
+    {
+      name: "tee",
+      args: "[output.ndjson]",
+      description:
+        "Stream readable text to stdout while saving raw NDJSON to a file",
+    },
   ],
   globalOptions: {
     format: { type: "string", description: "Output format (json|text)" },
@@ -135,10 +170,36 @@ const definition = {
     json: { type: "boolean", description: "Output help as JSON" },
   },
   examples: [
+    "fit-eval run --task-file=task.md --output=trace.ndjson",
+    "fit-eval supervise --task-file=task.md --supervisor-profile=judge --agent-profile=coder --output=trace.ndjson",
+    'fit-eval facilitate --task-file=task.md --facilitator-profile=lead --agent-profiles="security-engineer,technical-writer" --output=trace.ndjson',
     "fit-eval output --format=text < trace.ndjson",
-    "fit-eval run --task-file=task.md --model=opus",
-    "fit-eval supervise --task-file=task.md --supervisor-cwd=.",
-    'fit-eval facilitate --task-file=task.md --agent-profiles "security-engineer,technical-writer"',
+  ],
+  documentation: [
+    {
+      title: "Agent Evaluations",
+      url: "https://www.forwardimpact.team/docs/guides/agent-evaluations/index.md",
+      description:
+        "Author a judge profile, run an eval locally, wire it into CI, and inspect the resulting trace.",
+    },
+    {
+      title: "Agent Collaboration",
+      url: "https://www.forwardimpact.team/docs/guides/agent-collaboration/index.md",
+      description:
+        "Author a facilitator and participant profiles, run a multi-agent session, and read the message flow.",
+    },
+    {
+      title: "Trace Analysis",
+      url: "https://www.forwardimpact.team/docs/guides/trace-analysis/index.md",
+      description:
+        "Read the NDJSON traces produced by `fit-eval` with `fit-trace` — grounded-theory method and worked examples.",
+    },
+    {
+      title: "Agent Teams",
+      url: "https://www.forwardimpact.team/docs/guides/agent-teams/index.md",
+      description:
+        "How to author the agent, supervisor, and facilitator profiles consumed by --agent-profile, --supervisor-profile, --facilitator-profile, and --agent-profiles.",
+    },
   ],
 };

package/bin/fit-trace.js CHANGED Viewed

@@ -23,6 +23,7 @@ import {
   runInitCommand,
   runTurnCommand,
   runFilterCommand,
+  runSplitCommand,
 } from "../src/commands/trace.js";
 const { version: VERSION } = JSON.parse(
@@ -32,12 +33,14 @@ const { version: VERSION } = JSON.parse(
 const definition = {
   name: "fit-trace",
   version: VERSION,
-  description: "Download, query, and search agent execution traces",
+  description:
+    "Download, query, and analyze agent execution traces — read NDJSON output from fit-eval as qualitative research",
   commands: [
     {
       name: "runs",
       args: "[pattern]",
-      description: "List recent workflow runs (default pattern: agent)",
+      description:
+        "List recent GitHub Actions workflow runs (default pattern: agent)",
       options: {
         lookback: {
           type: "string",
@@ -155,7 +158,7 @@ const definition = {
     {
       name: "filter",
       args: "<file>",
-      description: "Filter turns by structural properties",
+      description: "Filter turns by role, tool, or error status",
       options: {
         role: {
           type: "string",
@@ -167,8 +170,23 @@ const definition = {
         },
         error: {
           type: "boolean",
-          description:
-            "Error tool_result turns only (flag-only; for non-errors use the API)",
+          description: "Error tool_result turns only",
+        },
+      },
+    },
+    {
+      name: "split",
+      args: "<file>",
+      description:
+        "Split a combined trace into per-source files (one per agent or supervisor)",
+      options: {
+        mode: {
+          type: "string",
+          description: "Execution mode: run (no-op), supervise, or facilitate",
+        },
+        "output-dir": {
+          type: "string",
+          description: "Output directory (default: same as input)",
         },
       },
     },
@@ -185,16 +203,34 @@ const definition = {
   examples: [
     "fit-trace runs --lookback 7d",
     "fit-trace download 24497273755",
+    "fit-trace split structured.json --mode=facilitate",
     "fit-trace overview structured.json",
     "fit-trace timeline structured.json",
+    "fit-trace stats structured.json",
+    "fit-trace tool structured.json Conclude",
     "fit-trace search structured.json 'error|fail' --context 1",
-    "fit-trace tool structured.json Bash",
-    "fit-trace batch structured.json 0 20",
-    "fit-trace init structured.json",
+    "fit-trace filter structured.json --tool Bash --error",
     "fit-trace turn structured.json 3",
-    "fit-trace filter structured.json --role system",
-    "fit-trace filter structured.json --tool Bash --role assistant",
-    "fit-trace search structured.json 'error' --full",
+  ],
+  documentation: [
+    {
+      title: "Trace Analysis",
+      url: "https://www.forwardimpact.team/docs/guides/trace-analysis/index.md",
+      description:
+        "The full method walkthrough with worked examples (an eval that failed, a multi-agent session that stalled).",
+    },
+    {
+      title: "Agent Evaluations",
+      url: "https://www.forwardimpact.team/docs/guides/agent-evaluations/index.md",
+      description:
+        "How `fit-eval supervise` produces the traces this skill analyzes.",
+    },
+    {
+      title: "Agent Collaboration",
+      url: "https://www.forwardimpact.team/docs/guides/agent-collaboration/index.md",
+      description:
+        "How `fit-eval facilitate` produces multi-agent traces; `split` is the bridge into per-source files.",
+    },
   ],
 };
@@ -219,6 +255,7 @@ const COMMANDS = {
   init: runInitCommand,
   turn: runTurnCommand,
   filter: runFilterCommand,
+  split: runSplitCommand,
 };
 async function main() {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@forwardimpact/libeval",
-  "version": "0.1.23",
+  "version": "0.1.25",
   "description": "Process Claude Code stream-json output into structured traces",
   "license": "Apache-2.0",
   "author": "D. Olsson <hi@senzilla.io>",
@@ -32,7 +32,7 @@
     "@forwardimpact/libcli": "^0.1.0",
     "@forwardimpact/libconfig": "^0.1.0",
     "@forwardimpact/libtelemetry": "^0.1.22",
-    "zod": "^4.3.6"
+    "zod": "^4.4.1"
   },
   "devDependencies": {
     "@forwardimpact/libharness": "^0.1.14"

package/src/agent-runner.js CHANGED Viewed

@@ -18,7 +18,7 @@ function applyDefaults(deps) {
     cwd: deps.cwd,
     query: deps.query,
     output: deps.output,
-    model: deps.model ?? "opus",
+    model: deps.model ?? "claude-opus-4-7[1m]",
     maxTurns: deps.maxTurns ?? 50,
     allowedTools: deps.allowedTools ?? DEFAULT_ALLOWED_TOOLS,
     onLine: deps.onLine ?? null,

package/src/commands/facilitate.js CHANGED Viewed

@@ -44,7 +44,7 @@ function parseFacilitateOptions(values) {
     taskAmend,
     agentConfigs,
     facilitatorCwd: resolve(values["facilitator-cwd"] ?? "."),
-    model: values.model ?? "opus",
+    model: values.model ?? "claude-opus-4-7[1m]",
     maxTurns: maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10),
     outputPath: values.output,
     facilitatorProfile: values["facilitator-profile"] ?? undefined,

package/src/commands/run.js CHANGED Viewed

@@ -27,7 +27,7 @@ function parseRunOptions(values) {
     taskContent,
     taskAmend,
     cwd: resolve(values.cwd ?? "."),
-    model: values.model ?? "opus",
+    model: values.model ?? "claude-opus-4-7[1m]",
     maxTurns: maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10),
     outputPath: values.output,
     agentProfile: values["agent-profile"] ?? undefined,

package/src/commands/supervise.js CHANGED Viewed

@@ -29,7 +29,7 @@ function parseSuperviseOptions(values) {
     agentCwd: resolve(
       values["agent-cwd"] ?? mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
     ),
-    model: values.model ?? "opus",
+    model: values.model ?? "claude-opus-4-7[1m]",
     maxTurns: (() => {
       const raw = values["max-turns"] ?? "20";
       return raw === "0" ? 0 : parseInt(raw, 10);

package/src/commands/trace.js CHANGED Viewed

@@ -1,5 +1,5 @@
-import { readFileSync, writeFileSync } from "node:fs";
-import { join } from "node:path";
+import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
+import { join, dirname } from "node:path";
 import { createTraceCollector } from "@forwardimpact/libeval";
 import { createTraceQuery } from "../trace-query.js";
 import { createTraceGitHub } from "../trace-github.js";
@@ -150,6 +150,113 @@ export async function runFilterCommand(values, args) {
   writeJSON(loadTrace(args[0]).filter(opts), values);
 }
+// --- Split command ---
+/** Valid agent source name pattern: lowercase letter, then lowercase alphanumeric or hyphen */
+const VALID_SOURCE_NAME = /^[a-z][a-z0-9-]*$/;
+/**
+ * Split a combined NDJSON trace into per-source files.
+ * @param {object} values - Parsed option values
+ * @param {string[]} args - [file]
+ */
+export async function runSplitCommand(values, args) {
+  const file = args[0];
+  if (!file) throw new Error("split: missing input file");
+  const mode = values.mode;
+  if (!mode) throw new Error("split: --mode is required");
+  if (mode === "run") {
+    process.stdout.write(
+      "run mode: trace is already in final form, no split needed\n",
+    );
+    return;
+  }
+  const outputDir = values["output-dir"] || dirname(file);
+  mkdirSync(outputDir, { recursive: true });
+  const buckets = parseBuckets(readFileSync(file, "utf8"));
+  if (mode === "supervise") {
+    writeBucket(buckets, "agent", outputDir);
+    writeBucket(buckets, "supervisor", outputDir);
+  } else if (mode === "facilitate") {
+    splitFacilitated(buckets, outputDir);
+  }
+}
+/**
+ * Parse NDJSON content into per-source buckets of unwrapped event lines.
+ * Skips empty lines, malformed JSON, non-envelope lines, and orchestrator events.
+ * @param {string} content - Raw NDJSON file content
+ * @returns {Map<string, string[]>} source name -> array of unwrapped JSON lines
+ */
+function parseBuckets(content) {
+  const buckets = new Map();
+  for (const raw of content.split("\n")) {
+    const trimmed = raw.trim();
+    if (!trimmed) continue;
+    let envelope;
+    try {
+      envelope = JSON.parse(trimmed);
+    } catch {
+      continue;
+    }
+    if (!envelope.event || typeof envelope.source !== "string") continue;
+    if (envelope.source === "orchestrator") continue;
+    if (!buckets.has(envelope.source)) {
+      buckets.set(envelope.source, []);
+    }
+    buckets.get(envelope.source).push(JSON.stringify(envelope.event));
+  }
+  return buckets;
+}
+/**
+ * Write facilitated mode split: facilitator, per-agent, and combined agent files.
+ * @param {Map<string, string[]>} buckets
+ * @param {string} outputDir
+ */
+function splitFacilitated(buckets, outputDir) {
+  writeBucket(buckets, "facilitator", outputDir);
+  const agentSources = [...buckets.keys()].filter(
+    (s) => s !== "facilitator" && VALID_SOURCE_NAME.test(s),
+  );
+  for (const name of agentSources) {
+    writeBucket(buckets, name, outputDir);
+  }
+  const combinedLines = agentSources.flatMap((n) => buckets.get(n) ?? []);
+  if (combinedLines.length > 0) {
+    writeFileSync(
+      join(outputDir, "trace-agent.ndjson"),
+      combinedLines.join("\n") + "\n",
+    );
+  }
+}
+/**
+ * Write a single source bucket to a trace-{name}.ndjson file.
+ * @param {Map<string, string[]>} buckets
+ * @param {string} name
+ * @param {string} outputDir
+ */
+function writeBucket(buckets, name, outputDir) {
+  const lines = buckets.get(name);
+  if (!lines || lines.length === 0) return;
+  const outPath = join(outputDir, `trace-${name}.ndjson`);
+  writeFileSync(outPath, lines.join("\n") + "\n");
+}
 // --- Shared helpers ---
 /**