npm - @forwardimpact/libeval - Versions diffs - 0.1.9 → 0.1.11 - Mend

@forwardimpact/libeval 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/bin/fit-eval.js +103 -72
package/package.json +4 -2
package/src/commands/output.js +7 -26
package/src/commands/run.js +16 -41
package/src/commands/supervise.js +19 -47
package/src/commands/tee.js +3 -2

package/bin/fit-eval.js CHANGED Viewed

@@ -1,10 +1,100 @@
 #!/usr/bin/env node
+import { readFileSync } from "node:fs";
+import { createCli } from "@forwardimpact/libcli";
+import { createLogger } from "@forwardimpact/libtelemetry";
 import { runOutputCommand } from "../src/commands/output.js";
 import { runTeeCommand } from "../src/commands/tee.js";
 import { runRunCommand } from "../src/commands/run.js";
 import { runSuperviseCommand } from "../src/commands/supervise.js";
+const { version: VERSION } = JSON.parse(
+  readFileSync(new URL("../package.json", import.meta.url), "utf8"),
+);
+const definition = {
+  name: "fit-eval",
+  version: VERSION,
+  description: "Process Claude Code stream-json output",
+  commands: [
+    {
+      name: "output",
+      args: "[--format=FORMAT]",
+      description: "Process trace and output formatted result",
+    },
+    {
+      name: "tee",
+      args: "[output.ndjson]",
+      description: "Stream text to stdout, optionally save raw NDJSON",
+    },
+    {
+      name: "run",
+      args: "[options]",
+      description: "Run a single agent via the Claude Agent SDK",
+    },
+    {
+      name: "supervise",
+      args: "[options]",
+      description: "Run a supervised agent-supervisor relay loop",
+    },
+  ],
+  options: {
+    format: { type: "string", description: "Output format (json|text)" },
+    help: { type: "boolean", short: "h", description: "Show this help" },
+    version: { type: "boolean", description: "Show version" },
+    json: { type: "boolean", description: "Output help as JSON" },
+    "task-file": { type: "string", description: "Path to task file" },
+    "task-text": { type: "string", description: "Inline task text" },
+    "task-amend": {
+      type: "string",
+      description: "Additional text appended to task",
+    },
+    model: {
+      type: "string",
+      description: "Claude model (default: opus)",
+    },
+    "max-turns": {
+      type: "string",
+      description: "Max agentic turns (default: 50)",
+    },
+    output: { type: "string", description: "Write NDJSON trace to file" },
+    cwd: { type: "string", description: "Working directory" },
+    "agent-profile": {
+      type: "string",
+      description: "Agent profile name",
+    },
+    "allowed-tools": {
+      type: "string",
+      description: "Comma-separated tool list",
+    },
+    "supervisor-cwd": {
+      type: "string",
+      description: "Supervisor working directory",
+    },
+    "agent-cwd": {
+      type: "string",
+      description: "Agent working directory",
+    },
+    "supervisor-profile": {
+      type: "string",
+      description: "Supervisor profile name",
+    },
+    "supervisor-allowed-tools": {
+      type: "string",
+      description: "Supervisor tool list",
+    },
+  },
+  examples: [
+    "fit-eval output --format=text < trace.ndjson",
+    "fit-eval run --task-file=task.md --model=opus",
+    "fit-eval supervise --task-file=task.md --supervisor-cwd=.",
+  ],
+};
+const cli = createCli(definition);
+const logger = createLogger("eval");
 const COMMANDS = {
   output: runOutputCommand,
   tee: runTeeCommand,
@@ -12,89 +102,30 @@ const COMMANDS = {
   supervise: runSuperviseCommand,
 };
-const HELP_TEXT = `
-Eval CLI — Process Claude Code stream-json output
-Usage:
-  fit-eval <command> [options]
-Commands:
-  output [--format=json|text]    Process trace and output formatted result
-  tee [output.ndjson]            Stream text to stdout, optionally save raw NDJSON
-  run [options]                  Run a single agent via the Claude Agent SDK
-  supervise [options]            Run a supervised agent ↔ supervisor relay loop
-Run options:
-  --task-file=PATH     Path to task file (mutually exclusive with --task-text)
-  --task-text=STRING   Inline task text (mutually exclusive with --task-file)
-  --cwd=DIR            Agent working directory (default: .)
-  --model=MODEL        Claude model to use (default: opus)
-  --max-turns=N        Maximum agentic turns (default: 50, 0 = unlimited)
-  --output=PATH        Write NDJSON trace to file (default: stdout)
-  --allowed-tools=LIST Comma-separated tools (default: Bash,Read,Glob,Grep,Write,Edit)
-  --agent-profile=NAME Agent profile name (passed as --agent to Claude CLI)
-Supervise options:
-  --task-file=PATH          Path to task file (mutually exclusive with --task-text)
-  --task-text=STRING        Inline task text (mutually exclusive with --task-file)
-  --supervisor-cwd=DIR      Supervisor working directory (default: .)
-  --agent-cwd=DIR           Agent working directory (default: temp directory)
-  --model=MODEL             Claude model to use (default: opus)
-  --max-turns=N             Maximum supervisor ↔ agent exchanges (default: 20, 0 = unlimited)
-  --output=PATH             Write NDJSON trace to file (default: stdout)
-  --allowed-tools=LIST      Comma-separated tools for agent (default: Bash,Read,Glob,Grep,Write,Edit)
-  --supervisor-allowed-tools=LIST
-                            Comma-separated tools for supervisor (default: Bash,Read,Glob,Grep,Write,Edit)
-  --supervisor-profile=NAME Supervisor agent profile name (passed as --agent to Claude CLI)
-  --agent-profile=NAME      Agent profile name (passed as --agent to Claude CLI)
-Options:
-  --help      Show this help message
-  --version   Show version number
-Examples:
-  fit-eval output --format=text < trace.ndjson
-  fit-eval output --format=json < trace.ndjson
-  fit-eval tee < trace.ndjson
-  fit-eval tee output.ndjson < trace.ndjson
-  fit-eval run --task-text="Perform a security audit of the repository." --model=opus
-  fit-eval run --task-file=scenarios/guide-setup/task.md --model=opus
-  fit-eval supervise --task-file=scenarios/guide-setup/task.md --supervisor-cwd=.
-`.trim();
 async function main() {
-  const args = process.argv.slice(2);
+  const parsed = cli.parse(process.argv.slice(2));
+  if (!parsed) process.exit(0);
-  if (args.includes("--help") || args.includes("-h") || args.length === 0) {
-    console.log(HELP_TEXT);
-    return;
-  }
+  const { values, positionals } = parsed;
-  if (args.includes("--version")) {
-    const { readFileSync } = await import("fs");
-    const { join, dirname } = await import("path");
-    const { fileURLToPath } = await import("url");
-    const __dirname = dirname(fileURLToPath(import.meta.url));
-    const pkg = JSON.parse(
-      readFileSync(join(__dirname, "..", "package.json"), "utf8"),
-    );
-    console.log(pkg.version);
-    return;
+  if (positionals.length === 0) {
+    cli.usageError("no command specified");
+    process.exit(2);
   }
-  const commandName = args[0];
-  const handler = COMMANDS[commandName];
+  const [command, ...args] = positionals;
+  const handler = COMMANDS[command];
   if (!handler) {
-    console.error(`Unknown command: ${commandName}\n`);
-    console.error(HELP_TEXT);
-    process.exit(1);
+    cli.usageError(`unknown command "${command}"`);
+    process.exit(2);
   }
-  await handler(args.slice(1));
+  await handler(values, args);
 }
 main().catch((error) => {
-  console.error(error.message);
+  logger.exception("main", error);
+  cli.error(error.message);
   process.exit(1);
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@forwardimpact/libeval",
-  "version": "0.1.9",
+  "version": "0.1.11",
   "description": "Process Claude Code stream-json output into structured traces",
   "license": "Apache-2.0",
   "author": "D. Olsson <hi@senzilla.io>",
@@ -17,7 +17,9 @@
     "test": "bun run node --test test/*.test.js"
   },
   "dependencies": {
-    "@anthropic-ai/claude-agent-sdk": "^0.2.91"
+    "@anthropic-ai/claude-agent-sdk": "^0.2.98",
+    "@forwardimpact/libcli": "^0.1.0",
+    "@forwardimpact/libtelemetry": "^0.1.22"
   },
   "publishConfig": {
     "access": "public"

package/src/commands/output.js CHANGED Viewed

@@ -6,10 +6,14 @@ import { createTraceCollector } from "@forwardimpact/libeval";
  *
  * Usage: fit-eval output [--format=json|text] < trace.ndjson
  *
- * @param {string[]} args - Command arguments
+ * @param {object} values - Parsed option values from cli.parse()
+ * @param {string[]} args - Positional arguments
  */
-export async function runOutputCommand(args) {
-  const format = parseFormat(args);
+export async function runOutputCommand(values, _args) {
+  const format =
+    values.format === "text" || values.format === "json"
+      ? values.format
+      : "json";
   const collector = createTraceCollector();
   const chunks = [];
@@ -28,26 +32,3 @@ export async function runOutputCommand(args) {
     process.stdout.write(JSON.stringify(collector.toJSON()) + "\n");
   }
 }
-/**
- * Parse --format from args. Supports --format=value and --format value.
- * @param {string[]} args
- * @returns {"text"|"json"}
- */
-function parseFormat(args) {
-  for (let i = 0; i < args.length; i++) {
-    if (args[i].startsWith("--format=")) {
-      const value = args[i].slice("--format=".length);
-      if (value === "text" || value === "json") return value;
-      console.error(`Unknown format: ${value}. Using "json".`);
-      return "json";
-    }
-    if (args[i] === "--format" && i + 1 < args.length) {
-      const value = args[i + 1];
-      if (value === "text" || value === "json") return value;
-      console.error(`Unknown format: ${value}. Using "json".`);
-      return "json";
-    }
-  }
-  return "json";
-}

package/src/commands/run.js CHANGED Viewed

@@ -4,47 +4,32 @@ import { createAgentRunner } from "../agent-runner.js";
 import { createTeeWriter } from "../tee-writer.js";
 /**
- * Parse a --key=value or --key value flag from args.
- * @param {string[]} args
- * @param {string} name - Flag name without --
- * @returns {string|undefined}
- */
-function parseFlag(args, name) {
-  const prefix = `--${name}=`;
-  for (let i = 0; i < args.length; i++) {
-    if (args[i].startsWith(prefix)) return args[i].slice(prefix.length);
-    if (args[i] === `--${name}` && i + 1 < args.length) return args[i + 1];
-  }
-  return undefined;
-}
-/**
- * Parse and validate run command options from args.
- * @param {string[]} args
+ * Parse and validate run command options from parsed values.
+ * @param {object} values - Parsed option values from cli.parse()
  * @returns {{ taskContent: string, cwd: string, model: string, maxTurns: number, outputPath: string|undefined, agentProfile: string|undefined, allowedTools: string[] }}
  */
-function parseRunOptions(args) {
-  const taskFile = parseFlag(args, "task-file");
-  const taskText = parseFlag(args, "task-text");
+function parseRunOptions(values) {
+  const taskFile = values["task-file"];
+  const taskText = values["task-text"];
   if (taskFile && taskText)
     throw new Error("--task-file and --task-text are mutually exclusive");
   if (!taskFile && !taskText)
     throw new Error("--task-file or --task-text is required");
-  const maxTurnsRaw = parseFlag(args, "max-turns") ?? "50";
-  const taskAmend = parseFlag(args, "task-amend") ?? undefined;
+  const maxTurnsRaw = values["max-turns"] ?? "50";
+  const taskAmend = values["task-amend"] ?? undefined;
   let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
   if (taskAmend) taskContent += `\n\n${taskAmend}`;
   return {
     taskContent,
-    cwd: resolve(parseFlag(args, "cwd") ?? "."),
-    model: parseFlag(args, "model") ?? "opus",
+    cwd: resolve(values.cwd ?? "."),
+    model: values.model ?? "opus",
     maxTurns: maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10),
-    outputPath: parseFlag(args, "output"),
-    agentProfile: parseFlag(args, "agent-profile") ?? undefined,
+    outputPath: values.output,
+    agentProfile: values["agent-profile"] ?? undefined,
     allowedTools: (
-      parseFlag(args, "allowed-tools") ??
+      values["allowed-tools"] ??
       "Bash,Read,Glob,Grep,Write,Edit,Agent,TodoWrite"
     ).split(","),
   };
@@ -55,20 +40,10 @@ function parseRunOptions(args) {
  *
  * Usage: fit-eval run [options]
  *
- * Options:
- *   --task-file=PATH     Path to task file (mutually exclusive with --task-text)
- *   --task-text=STRING   Inline task text (mutually exclusive with --task-file)
- *   --cwd=DIR            Agent working directory (default: .)
- *   --model=MODEL        Claude model to use (default: opus)
- *   --max-turns=N        Maximum agentic turns (default: 50, 0 = unlimited)
- *   --output=PATH        Write NDJSON trace to file (default: stdout)
- *   --allowed-tools=LIST Comma-separated tools (default: Bash,Read,Glob,Grep,Write,Edit)
- *   --agent-profile=NAME Agent profile name (passed as --agent to Claude CLI)
- *   --task-amend=TEXT     Additional text appended to the task prompt
- *
- * @param {string[]} args - Command arguments
+ * @param {object} values - Parsed option values from cli.parse()
+ * @param {string[]} args - Positional arguments
  */
-export async function runRunCommand(args) {
+export async function runRunCommand(values, _args) {
   const {
     taskContent,
     cwd,
@@ -77,7 +52,7 @@ export async function runRunCommand(args) {
     outputPath,
     agentProfile,
     allowedTools,
-  } = parseRunOptions(args);
+  } = parseRunOptions(values);
   // When --output is specified, stream text to stdout while writing NDJSON to file.
   // Otherwise, write NDJSON directly to stdout (backwards-compatible).

package/src/commands/supervise.js CHANGED Viewed

@@ -5,56 +5,40 @@ import { createSupervisor } from "../supervisor.js";
 import { createTeeWriter } from "../tee-writer.js";
 /**
- * Parse a --key=value or --key value flag from args.
- * @param {string[]} args
- * @param {string} name - Flag name without --
- * @returns {string|undefined}
- */
-function parseFlag(args, name) {
-  const prefix = `--${name}=`;
-  for (let i = 0; i < args.length; i++) {
-    if (args[i].startsWith(prefix)) return args[i].slice(prefix.length);
-    if (args[i] === `--${name}` && i + 1 < args.length) return args[i + 1];
-  }
-  return undefined;
-}
-/**
- * Parse all supervise flags from args into an options object.
- * @param {string[]} args
+ * Parse all supervise flags from parsed values into an options object.
+ * @param {object} values - Parsed option values from cli.parse()
  * @returns {object}
  */
-function parseSuperviseOptions(args) {
-  const taskFile = parseFlag(args, "task-file");
-  const taskText = parseFlag(args, "task-text");
+function parseSuperviseOptions(values) {
+  const taskFile = values["task-file"];
+  const taskText = values["task-text"];
   if (taskFile && taskText)
     throw new Error("--task-file and --task-text are mutually exclusive");
   if (!taskFile && !taskText)
     throw new Error("--task-file or --task-text is required");
-  const supervisorAllowedToolsRaw = parseFlag(args, "supervisor-allowed-tools");
+  const supervisorAllowedToolsRaw = values["supervisor-allowed-tools"];
-  const taskAmend = parseFlag(args, "task-amend") ?? undefined;
+  const taskAmend = values["task-amend"] ?? undefined;
   let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
   if (taskAmend) taskContent += `\n\n${taskAmend}`;
   return {
     taskContent,
-    supervisorCwd: resolve(parseFlag(args, "supervisor-cwd") ?? "."),
+    supervisorCwd: resolve(values["supervisor-cwd"] ?? "."),
     agentCwd: resolve(
-      parseFlag(args, "agent-cwd") ??
-        mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
+      values["agent-cwd"] ?? mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
     ),
-    model: parseFlag(args, "model") ?? "opus",
+    model: values.model ?? "opus",
     maxTurns: (() => {
-      const raw = parseFlag(args, "max-turns") ?? "20";
+      const raw = values["max-turns"] ?? "20";
       return raw === "0" ? 0 : parseInt(raw, 10);
     })(),
-    outputPath: parseFlag(args, "output"),
-    supervisorProfile: parseFlag(args, "supervisor-profile") ?? undefined,
-    agentProfile: parseFlag(args, "agent-profile") ?? undefined,
+    outputPath: values.output,
+    supervisorProfile: values["supervisor-profile"] ?? undefined,
+    agentProfile: values["agent-profile"] ?? undefined,
     allowedTools: (
-      parseFlag(args, "allowed-tools") ??
+      values["allowed-tools"] ??
       "Bash,Read,Glob,Grep,Write,Edit,Agent,TodoWrite"
     ).split(","),
     supervisorAllowedTools: supervisorAllowedToolsRaw
@@ -68,23 +52,11 @@ function parseSuperviseOptions(args) {
  *
  * Usage: fit-eval supervise [options]
  *
- * Options:
- *   --task-file=PATH          Path to task file (mutually exclusive with --task-text)
- *   --task-text=STRING        Inline task text (mutually exclusive with --task-file)
- *   --supervisor-cwd=DIR      Supervisor working directory (default: .)
- *   --agent-cwd=DIR           Agent working directory (default: temp directory)
- *   --model=MODEL             Claude model to use (default: opus)
- *   --max-turns=N             Maximum supervisor / agent exchanges (default: 20, 0 = unlimited)
- *   --output=PATH             Write NDJSON trace to file (default: stdout)
- *   --allowed-tools=LIST      Comma-separated tools for the agent (default: Bash,Read,Glob,Grep,Write,Edit)
- *   --supervisor-profile=NAME Supervisor agent profile name (passed as --agent to Claude CLI)
- *   --agent-profile=NAME      Agent profile name (passed as --agent to Claude CLI)
- *   --task-amend=TEXT          Additional text appended to the task prompt
- *
- * @param {string[]} args - Command arguments
+ * @param {object} values - Parsed option values from cli.parse()
+ * @param {string[]} args - Positional arguments
  */
-export async function runSuperviseCommand(args) {
-  const opts = parseSuperviseOptions(args);
+export async function runSuperviseCommand(values, _args) {
+  const opts = parseSuperviseOptions(values);
   // When --output is specified, stream text to stdout while writing NDJSON to file.
   // Otherwise, write NDJSON directly to stdout (backwards-compatible).

package/src/commands/tee.js CHANGED Viewed

@@ -9,9 +9,10 @@ import { createTeeWriter } from "../tee-writer.js";
  *
  * Usage: fit-eval tee [output.ndjson] < trace.ndjson
  *
- * @param {string[]} args - Command arguments (optional output file path)
+ * @param {object} values - Parsed option values from cli.parse()
+ * @param {string[]} args - Positional arguments (optional output file path)
  */
-export async function runTeeCommand(args) {
+export async function runTeeCommand(values, args) {
   const outputPath = args.find((a) => !a.startsWith("-")) ?? null;
   const fileStream = outputPath ? createWriteStream(outputPath) : null;