@smithers-orchestrator/cli 0.20.4 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -1,8 +1,10 @@
1
1
  #!/usr/bin/env bun
2
2
  import { setJsonMode } from "./util/logger.ts";
3
+ import { findFirstPositionalIndex, parseMcpSurfaceArgv, rewriteBareResumeFlagArgv } from "./argv-utils.js";
4
+ import { CLI_JSON_ARGUMENT_MAX_BYTES, parseJsonArgument, parseJsonInput } from "./json-args.js";
3
5
  import { resolve, dirname, basename } from "node:path";
4
6
  import { pathToFileURL } from "node:url";
5
- import { readFileSync, existsSync, openSync, statSync, mkdirSync, writeFileSync } from "node:fs";
7
+ import { readFileSync, existsSync, openSync, statSync } from "node:fs";
6
8
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
7
9
  import { Effect, Fiber } from "effect";
8
10
  import { Cli, Mcp as IncurMcp, z } from "incur";
@@ -39,7 +41,18 @@ import { listAccounts, removeAccount } from "@smithers-orchestrator/accounts";
39
41
  import { runAgentAdd, pingAccount } from "./agent-commands/runAgentAdd.js";
40
42
  import { agentAddWizard } from "./agent-commands/agentAddWizard.js";
41
43
  import { initWorkflowPack, getWorkflowFollowUpCtas } from "./workflow-pack.js";
42
- import { discoverWorkflows, resolveWorkflow, createWorkflowFile } from "./workflows.js";
44
+ import { discoverWorkflows, resolveWorkflow, createWorkflowFile, renderWorkflowSkill, writeWorkflowSkillFiles } from "./workflows.js";
45
+ import {
46
+ assertEvalRunIdsAvailable,
47
+ assertEvalReportWritable,
48
+ buildEvalPlan,
49
+ buildEvalReport,
50
+ evaluateEvalCaseResult,
51
+ loadEvalCases,
52
+ renderEvalPlan,
53
+ renderEvalReport,
54
+ writeEvalReport,
55
+ } from "./eval-suite.js";
43
56
  import { ask } from "./ask.js";
44
57
  import { runScheduler } from "./scheduler.js";
45
58
  import { resumeRunDetached } from "./resume-detached.js";
@@ -47,6 +60,7 @@ import { formatCliAgentCapabilityDoctorReport, getCliAgentCapabilityDoctorReport
47
60
  import { parseDurationMs, supervisorLoopEffect, } from "./supervisor.js";
48
61
  import { WATCH_MIN_INTERVAL_MS, runWatchLoop, watchIntervalSecondsToMs, } from "./watch.js";
49
62
  import { createSemanticMcpServer } from "./mcp/semantic-server.js";
63
+ import { parseTokenScopes, readSmithersTokenStore, smithersTokenStorePath, writeSmithersTokenStore, } from "./token-store.js";
50
64
  import pc from "picocolors";
51
65
  import crypto from "node:crypto";
52
66
  import React from "react";
@@ -105,43 +119,9 @@ function readPackageVersion() {
105
119
  return "unknown";
106
120
  }
107
121
  }
108
- function smithersTokenStorePath() {
109
- return process.env.SMITHERS_TOKEN_STORE ?? resolve(process.env.HOME ?? process.cwd(), ".smithers", "tokens.json");
110
- }
111
- function readSmithersTokenStore() {
112
- const path = smithersTokenStorePath();
113
- if (!existsSync(path)) {
114
- return { tokens: {} };
115
- }
116
- try {
117
- const parsed = JSON.parse(readFileSync(path, "utf8"));
118
- if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
119
- return { tokens: {} };
120
- }
121
- const tokens = parsed.tokens && typeof parsed.tokens === "object" && !Array.isArray(parsed.tokens)
122
- ? parsed.tokens
123
- : {};
124
- return { tokens };
125
- }
126
- catch {
127
- return { tokens: {} };
128
- }
129
- }
130
- function writeSmithersTokenStore(store) {
131
- const path = smithersTokenStorePath();
132
- mkdirSync(dirname(path), { recursive: true });
133
- writeFileSync(path, `${JSON.stringify(store, null, 2)}\n`, { mode: 0o600 });
134
- }
135
- function parseTokenScopes(raw) {
136
- return raw
137
- .split(/[,\s]+/)
138
- .map((scope) => scope.trim())
139
- .filter(Boolean);
140
- }
141
122
  const CLI_ARGUMENT_MAX_LENGTH = 4096;
142
123
  const CLI_IDENTIFIER_MAX_LENGTH = 256;
143
124
  const CLI_TEXT_ARGUMENT_MAX_LENGTH = 64 * 1024;
144
- const CLI_JSON_ARGUMENT_MAX_BYTES = 1024 * 1024;
145
125
  const CLI_HANDLER_BOUNDS_WRAPPED = Symbol("smithers.cliHandlerBoundsWrapped");
146
126
  /**
147
127
  * @param {string} path
@@ -240,55 +220,6 @@ function wrapCliCommandHandlersWithInputBounds(commands) {
240
220
  entry[CLI_HANDLER_BOUNDS_WRAPPED] = true;
241
221
  }
242
222
  }
243
- /**
244
- * @param {string | undefined} raw
245
- * @param {string} label
246
- * @param {FailFn} fail
247
- */
248
- function parseJsonInput(raw, label, fail) {
249
- if (!raw)
250
- return undefined;
251
- try {
252
- return JSON.parse(raw);
253
- }
254
- catch (err) {
255
- return fail({
256
- code: "INVALID_JSON",
257
- message: `Invalid JSON for ${label}: ${err?.message ?? String(err)}`,
258
- exitCode: 4,
259
- });
260
- }
261
- }
262
- /**
263
- * @param {string | undefined} raw
264
- * @param {FailFn} fail
265
- * @returns {Record<string, string | number | boolean> | undefined}
266
- */
267
- function parseAnnotations(raw, fail) {
268
- const parsed = parseJsonInput(raw, "annotations", fail);
269
- if (parsed === undefined)
270
- return undefined;
271
- if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
272
- return fail({
273
- code: "INVALID_ANNOTATIONS",
274
- message: "Run annotations must be a flat JSON object of string/number/boolean values",
275
- exitCode: 4,
276
- });
277
- }
278
- /** @type {Record<string, string | number | boolean>} */
279
- const annotations = {};
280
- for (const [key, value] of Object.entries(parsed)) {
281
- if (!["string", "number", "boolean"].includes(typeof value)) {
282
- return fail({
283
- code: "INVALID_ANNOTATIONS",
284
- message: `Run annotation ${key} must be a string, number, or boolean`,
285
- exitCode: 4,
286
- });
287
- }
288
- annotations[key] = /** @type {string | number | boolean} */ (value);
289
- }
290
- return annotations;
291
- }
292
223
  /**
293
224
  * @param {string | undefined} status
294
225
  */
@@ -1308,6 +1239,24 @@ const upOptions = z.object({
1308
1239
  authToken: z.string().optional().describe("Bearer token for HTTP auth (or set SMITHERS_API_KEY)"),
1309
1240
  metrics: z.boolean().default(true).describe("Expose /metrics endpoint (with --serve)"),
1310
1241
  });
1242
+ const evalOptions = z.object({
1243
+ cases: z.string().describe("JSON or JSONL eval case file"),
1244
+ suite: z.string().optional().describe("Stable suite ID used in run IDs and report paths"),
1245
+ runLabel: z.string().optional().describe("Run label appended to eval run IDs; defaults to current UTC timestamp plus a nonce"),
1246
+ dryRun: z.boolean().default(false).describe("Plan the suite without launching runs"),
1247
+ concurrency: z.number().int().min(1).max(16).default(1).describe("Number of eval cases to run at once"),
1248
+ maxCases: z.number().int().min(1).optional().describe("Run only the first N cases"),
1249
+ report: z.string().optional().describe("Write report JSON to this path"),
1250
+ force: z.boolean().default(false).describe("Overwrite an existing eval report"),
1251
+ includeOutput: z.boolean().default(true).describe("Include workflow outputs in the report"),
1252
+ maxConcurrency: z.number().int().min(1).optional().describe("Per-workflow max task concurrency"),
1253
+ root: z.string().optional().describe("Tool sandbox root directory"),
1254
+ log: z.boolean().default(true).describe("Enable NDJSON event log file output"),
1255
+ logDir: z.string().optional().describe("NDJSON event logs directory"),
1256
+ allowNetwork: z.boolean().default(false).describe("Allow bash tool network requests"),
1257
+ maxOutputBytes: z.number().int().min(1).optional().describe("Max bytes a single tool call can return"),
1258
+ toolTimeoutMs: z.number().int().min(1).optional().describe("Max wall-clock time per tool call in ms"),
1259
+ });
1311
1260
  const superviseOptions = z.object({
1312
1261
  dryRun: z.boolean().default(false).describe("Show which stale runs would be resumed, without acting"),
1313
1262
  interval: z.string().default("10s").describe("Poll interval (e.g. 10s, 30s, 1m)"),
@@ -1347,7 +1296,7 @@ const chatOptions = z.object({
1347
1296
  stderr: z.boolean().default(true).describe("Include agent stderr output"),
1348
1297
  });
1349
1298
  const chatCreateOptions = z.object({
1350
- agent: z.enum(["claude-code", "codex", "gemini"]).describe("CLI agent engine to launch"),
1299
+ agent: z.enum(["claude-code", "codex", "antigravity", "gemini"]).describe("CLI agent engine to launch"),
1351
1300
  cwd: z.string().optional().describe("Working directory for the chat session (default: current directory)"),
1352
1301
  });
1353
1302
  const inspectArgs = z.object({
@@ -1438,6 +1387,13 @@ const workflowPathArgs = z.object({
1438
1387
  const workflowDoctorArgs = z.object({
1439
1388
  name: z.string().optional().describe("Workflow ID"),
1440
1389
  });
1390
+ const workflowSkillArgs = z.object({
1391
+ name: z.string().optional().describe("Workflow ID, or omit to generate skills for all workflows"),
1392
+ });
1393
+ const workflowSkillOptions = z.object({
1394
+ output: z.string().optional().describe("Output file for one workflow, or output directory for all workflows"),
1395
+ force: z.boolean().default(false).describe("Overwrite existing skill files"),
1396
+ });
1441
1397
  const workflowRunOptions = upOptions.extend({
1442
1398
  prompt: z.string().optional().describe("Prompt text mapped to input.prompt when --input is omitted"),
1443
1399
  });
@@ -1455,6 +1411,54 @@ function normalizeWorkflowRunOptions(options) {
1455
1411
  root: options.root ?? ".",
1456
1412
  };
1457
1413
  }
1414
+ function formatRequestedJsonOutput() {
1415
+ for (let index = 0; index < process.argv.length; index += 1) {
1416
+ const arg = process.argv[index];
1417
+ if (arg === "--format") {
1418
+ const value = process.argv[index + 1];
1419
+ return value === "json" || value === "jsonl";
1420
+ }
1421
+ if (arg === "--format=json" || arg === "--format=jsonl") {
1422
+ return true;
1423
+ }
1424
+ }
1425
+ return false;
1426
+ }
1427
+ function defaultEvalRunLabel() {
1428
+ const timestamp = new Date().toISOString().replace(/[-:TZ.]/g, "").slice(0, 14);
1429
+ return `${timestamp}-${crypto.randomUUID().slice(0, 8)}`;
1430
+ }
1431
+ /**
1432
+ * @param {string} workflowInput
1433
+ */
1434
+ function resolveWorkflowPathForEval(workflowInput) {
1435
+ const asPath = resolve(process.cwd(), workflowInput);
1436
+ if (existsSync(asPath)) {
1437
+ return workflowInput;
1438
+ }
1439
+ return resolveWorkflow(workflowInput, process.cwd()).entryFile;
1440
+ }
1441
+ /**
1442
+ * @template T
1443
+ * @template R
1444
+ * @param {T[]} items
1445
+ * @param {number} limit
1446
+ * @param {(item: T, index: number) => Promise<R>} worker
1447
+ * @returns {Promise<R[]>}
1448
+ */
1449
+ async function runWithLimit(items, limit, worker) {
1450
+ const results = new Array(items.length);
1451
+ let cursor = 0;
1452
+ const workerCount = Math.min(limit, items.length);
1453
+ await Promise.all(Array.from({ length: workerCount }, async () => {
1454
+ while (cursor < items.length) {
1455
+ const index = cursor;
1456
+ cursor += 1;
1457
+ results[index] = await worker(items[index], index);
1458
+ }
1459
+ }));
1460
+ return results;
1461
+ }
1458
1462
  /**
1459
1463
  * @param {string} intervalRaw
1460
1464
  * @param {string} staleThresholdRaw
@@ -1515,8 +1519,42 @@ function normalizeEventsQuery(options) {
1515
1519
  async function executeUpCommand(c, workflowPath, options, fail) {
1516
1520
  try {
1517
1521
  const resolvedWorkflowPath = resolve(process.cwd(), workflowPath);
1518
- const input = parseJsonInput(options.input, "input", fail) ?? {};
1519
- const annotations = parseAnnotations(options.annotations, fail);
1522
+ let input;
1523
+ let annotations;
1524
+ try {
1525
+ input = parseJsonArgument(options.input, "input") ?? {};
1526
+ const parsedAnnotations = parseJsonArgument(options.annotations, "annotations");
1527
+ if (parsedAnnotations === undefined) {
1528
+ annotations = undefined;
1529
+ }
1530
+ else if (!parsedAnnotations || typeof parsedAnnotations !== "object" || Array.isArray(parsedAnnotations)) {
1531
+ return fail({
1532
+ code: "INVALID_ANNOTATIONS",
1533
+ message: "Run annotations must be a flat JSON object of string/number/boolean values",
1534
+ exitCode: 4,
1535
+ });
1536
+ }
1537
+ else {
1538
+ annotations = {};
1539
+ for (const [key, value] of Object.entries(parsedAnnotations)) {
1540
+ if (!["string", "number", "boolean"].includes(typeof value)) {
1541
+ return fail({
1542
+ code: "INVALID_ANNOTATIONS",
1543
+ message: `Run annotation ${key} must be a string, number, or boolean`,
1544
+ exitCode: 4,
1545
+ });
1546
+ }
1547
+ annotations[key] = /** @type {string | number | boolean} */ (value);
1548
+ }
1549
+ }
1550
+ }
1551
+ catch (err) {
1552
+ return fail({
1553
+ code: err instanceof SmithersError ? err.code : "INVALID_JSON",
1554
+ message: err?.message ?? String(err),
1555
+ exitCode: 4,
1556
+ });
1557
+ }
1520
1558
  const { resume, resumeRunId } = normalizeResumeOption(options.resume);
1521
1559
  const runId = options.runId ?? resumeRunId;
1522
1560
  // Detached mode: spawn ourselves as a background process
@@ -1526,9 +1564,9 @@ async function executeUpCommand(c, workflowPath, options, fail) {
1526
1564
  if (runId)
1527
1565
  childArgs.push("--run-id", runId);
1528
1566
  if (options.input)
1529
- childArgs.push("--input", options.input);
1567
+ childArgs.push("--input", options.input === "-" ? JSON.stringify(input) : options.input);
1530
1568
  if (options.annotations)
1531
- childArgs.push("--annotations", options.annotations);
1569
+ childArgs.push("--annotations", options.annotations === "-" ? JSON.stringify(annotations ?? {}) : options.annotations);
1532
1570
  if (options.maxConcurrency)
1533
1571
  childArgs.push("--max-concurrency", String(options.maxConcurrency));
1534
1572
  if (options.root)
@@ -1611,6 +1649,13 @@ async function executeUpCommand(c, workflowPath, options, fail) {
1611
1649
  exitCode: 4,
1612
1650
  });
1613
1651
  }
1652
+ if (Boolean(options.resumeClaimOwner) !== Boolean(options.resumeClaimHeartbeat)) {
1653
+ return fail({
1654
+ code: "INVALID_RESUME_CLAIM",
1655
+ message: "--resume-claim-owner and --resume-claim-heartbeat must be provided together.",
1656
+ exitCode: 4,
1657
+ });
1658
+ }
1614
1659
  const workflow = await loadWorkflow(workflowPath);
1615
1660
  ensureSmithersTables(workflow.db);
1616
1661
  if (options.hot) {
@@ -1644,13 +1689,6 @@ async function executeUpCommand(c, workflowPath, options, fail) {
1644
1689
  const logDir = options.log ? options.logDir : null;
1645
1690
  const onProgress = buildProgressReporter();
1646
1691
  const abort = setupAbortSignal();
1647
- if (Boolean(options.resumeClaimOwner) !== Boolean(options.resumeClaimHeartbeat)) {
1648
- return fail({
1649
- code: "INVALID_RESUME_CLAIM",
1650
- message: "--resume-claim-owner and --resume-claim-heartbeat must be provided together.",
1651
- exitCode: 4,
1652
- });
1653
- }
1654
1692
  const resumeClaim = options.resumeClaimOwner && options.resumeClaimHeartbeat
1655
1693
  ? {
1656
1694
  claimOwnerId: options.resumeClaimOwner,
@@ -1870,6 +1908,49 @@ const workflowCli = Cli.create({
1870
1908
  });
1871
1909
  }
1872
1910
  },
1911
+ })
1912
+ .command("inspect", {
1913
+ description: "Show workflow metadata and an agent-facing skill preview.",
1914
+ args: workflowPathArgs,
1915
+ run(c) {
1916
+ const workflow = resolveWorkflow(c.args.name, process.cwd());
1917
+ return c.ok({
1918
+ workflow,
1919
+ skillPreview: renderWorkflowSkill(workflow, { root: process.cwd() }),
1920
+ });
1921
+ },
1922
+ })
1923
+ .command("skills", {
1924
+ description: "Generate agent-facing skill docs for local workflows.",
1925
+ args: workflowSkillArgs,
1926
+ options: workflowSkillOptions,
1927
+ run(c) {
1928
+ const fail = (opts) => {
1929
+ commandExitOverride = opts.exitCode ?? 1;
1930
+ return c.error(opts);
1931
+ };
1932
+ try {
1933
+ return c.ok(writeWorkflowSkillFiles(process.cwd(), {
1934
+ workflowId: c.args.name ?? "all",
1935
+ output: c.options.output,
1936
+ force: c.options.force,
1937
+ }));
1938
+ }
1939
+ catch (err) {
1940
+ if (err instanceof SmithersError) {
1941
+ return fail({
1942
+ code: err.code,
1943
+ message: err.message,
1944
+ exitCode: 4,
1945
+ });
1946
+ }
1947
+ return fail({
1948
+ code: "WORKFLOW_SKILLS_FAILED",
1949
+ message: err?.message ?? String(err),
1950
+ exitCode: 1,
1951
+ });
1952
+ }
1953
+ },
1873
1954
  })
1874
1955
  .command("doctor", {
1875
1956
  description: "Inspect workflow discovery, preload files, and detected agents.",
@@ -2039,7 +2120,7 @@ const agentsCli = Cli.create({
2039
2120
  description: "Register a Smithers agent account (interactive wizard, or non-interactive via flags).",
2040
2121
  options: z.object({
2041
2122
  provider: z.enum([
2042
- "claude-code", "codex", "gemini", "kimi",
2123
+ "claude-code", "antigravity", "codex", "gemini", "kimi",
2043
2124
  "anthropic-api", "openai-api", "gemini-api",
2044
2125
  ]).optional().describe("Provider id; omit to launch the interactive wizard"),
2045
2126
  label: z.string().optional().describe("Unique label, e.g. 'claude-work'"),
@@ -2622,6 +2703,141 @@ const cli = Cli.create({
2622
2703
  };
2623
2704
  return executeUpCommand(c, c.args.workflow, c.options, fail);
2624
2705
  },
2706
+ })
2707
+ // =========================================================================
2708
+ // smithers eval <workflow>
2709
+ // =========================================================================
2710
+ .command("eval", {
2711
+ description: "Run a workflow over a JSON/JSONL eval suite and write a regression report.",
2712
+ args: workflowArgs,
2713
+ options: evalOptions,
2714
+ alias: { cases: "c", suite: "s", dryRun: "n", concurrency: "j", report: "r" },
2715
+ async run(c) {
2716
+ const fail = (opts) => {
2717
+ commandExitOverride = opts.exitCode ?? 1;
2718
+ return c.error(opts);
2719
+ };
2720
+ try {
2721
+ const workflowPath = resolveWorkflowPathForEval(c.args.workflow);
2722
+ const loadedCases = loadEvalCases(process.cwd(), c.options.cases, {
2723
+ maxCases: c.options.maxCases,
2724
+ });
2725
+ const plan = buildEvalPlan({
2726
+ suiteId: c.options.suite,
2727
+ runLabel: c.options.runLabel ?? defaultEvalRunLabel(),
2728
+ workflowPath,
2729
+ casesPath: c.options.cases,
2730
+ loadedCases,
2731
+ });
2732
+ const wantsStructured = c.format === "json" || c.format === "jsonl" || formatRequestedJsonOutput();
2733
+ if (c.options.dryRun) {
2734
+ if (wantsStructured) {
2735
+ return c.ok({ suite: plan });
2736
+ }
2737
+ process.stdout.write(`${renderEvalPlan(plan)}\n`);
2738
+ return c.ok(undefined);
2739
+ }
2740
+ assertEvalReportWritable(process.cwd(), plan.suiteId, {
2741
+ path: c.options.report,
2742
+ force: c.options.force,
2743
+ });
2744
+ const workflow = await loadWorkflow(workflowPath);
2745
+ ensureSmithersTables(workflow.db);
2746
+ await assertEvalRunIdsAvailable(new SmithersDb(workflow.db), plan.cases);
2747
+ setupSqliteCleanup(workflow);
2748
+ const schema = resolveSchema(workflow.db);
2749
+ const resolvedWorkflowPath = resolve(process.cwd(), workflowPath);
2750
+ const rootDir = c.options.root ? resolve(process.cwd(), c.options.root) : dirname(resolvedWorkflowPath);
2751
+ const logDir = c.options.log ? c.options.logDir : null;
2752
+ const abort = setupAbortSignal();
2753
+ const startedAtMs = Date.now();
2754
+ const results = await runWithLimit(plan.cases, c.options.concurrency, async (testCase) => {
2755
+ const caseStartedAtMs = Date.now();
2756
+ process.stderr.write(`[eval:${plan.suiteId}] ${testCase.id} -> ${testCase.runId}\n`);
2757
+ try {
2758
+ const result = await Effect.runPromise(runWorkflow(workflow, {
2759
+ input: testCase.input,
2760
+ runId: testCase.runId,
2761
+ workflowPath: resolvedWorkflowPath,
2762
+ maxConcurrency: c.options.maxConcurrency,
2763
+ rootDir,
2764
+ logDir,
2765
+ allowNetwork: c.options.allowNetwork,
2766
+ maxOutputBytes: c.options.maxOutputBytes,
2767
+ toolTimeoutMs: c.options.toolTimeoutMs,
2768
+ annotations: {
2769
+ suiteId: plan.suiteId,
2770
+ caseId: testCase.id,
2771
+ ...testCase.annotations,
2772
+ },
2773
+ signal: abort.signal,
2774
+ }));
2775
+ const output = await loadOutputs(workflow.db, schema, testCase.runId);
2776
+ const durationMs = Date.now() - caseStartedAtMs;
2777
+ const evaluation = evaluateEvalCaseResult(testCase, {
2778
+ ...result,
2779
+ output,
2780
+ });
2781
+ return {
2782
+ caseId: testCase.id,
2783
+ runId: testCase.runId,
2784
+ expectedStatus: testCase.expected.status,
2785
+ status: result.status,
2786
+ passed: evaluation.passed,
2787
+ assertions: evaluation.assertions,
2788
+ durationMs,
2789
+ input: testCase.input,
2790
+ ...(c.options.includeOutput ? { output } : {}),
2791
+ metadata: testCase.metadata,
2792
+ };
2793
+ }
2794
+ catch (err) {
2795
+ const errorMessage = err?.message ?? String(err);
2796
+ const durationMs = Date.now() - caseStartedAtMs;
2797
+ const evaluation = evaluateEvalCaseResult(testCase, {
2798
+ status: "error",
2799
+ error: err,
2800
+ });
2801
+ return {
2802
+ caseId: testCase.id,
2803
+ runId: testCase.runId,
2804
+ expectedStatus: testCase.expected.status,
2805
+ status: "error",
2806
+ passed: evaluation.passed,
2807
+ assertions: evaluation.assertions,
2808
+ durationMs,
2809
+ input: testCase.input,
2810
+ error: errorMessage,
2811
+ metadata: testCase.metadata,
2812
+ };
2813
+ }
2814
+ });
2815
+ const finishedAtMs = Date.now();
2816
+ let report = buildEvalReport({
2817
+ plan,
2818
+ results,
2819
+ startedAtMs,
2820
+ finishedAtMs,
2821
+ });
2822
+ const reportPath = writeEvalReport(process.cwd(), report, {
2823
+ path: c.options.report,
2824
+ force: c.options.force,
2825
+ });
2826
+ report = { ...report, reportPath };
2827
+ process.exitCode = report.summary.failed > 0 ? 1 : 0;
2828
+ if (wantsStructured) {
2829
+ return c.ok({ eval: report });
2830
+ }
2831
+ process.stdout.write(`${renderEvalReport(report)}\n`);
2832
+ return c.ok(undefined);
2833
+ }
2834
+ catch (err) {
2835
+ if (err instanceof SmithersError) {
2836
+ return fail({ code: err.code, message: err.message, exitCode: 4 });
2837
+ }
2838
+ return fail({ code: "EVAL_FAILED", message: err?.message ?? String(err), exitCode: 1 });
2839
+ }
2840
+ },
2625
2841
  })
2626
2842
  // =========================================================================
2627
2843
  // smithers supervise
@@ -4487,7 +4703,7 @@ const cli = Cli.create({
4487
4703
  question: z.string().optional().describe("The question to ask"),
4488
4704
  }),
4489
4705
  options: z.object({
4490
- agent: z.enum(["claude", "codex", "gemini", "kimi", "pi"]).optional().describe("Explicitly select which agent CLI to use"),
4706
+ agent: z.enum(["claude", "codex", "antigravity", "gemini", "kimi", "pi"]).optional().describe("Explicitly select which agent CLI to use"),
4491
4707
  listAgents: z.boolean().default(false).describe("List detected agents plus their bootstrap mode and exit"),
4492
4708
  dumpPrompt: z.boolean().default(false).describe("Print the generated system prompt and exit"),
4493
4709
  toolSurface: z.enum(["semantic", "raw"]).default("semantic").describe("Choose which Smithers MCP tool surface to expose"),
@@ -4989,10 +5205,10 @@ wrapCliCommandHandlersWithInputBounds(cliCommands);
4989
5205
  // Main
4990
5206
  // ---------------------------------------------------------------------------
4991
5207
  const KNOWN_COMMANDS = new Set([
4992
- "init", "up", "supervise", "down", "ps", "logs", "events", "chat", "inspect", "node", "why", "approve", "deny",
4993
- "cancel", "graph", "revert", "scores", "observability", "workflow", "ask", "cron", "chat-create",
4994
- "replay", "diff", "fork", "timeline", "memory", "openapi", "token", "agents", "alerts",
4995
- "tree", "output", "rewind", "gui",
5208
+ ...cliCommands.keys(),
5209
+ "completions",
5210
+ "mcp",
5211
+ "skills",
4996
5212
  ]);
4997
5213
  /**
4998
5214
  * Rewrite `smithers .` or `smithers <path>` (when path looks like a directory) to `smithers gui <path>`.
@@ -5031,54 +5247,15 @@ function resolveCliColor(mode, stream) {
5031
5247
  if (process.env.NO_COLOR !== undefined && process.env.NO_COLOR.length > 0) return false;
5032
5248
  return Boolean(stream.isTTY);
5033
5249
  }
5034
- const BUILTIN_FLAGS_WITH_VALUES = new Set([
5035
- "--format",
5036
- "--filter-output",
5037
- "--token-limit",
5038
- "--token-offset",
5039
- ]);
5040
5250
  const WORKFLOW_UTILITY_COMMANDS = new Set([
5041
5251
  "run",
5042
5252
  "list",
5043
5253
  "path",
5044
5254
  "create",
5255
+ "inspect",
5256
+ "skills",
5045
5257
  "doctor",
5046
5258
  ]);
5047
- /**
5048
- * @param {string | undefined} value
5049
- * @returns {McpSurface}
5050
- */
5051
- function normalizeMcpSurface(value) {
5052
- const surface = value?.trim().toLowerCase();
5053
- if (surface === undefined || surface.length === 0) {
5054
- throw new Error("Missing value for --surface. Expected semantic, raw, or both.");
5055
- }
5056
- if (surface === "semantic" || surface === "raw" || surface === "both") {
5057
- return surface;
5058
- }
5059
- throw new Error(`Invalid --surface value: ${value}. Expected semantic, raw, or both.`);
5060
- }
5061
- /**
5062
- * @param {string[]} argv
5063
- */
5064
- function parseMcpSurfaceArgv(argv) {
5065
- let surface = "semantic";
5066
- const filtered = [];
5067
- for (let index = 0; index < argv.length; index++) {
5068
- const arg = argv[index];
5069
- if (arg === "--surface") {
5070
- surface = normalizeMcpSurface(argv[index + 1]);
5071
- index += 1;
5072
- continue;
5073
- }
5074
- if (arg.startsWith("--surface=")) {
5075
- surface = normalizeMcpSurface(arg.slice("--surface=".length));
5076
- continue;
5077
- }
5078
- filtered.push(arg);
5079
- }
5080
- return { surface, argv: filtered };
5081
- }
5082
5259
  /**
5083
5260
  * @param {ReturnType<typeof createSemanticMcpServer>} server
5084
5261
  */
@@ -5103,22 +5280,6 @@ function registerRawToolsOnMcpServer(server) {
5103
5280
  });
5104
5281
  }
5105
5282
  }
5106
- /**
5107
- * @param {string[]} argv
5108
- * @returns {number}
5109
- */
5110
- function findFirstPositionalIndex(argv, startIndex = 0) {
5111
- for (let index = startIndex; index < argv.length; index++) {
5112
- const arg = argv[index];
5113
- if (!arg.startsWith("-")) {
5114
- return index;
5115
- }
5116
- if (BUILTIN_FLAGS_WITH_VALUES.has(arg)) {
5117
- index++;
5118
- }
5119
- }
5120
- return -1;
5121
- }
5122
5283
  /**
5123
5284
  * @param {string[]} argv
5124
5285
  */
@@ -5287,17 +5448,6 @@ function rewriteEventsJsonFlagArgv(argv) {
5287
5448
  }
5288
5449
  return argv.map((arg) => (arg === "--json" ? "-j" : arg));
5289
5450
  }
5290
- /**
5291
- * Incur treats union-typed options as value-bearing flags, so a bare
5292
- * `--resume --run-id value` would consume `--run-id` as the resume value.
5293
- *
5294
- * @param {string[]} argv
5295
- */
5296
- function rewriteBareResumeFlagArgv(argv) {
5297
- return argv.map((arg, index) => arg === "--resume" && (argv[index + 1] === undefined || argv[index + 1]?.startsWith("-"))
5298
- ? "--resume=true"
5299
- : arg);
5300
- }
5301
5451
  /**
5302
5452
  * @param {unknown} value
5303
5453
  */
@@ -5326,7 +5476,7 @@ const CHAT_CREATE_PROMPT = [
5326
5476
  'When you are completely finished and want to hand control back to Smithers, return ONLY this raw JSON object with no prose, markdown, or code fence: {}.',
5327
5477
  ].join("\n\n");
5328
5478
  /**
5329
- * @param {"claude-code" | "codex" | "gemini"} agentId
5479
+ * @param {"claude-code" | "codex" | "antigravity" | "gemini"} agentId
5330
5480
  * @param {string} cwd
5331
5481
  */
5332
5482
  async function createChatAgent(agentId, cwd) {
@@ -5346,6 +5496,12 @@ async function createChatAgent(agentId, cwd) {
5346
5496
  skipGitRepoCheck: true,
5347
5497
  });
5348
5498
  }
5499
+ case "antigravity": {
5500
+ const { AntigravityAgent } = await import("@smithers-orchestrator/agents/AntigravityAgent");
5501
+ return new AntigravityAgent({
5502
+ cwd,
5503
+ });
5504
+ }
5349
5505
  case "gemini": {
5350
5506
  const { GeminiAgent } = await import("@smithers-orchestrator/agents/GeminiAgent");
5351
5507
  return new GeminiAgent({
@@ -5356,7 +5512,7 @@ async function createChatAgent(agentId, cwd) {
5356
5512
  }
5357
5513
  }
5358
5514
  /**
5359
- * @param {"claude-code" | "codex" | "gemini"} agentId
5515
+ * @param {"claude-code" | "codex" | "antigravity" | "gemini"} agentId
5360
5516
  * @param {string} cwd
5361
5517
  * @returns {Promise<import("@smithers-orchestrator/components/SmithersWorkflow").SmithersWorkflow<any>>}
5362
5518
  */
@@ -5461,6 +5617,12 @@ async function main() {
5461
5617
  ...argv.slice(firstPositionalIndex),
5462
5618
  ];
5463
5619
  }
5620
+ const commandIndex = findFirstPositionalIndex(argv);
5621
+ const command = commandIndex >= 0 ? argv[commandIndex] : undefined;
5622
+ if (command && !KNOWN_COMMANDS.has(command)) {
5623
+ console.error(`Unknown command: ${command}`);
5624
+ process.exit(4);
5625
+ }
5464
5626
  argv = rewriteBareResumeFlagArgv(argv);
5465
5627
  // --mcp mode: the MCP server needs to stay alive listening on stdin.
5466
5628
  if (argv.includes("--mcp")) {