@smithers-orchestrator/cli 0.20.4 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-detection.d.ts +16 -3
- package/dist/argv-utils.d.ts +21 -0
- package/dist/eval-suite.d.ts +201 -0
- package/dist/hijack.d.ts +1 -1
- package/dist/json-args.d.ts +24 -0
- package/dist/token-store.d.ts +8 -0
- package/dist/workflows.d.ts +30 -1
- package/package.json +16 -16
- package/src/AgentAvailability.ts +3 -1
- package/src/AskOptions.ts +1 -1
- package/src/DiscoveredWorkflow.ts +4 -0
- package/src/NativeHijackEngine.ts +1 -0
- package/src/agent-commands/agentAddWizard.js +16 -3
- package/src/agent-commands/regenerateAgentsTsIfPresent.js +15 -2
- package/src/agent-commands/runAgentAdd.js +14 -2
- package/src/agent-detection.js +123 -22
- package/src/argv-utils.js +73 -0
- package/src/ask.js +13 -2
- package/src/eval-suite.js +560 -0
- package/src/hijack.js +9 -0
- package/src/index.js +335 -173
- package/src/json-args.js +59 -0
- package/src/mcp/semantic-tools.js +9 -1
- package/src/token-store.js +39 -0
- package/src/workflow-pack.js +238 -10
- package/src/workflows.js +193 -5
package/src/index.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
import { setJsonMode } from "./util/logger.ts";
|
|
3
|
+
import { findFirstPositionalIndex, parseMcpSurfaceArgv, rewriteBareResumeFlagArgv } from "./argv-utils.js";
|
|
4
|
+
import { CLI_JSON_ARGUMENT_MAX_BYTES, parseJsonArgument, parseJsonInput } from "./json-args.js";
|
|
3
5
|
import { resolve, dirname, basename } from "node:path";
|
|
4
6
|
import { pathToFileURL } from "node:url";
|
|
5
|
-
import { readFileSync, existsSync, openSync, statSync
|
|
7
|
+
import { readFileSync, existsSync, openSync, statSync } from "node:fs";
|
|
6
8
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
7
9
|
import { Effect, Fiber } from "effect";
|
|
8
10
|
import { Cli, Mcp as IncurMcp, z } from "incur";
|
|
@@ -39,7 +41,18 @@ import { listAccounts, removeAccount } from "@smithers-orchestrator/accounts";
|
|
|
39
41
|
import { runAgentAdd, pingAccount } from "./agent-commands/runAgentAdd.js";
|
|
40
42
|
import { agentAddWizard } from "./agent-commands/agentAddWizard.js";
|
|
41
43
|
import { initWorkflowPack, getWorkflowFollowUpCtas } from "./workflow-pack.js";
|
|
42
|
-
import { discoverWorkflows, resolveWorkflow, createWorkflowFile } from "./workflows.js";
|
|
44
|
+
import { discoverWorkflows, resolveWorkflow, createWorkflowFile, renderWorkflowSkill, writeWorkflowSkillFiles } from "./workflows.js";
|
|
45
|
+
import {
|
|
46
|
+
assertEvalRunIdsAvailable,
|
|
47
|
+
assertEvalReportWritable,
|
|
48
|
+
buildEvalPlan,
|
|
49
|
+
buildEvalReport,
|
|
50
|
+
evaluateEvalCaseResult,
|
|
51
|
+
loadEvalCases,
|
|
52
|
+
renderEvalPlan,
|
|
53
|
+
renderEvalReport,
|
|
54
|
+
writeEvalReport,
|
|
55
|
+
} from "./eval-suite.js";
|
|
43
56
|
import { ask } from "./ask.js";
|
|
44
57
|
import { runScheduler } from "./scheduler.js";
|
|
45
58
|
import { resumeRunDetached } from "./resume-detached.js";
|
|
@@ -47,6 +60,7 @@ import { formatCliAgentCapabilityDoctorReport, getCliAgentCapabilityDoctorReport
|
|
|
47
60
|
import { parseDurationMs, supervisorLoopEffect, } from "./supervisor.js";
|
|
48
61
|
import { WATCH_MIN_INTERVAL_MS, runWatchLoop, watchIntervalSecondsToMs, } from "./watch.js";
|
|
49
62
|
import { createSemanticMcpServer } from "./mcp/semantic-server.js";
|
|
63
|
+
import { parseTokenScopes, readSmithersTokenStore, smithersTokenStorePath, writeSmithersTokenStore, } from "./token-store.js";
|
|
50
64
|
import pc from "picocolors";
|
|
51
65
|
import crypto from "node:crypto";
|
|
52
66
|
import React from "react";
|
|
@@ -105,43 +119,9 @@ function readPackageVersion() {
|
|
|
105
119
|
return "unknown";
|
|
106
120
|
}
|
|
107
121
|
}
|
|
108
|
-
function smithersTokenStorePath() {
|
|
109
|
-
return process.env.SMITHERS_TOKEN_STORE ?? resolve(process.env.HOME ?? process.cwd(), ".smithers", "tokens.json");
|
|
110
|
-
}
|
|
111
|
-
function readSmithersTokenStore() {
|
|
112
|
-
const path = smithersTokenStorePath();
|
|
113
|
-
if (!existsSync(path)) {
|
|
114
|
-
return { tokens: {} };
|
|
115
|
-
}
|
|
116
|
-
try {
|
|
117
|
-
const parsed = JSON.parse(readFileSync(path, "utf8"));
|
|
118
|
-
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
119
|
-
return { tokens: {} };
|
|
120
|
-
}
|
|
121
|
-
const tokens = parsed.tokens && typeof parsed.tokens === "object" && !Array.isArray(parsed.tokens)
|
|
122
|
-
? parsed.tokens
|
|
123
|
-
: {};
|
|
124
|
-
return { tokens };
|
|
125
|
-
}
|
|
126
|
-
catch {
|
|
127
|
-
return { tokens: {} };
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
function writeSmithersTokenStore(store) {
|
|
131
|
-
const path = smithersTokenStorePath();
|
|
132
|
-
mkdirSync(dirname(path), { recursive: true });
|
|
133
|
-
writeFileSync(path, `${JSON.stringify(store, null, 2)}\n`, { mode: 0o600 });
|
|
134
|
-
}
|
|
135
|
-
function parseTokenScopes(raw) {
|
|
136
|
-
return raw
|
|
137
|
-
.split(/[,\s]+/)
|
|
138
|
-
.map((scope) => scope.trim())
|
|
139
|
-
.filter(Boolean);
|
|
140
|
-
}
|
|
141
122
|
const CLI_ARGUMENT_MAX_LENGTH = 4096;
|
|
142
123
|
const CLI_IDENTIFIER_MAX_LENGTH = 256;
|
|
143
124
|
const CLI_TEXT_ARGUMENT_MAX_LENGTH = 64 * 1024;
|
|
144
|
-
const CLI_JSON_ARGUMENT_MAX_BYTES = 1024 * 1024;
|
|
145
125
|
const CLI_HANDLER_BOUNDS_WRAPPED = Symbol("smithers.cliHandlerBoundsWrapped");
|
|
146
126
|
/**
|
|
147
127
|
* @param {string} path
|
|
@@ -240,55 +220,6 @@ function wrapCliCommandHandlersWithInputBounds(commands) {
|
|
|
240
220
|
entry[CLI_HANDLER_BOUNDS_WRAPPED] = true;
|
|
241
221
|
}
|
|
242
222
|
}
|
|
243
|
-
/**
|
|
244
|
-
* @param {string | undefined} raw
|
|
245
|
-
* @param {string} label
|
|
246
|
-
* @param {FailFn} fail
|
|
247
|
-
*/
|
|
248
|
-
function parseJsonInput(raw, label, fail) {
|
|
249
|
-
if (!raw)
|
|
250
|
-
return undefined;
|
|
251
|
-
try {
|
|
252
|
-
return JSON.parse(raw);
|
|
253
|
-
}
|
|
254
|
-
catch (err) {
|
|
255
|
-
return fail({
|
|
256
|
-
code: "INVALID_JSON",
|
|
257
|
-
message: `Invalid JSON for ${label}: ${err?.message ?? String(err)}`,
|
|
258
|
-
exitCode: 4,
|
|
259
|
-
});
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
/**
|
|
263
|
-
* @param {string | undefined} raw
|
|
264
|
-
* @param {FailFn} fail
|
|
265
|
-
* @returns {Record<string, string | number | boolean> | undefined}
|
|
266
|
-
*/
|
|
267
|
-
function parseAnnotations(raw, fail) {
|
|
268
|
-
const parsed = parseJsonInput(raw, "annotations", fail);
|
|
269
|
-
if (parsed === undefined)
|
|
270
|
-
return undefined;
|
|
271
|
-
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
272
|
-
return fail({
|
|
273
|
-
code: "INVALID_ANNOTATIONS",
|
|
274
|
-
message: "Run annotations must be a flat JSON object of string/number/boolean values",
|
|
275
|
-
exitCode: 4,
|
|
276
|
-
});
|
|
277
|
-
}
|
|
278
|
-
/** @type {Record<string, string | number | boolean>} */
|
|
279
|
-
const annotations = {};
|
|
280
|
-
for (const [key, value] of Object.entries(parsed)) {
|
|
281
|
-
if (!["string", "number", "boolean"].includes(typeof value)) {
|
|
282
|
-
return fail({
|
|
283
|
-
code: "INVALID_ANNOTATIONS",
|
|
284
|
-
message: `Run annotation ${key} must be a string, number, or boolean`,
|
|
285
|
-
exitCode: 4,
|
|
286
|
-
});
|
|
287
|
-
}
|
|
288
|
-
annotations[key] = /** @type {string | number | boolean} */ (value);
|
|
289
|
-
}
|
|
290
|
-
return annotations;
|
|
291
|
-
}
|
|
292
223
|
/**
|
|
293
224
|
* @param {string | undefined} status
|
|
294
225
|
*/
|
|
@@ -1308,6 +1239,24 @@ const upOptions = z.object({
|
|
|
1308
1239
|
authToken: z.string().optional().describe("Bearer token for HTTP auth (or set SMITHERS_API_KEY)"),
|
|
1309
1240
|
metrics: z.boolean().default(true).describe("Expose /metrics endpoint (with --serve)"),
|
|
1310
1241
|
});
|
|
1242
|
+
const evalOptions = z.object({
|
|
1243
|
+
cases: z.string().describe("JSON or JSONL eval case file"),
|
|
1244
|
+
suite: z.string().optional().describe("Stable suite ID used in run IDs and report paths"),
|
|
1245
|
+
runLabel: z.string().optional().describe("Run label appended to eval run IDs; defaults to current UTC timestamp plus a nonce"),
|
|
1246
|
+
dryRun: z.boolean().default(false).describe("Plan the suite without launching runs"),
|
|
1247
|
+
concurrency: z.number().int().min(1).max(16).default(1).describe("Number of eval cases to run at once"),
|
|
1248
|
+
maxCases: z.number().int().min(1).optional().describe("Run only the first N cases"),
|
|
1249
|
+
report: z.string().optional().describe("Write report JSON to this path"),
|
|
1250
|
+
force: z.boolean().default(false).describe("Overwrite an existing eval report"),
|
|
1251
|
+
includeOutput: z.boolean().default(true).describe("Include workflow outputs in the report"),
|
|
1252
|
+
maxConcurrency: z.number().int().min(1).optional().describe("Per-workflow max task concurrency"),
|
|
1253
|
+
root: z.string().optional().describe("Tool sandbox root directory"),
|
|
1254
|
+
log: z.boolean().default(true).describe("Enable NDJSON event log file output"),
|
|
1255
|
+
logDir: z.string().optional().describe("NDJSON event logs directory"),
|
|
1256
|
+
allowNetwork: z.boolean().default(false).describe("Allow bash tool network requests"),
|
|
1257
|
+
maxOutputBytes: z.number().int().min(1).optional().describe("Max bytes a single tool call can return"),
|
|
1258
|
+
toolTimeoutMs: z.number().int().min(1).optional().describe("Max wall-clock time per tool call in ms"),
|
|
1259
|
+
});
|
|
1311
1260
|
const superviseOptions = z.object({
|
|
1312
1261
|
dryRun: z.boolean().default(false).describe("Show which stale runs would be resumed, without acting"),
|
|
1313
1262
|
interval: z.string().default("10s").describe("Poll interval (e.g. 10s, 30s, 1m)"),
|
|
@@ -1347,7 +1296,7 @@ const chatOptions = z.object({
|
|
|
1347
1296
|
stderr: z.boolean().default(true).describe("Include agent stderr output"),
|
|
1348
1297
|
});
|
|
1349
1298
|
const chatCreateOptions = z.object({
|
|
1350
|
-
agent: z.enum(["claude-code", "codex", "gemini"]).describe("CLI agent engine to launch"),
|
|
1299
|
+
agent: z.enum(["claude-code", "codex", "antigravity", "gemini"]).describe("CLI agent engine to launch"),
|
|
1351
1300
|
cwd: z.string().optional().describe("Working directory for the chat session (default: current directory)"),
|
|
1352
1301
|
});
|
|
1353
1302
|
const inspectArgs = z.object({
|
|
@@ -1438,6 +1387,13 @@ const workflowPathArgs = z.object({
|
|
|
1438
1387
|
const workflowDoctorArgs = z.object({
|
|
1439
1388
|
name: z.string().optional().describe("Workflow ID"),
|
|
1440
1389
|
});
|
|
1390
|
+
const workflowSkillArgs = z.object({
|
|
1391
|
+
name: z.string().optional().describe("Workflow ID, or omit to generate skills for all workflows"),
|
|
1392
|
+
});
|
|
1393
|
+
const workflowSkillOptions = z.object({
|
|
1394
|
+
output: z.string().optional().describe("Output file for one workflow, or output directory for all workflows"),
|
|
1395
|
+
force: z.boolean().default(false).describe("Overwrite existing skill files"),
|
|
1396
|
+
});
|
|
1441
1397
|
const workflowRunOptions = upOptions.extend({
|
|
1442
1398
|
prompt: z.string().optional().describe("Prompt text mapped to input.prompt when --input is omitted"),
|
|
1443
1399
|
});
|
|
@@ -1455,6 +1411,54 @@ function normalizeWorkflowRunOptions(options) {
|
|
|
1455
1411
|
root: options.root ?? ".",
|
|
1456
1412
|
};
|
|
1457
1413
|
}
|
|
1414
|
+
function formatRequestedJsonOutput() {
|
|
1415
|
+
for (let index = 0; index < process.argv.length; index += 1) {
|
|
1416
|
+
const arg = process.argv[index];
|
|
1417
|
+
if (arg === "--format") {
|
|
1418
|
+
const value = process.argv[index + 1];
|
|
1419
|
+
return value === "json" || value === "jsonl";
|
|
1420
|
+
}
|
|
1421
|
+
if (arg === "--format=json" || arg === "--format=jsonl") {
|
|
1422
|
+
return true;
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
return false;
|
|
1426
|
+
}
|
|
1427
|
+
function defaultEvalRunLabel() {
|
|
1428
|
+
const timestamp = new Date().toISOString().replace(/[-:TZ.]/g, "").slice(0, 14);
|
|
1429
|
+
return `${timestamp}-${crypto.randomUUID().slice(0, 8)}`;
|
|
1430
|
+
}
|
|
1431
|
+
/**
|
|
1432
|
+
* @param {string} workflowInput
|
|
1433
|
+
*/
|
|
1434
|
+
function resolveWorkflowPathForEval(workflowInput) {
|
|
1435
|
+
const asPath = resolve(process.cwd(), workflowInput);
|
|
1436
|
+
if (existsSync(asPath)) {
|
|
1437
|
+
return workflowInput;
|
|
1438
|
+
}
|
|
1439
|
+
return resolveWorkflow(workflowInput, process.cwd()).entryFile;
|
|
1440
|
+
}
|
|
1441
|
+
/**
|
|
1442
|
+
* @template T
|
|
1443
|
+
* @template R
|
|
1444
|
+
* @param {T[]} items
|
|
1445
|
+
* @param {number} limit
|
|
1446
|
+
* @param {(item: T, index: number) => Promise<R>} worker
|
|
1447
|
+
* @returns {Promise<R[]>}
|
|
1448
|
+
*/
|
|
1449
|
+
async function runWithLimit(items, limit, worker) {
|
|
1450
|
+
const results = new Array(items.length);
|
|
1451
|
+
let cursor = 0;
|
|
1452
|
+
const workerCount = Math.min(limit, items.length);
|
|
1453
|
+
await Promise.all(Array.from({ length: workerCount }, async () => {
|
|
1454
|
+
while (cursor < items.length) {
|
|
1455
|
+
const index = cursor;
|
|
1456
|
+
cursor += 1;
|
|
1457
|
+
results[index] = await worker(items[index], index);
|
|
1458
|
+
}
|
|
1459
|
+
}));
|
|
1460
|
+
return results;
|
|
1461
|
+
}
|
|
1458
1462
|
/**
|
|
1459
1463
|
* @param {string} intervalRaw
|
|
1460
1464
|
* @param {string} staleThresholdRaw
|
|
@@ -1515,8 +1519,42 @@ function normalizeEventsQuery(options) {
|
|
|
1515
1519
|
async function executeUpCommand(c, workflowPath, options, fail) {
|
|
1516
1520
|
try {
|
|
1517
1521
|
const resolvedWorkflowPath = resolve(process.cwd(), workflowPath);
|
|
1518
|
-
|
|
1519
|
-
|
|
1522
|
+
let input;
|
|
1523
|
+
let annotations;
|
|
1524
|
+
try {
|
|
1525
|
+
input = parseJsonArgument(options.input, "input") ?? {};
|
|
1526
|
+
const parsedAnnotations = parseJsonArgument(options.annotations, "annotations");
|
|
1527
|
+
if (parsedAnnotations === undefined) {
|
|
1528
|
+
annotations = undefined;
|
|
1529
|
+
}
|
|
1530
|
+
else if (!parsedAnnotations || typeof parsedAnnotations !== "object" || Array.isArray(parsedAnnotations)) {
|
|
1531
|
+
return fail({
|
|
1532
|
+
code: "INVALID_ANNOTATIONS",
|
|
1533
|
+
message: "Run annotations must be a flat JSON object of string/number/boolean values",
|
|
1534
|
+
exitCode: 4,
|
|
1535
|
+
});
|
|
1536
|
+
}
|
|
1537
|
+
else {
|
|
1538
|
+
annotations = {};
|
|
1539
|
+
for (const [key, value] of Object.entries(parsedAnnotations)) {
|
|
1540
|
+
if (!["string", "number", "boolean"].includes(typeof value)) {
|
|
1541
|
+
return fail({
|
|
1542
|
+
code: "INVALID_ANNOTATIONS",
|
|
1543
|
+
message: `Run annotation ${key} must be a string, number, or boolean`,
|
|
1544
|
+
exitCode: 4,
|
|
1545
|
+
});
|
|
1546
|
+
}
|
|
1547
|
+
annotations[key] = /** @type {string | number | boolean} */ (value);
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
}
|
|
1551
|
+
catch (err) {
|
|
1552
|
+
return fail({
|
|
1553
|
+
code: err instanceof SmithersError ? err.code : "INVALID_JSON",
|
|
1554
|
+
message: err?.message ?? String(err),
|
|
1555
|
+
exitCode: 4,
|
|
1556
|
+
});
|
|
1557
|
+
}
|
|
1520
1558
|
const { resume, resumeRunId } = normalizeResumeOption(options.resume);
|
|
1521
1559
|
const runId = options.runId ?? resumeRunId;
|
|
1522
1560
|
// Detached mode: spawn ourselves as a background process
|
|
@@ -1526,9 +1564,9 @@ async function executeUpCommand(c, workflowPath, options, fail) {
|
|
|
1526
1564
|
if (runId)
|
|
1527
1565
|
childArgs.push("--run-id", runId);
|
|
1528
1566
|
if (options.input)
|
|
1529
|
-
childArgs.push("--input", options.input);
|
|
1567
|
+
childArgs.push("--input", options.input === "-" ? JSON.stringify(input) : options.input);
|
|
1530
1568
|
if (options.annotations)
|
|
1531
|
-
childArgs.push("--annotations", options.annotations);
|
|
1569
|
+
childArgs.push("--annotations", options.annotations === "-" ? JSON.stringify(annotations ?? {}) : options.annotations);
|
|
1532
1570
|
if (options.maxConcurrency)
|
|
1533
1571
|
childArgs.push("--max-concurrency", String(options.maxConcurrency));
|
|
1534
1572
|
if (options.root)
|
|
@@ -1611,6 +1649,13 @@ async function executeUpCommand(c, workflowPath, options, fail) {
|
|
|
1611
1649
|
exitCode: 4,
|
|
1612
1650
|
});
|
|
1613
1651
|
}
|
|
1652
|
+
if (Boolean(options.resumeClaimOwner) !== Boolean(options.resumeClaimHeartbeat)) {
|
|
1653
|
+
return fail({
|
|
1654
|
+
code: "INVALID_RESUME_CLAIM",
|
|
1655
|
+
message: "--resume-claim-owner and --resume-claim-heartbeat must be provided together.",
|
|
1656
|
+
exitCode: 4,
|
|
1657
|
+
});
|
|
1658
|
+
}
|
|
1614
1659
|
const workflow = await loadWorkflow(workflowPath);
|
|
1615
1660
|
ensureSmithersTables(workflow.db);
|
|
1616
1661
|
if (options.hot) {
|
|
@@ -1644,13 +1689,6 @@ async function executeUpCommand(c, workflowPath, options, fail) {
|
|
|
1644
1689
|
const logDir = options.log ? options.logDir : null;
|
|
1645
1690
|
const onProgress = buildProgressReporter();
|
|
1646
1691
|
const abort = setupAbortSignal();
|
|
1647
|
-
if (Boolean(options.resumeClaimOwner) !== Boolean(options.resumeClaimHeartbeat)) {
|
|
1648
|
-
return fail({
|
|
1649
|
-
code: "INVALID_RESUME_CLAIM",
|
|
1650
|
-
message: "--resume-claim-owner and --resume-claim-heartbeat must be provided together.",
|
|
1651
|
-
exitCode: 4,
|
|
1652
|
-
});
|
|
1653
|
-
}
|
|
1654
1692
|
const resumeClaim = options.resumeClaimOwner && options.resumeClaimHeartbeat
|
|
1655
1693
|
? {
|
|
1656
1694
|
claimOwnerId: options.resumeClaimOwner,
|
|
@@ -1870,6 +1908,49 @@ const workflowCli = Cli.create({
|
|
|
1870
1908
|
});
|
|
1871
1909
|
}
|
|
1872
1910
|
},
|
|
1911
|
+
})
|
|
1912
|
+
.command("inspect", {
|
|
1913
|
+
description: "Show workflow metadata and an agent-facing skill preview.",
|
|
1914
|
+
args: workflowPathArgs,
|
|
1915
|
+
run(c) {
|
|
1916
|
+
const workflow = resolveWorkflow(c.args.name, process.cwd());
|
|
1917
|
+
return c.ok({
|
|
1918
|
+
workflow,
|
|
1919
|
+
skillPreview: renderWorkflowSkill(workflow, { root: process.cwd() }),
|
|
1920
|
+
});
|
|
1921
|
+
},
|
|
1922
|
+
})
|
|
1923
|
+
.command("skills", {
|
|
1924
|
+
description: "Generate agent-facing skill docs for local workflows.",
|
|
1925
|
+
args: workflowSkillArgs,
|
|
1926
|
+
options: workflowSkillOptions,
|
|
1927
|
+
run(c) {
|
|
1928
|
+
const fail = (opts) => {
|
|
1929
|
+
commandExitOverride = opts.exitCode ?? 1;
|
|
1930
|
+
return c.error(opts);
|
|
1931
|
+
};
|
|
1932
|
+
try {
|
|
1933
|
+
return c.ok(writeWorkflowSkillFiles(process.cwd(), {
|
|
1934
|
+
workflowId: c.args.name ?? "all",
|
|
1935
|
+
output: c.options.output,
|
|
1936
|
+
force: c.options.force,
|
|
1937
|
+
}));
|
|
1938
|
+
}
|
|
1939
|
+
catch (err) {
|
|
1940
|
+
if (err instanceof SmithersError) {
|
|
1941
|
+
return fail({
|
|
1942
|
+
code: err.code,
|
|
1943
|
+
message: err.message,
|
|
1944
|
+
exitCode: 4,
|
|
1945
|
+
});
|
|
1946
|
+
}
|
|
1947
|
+
return fail({
|
|
1948
|
+
code: "WORKFLOW_SKILLS_FAILED",
|
|
1949
|
+
message: err?.message ?? String(err),
|
|
1950
|
+
exitCode: 1,
|
|
1951
|
+
});
|
|
1952
|
+
}
|
|
1953
|
+
},
|
|
1873
1954
|
})
|
|
1874
1955
|
.command("doctor", {
|
|
1875
1956
|
description: "Inspect workflow discovery, preload files, and detected agents.",
|
|
@@ -2039,7 +2120,7 @@ const agentsCli = Cli.create({
|
|
|
2039
2120
|
description: "Register a Smithers agent account (interactive wizard, or non-interactive via flags).",
|
|
2040
2121
|
options: z.object({
|
|
2041
2122
|
provider: z.enum([
|
|
2042
|
-
"claude-code", "codex", "gemini", "kimi",
|
|
2123
|
+
"claude-code", "antigravity", "codex", "gemini", "kimi",
|
|
2043
2124
|
"anthropic-api", "openai-api", "gemini-api",
|
|
2044
2125
|
]).optional().describe("Provider id; omit to launch the interactive wizard"),
|
|
2045
2126
|
label: z.string().optional().describe("Unique label, e.g. 'claude-work'"),
|
|
@@ -2622,6 +2703,141 @@ const cli = Cli.create({
|
|
|
2622
2703
|
};
|
|
2623
2704
|
return executeUpCommand(c, c.args.workflow, c.options, fail);
|
|
2624
2705
|
},
|
|
2706
|
+
})
|
|
2707
|
+
// =========================================================================
|
|
2708
|
+
// smithers eval <workflow>
|
|
2709
|
+
// =========================================================================
|
|
2710
|
+
.command("eval", {
|
|
2711
|
+
description: "Run a workflow over a JSON/JSONL eval suite and write a regression report.",
|
|
2712
|
+
args: workflowArgs,
|
|
2713
|
+
options: evalOptions,
|
|
2714
|
+
alias: { cases: "c", suite: "s", dryRun: "n", concurrency: "j", report: "r" },
|
|
2715
|
+
async run(c) {
|
|
2716
|
+
const fail = (opts) => {
|
|
2717
|
+
commandExitOverride = opts.exitCode ?? 1;
|
|
2718
|
+
return c.error(opts);
|
|
2719
|
+
};
|
|
2720
|
+
try {
|
|
2721
|
+
const workflowPath = resolveWorkflowPathForEval(c.args.workflow);
|
|
2722
|
+
const loadedCases = loadEvalCases(process.cwd(), c.options.cases, {
|
|
2723
|
+
maxCases: c.options.maxCases,
|
|
2724
|
+
});
|
|
2725
|
+
const plan = buildEvalPlan({
|
|
2726
|
+
suiteId: c.options.suite,
|
|
2727
|
+
runLabel: c.options.runLabel ?? defaultEvalRunLabel(),
|
|
2728
|
+
workflowPath,
|
|
2729
|
+
casesPath: c.options.cases,
|
|
2730
|
+
loadedCases,
|
|
2731
|
+
});
|
|
2732
|
+
const wantsStructured = c.format === "json" || c.format === "jsonl" || formatRequestedJsonOutput();
|
|
2733
|
+
if (c.options.dryRun) {
|
|
2734
|
+
if (wantsStructured) {
|
|
2735
|
+
return c.ok({ suite: plan });
|
|
2736
|
+
}
|
|
2737
|
+
process.stdout.write(`${renderEvalPlan(plan)}\n`);
|
|
2738
|
+
return c.ok(undefined);
|
|
2739
|
+
}
|
|
2740
|
+
assertEvalReportWritable(process.cwd(), plan.suiteId, {
|
|
2741
|
+
path: c.options.report,
|
|
2742
|
+
force: c.options.force,
|
|
2743
|
+
});
|
|
2744
|
+
const workflow = await loadWorkflow(workflowPath);
|
|
2745
|
+
ensureSmithersTables(workflow.db);
|
|
2746
|
+
await assertEvalRunIdsAvailable(new SmithersDb(workflow.db), plan.cases);
|
|
2747
|
+
setupSqliteCleanup(workflow);
|
|
2748
|
+
const schema = resolveSchema(workflow.db);
|
|
2749
|
+
const resolvedWorkflowPath = resolve(process.cwd(), workflowPath);
|
|
2750
|
+
const rootDir = c.options.root ? resolve(process.cwd(), c.options.root) : dirname(resolvedWorkflowPath);
|
|
2751
|
+
const logDir = c.options.log ? c.options.logDir : null;
|
|
2752
|
+
const abort = setupAbortSignal();
|
|
2753
|
+
const startedAtMs = Date.now();
|
|
2754
|
+
const results = await runWithLimit(plan.cases, c.options.concurrency, async (testCase) => {
|
|
2755
|
+
const caseStartedAtMs = Date.now();
|
|
2756
|
+
process.stderr.write(`[eval:${plan.suiteId}] ${testCase.id} -> ${testCase.runId}\n`);
|
|
2757
|
+
try {
|
|
2758
|
+
const result = await Effect.runPromise(runWorkflow(workflow, {
|
|
2759
|
+
input: testCase.input,
|
|
2760
|
+
runId: testCase.runId,
|
|
2761
|
+
workflowPath: resolvedWorkflowPath,
|
|
2762
|
+
maxConcurrency: c.options.maxConcurrency,
|
|
2763
|
+
rootDir,
|
|
2764
|
+
logDir,
|
|
2765
|
+
allowNetwork: c.options.allowNetwork,
|
|
2766
|
+
maxOutputBytes: c.options.maxOutputBytes,
|
|
2767
|
+
toolTimeoutMs: c.options.toolTimeoutMs,
|
|
2768
|
+
annotations: {
|
|
2769
|
+
suiteId: plan.suiteId,
|
|
2770
|
+
caseId: testCase.id,
|
|
2771
|
+
...testCase.annotations,
|
|
2772
|
+
},
|
|
2773
|
+
signal: abort.signal,
|
|
2774
|
+
}));
|
|
2775
|
+
const output = await loadOutputs(workflow.db, schema, testCase.runId);
|
|
2776
|
+
const durationMs = Date.now() - caseStartedAtMs;
|
|
2777
|
+
const evaluation = evaluateEvalCaseResult(testCase, {
|
|
2778
|
+
...result,
|
|
2779
|
+
output,
|
|
2780
|
+
});
|
|
2781
|
+
return {
|
|
2782
|
+
caseId: testCase.id,
|
|
2783
|
+
runId: testCase.runId,
|
|
2784
|
+
expectedStatus: testCase.expected.status,
|
|
2785
|
+
status: result.status,
|
|
2786
|
+
passed: evaluation.passed,
|
|
2787
|
+
assertions: evaluation.assertions,
|
|
2788
|
+
durationMs,
|
|
2789
|
+
input: testCase.input,
|
|
2790
|
+
...(c.options.includeOutput ? { output } : {}),
|
|
2791
|
+
metadata: testCase.metadata,
|
|
2792
|
+
};
|
|
2793
|
+
}
|
|
2794
|
+
catch (err) {
|
|
2795
|
+
const errorMessage = err?.message ?? String(err);
|
|
2796
|
+
const durationMs = Date.now() - caseStartedAtMs;
|
|
2797
|
+
const evaluation = evaluateEvalCaseResult(testCase, {
|
|
2798
|
+
status: "error",
|
|
2799
|
+
error: err,
|
|
2800
|
+
});
|
|
2801
|
+
return {
|
|
2802
|
+
caseId: testCase.id,
|
|
2803
|
+
runId: testCase.runId,
|
|
2804
|
+
expectedStatus: testCase.expected.status,
|
|
2805
|
+
status: "error",
|
|
2806
|
+
passed: evaluation.passed,
|
|
2807
|
+
assertions: evaluation.assertions,
|
|
2808
|
+
durationMs,
|
|
2809
|
+
input: testCase.input,
|
|
2810
|
+
error: errorMessage,
|
|
2811
|
+
metadata: testCase.metadata,
|
|
2812
|
+
};
|
|
2813
|
+
}
|
|
2814
|
+
});
|
|
2815
|
+
const finishedAtMs = Date.now();
|
|
2816
|
+
let report = buildEvalReport({
|
|
2817
|
+
plan,
|
|
2818
|
+
results,
|
|
2819
|
+
startedAtMs,
|
|
2820
|
+
finishedAtMs,
|
|
2821
|
+
});
|
|
2822
|
+
const reportPath = writeEvalReport(process.cwd(), report, {
|
|
2823
|
+
path: c.options.report,
|
|
2824
|
+
force: c.options.force,
|
|
2825
|
+
});
|
|
2826
|
+
report = { ...report, reportPath };
|
|
2827
|
+
process.exitCode = report.summary.failed > 0 ? 1 : 0;
|
|
2828
|
+
if (wantsStructured) {
|
|
2829
|
+
return c.ok({ eval: report });
|
|
2830
|
+
}
|
|
2831
|
+
process.stdout.write(`${renderEvalReport(report)}\n`);
|
|
2832
|
+
return c.ok(undefined);
|
|
2833
|
+
}
|
|
2834
|
+
catch (err) {
|
|
2835
|
+
if (err instanceof SmithersError) {
|
|
2836
|
+
return fail({ code: err.code, message: err.message, exitCode: 4 });
|
|
2837
|
+
}
|
|
2838
|
+
return fail({ code: "EVAL_FAILED", message: err?.message ?? String(err), exitCode: 1 });
|
|
2839
|
+
}
|
|
2840
|
+
},
|
|
2625
2841
|
})
|
|
2626
2842
|
// =========================================================================
|
|
2627
2843
|
// smithers supervise
|
|
@@ -4487,7 +4703,7 @@ const cli = Cli.create({
|
|
|
4487
4703
|
question: z.string().optional().describe("The question to ask"),
|
|
4488
4704
|
}),
|
|
4489
4705
|
options: z.object({
|
|
4490
|
-
agent: z.enum(["claude", "codex", "gemini", "kimi", "pi"]).optional().describe("Explicitly select which agent CLI to use"),
|
|
4706
|
+
agent: z.enum(["claude", "codex", "antigravity", "gemini", "kimi", "pi"]).optional().describe("Explicitly select which agent CLI to use"),
|
|
4491
4707
|
listAgents: z.boolean().default(false).describe("List detected agents plus their bootstrap mode and exit"),
|
|
4492
4708
|
dumpPrompt: z.boolean().default(false).describe("Print the generated system prompt and exit"),
|
|
4493
4709
|
toolSurface: z.enum(["semantic", "raw"]).default("semantic").describe("Choose which Smithers MCP tool surface to expose"),
|
|
@@ -4989,10 +5205,10 @@ wrapCliCommandHandlersWithInputBounds(cliCommands);
|
|
|
4989
5205
|
// Main
|
|
4990
5206
|
// ---------------------------------------------------------------------------
|
|
4991
5207
|
const KNOWN_COMMANDS = new Set([
|
|
4992
|
-
|
|
4993
|
-
"
|
|
4994
|
-
"
|
|
4995
|
-
"
|
|
5208
|
+
...cliCommands.keys(),
|
|
5209
|
+
"completions",
|
|
5210
|
+
"mcp",
|
|
5211
|
+
"skills",
|
|
4996
5212
|
]);
|
|
4997
5213
|
/**
|
|
4998
5214
|
* Rewrite `smithers .` or `smithers <path>` (when path looks like a directory) to `smithers gui <path>`.
|
|
@@ -5031,54 +5247,15 @@ function resolveCliColor(mode, stream) {
|
|
|
5031
5247
|
if (process.env.NO_COLOR !== undefined && process.env.NO_COLOR.length > 0) return false;
|
|
5032
5248
|
return Boolean(stream.isTTY);
|
|
5033
5249
|
}
|
|
5034
|
-
const BUILTIN_FLAGS_WITH_VALUES = new Set([
|
|
5035
|
-
"--format",
|
|
5036
|
-
"--filter-output",
|
|
5037
|
-
"--token-limit",
|
|
5038
|
-
"--token-offset",
|
|
5039
|
-
]);
|
|
5040
5250
|
const WORKFLOW_UTILITY_COMMANDS = new Set([
|
|
5041
5251
|
"run",
|
|
5042
5252
|
"list",
|
|
5043
5253
|
"path",
|
|
5044
5254
|
"create",
|
|
5255
|
+
"inspect",
|
|
5256
|
+
"skills",
|
|
5045
5257
|
"doctor",
|
|
5046
5258
|
]);
|
|
5047
|
-
/**
|
|
5048
|
-
* @param {string | undefined} value
|
|
5049
|
-
* @returns {McpSurface}
|
|
5050
|
-
*/
|
|
5051
|
-
function normalizeMcpSurface(value) {
|
|
5052
|
-
const surface = value?.trim().toLowerCase();
|
|
5053
|
-
if (surface === undefined || surface.length === 0) {
|
|
5054
|
-
throw new Error("Missing value for --surface. Expected semantic, raw, or both.");
|
|
5055
|
-
}
|
|
5056
|
-
if (surface === "semantic" || surface === "raw" || surface === "both") {
|
|
5057
|
-
return surface;
|
|
5058
|
-
}
|
|
5059
|
-
throw new Error(`Invalid --surface value: ${value}. Expected semantic, raw, or both.`);
|
|
5060
|
-
}
|
|
5061
|
-
/**
|
|
5062
|
-
* @param {string[]} argv
|
|
5063
|
-
*/
|
|
5064
|
-
function parseMcpSurfaceArgv(argv) {
|
|
5065
|
-
let surface = "semantic";
|
|
5066
|
-
const filtered = [];
|
|
5067
|
-
for (let index = 0; index < argv.length; index++) {
|
|
5068
|
-
const arg = argv[index];
|
|
5069
|
-
if (arg === "--surface") {
|
|
5070
|
-
surface = normalizeMcpSurface(argv[index + 1]);
|
|
5071
|
-
index += 1;
|
|
5072
|
-
continue;
|
|
5073
|
-
}
|
|
5074
|
-
if (arg.startsWith("--surface=")) {
|
|
5075
|
-
surface = normalizeMcpSurface(arg.slice("--surface=".length));
|
|
5076
|
-
continue;
|
|
5077
|
-
}
|
|
5078
|
-
filtered.push(arg);
|
|
5079
|
-
}
|
|
5080
|
-
return { surface, argv: filtered };
|
|
5081
|
-
}
|
|
5082
5259
|
/**
|
|
5083
5260
|
* @param {ReturnType<typeof createSemanticMcpServer>} server
|
|
5084
5261
|
*/
|
|
@@ -5103,22 +5280,6 @@ function registerRawToolsOnMcpServer(server) {
|
|
|
5103
5280
|
});
|
|
5104
5281
|
}
|
|
5105
5282
|
}
|
|
5106
|
-
/**
|
|
5107
|
-
* @param {string[]} argv
|
|
5108
|
-
* @returns {number}
|
|
5109
|
-
*/
|
|
5110
|
-
function findFirstPositionalIndex(argv, startIndex = 0) {
|
|
5111
|
-
for (let index = startIndex; index < argv.length; index++) {
|
|
5112
|
-
const arg = argv[index];
|
|
5113
|
-
if (!arg.startsWith("-")) {
|
|
5114
|
-
return index;
|
|
5115
|
-
}
|
|
5116
|
-
if (BUILTIN_FLAGS_WITH_VALUES.has(arg)) {
|
|
5117
|
-
index++;
|
|
5118
|
-
}
|
|
5119
|
-
}
|
|
5120
|
-
return -1;
|
|
5121
|
-
}
|
|
5122
5283
|
/**
|
|
5123
5284
|
* @param {string[]} argv
|
|
5124
5285
|
*/
|
|
@@ -5287,17 +5448,6 @@ function rewriteEventsJsonFlagArgv(argv) {
|
|
|
5287
5448
|
}
|
|
5288
5449
|
return argv.map((arg) => (arg === "--json" ? "-j" : arg));
|
|
5289
5450
|
}
|
|
5290
|
-
/**
|
|
5291
|
-
* Incur treats union-typed options as value-bearing flags, so a bare
|
|
5292
|
-
* `--resume --run-id value` would consume `--run-id` as the resume value.
|
|
5293
|
-
*
|
|
5294
|
-
* @param {string[]} argv
|
|
5295
|
-
*/
|
|
5296
|
-
function rewriteBareResumeFlagArgv(argv) {
|
|
5297
|
-
return argv.map((arg, index) => arg === "--resume" && (argv[index + 1] === undefined || argv[index + 1]?.startsWith("-"))
|
|
5298
|
-
? "--resume=true"
|
|
5299
|
-
: arg);
|
|
5300
|
-
}
|
|
5301
5451
|
/**
|
|
5302
5452
|
* @param {unknown} value
|
|
5303
5453
|
*/
|
|
@@ -5326,7 +5476,7 @@ const CHAT_CREATE_PROMPT = [
|
|
|
5326
5476
|
'When you are completely finished and want to hand control back to Smithers, return ONLY this raw JSON object with no prose, markdown, or code fence: {}.',
|
|
5327
5477
|
].join("\n\n");
|
|
5328
5478
|
/**
|
|
5329
|
-
* @param {"claude-code" | "codex" | "gemini"} agentId
|
|
5479
|
+
* @param {"claude-code" | "codex" | "antigravity" | "gemini"} agentId
|
|
5330
5480
|
* @param {string} cwd
|
|
5331
5481
|
*/
|
|
5332
5482
|
async function createChatAgent(agentId, cwd) {
|
|
@@ -5346,6 +5496,12 @@ async function createChatAgent(agentId, cwd) {
|
|
|
5346
5496
|
skipGitRepoCheck: true,
|
|
5347
5497
|
});
|
|
5348
5498
|
}
|
|
5499
|
+
case "antigravity": {
|
|
5500
|
+
const { AntigravityAgent } = await import("@smithers-orchestrator/agents/AntigravityAgent");
|
|
5501
|
+
return new AntigravityAgent({
|
|
5502
|
+
cwd,
|
|
5503
|
+
});
|
|
5504
|
+
}
|
|
5349
5505
|
case "gemini": {
|
|
5350
5506
|
const { GeminiAgent } = await import("@smithers-orchestrator/agents/GeminiAgent");
|
|
5351
5507
|
return new GeminiAgent({
|
|
@@ -5356,7 +5512,7 @@ async function createChatAgent(agentId, cwd) {
|
|
|
5356
5512
|
}
|
|
5357
5513
|
}
|
|
5358
5514
|
/**
|
|
5359
|
-
* @param {"claude-code" | "codex" | "gemini"} agentId
|
|
5515
|
+
* @param {"claude-code" | "codex" | "antigravity" | "gemini"} agentId
|
|
5360
5516
|
* @param {string} cwd
|
|
5361
5517
|
* @returns {Promise<import("@smithers-orchestrator/components/SmithersWorkflow").SmithersWorkflow<any>>}
|
|
5362
5518
|
*/
|
|
@@ -5461,6 +5617,12 @@ async function main() {
|
|
|
5461
5617
|
...argv.slice(firstPositionalIndex),
|
|
5462
5618
|
];
|
|
5463
5619
|
}
|
|
5620
|
+
const commandIndex = findFirstPositionalIndex(argv);
|
|
5621
|
+
const command = commandIndex >= 0 ? argv[commandIndex] : undefined;
|
|
5622
|
+
if (command && !KNOWN_COMMANDS.has(command)) {
|
|
5623
|
+
console.error(`Unknown command: ${command}`);
|
|
5624
|
+
process.exit(4);
|
|
5625
|
+
}
|
|
5464
5626
|
argv = rewriteBareResumeFlagArgv(argv);
|
|
5465
5627
|
// --mcp mode: the MCP server needs to stay alive listening on stdin.
|
|
5466
5628
|
if (argv.includes("--mcp")) {
|