@f-o-h/cli 0.1.70 → 0.1.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/foh.js +589 -204
- package/package.json +1 -1
package/dist/foh.js
CHANGED
|
@@ -6046,7 +6046,7 @@ var require_compile = __commonJS({
|
|
|
6046
6046
|
const schOrFunc = root.refs[ref];
|
|
6047
6047
|
if (schOrFunc)
|
|
6048
6048
|
return schOrFunc;
|
|
6049
|
-
let _sch =
|
|
6049
|
+
let _sch = resolve14.call(this, root, ref);
|
|
6050
6050
|
if (_sch === void 0) {
|
|
6051
6051
|
const schema2 = (_a2 = root.localRefs) === null || _a2 === void 0 ? void 0 : _a2[ref];
|
|
6052
6052
|
const { schemaId } = this.opts;
|
|
@@ -6073,7 +6073,7 @@ var require_compile = __commonJS({
|
|
|
6073
6073
|
function sameSchemaEnv(s1, s2) {
|
|
6074
6074
|
return s1.schema === s2.schema && s1.root === s2.root && s1.baseId === s2.baseId;
|
|
6075
6075
|
}
|
|
6076
|
-
function
|
|
6076
|
+
function resolve14(root, ref) {
|
|
6077
6077
|
let sch;
|
|
6078
6078
|
while (typeof (sch = this.refs[ref]) == "string")
|
|
6079
6079
|
ref = sch;
|
|
@@ -6648,55 +6648,55 @@ var require_fast_uri = __commonJS({
|
|
|
6648
6648
|
}
|
|
6649
6649
|
return uri;
|
|
6650
6650
|
}
|
|
6651
|
-
function
|
|
6651
|
+
function resolve14(baseURI, relativeURI, options) {
|
|
6652
6652
|
const schemelessOptions = options ? Object.assign({ scheme: "null" }, options) : { scheme: "null" };
|
|
6653
6653
|
const resolved = resolveComponent(parse3(baseURI, schemelessOptions), parse3(relativeURI, schemelessOptions), schemelessOptions, true);
|
|
6654
6654
|
schemelessOptions.skipEscape = true;
|
|
6655
6655
|
return serialize(resolved, schemelessOptions);
|
|
6656
6656
|
}
|
|
6657
|
-
function resolveComponent(base,
|
|
6657
|
+
function resolveComponent(base, relative4, options, skipNormalization) {
|
|
6658
6658
|
const target = {};
|
|
6659
6659
|
if (!skipNormalization) {
|
|
6660
6660
|
base = parse3(serialize(base, options), options);
|
|
6661
|
-
|
|
6661
|
+
relative4 = parse3(serialize(relative4, options), options);
|
|
6662
6662
|
}
|
|
6663
6663
|
options = options || {};
|
|
6664
|
-
if (!options.tolerant &&
|
|
6665
|
-
target.scheme =
|
|
6666
|
-
target.userinfo =
|
|
6667
|
-
target.host =
|
|
6668
|
-
target.port =
|
|
6669
|
-
target.path = removeDotSegments(
|
|
6670
|
-
target.query =
|
|
6664
|
+
if (!options.tolerant && relative4.scheme) {
|
|
6665
|
+
target.scheme = relative4.scheme;
|
|
6666
|
+
target.userinfo = relative4.userinfo;
|
|
6667
|
+
target.host = relative4.host;
|
|
6668
|
+
target.port = relative4.port;
|
|
6669
|
+
target.path = removeDotSegments(relative4.path || "");
|
|
6670
|
+
target.query = relative4.query;
|
|
6671
6671
|
} else {
|
|
6672
|
-
if (
|
|
6673
|
-
target.userinfo =
|
|
6674
|
-
target.host =
|
|
6675
|
-
target.port =
|
|
6676
|
-
target.path = removeDotSegments(
|
|
6677
|
-
target.query =
|
|
6672
|
+
if (relative4.userinfo !== void 0 || relative4.host !== void 0 || relative4.port !== void 0) {
|
|
6673
|
+
target.userinfo = relative4.userinfo;
|
|
6674
|
+
target.host = relative4.host;
|
|
6675
|
+
target.port = relative4.port;
|
|
6676
|
+
target.path = removeDotSegments(relative4.path || "");
|
|
6677
|
+
target.query = relative4.query;
|
|
6678
6678
|
} else {
|
|
6679
|
-
if (!
|
|
6679
|
+
if (!relative4.path) {
|
|
6680
6680
|
target.path = base.path;
|
|
6681
|
-
if (
|
|
6682
|
-
target.query =
|
|
6681
|
+
if (relative4.query !== void 0) {
|
|
6682
|
+
target.query = relative4.query;
|
|
6683
6683
|
} else {
|
|
6684
6684
|
target.query = base.query;
|
|
6685
6685
|
}
|
|
6686
6686
|
} else {
|
|
6687
|
-
if (
|
|
6688
|
-
target.path = removeDotSegments(
|
|
6687
|
+
if (relative4.path[0] === "/") {
|
|
6688
|
+
target.path = removeDotSegments(relative4.path);
|
|
6689
6689
|
} else {
|
|
6690
6690
|
if ((base.userinfo !== void 0 || base.host !== void 0 || base.port !== void 0) && !base.path) {
|
|
6691
|
-
target.path = "/" +
|
|
6691
|
+
target.path = "/" + relative4.path;
|
|
6692
6692
|
} else if (!base.path) {
|
|
6693
|
-
target.path =
|
|
6693
|
+
target.path = relative4.path;
|
|
6694
6694
|
} else {
|
|
6695
|
-
target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) +
|
|
6695
|
+
target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative4.path;
|
|
6696
6696
|
}
|
|
6697
6697
|
target.path = removeDotSegments(target.path);
|
|
6698
6698
|
}
|
|
6699
|
-
target.query =
|
|
6699
|
+
target.query = relative4.query;
|
|
6700
6700
|
}
|
|
6701
6701
|
target.userinfo = base.userinfo;
|
|
6702
6702
|
target.host = base.host;
|
|
@@ -6704,7 +6704,7 @@ var require_fast_uri = __commonJS({
|
|
|
6704
6704
|
}
|
|
6705
6705
|
target.scheme = base.scheme;
|
|
6706
6706
|
}
|
|
6707
|
-
target.fragment =
|
|
6707
|
+
target.fragment = relative4.fragment;
|
|
6708
6708
|
return target;
|
|
6709
6709
|
}
|
|
6710
6710
|
function equal(uriA, uriB, options) {
|
|
@@ -6875,7 +6875,7 @@ var require_fast_uri = __commonJS({
|
|
|
6875
6875
|
var fastUri = {
|
|
6876
6876
|
SCHEMES,
|
|
6877
6877
|
normalize,
|
|
6878
|
-
resolve:
|
|
6878
|
+
resolve: resolve14,
|
|
6879
6879
|
resolveComponent,
|
|
6880
6880
|
equal,
|
|
6881
6881
|
serialize,
|
|
@@ -10172,21 +10172,21 @@ async function promptLine(label, {
|
|
|
10172
10172
|
allowEmpty = false,
|
|
10173
10173
|
defaultValue
|
|
10174
10174
|
} = {}) {
|
|
10175
|
-
return await new Promise((
|
|
10175
|
+
return await new Promise((resolve14) => {
|
|
10176
10176
|
const suffix = defaultValue ? ` [${defaultValue}]` : "";
|
|
10177
10177
|
const rl = (0, import_readline.createInterface)({ input: process.stdin, output: process.stdout, terminal: true });
|
|
10178
10178
|
rl.question(`${label}${suffix}: `, (answer) => {
|
|
10179
10179
|
rl.close();
|
|
10180
10180
|
const value = String(answer ?? "").trim();
|
|
10181
10181
|
if (!value && typeof defaultValue === "string") {
|
|
10182
|
-
|
|
10182
|
+
resolve14(defaultValue);
|
|
10183
10183
|
return;
|
|
10184
10184
|
}
|
|
10185
10185
|
if (!value && !allowEmpty) {
|
|
10186
|
-
|
|
10186
|
+
resolve14("");
|
|
10187
10187
|
return;
|
|
10188
10188
|
}
|
|
10189
|
-
|
|
10189
|
+
resolve14(value);
|
|
10190
10190
|
});
|
|
10191
10191
|
});
|
|
10192
10192
|
}
|
|
@@ -10194,7 +10194,7 @@ async function promptSecret(label) {
|
|
|
10194
10194
|
if (!process.stdin.isTTY || !process.stdout.isTTY || typeof process.stdin.setRawMode !== "function") {
|
|
10195
10195
|
return await promptLine(label);
|
|
10196
10196
|
}
|
|
10197
|
-
return await new Promise((
|
|
10197
|
+
return await new Promise((resolve14) => {
|
|
10198
10198
|
const stdin = process.stdin;
|
|
10199
10199
|
const stdout = process.stdout;
|
|
10200
10200
|
const wasRaw = Boolean(stdin.isRaw);
|
|
@@ -10208,7 +10208,7 @@ async function promptSecret(label) {
|
|
|
10208
10208
|
const finish = () => {
|
|
10209
10209
|
cleanup();
|
|
10210
10210
|
stdout.write("\n");
|
|
10211
|
-
|
|
10211
|
+
resolve14(value);
|
|
10212
10212
|
};
|
|
10213
10213
|
const onData = (chunk) => {
|
|
10214
10214
|
const text = typeof chunk === "string" ? chunk : chunk.toString("utf8");
|
|
@@ -10217,7 +10217,7 @@ async function promptSecret(label) {
|
|
|
10217
10217
|
cleanup();
|
|
10218
10218
|
process.exitCode = 130;
|
|
10219
10219
|
stdout.write("\n");
|
|
10220
|
-
return
|
|
10220
|
+
return resolve14("");
|
|
10221
10221
|
}
|
|
10222
10222
|
if (char === "\r" || char === "\n") {
|
|
10223
10223
|
finish();
|
|
@@ -10490,7 +10490,7 @@ async function storeAuthenticatedSession(params) {
|
|
|
10490
10490
|
return output;
|
|
10491
10491
|
}
|
|
10492
10492
|
function sleep(ms) {
|
|
10493
|
-
return new Promise((
|
|
10493
|
+
return new Promise((resolve14) => setTimeout(resolve14, ms));
|
|
10494
10494
|
}
|
|
10495
10495
|
function hasExplicitTimeoutFlag(argv = process.argv) {
|
|
10496
10496
|
return argv.some((arg) => arg === "--timeout-seconds" || arg.startsWith("--timeout-seconds="));
|
|
@@ -11048,7 +11048,7 @@ async function pollUntil(check2, opts) {
|
|
|
11048
11048
|
}
|
|
11049
11049
|
}
|
|
11050
11050
|
function sleep2(ms) {
|
|
11051
|
-
return new Promise((
|
|
11051
|
+
return new Promise((resolve14) => setTimeout(resolve14, ms));
|
|
11052
11052
|
}
|
|
11053
11053
|
|
|
11054
11054
|
// src/commands/compliance.ts
|
|
@@ -14172,8 +14172,8 @@ function registerAgentGuardrailCommands(agent) {
|
|
|
14172
14172
|
try {
|
|
14173
14173
|
rule = JSON.parse(opts.rule);
|
|
14174
14174
|
} catch {
|
|
14175
|
-
const { readFileSync:
|
|
14176
|
-
rule = JSON.parse(
|
|
14175
|
+
const { readFileSync: readFileSync17 } = await import("fs");
|
|
14176
|
+
rule = JSON.parse(readFileSync17(opts.rule, "utf-8"));
|
|
14177
14177
|
}
|
|
14178
14178
|
const data = await apiFetch(`/v1/console/agents/${opts.agent}/guardrails`, {
|
|
14179
14179
|
method: "POST",
|
|
@@ -14719,9 +14719,9 @@ function registerAgent(program3) {
|
|
|
14719
14719
|
process.stdout.write(yaml);
|
|
14720
14720
|
return;
|
|
14721
14721
|
}
|
|
14722
|
-
const { writeFileSync:
|
|
14722
|
+
const { writeFileSync: writeFileSync14 } = await import("fs");
|
|
14723
14723
|
const outputPath = opts.output ?? "tenant.yaml";
|
|
14724
|
-
|
|
14724
|
+
writeFileSync14(
|
|
14725
14725
|
outputPath,
|
|
14726
14726
|
`# tenant.yaml - Front Of House agent manifest
|
|
14727
14727
|
# Edit this file and run: foh plan tenant.yaml
|
|
@@ -16173,11 +16173,11 @@ function registerVoice(program3) {
|
|
|
16173
16173
|
}
|
|
16174
16174
|
const outputPath = String(opts.out || `foh-voice-preview-${provider}-${voiceId}.mp3`).trim();
|
|
16175
16175
|
const audio = Buffer.from(await res.arrayBuffer());
|
|
16176
|
-
const { mkdirSync:
|
|
16177
|
-
const { dirname:
|
|
16178
|
-
const absolutePath =
|
|
16179
|
-
|
|
16180
|
-
|
|
16176
|
+
const { mkdirSync: mkdirSync9, writeFileSync: writeFileSync14 } = await import("fs");
|
|
16177
|
+
const { dirname: dirname12, resolve: resolve14 } = await import("path");
|
|
16178
|
+
const absolutePath = resolve14(outputPath);
|
|
16179
|
+
mkdirSync9(dirname12(absolutePath), { recursive: true });
|
|
16180
|
+
writeFileSync14(absolutePath, audio);
|
|
16181
16181
|
format({
|
|
16182
16182
|
status: "ok",
|
|
16183
16183
|
provider,
|
|
@@ -30668,7 +30668,7 @@ var Protocol = class {
|
|
|
30668
30668
|
return;
|
|
30669
30669
|
}
|
|
30670
30670
|
const pollInterval = task2.pollInterval ?? this._options?.defaultTaskPollInterval ?? 1e3;
|
|
30671
|
-
await new Promise((
|
|
30671
|
+
await new Promise((resolve14) => setTimeout(resolve14, pollInterval));
|
|
30672
30672
|
options?.signal?.throwIfAborted();
|
|
30673
30673
|
}
|
|
30674
30674
|
} catch (error2) {
|
|
@@ -30685,7 +30685,7 @@ var Protocol = class {
|
|
|
30685
30685
|
*/
|
|
30686
30686
|
request(request, resultSchema, options) {
|
|
30687
30687
|
const { relatedRequestId, resumptionToken, onresumptiontoken, task, relatedTask } = options ?? {};
|
|
30688
|
-
return new Promise((
|
|
30688
|
+
return new Promise((resolve14, reject) => {
|
|
30689
30689
|
const earlyReject = (error2) => {
|
|
30690
30690
|
reject(error2);
|
|
30691
30691
|
};
|
|
@@ -30763,7 +30763,7 @@ var Protocol = class {
|
|
|
30763
30763
|
if (!parseResult.success) {
|
|
30764
30764
|
reject(parseResult.error);
|
|
30765
30765
|
} else {
|
|
30766
|
-
|
|
30766
|
+
resolve14(parseResult.data);
|
|
30767
30767
|
}
|
|
30768
30768
|
} catch (error2) {
|
|
30769
30769
|
reject(error2);
|
|
@@ -31024,12 +31024,12 @@ var Protocol = class {
|
|
|
31024
31024
|
}
|
|
31025
31025
|
} catch {
|
|
31026
31026
|
}
|
|
31027
|
-
return new Promise((
|
|
31027
|
+
return new Promise((resolve14, reject) => {
|
|
31028
31028
|
if (signal.aborted) {
|
|
31029
31029
|
reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
|
|
31030
31030
|
return;
|
|
31031
31031
|
}
|
|
31032
|
-
const timeoutId = setTimeout(
|
|
31032
|
+
const timeoutId = setTimeout(resolve14, interval);
|
|
31033
31033
|
signal.addEventListener("abort", () => {
|
|
31034
31034
|
clearTimeout(timeoutId);
|
|
31035
31035
|
reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
|
|
@@ -32129,7 +32129,7 @@ var McpServer = class {
|
|
|
32129
32129
|
let task = createTaskResult.task;
|
|
32130
32130
|
const pollInterval = task.pollInterval ?? 5e3;
|
|
32131
32131
|
while (task.status !== "completed" && task.status !== "failed" && task.status !== "cancelled") {
|
|
32132
|
-
await new Promise((
|
|
32132
|
+
await new Promise((resolve14) => setTimeout(resolve14, pollInterval));
|
|
32133
32133
|
const updatedTask = await extra.taskStore.getTask(taskId);
|
|
32134
32134
|
if (!updatedTask) {
|
|
32135
32135
|
throw new McpError(ErrorCode.InternalError, `Task ${taskId} not found during polling`);
|
|
@@ -32778,19 +32778,19 @@ var StdioServerTransport = class {
|
|
|
32778
32778
|
this.onclose?.();
|
|
32779
32779
|
}
|
|
32780
32780
|
send(message) {
|
|
32781
|
-
return new Promise((
|
|
32781
|
+
return new Promise((resolve14) => {
|
|
32782
32782
|
const json3 = serializeMessage(message);
|
|
32783
32783
|
if (this._stdout.write(json3)) {
|
|
32784
|
-
|
|
32784
|
+
resolve14();
|
|
32785
32785
|
} else {
|
|
32786
|
-
this._stdout.once("drain",
|
|
32786
|
+
this._stdout.once("drain", resolve14);
|
|
32787
32787
|
}
|
|
32788
32788
|
});
|
|
32789
32789
|
}
|
|
32790
32790
|
};
|
|
32791
32791
|
|
|
32792
32792
|
// src/lib/cli-version.ts
|
|
32793
|
-
var CLI_VERSION = "0.1.
|
|
32793
|
+
var CLI_VERSION = "0.1.71";
|
|
32794
32794
|
|
|
32795
32795
|
// src/commands/mcp-serve.ts
|
|
32796
32796
|
var DEFAULT_TIMEOUT_MS = 12e4;
|
|
@@ -32975,7 +32975,7 @@ async function runFohCli(params) {
|
|
|
32975
32975
|
effectiveArgv.push("--json");
|
|
32976
32976
|
}
|
|
32977
32977
|
const command = `foh ${effectiveArgv.join(" ")}`;
|
|
32978
|
-
return await new Promise((
|
|
32978
|
+
return await new Promise((resolve14) => {
|
|
32979
32979
|
const child = (0, import_node_child_process.spawn)(process.execPath, [cliEntry, ...effectiveArgv], {
|
|
32980
32980
|
stdio: ["ignore", "pipe", "pipe"],
|
|
32981
32981
|
env: {
|
|
@@ -33000,7 +33000,7 @@ async function runFohCli(params) {
|
|
|
33000
33000
|
});
|
|
33001
33001
|
child.once("error", (error2) => {
|
|
33002
33002
|
clearTimeout(timeoutHandle);
|
|
33003
|
-
|
|
33003
|
+
resolve14({
|
|
33004
33004
|
ok: false,
|
|
33005
33005
|
command,
|
|
33006
33006
|
argv: effectiveArgv,
|
|
@@ -33016,7 +33016,7 @@ async function runFohCli(params) {
|
|
|
33016
33016
|
const stderrText = finalizeBoundedText(stderrBuffer);
|
|
33017
33017
|
const exitCode = Number.isFinite(code ?? NaN) ? Number(code) : 1;
|
|
33018
33018
|
const stdoutJson = tryParseJson(stdoutText);
|
|
33019
|
-
|
|
33019
|
+
resolve14({
|
|
33020
33020
|
ok: !timedOut && exitCode === 0,
|
|
33021
33021
|
command,
|
|
33022
33022
|
argv: effectiveArgv,
|
|
@@ -35186,8 +35186,8 @@ function registerSetup(program3) {
|
|
|
35186
35186
|
}
|
|
35187
35187
|
try {
|
|
35188
35188
|
const manifest = await agentExport(resolvedAgentId, { apiUrlOverride: opts.apiUrl });
|
|
35189
|
-
const { writeFileSync:
|
|
35190
|
-
|
|
35189
|
+
const { writeFileSync: writeFileSync14 } = await import("fs");
|
|
35190
|
+
writeFileSync14(
|
|
35191
35191
|
"tenant.yaml",
|
|
35192
35192
|
`# tenant.yaml - Front Of House agent manifest
|
|
35193
35193
|
# Edit this file and run: foh plan tenant.yaml
|
|
@@ -35357,8 +35357,8 @@ function registerSim(program3) {
|
|
|
35357
35357
|
}
|
|
35358
35358
|
const cert = response.certificate;
|
|
35359
35359
|
if (opts.out) {
|
|
35360
|
-
const { writeFileSync:
|
|
35361
|
-
|
|
35360
|
+
const { writeFileSync: writeFileSync14 } = await import("fs");
|
|
35361
|
+
writeFileSync14(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
|
|
35362
35362
|
process.stderr.write(` Certificate written to ${opts.out}
|
|
35363
35363
|
`);
|
|
35364
35364
|
}
|
|
@@ -35408,8 +35408,8 @@ function registerSim(program3) {
|
|
|
35408
35408
|
});
|
|
35409
35409
|
}
|
|
35410
35410
|
if (opts.out) {
|
|
35411
|
-
const { writeFileSync:
|
|
35412
|
-
|
|
35411
|
+
const { writeFileSync: writeFileSync14 } = await import("fs");
|
|
35412
|
+
writeFileSync14(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
|
|
35413
35413
|
process.stderr.write(` Final certificate written to ${opts.out}
|
|
35414
35414
|
`);
|
|
35415
35415
|
}
|
|
@@ -38469,7 +38469,7 @@ async function runSelf(args, apiUrlOverride) {
|
|
|
38469
38469
|
if (apiUrlOverride && !spawnArgs.includes("--api-url")) {
|
|
38470
38470
|
spawnArgs.push("--api-url", apiUrlOverride);
|
|
38471
38471
|
}
|
|
38472
|
-
return await new Promise((
|
|
38472
|
+
return await new Promise((resolve14, reject) => {
|
|
38473
38473
|
const child = (0, import_child_process2.spawn)(process.execPath, [process.argv[1], ...spawnArgs], {
|
|
38474
38474
|
stdio: "inherit",
|
|
38475
38475
|
env: {
|
|
@@ -38479,7 +38479,7 @@ async function runSelf(args, apiUrlOverride) {
|
|
|
38479
38479
|
}
|
|
38480
38480
|
});
|
|
38481
38481
|
child.once("error", reject);
|
|
38482
|
-
child.once("close", (code) =>
|
|
38482
|
+
child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
|
|
38483
38483
|
});
|
|
38484
38484
|
}
|
|
38485
38485
|
function shouldUseInteractiveHome(argv) {
|
|
@@ -38857,17 +38857,17 @@ function detectUpdateAvailability(currentVersion, cwd = process.cwd()) {
|
|
|
38857
38857
|
async function applyRepoUpdate(repoRoot) {
|
|
38858
38858
|
const scriptPath = (0, import_path9.join)(repoRoot, "scripts", "Install-FohCli.ps1");
|
|
38859
38859
|
if (process.platform === "win32") {
|
|
38860
|
-
return await new Promise((
|
|
38860
|
+
return await new Promise((resolve14, reject) => {
|
|
38861
38861
|
const child = (0, import_child_process3.spawn)(
|
|
38862
38862
|
"powershell",
|
|
38863
38863
|
["-ExecutionPolicy", "Bypass", "-File", scriptPath],
|
|
38864
38864
|
{ stdio: "inherit" }
|
|
38865
38865
|
);
|
|
38866
38866
|
child.once("error", reject);
|
|
38867
|
-
child.once("close", (code) =>
|
|
38867
|
+
child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
|
|
38868
38868
|
});
|
|
38869
38869
|
}
|
|
38870
|
-
return await new Promise((
|
|
38870
|
+
return await new Promise((resolve14, reject) => {
|
|
38871
38871
|
const child = (0, import_child_process3.spawn)(
|
|
38872
38872
|
"corepack",
|
|
38873
38873
|
["pnpm", "cli:install:global"],
|
|
@@ -38877,7 +38877,7 @@ async function applyRepoUpdate(repoRoot) {
|
|
|
38877
38877
|
}
|
|
38878
38878
|
);
|
|
38879
38879
|
child.once("error", reject);
|
|
38880
|
-
child.once("close", (code) =>
|
|
38880
|
+
child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
|
|
38881
38881
|
});
|
|
38882
38882
|
}
|
|
38883
38883
|
function shouldShowUpdateNotice(argv = process.argv) {
|
|
@@ -39013,8 +39013,8 @@ function registerUpdate(program3) {
|
|
|
39013
39013
|
}
|
|
39014
39014
|
|
|
39015
39015
|
// src/commands/eval.ts
|
|
39016
|
-
var
|
|
39017
|
-
var
|
|
39016
|
+
var import_fs20 = require("fs");
|
|
39017
|
+
var import_path19 = require("path");
|
|
39018
39018
|
var import_child_process6 = require("child_process");
|
|
39019
39019
|
|
|
39020
39020
|
// src/lib/external-agent-artifact-safety.ts
|
|
@@ -39349,9 +39349,9 @@ function readCommandRecords(runDir) {
|
|
|
39349
39349
|
}
|
|
39350
39350
|
|
|
39351
39351
|
// src/lib/external-agent-executor.ts
|
|
39352
|
-
var
|
|
39352
|
+
var import_fs19 = require("fs");
|
|
39353
39353
|
var import_os2 = require("os");
|
|
39354
|
-
var
|
|
39354
|
+
var import_path18 = require("path");
|
|
39355
39355
|
var import_child_process5 = require("child_process");
|
|
39356
39356
|
|
|
39357
39357
|
// src/lib/external-agent-executor-env.ts
|
|
@@ -39518,40 +39518,394 @@ function copyExternalAgentCommandCaptureArtifacts(input) {
|
|
|
39518
39518
|
}
|
|
39519
39519
|
|
|
39520
39520
|
// src/lib/external-agent-executor-classification.ts
|
|
39521
|
+
var import_fs17 = require("fs");
|
|
39522
|
+
var import_path16 = require("path");
|
|
39523
|
+
|
|
39524
|
+
// src/lib/external-agent-run-summary.ts
|
|
39521
39525
|
var import_fs16 = require("fs");
|
|
39522
39526
|
var import_path15 = require("path");
|
|
39527
|
+
var REQUIRED_RUN_FIELDS = [
|
|
39528
|
+
"schema_version",
|
|
39529
|
+
"run_id",
|
|
39530
|
+
"status",
|
|
39531
|
+
"model_provider",
|
|
39532
|
+
"model_name",
|
|
39533
|
+
"prompt_version",
|
|
39534
|
+
"started_at",
|
|
39535
|
+
"manual_intervention_count",
|
|
39536
|
+
"environment",
|
|
39537
|
+
"public_entrypoints",
|
|
39538
|
+
"commands_run",
|
|
39539
|
+
"docs_pages_used",
|
|
39540
|
+
"artifacts"
|
|
39541
|
+
];
|
|
39542
|
+
var VALID_STATUSES = /* @__PURE__ */ new Set(["pass", "hold", "fail"]);
|
|
39543
|
+
var DOC_URL_RE = /https:\/\/frontofhouse\.okii\.uk\/[^\s"'`)<>,;\\\]}]*/g;
|
|
39544
|
+
function quoteShellArg(value) {
|
|
39545
|
+
const text = String(value);
|
|
39546
|
+
if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
|
|
39547
|
+
return `"${text.replace(/(["$`])/g, "\\$1")}"`;
|
|
39548
|
+
}
|
|
39549
|
+
function externalAgentSummaryCommand(root) {
|
|
39550
|
+
const summaryPath = (0, import_path15.join)(root, "latest-summary.json");
|
|
39551
|
+
const reportPath = (0, import_path15.join)(root, "summary.report.json");
|
|
39552
|
+
return [
|
|
39553
|
+
"foh",
|
|
39554
|
+
"eval",
|
|
39555
|
+
"external-agent",
|
|
39556
|
+
"summary",
|
|
39557
|
+
"--root",
|
|
39558
|
+
quoteShellArg(root),
|
|
39559
|
+
"--out",
|
|
39560
|
+
quoteShellArg(summaryPath),
|
|
39561
|
+
"--report",
|
|
39562
|
+
quoteShellArg(reportPath),
|
|
39563
|
+
"--json"
|
|
39564
|
+
].join(" ");
|
|
39565
|
+
}
|
|
39566
|
+
function readJson(filePath) {
|
|
39567
|
+
return JSON.parse((0, import_fs16.readFileSync)(filePath, "utf8").replace(/^\uFEFF/, ""));
|
|
39568
|
+
}
|
|
39569
|
+
function readNdjson(filePath) {
|
|
39570
|
+
if (!(0, import_fs16.existsSync)(filePath)) return [];
|
|
39571
|
+
return (0, import_fs16.readFileSync)(filePath, "utf8").split(/\r?\n/).map((line) => line.trim()).filter(Boolean).map((line) => {
|
|
39572
|
+
try {
|
|
39573
|
+
const parsed = JSON.parse(line);
|
|
39574
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : null;
|
|
39575
|
+
} catch {
|
|
39576
|
+
return null;
|
|
39577
|
+
}
|
|
39578
|
+
}).filter((record2) => Boolean(record2));
|
|
39579
|
+
}
|
|
39580
|
+
function asObject(value) {
|
|
39581
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : null;
|
|
39582
|
+
}
|
|
39583
|
+
function toArray2(value) {
|
|
39584
|
+
return Array.isArray(value) ? value : [];
|
|
39585
|
+
}
|
|
39586
|
+
function increment(map3, key, amount = 1) {
|
|
39587
|
+
const normalized = String(key || "unknown");
|
|
39588
|
+
map3.set(normalized, (map3.get(normalized) || 0) + amount);
|
|
39589
|
+
}
|
|
39590
|
+
function ranked(map3) {
|
|
39591
|
+
return Array.from(map3.entries()).map(([key, count]) => ({ key, count })).sort((a, b) => b.count - a.count || a.key.localeCompare(b.key));
|
|
39592
|
+
}
|
|
39593
|
+
function collectDocUrls(text) {
|
|
39594
|
+
return Array.from(new Set((String(text || "").match(DOC_URL_RE) || []).map((url2) => url2.replace(/[.?!:]+$/g, "")).filter((url2) => url2.startsWith("https://frontofhouse.okii.uk/")))).sort();
|
|
39595
|
+
}
|
|
39596
|
+
function findRunFiles(root) {
|
|
39597
|
+
if (!(0, import_fs16.existsSync)(root)) return [];
|
|
39598
|
+
const files = [];
|
|
39599
|
+
const stack = [root];
|
|
39600
|
+
while (stack.length > 0) {
|
|
39601
|
+
const current = stack.pop();
|
|
39602
|
+
if (!current) continue;
|
|
39603
|
+
for (const entry of (0, import_fs16.readdirSync)(current, { withFileTypes: true })) {
|
|
39604
|
+
const absolute = (0, import_path15.join)(current, entry.name);
|
|
39605
|
+
if (entry.isDirectory()) {
|
|
39606
|
+
stack.push(absolute);
|
|
39607
|
+
} else if (entry.isFile() && entry.name === "run.json") {
|
|
39608
|
+
files.push(absolute);
|
|
39609
|
+
}
|
|
39610
|
+
}
|
|
39611
|
+
}
|
|
39612
|
+
return files.sort();
|
|
39613
|
+
}
|
|
39614
|
+
function validateExternalAgentRun(value) {
|
|
39615
|
+
const findings = [];
|
|
39616
|
+
const run = asObject(value);
|
|
39617
|
+
if (!run) return [{ id: "run_not_object", detail: "run artifact must be an object" }];
|
|
39618
|
+
for (const field of REQUIRED_RUN_FIELDS) {
|
|
39619
|
+
if (!(field in run)) findings.push({ id: "required_field_missing", field });
|
|
39620
|
+
}
|
|
39621
|
+
if (run.schema_version !== "external_agent_run.v1") {
|
|
39622
|
+
findings.push({ id: "schema_version_invalid", expected: "external_agent_run.v1", actual: run.schema_version ?? null });
|
|
39623
|
+
}
|
|
39624
|
+
if (!VALID_STATUSES.has(String(run.status || ""))) {
|
|
39625
|
+
findings.push({ id: "status_invalid", expected: Array.from(VALID_STATUSES), actual: run.status ?? null });
|
|
39626
|
+
}
|
|
39627
|
+
if ((run.status === "hold" || run.status === "fail") && !String(run.failure_reason_code || "").trim()) {
|
|
39628
|
+
findings.push({ id: "failure_reason_code_missing" });
|
|
39629
|
+
}
|
|
39630
|
+
if (!Number.isInteger(run.manual_intervention_count) || Number(run.manual_intervention_count) < 0) {
|
|
39631
|
+
findings.push({ id: "manual_intervention_count_invalid" });
|
|
39632
|
+
}
|
|
39633
|
+
if (!Array.isArray(run.commands_run)) findings.push({ id: "commands_run_invalid" });
|
|
39634
|
+
if (!Array.isArray(run.docs_pages_used)) findings.push({ id: "docs_pages_used_invalid" });
|
|
39635
|
+
if (!asObject(run.environment)) findings.push({ id: "environment_invalid" });
|
|
39636
|
+
if (!asObject(run.artifacts)) findings.push({ id: "artifacts_invalid" });
|
|
39637
|
+
if (toArray2(run.public_entrypoints).length === 0) findings.push({ id: "public_entrypoints_missing" });
|
|
39638
|
+
return findings;
|
|
39639
|
+
}
|
|
39640
|
+
function runSortTime(run) {
|
|
39641
|
+
const raw = String(run.ended_at || run.started_at || "");
|
|
39642
|
+
const time3 = Date.parse(raw);
|
|
39643
|
+
return Number.isFinite(time3) ? time3 : 0;
|
|
39644
|
+
}
|
|
39645
|
+
function cohortIdForRunPath(root, runPath) {
|
|
39646
|
+
const normalized = (0, import_path15.relative)(root, (0, import_path15.dirname)(runPath)).replaceAll("\\", "/");
|
|
39647
|
+
const parts = normalized.split("/").filter(Boolean);
|
|
39648
|
+
if (parts.length === 0) return ".";
|
|
39649
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(parts[0]) && parts[1]) return `${parts[0]}/${parts[1]}`;
|
|
39650
|
+
return parts[0];
|
|
39651
|
+
}
|
|
39652
|
+
function readRunRecords(root, cwd) {
|
|
39653
|
+
const records = [];
|
|
39654
|
+
const invalid_runs = [];
|
|
39655
|
+
for (const file2 of findRunFiles(root)) {
|
|
39656
|
+
try {
|
|
39657
|
+
const parsed = readJson(file2);
|
|
39658
|
+
const findings = validateExternalAgentRun(parsed);
|
|
39659
|
+
if (findings.length > 0) {
|
|
39660
|
+
invalid_runs.push({ path: (0, import_path15.relative)(cwd, file2).replaceAll("\\", "/"), findings });
|
|
39661
|
+
continue;
|
|
39662
|
+
}
|
|
39663
|
+
const run = parsed;
|
|
39664
|
+
records.push({
|
|
39665
|
+
path: file2,
|
|
39666
|
+
run,
|
|
39667
|
+
cohort_id: cohortIdForRunPath(root, file2),
|
|
39668
|
+
sort_time: runSortTime(run)
|
|
39669
|
+
});
|
|
39670
|
+
} catch (error2) {
|
|
39671
|
+
invalid_runs.push({
|
|
39672
|
+
path: (0, import_path15.relative)(cwd, file2).replaceAll("\\", "/"),
|
|
39673
|
+
findings: [{ id: "json_parse_failed", detail: error2 instanceof Error ? error2.message : String(error2) }]
|
|
39674
|
+
});
|
|
39675
|
+
}
|
|
39676
|
+
}
|
|
39677
|
+
return { records, invalid_runs };
|
|
39678
|
+
}
|
|
39679
|
+
function latestCohortId(records) {
|
|
39680
|
+
return records.slice().sort((a, b) => b.sort_time - a.sort_time || b.path.localeCompare(a.path))[0]?.cohort_id ?? null;
|
|
39681
|
+
}
|
|
39682
|
+
function ownerSubsystemFor(reasonCode) {
|
|
39683
|
+
const reason = String(reasonCode || "").toLowerCase();
|
|
39684
|
+
if (reason.includes("simulation") || reason.includes("certification") || reason.includes("scenario")) return "dojo_certification";
|
|
39685
|
+
if (reason.includes("contact_phone") || reason.includes("voice_contact") || reason.includes("provider_capacity") || reason.includes("byon")) return "voice_contact";
|
|
39686
|
+
if (reason.includes("exec_policy") || reason.includes("policy_blocked") || reason.includes("sandbox") || reason.includes("runner") || reason.includes("codex")) return "infra_runner";
|
|
39687
|
+
if (reason.includes("api") || reason.includes("http_4") || reason.includes("http_5") || reason.includes("404") || reason.includes("500") || reason.includes("roundtrip")) return "api_contract";
|
|
39688
|
+
if (reason.includes("cli") || reason.includes("command") || reason.includes("flag")) return "cli";
|
|
39689
|
+
if (reason.includes("docs") || reason.includes("unclear") || reason.includes("not_found")) return "docs";
|
|
39690
|
+
if (reason.includes("auth") || reason.includes("org") || reason.includes("config")) return "infra_runner";
|
|
39691
|
+
if (reason.includes("runtime") || reason.includes("widget") || reason.includes("proof")) return "runtime";
|
|
39692
|
+
return "product_ux";
|
|
39693
|
+
}
|
|
39694
|
+
function recommendedFixFor(reasonCode) {
|
|
39695
|
+
const owner = ownerSubsystemFor(reasonCode);
|
|
39696
|
+
if (owner === "api_contract") return "fix_api";
|
|
39697
|
+
if (owner === "cli") return "fix_cli";
|
|
39698
|
+
if (owner === "docs") return "fix_docs";
|
|
39699
|
+
if (owner === "runtime") return "fix_runtime";
|
|
39700
|
+
if (owner === "dojo_certification") return "add_test";
|
|
39701
|
+
return "fix_config";
|
|
39702
|
+
}
|
|
39703
|
+
function collapseCommandRecords(records) {
|
|
39704
|
+
const order = [];
|
|
39705
|
+
const byId = /* @__PURE__ */ new Map();
|
|
39706
|
+
for (const record2 of records) {
|
|
39707
|
+
const id = String(record2.command_id || `${record2.recorded_at || ""}:${record2.command || ""}`);
|
|
39708
|
+
if (!byId.has(id)) order.push(id);
|
|
39709
|
+
const previous = byId.get(id);
|
|
39710
|
+
byId.set(id, record2.phase === "completed" ? record2 : previous || record2);
|
|
39711
|
+
}
|
|
39712
|
+
return order.map((id) => byId.get(id)).filter((record2) => Boolean(record2));
|
|
39713
|
+
}
|
|
39714
|
+
function analyzeRunArtifacts(runPath, run, cwd) {
|
|
39715
|
+
const runDir = (0, import_path15.dirname)(runPath);
|
|
39716
|
+
const commands = collapseCommandRecords(readNdjson((0, import_path15.join)(runDir, "commands.ndjson")));
|
|
39717
|
+
const reasonCounts = /* @__PURE__ */ new Map();
|
|
39718
|
+
const slowSteps = [];
|
|
39719
|
+
let completed = 0;
|
|
39720
|
+
let withDuration = 0;
|
|
39721
|
+
let totalDuration = 0;
|
|
39722
|
+
for (const command of commands) {
|
|
39723
|
+
if (command.phase === "completed" || command.completed_at) completed += 1;
|
|
39724
|
+
if (typeof command.duration_ms === "number") {
|
|
39725
|
+
withDuration += 1;
|
|
39726
|
+
totalDuration += command.duration_ms;
|
|
39727
|
+
slowSteps.push({
|
|
39728
|
+
run_id: run.run_id,
|
|
39729
|
+
run_path: (0, import_path15.relative)(cwd, runPath).replaceAll("\\", "/"),
|
|
39730
|
+
command: command.command || "",
|
|
39731
|
+
duration_ms: command.duration_ms,
|
|
39732
|
+
status: command.status || null,
|
|
39733
|
+
reason_code: command.reason_code || null,
|
|
39734
|
+
check_reason_codes: Array.isArray(command.check_reason_codes) ? command.check_reason_codes : []
|
|
39735
|
+
});
|
|
39736
|
+
}
|
|
39737
|
+
if (command.reason_code) increment(reasonCounts, command.reason_code);
|
|
39738
|
+
for (const reasonCode of toArray2(command.check_reason_codes)) {
|
|
39739
|
+
if (reasonCode) increment(reasonCounts, reasonCode);
|
|
39740
|
+
}
|
|
39741
|
+
}
|
|
39742
|
+
const codexEvents = readNdjson((0, import_path15.join)(runDir, "codex-exec.jsonl"));
|
|
39743
|
+
const codexDocs = /* @__PURE__ */ new Set();
|
|
39744
|
+
let codexCommandExecutions = 0;
|
|
39745
|
+
let codexFailedExitCodes = 0;
|
|
39746
|
+
for (const event of codexEvents) {
|
|
39747
|
+
const item = asObject(event.item) || event;
|
|
39748
|
+
if (item.type === "command_execution" && item.status === "completed") {
|
|
39749
|
+
codexCommandExecutions += 1;
|
|
39750
|
+
if (typeof item.exit_code === "number" && item.exit_code !== 0) codexFailedExitCodes += 1;
|
|
39751
|
+
}
|
|
39752
|
+
for (const url2 of collectDocUrls(JSON.stringify(event))) codexDocs.add(url2);
|
|
39753
|
+
}
|
|
39754
|
+
const docs = /* @__PURE__ */ new Set([
|
|
39755
|
+
...toArray2(run.docs_pages_used).map(String),
|
|
39756
|
+
...Array.from(codexDocs)
|
|
39757
|
+
]);
|
|
39758
|
+
return {
|
|
39759
|
+
command_log_present: (0, import_fs16.existsSync)((0, import_path15.join)(runDir, "commands.ndjson")),
|
|
39760
|
+
command_count: commands.length,
|
|
39761
|
+
completed_command_count: completed,
|
|
39762
|
+
missing_completion_count: Math.max(0, commands.length - completed),
|
|
39763
|
+
commands_with_duration_count: withDuration,
|
|
39764
|
+
total_command_duration_ms: totalDuration,
|
|
39765
|
+
command_reason_codes: ranked(reasonCounts),
|
|
39766
|
+
slow_steps: slowSteps.sort((a, b) => Number(b.duration_ms) - Number(a.duration_ms)).slice(0, 10),
|
|
39767
|
+
docs_pages_observed: Array.from(docs).sort(),
|
|
39768
|
+
codex_command_execution_completed_count: codexCommandExecutions,
|
|
39769
|
+
codex_failed_exit_code_count: codexFailedExitCodes
|
|
39770
|
+
};
|
|
39771
|
+
}
|
|
39772
|
+
function summarizeExternalAgentRuns(options) {
|
|
39773
|
+
const cwd = (0, import_path15.resolve)(options.cwd || process.cwd());
|
|
39774
|
+
const root = (0, import_path15.resolve)(cwd, options.root);
|
|
39775
|
+
const loaded = readRunRecords(root, cwd);
|
|
39776
|
+
const selectedCohortId = options.cohortId || (options.currentBaselineOnly ? latestCohortId(loaded.records) : null);
|
|
39777
|
+
const records = selectedCohortId ? loaded.records.filter((record2) => record2.cohort_id === selectedCohortId) : loaded.records;
|
|
39778
|
+
const statusCounts = /* @__PURE__ */ new Map();
|
|
39779
|
+
const modelCounts = /* @__PURE__ */ new Map();
|
|
39780
|
+
const failureCounts = /* @__PURE__ */ new Map();
|
|
39781
|
+
const commandReasonCounts = /* @__PURE__ */ new Map();
|
|
39782
|
+
const docsCounts = /* @__PURE__ */ new Map();
|
|
39783
|
+
const slowSteps = [];
|
|
39784
|
+
let manualInterventions = 0;
|
|
39785
|
+
let commandCount = 0;
|
|
39786
|
+
let completedCommandCount = 0;
|
|
39787
|
+
let missingCompletionCount = 0;
|
|
39788
|
+
let commandsWithDurationCount = 0;
|
|
39789
|
+
let totalCommandDurationMs = 0;
|
|
39790
|
+
let commandLogRunCount = 0;
|
|
39791
|
+
let codexCommandExecutions = 0;
|
|
39792
|
+
let codexFailedExitCodes = 0;
|
|
39793
|
+
for (const record2 of records) {
|
|
39794
|
+
const run = record2.run;
|
|
39795
|
+
increment(statusCounts, run.status);
|
|
39796
|
+
increment(modelCounts, `${run.model_provider}/${run.model_name}`);
|
|
39797
|
+
manualInterventions += Number(run.manual_intervention_count || 0);
|
|
39798
|
+
if (run.status !== "pass") increment(failureCounts, run.failure_reason_code || "unknown");
|
|
39799
|
+
const artifactSummary = analyzeRunArtifacts(record2.path, run, cwd);
|
|
39800
|
+
if (artifactSummary.command_log_present) commandLogRunCount += 1;
|
|
39801
|
+
commandCount += Number(artifactSummary.command_count || 0);
|
|
39802
|
+
completedCommandCount += Number(artifactSummary.completed_command_count || 0);
|
|
39803
|
+
missingCompletionCount += Number(artifactSummary.missing_completion_count || 0);
|
|
39804
|
+
commandsWithDurationCount += Number(artifactSummary.commands_with_duration_count || 0);
|
|
39805
|
+
totalCommandDurationMs += Number(artifactSummary.total_command_duration_ms || 0);
|
|
39806
|
+
codexCommandExecutions += Number(artifactSummary.codex_command_execution_completed_count || 0);
|
|
39807
|
+
codexFailedExitCodes += Number(artifactSummary.codex_failed_exit_code_count || 0);
|
|
39808
|
+
for (const row of toArray2(artifactSummary.slow_steps)) slowSteps.push(row);
|
|
39809
|
+
for (const row of toArray2(artifactSummary.command_reason_codes)) {
|
|
39810
|
+
const entry = asObject(row);
|
|
39811
|
+
if (entry) increment(commandReasonCounts, entry.key, Number(entry.count || 1));
|
|
39812
|
+
}
|
|
39813
|
+
for (const page of toArray2(artifactSummary.docs_pages_observed)) increment(docsCounts, page);
|
|
39814
|
+
}
|
|
39815
|
+
const topFailures = ranked(failureCounts);
|
|
39816
|
+
const commandReasonCodes = ranked(commandReasonCounts);
|
|
39817
|
+
const recommendedFixes = topFailures.map((failure) => ({
|
|
39818
|
+
reason_code: failure.key,
|
|
39819
|
+
count: failure.count,
|
|
39820
|
+
recommended_fix: recommendedFixFor(failure.key),
|
|
39821
|
+
owner_subsystem: ownerSubsystemFor(failure.key)
|
|
39822
|
+
}));
|
|
39823
|
+
const nextRecommendedFix = recommendedFixes[0] || null;
|
|
39824
|
+
return {
|
|
39825
|
+
schema_version: "external_agent_run_summary.v1",
|
|
39826
|
+
generated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
39827
|
+
root: (0, import_path15.relative)(cwd, root).replaceAll("\\", "/") || ".",
|
|
39828
|
+
cohort_id: selectedCohortId,
|
|
39829
|
+
current_baseline_only: Boolean(selectedCohortId),
|
|
39830
|
+
run_count: records.length,
|
|
39831
|
+
invalid_run_count: selectedCohortId ? 0 : loaded.invalid_runs.length,
|
|
39832
|
+
status_counts: Object.fromEntries(statusCounts),
|
|
39833
|
+
model_counts: ranked(modelCounts),
|
|
39834
|
+
manual_intervention_count: manualInterventions,
|
|
39835
|
+
top_failure_reason_codes: topFailures,
|
|
39836
|
+
docs_pages_observed: ranked(docsCounts),
|
|
39837
|
+
command_telemetry: {
|
|
39838
|
+
run_count_with_command_log: commandLogRunCount,
|
|
39839
|
+
command_count: commandCount,
|
|
39840
|
+
completed_command_count: completedCommandCount,
|
|
39841
|
+
missing_completion_count: missingCompletionCount,
|
|
39842
|
+
commands_with_duration_count: commandsWithDurationCount,
|
|
39843
|
+
total_command_duration_ms: totalCommandDurationMs,
|
|
39844
|
+
command_reason_codes: commandReasonCodes,
|
|
39845
|
+
slow_steps: slowSteps.sort((a, b) => Number(b.duration_ms || 0) - Number(a.duration_ms || 0) || String(a.command || "").localeCompare(String(b.command || ""))).slice(0, 20)
|
|
39846
|
+
},
|
|
39847
|
+
codex_telemetry: {
|
|
39848
|
+
command_execution_completed_count: codexCommandExecutions,
|
|
39849
|
+
failed_exit_code_count: codexFailedExitCodes
|
|
39850
|
+
},
|
|
39851
|
+
recommended_fixes: recommendedFixes,
|
|
39852
|
+
next_recommended_fix: nextRecommendedFix,
|
|
39853
|
+
fix_selection_policy: {
|
|
39854
|
+
mode: "coherent_failure_cluster_first",
|
|
39855
|
+
rule: "Fix the highest-impact owner subsystem locally with focused proof, then rerun the same prompt once externally.",
|
|
39856
|
+
run_failure_weight: 3,
|
|
39857
|
+
command_reason_weight: 1
|
|
39858
|
+
},
|
|
39859
|
+
next_commands: nextRecommendedFix ? [`foh bug improve --from external-agent-run --file <run_dir>/run.json --json`] : [],
|
|
39860
|
+
invalid_runs: selectedCohortId ? [] : loaded.invalid_runs,
|
|
39861
|
+
run_paths: records.map((record2) => (0, import_path15.relative)(cwd, record2.path).replaceAll("\\", "/")).sort()
|
|
39862
|
+
};
|
|
39863
|
+
}
|
|
39864
|
+
function runExternalAgentRunSummary(options) {
|
|
39865
|
+
const summary = summarizeExternalAgentRuns(options);
|
|
39866
|
+
const invalidRuns = toArray2(summary.invalid_runs);
|
|
39867
|
+
const status = invalidRuns.length > 0 ? "failed" : "passed";
|
|
39868
|
+
const report = {
|
|
39869
|
+
report_schema_version: "script_report.v1",
|
|
39870
|
+
script: "foh eval external-agent summary",
|
|
39871
|
+
checked_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
39872
|
+
status,
|
|
39873
|
+
errors: invalidRuns.map((entry) => {
|
|
39874
|
+
const object3 = asObject(entry);
|
|
39875
|
+
return `${object3?.path || "unknown"}: ${JSON.stringify(object3?.findings || [])}`;
|
|
39876
|
+
}),
|
|
39877
|
+
warnings: Number(summary.run_count || 0) === 0 ? ["no external-agent run artifacts found"] : [],
|
|
39878
|
+
report: summary
|
|
39879
|
+
};
|
|
39880
|
+
if (options.out) {
|
|
39881
|
+
(0, import_fs16.mkdirSync)((0, import_path15.dirname)((0, import_path15.resolve)(options.cwd || process.cwd(), options.out)), { recursive: true });
|
|
39882
|
+
(0, import_fs16.writeFileSync)((0, import_path15.resolve)(options.cwd || process.cwd(), options.out), `${JSON.stringify(summary, null, 2)}
|
|
39883
|
+
`, "utf8");
|
|
39884
|
+
}
|
|
39885
|
+
if (options.report) {
|
|
39886
|
+
(0, import_fs16.mkdirSync)((0, import_path15.dirname)((0, import_path15.resolve)(options.cwd || process.cwd(), options.report)), { recursive: true });
|
|
39887
|
+
(0, import_fs16.writeFileSync)((0, import_path15.resolve)(options.cwd || process.cwd(), options.report), `${JSON.stringify(report, null, 2)}
|
|
39888
|
+
`, "utf8");
|
|
39889
|
+
}
|
|
39890
|
+
return { summary, report };
|
|
39891
|
+
}
|
|
39892
|
+
|
|
39893
|
+
// src/lib/external-agent-executor-classification.ts
|
|
39523
39894
|
function proofArtifactPasses(runDir) {
|
|
39524
|
-
const proofPath = (0,
|
|
39525
|
-
if (!(0,
|
|
39895
|
+
const proofPath = (0, import_path16.join)(runDir, "proof.json");
|
|
39896
|
+
if (!(0, import_fs17.existsSync)(proofPath)) return false;
|
|
39526
39897
|
try {
|
|
39527
|
-
const parsed = JSON.parse((0,
|
|
39898
|
+
const parsed = JSON.parse((0, import_fs17.readFileSync)(proofPath, "utf8"));
|
|
39528
39899
|
return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
|
|
39529
39900
|
} catch {
|
|
39530
39901
|
return false;
|
|
39531
39902
|
}
|
|
39532
39903
|
}
|
|
39533
39904
|
function readIfExists(path2) {
|
|
39534
|
-
return (0,
|
|
39905
|
+
return (0, import_fs17.existsSync)(path2) ? (0, import_fs17.readFileSync)(path2, "utf8") : "";
|
|
39535
39906
|
}
|
|
39536
39907
|
function relativeArtifactName(path2) {
|
|
39537
|
-
return (0,
|
|
39538
|
-
}
|
|
39539
|
-
function externalAgentSummaryCommand(root) {
|
|
39540
|
-
return [
|
|
39541
|
-
"node",
|
|
39542
|
-
"scripts/summarize-external-agent-runs.mjs",
|
|
39543
|
-
"--root",
|
|
39544
|
-
quoteShellArg(root),
|
|
39545
|
-
"--out",
|
|
39546
|
-
quoteShellArg((0, import_path15.join)(root, "latest-summary.json")),
|
|
39547
|
-
"--report",
|
|
39548
|
-
quoteShellArg((0, import_path15.join)(root, "summary.report.json"))
|
|
39549
|
-
].join(" ");
|
|
39550
|
-
}
|
|
39551
|
-
function quoteShellArg(value) {
|
|
39552
|
-
const text = String(value);
|
|
39553
|
-
if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
|
|
39554
|
-
return `"${text.replace(/(["$`])/g, "\\$1")}"`;
|
|
39908
|
+
return (0, import_path16.basename)(path2);
|
|
39555
39909
|
}
|
|
39556
39910
|
function classifyExternalAgentRun(input) {
|
|
39557
39911
|
if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
|
|
@@ -39697,13 +40051,13 @@ function buildExecutedExternalAgentRunArtifact(input) {
|
|
|
39697
40051
|
},
|
|
39698
40052
|
artifacts: {
|
|
39699
40053
|
terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
|
|
39700
|
-
command_log: (0,
|
|
39701
|
-
proof_bundle: (0,
|
|
39702
|
-
replay_packet: (0,
|
|
39703
|
-
knowledge_packet: (0,
|
|
40054
|
+
command_log: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
|
|
40055
|
+
proof_bundle: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
|
|
40056
|
+
replay_packet: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
|
|
40057
|
+
knowledge_packet: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
|
|
39704
40058
|
improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
|
|
39705
40059
|
agent_metadata: agentMetadata.path,
|
|
39706
|
-
notes: (0,
|
|
40060
|
+
notes: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
|
|
39707
40061
|
runner_last_message: relativeArtifactName(input.run.outputs.last_message),
|
|
39708
40062
|
runner_stderr: relativeArtifactName(input.run.outputs.stderr),
|
|
39709
40063
|
codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
|
|
@@ -39711,25 +40065,25 @@ function buildExecutedExternalAgentRunArtifact(input) {
|
|
|
39711
40065
|
artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
|
|
39712
40066
|
},
|
|
39713
40067
|
summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
|
|
39714
|
-
next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0,
|
|
40068
|
+
next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path16.dirname)(input.run.run_dir))] : [
|
|
39715
40069
|
"foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
|
|
39716
40070
|
"foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
|
|
39717
|
-
externalAgentSummaryCommand((0,
|
|
40071
|
+
externalAgentSummaryCommand((0, import_path16.dirname)(input.run.run_dir))
|
|
39718
40072
|
]
|
|
39719
40073
|
};
|
|
39720
40074
|
}
|
|
39721
40075
|
|
|
39722
40076
|
// src/lib/external-agent-runner-execution.ts
|
|
39723
40077
|
var import_child_process4 = require("child_process");
|
|
39724
|
-
var
|
|
39725
|
-
var
|
|
40078
|
+
var import_fs18 = require("fs");
|
|
40079
|
+
var import_path17 = require("path");
|
|
39726
40080
|
function buildCommandInvocation(command, args) {
|
|
39727
40081
|
if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
|
|
39728
|
-
const binDir = (0,
|
|
39729
|
-
const codexEntrypoint = (0,
|
|
39730
|
-
if ((0,
|
|
39731
|
-
const geminiEntrypoint = (0,
|
|
39732
|
-
if ((0,
|
|
40082
|
+
const binDir = (0, import_path17.dirname)(command);
|
|
40083
|
+
const codexEntrypoint = (0, import_path17.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
|
|
40084
|
+
if ((0, import_fs18.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
|
|
40085
|
+
const geminiEntrypoint = (0, import_path17.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
|
|
40086
|
+
if ((0, import_fs18.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
|
|
39733
40087
|
}
|
|
39734
40088
|
return { command, args };
|
|
39735
40089
|
}
|
|
@@ -39744,8 +40098,8 @@ function spawnExternalAgentRunner(input) {
|
|
|
39744
40098
|
stdio: ["pipe", "pipe", "pipe"],
|
|
39745
40099
|
windowsHide: true
|
|
39746
40100
|
});
|
|
39747
|
-
const stdout = (0,
|
|
39748
|
-
const stderr = (0,
|
|
40101
|
+
const stdout = (0, import_fs18.createWriteStream)(input.stdoutPath, { flags: "w" });
|
|
40102
|
+
const stderr = (0, import_fs18.createWriteStream)(input.stderrPath, { flags: "w" });
|
|
39749
40103
|
child.stdout.pipe(stdout);
|
|
39750
40104
|
child.stderr.pipe(stderr);
|
|
39751
40105
|
child.stdin.end(input.prompt);
|
|
@@ -39857,14 +40211,14 @@ async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}
|
|
|
39857
40211
|
};
|
|
39858
40212
|
}
|
|
39859
40213
|
function normalizeForCompare(path2) {
|
|
39860
|
-
const resolved = (0,
|
|
40214
|
+
const resolved = (0, import_path18.resolve)(path2);
|
|
39861
40215
|
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
|
|
39862
40216
|
}
|
|
39863
40217
|
function isPathInside(childPath, parentPath) {
|
|
39864
40218
|
const child = normalizeForCompare(childPath);
|
|
39865
40219
|
const parent = normalizeForCompare(parentPath);
|
|
39866
|
-
const rel = (0,
|
|
39867
|
-
return rel === "" || !!rel && !rel.startsWith("..") && !(0,
|
|
40220
|
+
const rel = (0, import_path18.relative)(parent, child);
|
|
40221
|
+
return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path18.isAbsolute)(rel);
|
|
39868
40222
|
}
|
|
39869
40223
|
function requireString(value, field) {
|
|
39870
40224
|
if (typeof value !== "string" || value.trim() === "") {
|
|
@@ -39873,10 +40227,10 @@ function requireString(value, field) {
|
|
|
39873
40227
|
return value;
|
|
39874
40228
|
}
|
|
39875
40229
|
function readBatch(batchPath) {
|
|
39876
|
-
if (!(0,
|
|
40230
|
+
if (!(0, import_fs19.existsSync)(batchPath)) {
|
|
39877
40231
|
throw new ExternalAgentExecutorError("external_agent_batch_not_found", `Batch file not found: ${batchPath}`);
|
|
39878
40232
|
}
|
|
39879
|
-
const parsed = JSON.parse((0,
|
|
40233
|
+
const parsed = JSON.parse((0, import_fs19.readFileSync)(batchPath, "utf8"));
|
|
39880
40234
|
if (parsed.schema_version !== "external_agent_batch_plan.v1") {
|
|
39881
40235
|
throw new ExternalAgentExecutorError("invalid_external_agent_batch", "Batch schema_version must be external_agent_batch_plan.v1.");
|
|
39882
40236
|
}
|
|
@@ -39913,8 +40267,8 @@ function resolveCodexProbeCommand() {
|
|
|
39913
40267
|
if (process.platform !== "win32") return "codex";
|
|
39914
40268
|
const appData = process.env.APPDATA;
|
|
39915
40269
|
if (appData) {
|
|
39916
|
-
const appDataShim = (0,
|
|
39917
|
-
if ((0,
|
|
40270
|
+
const appDataShim = (0, import_path18.join)(appData, "npm", "codex.cmd");
|
|
40271
|
+
if ((0, import_fs19.existsSync)(appDataShim)) return appDataShim;
|
|
39918
40272
|
}
|
|
39919
40273
|
return "codex.cmd";
|
|
39920
40274
|
}
|
|
@@ -39925,8 +40279,8 @@ function resolveGeminiProbeCommand() {
|
|
|
39925
40279
|
if (process.platform !== "win32") return "gemini";
|
|
39926
40280
|
const appData = process.env.APPDATA;
|
|
39927
40281
|
if (appData) {
|
|
39928
|
-
const appDataShim = (0,
|
|
39929
|
-
if ((0,
|
|
40282
|
+
const appDataShim = (0, import_path18.join)(appData, "npm", "gemini.cmd");
|
|
40283
|
+
if ((0, import_fs19.existsSync)(appDataShim)) return appDataShim;
|
|
39930
40284
|
}
|
|
39931
40285
|
return "gemini.cmd";
|
|
39932
40286
|
}
|
|
@@ -40197,34 +40551,34 @@ function safeRunId(value) {
|
|
|
40197
40551
|
return value.toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "run";
|
|
40198
40552
|
}
|
|
40199
40553
|
function resolveWorkspaceRoot(input) {
|
|
40200
|
-
if (input.workspaceRoot) return (0,
|
|
40201
|
-
const batchStem = (0,
|
|
40202
|
-
const repoStem = (0,
|
|
40203
|
-
return (0,
|
|
40554
|
+
if (input.workspaceRoot) return (0, import_path18.resolve)(input.workspaceRoot);
|
|
40555
|
+
const batchStem = (0, import_path18.basename)((0, import_path18.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
|
|
40556
|
+
const repoStem = (0, import_path18.basename)((0, import_path18.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
|
|
40557
|
+
return (0, import_path18.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
|
|
40204
40558
|
}
|
|
40205
40559
|
function findNearestGitRoot(startPath) {
|
|
40206
|
-
let current = (0,
|
|
40560
|
+
let current = (0, import_path18.resolve)(startPath);
|
|
40207
40561
|
while (true) {
|
|
40208
|
-
if ((0,
|
|
40209
|
-
const parent = (0,
|
|
40562
|
+
if ((0, import_fs19.existsSync)((0, import_path18.join)(current, ".git"))) return current;
|
|
40563
|
+
const parent = (0, import_path18.dirname)(current);
|
|
40210
40564
|
if (parent === current) return null;
|
|
40211
40565
|
current = parent;
|
|
40212
40566
|
}
|
|
40213
40567
|
}
|
|
40214
40568
|
function resolvePrivateRepoRoot(input) {
|
|
40215
40569
|
if (input.explicitPrivateRepoRoot) {
|
|
40216
|
-
return { root: (0,
|
|
40570
|
+
return { root: (0, import_path18.resolve)(input.explicitPrivateRepoRoot), explicit: true };
|
|
40217
40571
|
}
|
|
40218
|
-
const cwd = (0,
|
|
40572
|
+
const cwd = (0, import_path18.resolve)(input.cwd || process.cwd());
|
|
40219
40573
|
const gitRoot = findNearestGitRoot(cwd);
|
|
40220
40574
|
if (gitRoot) return { root: gitRoot, explicit: false };
|
|
40221
40575
|
return {
|
|
40222
|
-
root: (0,
|
|
40576
|
+
root: (0, import_path18.join)(cwd, ".foh-no-private-repo-root-sentinel"),
|
|
40223
40577
|
explicit: false
|
|
40224
40578
|
};
|
|
40225
40579
|
}
|
|
40226
40580
|
function promptVersionFromPath(promptPath) {
|
|
40227
|
-
const raw = (0,
|
|
40581
|
+
const raw = (0, import_fs19.readFileSync)(promptPath, "utf8");
|
|
40228
40582
|
if (raw.includes("Do not assume access to the private source repository")) return "blank-setup.v1";
|
|
40229
40583
|
return "unknown";
|
|
40230
40584
|
}
|
|
@@ -40233,7 +40587,7 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
40233
40587
|
if (runner !== "codex" && runner !== "gemini") {
|
|
40234
40588
|
throw new ExternalAgentExecutorError("unsupported_external_agent_runner", `Unsupported runner: ${runner}`);
|
|
40235
40589
|
}
|
|
40236
|
-
const batchPath = (0,
|
|
40590
|
+
const batchPath = (0, import_path18.resolve)(options.batchPath);
|
|
40237
40591
|
const batch = readBatch(batchPath);
|
|
40238
40592
|
const runnerProbe = validateRunner(options, runner);
|
|
40239
40593
|
const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
|
|
@@ -40252,17 +40606,17 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
40252
40606
|
`Workspace root must be outside the private repository. workspace=${workspaceRoot} repo=${privateRepoRoot}`
|
|
40253
40607
|
);
|
|
40254
40608
|
}
|
|
40255
|
-
(0,
|
|
40256
|
-
const batchDir = (0,
|
|
40609
|
+
(0, import_fs19.mkdirSync)(workspaceRoot, { recursive: true });
|
|
40610
|
+
const batchDir = (0, import_path18.resolve)(String(batch.batch_dir || (0, import_path18.resolve)(batchPath, "..")));
|
|
40257
40611
|
const timeoutMinutes = Number.isFinite(options.timeoutMinutes) && Number(options.timeoutMinutes) > 0 ? Number(options.timeoutMinutes) : 30;
|
|
40258
40612
|
const runs = batch.runs.map((run) => {
|
|
40259
40613
|
const runId = safeRunId(requireString(run.run_id, "runs[].run_id"));
|
|
40260
|
-
const runDir = (0,
|
|
40261
|
-
const promptPath = (0,
|
|
40262
|
-
const workspaceDir = (0,
|
|
40263
|
-
(0,
|
|
40264
|
-
(0,
|
|
40265
|
-
(0,
|
|
40614
|
+
const runDir = (0, import_path18.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
|
|
40615
|
+
const promptPath = (0, import_path18.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
|
|
40616
|
+
const workspaceDir = (0, import_path18.join)(workspaceRoot, runId);
|
|
40617
|
+
(0, import_fs19.mkdirSync)(workspaceDir, { recursive: true });
|
|
40618
|
+
(0, import_fs19.writeFileSync)(
|
|
40619
|
+
(0, import_path18.join)(workspaceDir, "README.md"),
|
|
40266
40620
|
[
|
|
40267
40621
|
"# FOH External-Agent Workspace",
|
|
40268
40622
|
"",
|
|
@@ -40280,11 +40634,11 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
40280
40634
|
});
|
|
40281
40635
|
const promptVersion = String(env[EXTERNAL_AGENT_PROMPT_VERSION_ENV] || "unknown");
|
|
40282
40636
|
const outputStem = runner === "gemini" ? "gemini" : "codex";
|
|
40283
|
-
const jsonlPath = (0,
|
|
40284
|
-
const lastMessagePath = (0,
|
|
40285
|
-
const stderrPath = (0,
|
|
40286
|
-
const runPath = (0,
|
|
40287
|
-
const artifactSafetyPath = (0,
|
|
40637
|
+
const jsonlPath = (0, import_path18.join)(runDir, `${outputStem}-exec.jsonl`);
|
|
40638
|
+
const lastMessagePath = (0, import_path18.join)(runDir, `${outputStem}-last-message.md`);
|
|
40639
|
+
const stderrPath = (0, import_path18.join)(runDir, `${outputStem}-stderr.txt`);
|
|
40640
|
+
const runPath = (0, import_path18.join)(runDir, "run.json");
|
|
40641
|
+
const artifactSafetyPath = (0, import_path18.join)(runDir, "artifact-safety.json");
|
|
40288
40642
|
const args = runner === "gemini" ? [
|
|
40289
40643
|
...runnerProbe.globalArgs,
|
|
40290
40644
|
...runnerProbe.execArgs
|
|
@@ -40375,9 +40729,9 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
40375
40729
|
};
|
|
40376
40730
|
}
|
|
40377
40731
|
function writeExternalAgentExecutorPlan(plan) {
|
|
40378
|
-
const path2 = (0,
|
|
40379
|
-
(0,
|
|
40380
|
-
(0,
|
|
40732
|
+
const path2 = (0, import_path18.join)(plan.batch_dir, "executor-plan.json");
|
|
40733
|
+
(0, import_fs19.mkdirSync)(plan.batch_dir, { recursive: true });
|
|
40734
|
+
(0, import_fs19.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
|
|
40381
40735
|
`, "utf8");
|
|
40382
40736
|
return path2;
|
|
40383
40737
|
}
|
|
@@ -40392,7 +40746,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40392
40746
|
if (authPreflight && !authPreflight.ok) {
|
|
40393
40747
|
const endedAt2 = (/* @__PURE__ */ new Date()).toISOString();
|
|
40394
40748
|
const blockedResults = plan.runs.map((run) => {
|
|
40395
|
-
(0,
|
|
40749
|
+
(0, import_fs19.mkdirSync)(run.run_dir, { recursive: true });
|
|
40396
40750
|
const runArtifact = buildExecutedExternalAgentRunArtifact({
|
|
40397
40751
|
run,
|
|
40398
40752
|
startedAt,
|
|
@@ -40403,7 +40757,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40403
40757
|
timedOut: false,
|
|
40404
40758
|
durationMs: 0
|
|
40405
40759
|
});
|
|
40406
|
-
(0,
|
|
40760
|
+
(0, import_fs19.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
|
|
40407
40761
|
`, "utf8");
|
|
40408
40762
|
return {
|
|
40409
40763
|
run_id: run.run_id,
|
|
@@ -40430,8 +40784,8 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40430
40784
|
}
|
|
40431
40785
|
for (const run of plan.runs) {
|
|
40432
40786
|
const runStartedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
40433
|
-
const commandCaptureDir = (0,
|
|
40434
|
-
(0,
|
|
40787
|
+
const commandCaptureDir = (0, import_path18.join)(run.workspace_dir, ".foh-capture");
|
|
40788
|
+
(0, import_fs19.mkdirSync)(commandCaptureDir, { recursive: true });
|
|
40435
40789
|
const env = buildCodexExecutorEnv({
|
|
40436
40790
|
sourceEnv: options.env,
|
|
40437
40791
|
runDir: commandCaptureDir,
|
|
@@ -40442,7 +40796,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40442
40796
|
args: run.args,
|
|
40443
40797
|
cwd: run.workspace_dir,
|
|
40444
40798
|
env,
|
|
40445
|
-
prompt: (0,
|
|
40799
|
+
prompt: (0, import_fs19.readFileSync)(run.prompt_path, "utf8"),
|
|
40446
40800
|
stdoutPath: run.outputs.jsonl,
|
|
40447
40801
|
stderrPath: run.outputs.stderr,
|
|
40448
40802
|
timeoutMs: plan.timeout_minutes * 60 * 1e3
|
|
@@ -40455,7 +40809,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40455
40809
|
privateRepoRoot,
|
|
40456
40810
|
writeRedacted: true
|
|
40457
40811
|
});
|
|
40458
|
-
(0,
|
|
40812
|
+
(0, import_fs19.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
|
|
40459
40813
|
`, "utf8");
|
|
40460
40814
|
const runEndedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
40461
40815
|
const classification = classifyExternalAgentRun({
|
|
@@ -40474,7 +40828,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40474
40828
|
timedOut: spawned.timedOut,
|
|
40475
40829
|
durationMs: spawned.durationMs
|
|
40476
40830
|
});
|
|
40477
|
-
(0,
|
|
40831
|
+
(0, import_fs19.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
|
|
40478
40832
|
`, "utf8");
|
|
40479
40833
|
results.push({
|
|
40480
40834
|
run_id: run.run_id,
|
|
@@ -40523,13 +40877,13 @@ function defaultRunDir(modelName, promptVersion) {
|
|
|
40523
40877
|
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
|
|
40524
40878
|
const safeModel = String(modelName || "unknown-model").toLowerCase().replace(/[^a-z0-9_-]+/g, "-");
|
|
40525
40879
|
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
40526
|
-
return (0,
|
|
40880
|
+
return (0, import_path19.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
|
|
40527
40881
|
}
|
|
40528
40882
|
function defaultBatchDir(promptVersion) {
|
|
40529
40883
|
const date4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
40530
40884
|
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
|
|
40531
40885
|
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
40532
|
-
return (0,
|
|
40886
|
+
return (0, import_path19.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
|
|
40533
40887
|
}
|
|
40534
40888
|
function safeSlug(value) {
|
|
40535
40889
|
return String(value || "unknown").toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "unknown";
|
|
@@ -40543,20 +40897,6 @@ function scanArtifactsCommand(runDir, privateRepoRoot) {
|
|
|
40543
40897
|
const privateRootArg = privateRepoRoot ? ` --private-repo-root ${quoteArg(privateRepoRoot)}` : "";
|
|
40544
40898
|
return `foh eval external-agent scan-artifacts --run-dir ${quoteArg(runDir)}${privateRootArg} --write-redacted --json`;
|
|
40545
40899
|
}
|
|
40546
|
-
function externalAgentSummaryCommand2(root) {
|
|
40547
|
-
const summaryPath = (0, import_path18.join)(root, "latest-summary.json");
|
|
40548
|
-
const reportPath = (0, import_path18.join)(root, "summary.report.json");
|
|
40549
|
-
return [
|
|
40550
|
-
"node",
|
|
40551
|
-
"scripts/summarize-external-agent-runs.mjs",
|
|
40552
|
-
"--root",
|
|
40553
|
-
quoteArg(root),
|
|
40554
|
-
"--out",
|
|
40555
|
-
quoteArg(summaryPath),
|
|
40556
|
-
"--report",
|
|
40557
|
-
quoteArg(reportPath)
|
|
40558
|
-
].join(" ");
|
|
40559
|
-
}
|
|
40560
40900
|
function executorRecoveryCommands(reasonCode, runner) {
|
|
40561
40901
|
const normalizedRunner = String(runner || "codex").trim().toLowerCase();
|
|
40562
40902
|
if (reasonCode === "external_agent_runner_binary_missing") {
|
|
@@ -40659,14 +40999,14 @@ function writePrompt(runDir, promptVersion, context = {}) {
|
|
|
40659
40999
|
replayPromptContext(context.replayFile),
|
|
40660
41000
|
knowledgeMissPromptContext(context.knowledgeQuestion, context.expectedAnswer)
|
|
40661
41001
|
].join("");
|
|
40662
|
-
const path2 = (0,
|
|
40663
|
-
(0,
|
|
41002
|
+
const path2 = (0, import_path19.join)(runDir, "prompt.txt");
|
|
41003
|
+
(0, import_fs20.writeFileSync)(path2, `${prompt}
|
|
40664
41004
|
`, "utf8");
|
|
40665
41005
|
return path2;
|
|
40666
41006
|
}
|
|
40667
41007
|
function writeSession(runDir, session) {
|
|
40668
|
-
const path2 = (0,
|
|
40669
|
-
(0,
|
|
41008
|
+
const path2 = (0, import_path19.join)(runDir, "session.json");
|
|
41009
|
+
(0, import_fs20.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
|
|
40670
41010
|
`, "utf8");
|
|
40671
41011
|
return path2;
|
|
40672
41012
|
}
|
|
@@ -40742,9 +41082,9 @@ function buildRunArtifact(input) {
|
|
|
40742
41082
|
notes: "notes.md"
|
|
40743
41083
|
},
|
|
40744
41084
|
summary: status === "pass" ? "External-agent capture session completed and was marked pass." : `External-agent capture session completed with ${commands.length} captured FOH command(s); classify and improve reason ${reasonCode}.`,
|
|
40745
|
-
next_commands: status === "pass" ? [
|
|
40746
|
-
`foh bug improve --from external-agent-run --file ${(0,
|
|
40747
|
-
|
|
41085
|
+
next_commands: status === "pass" ? [externalAgentSummaryCommand((0, import_path19.dirname)(input.runDir))] : [
|
|
41086
|
+
`foh bug improve --from external-agent-run --file ${(0, import_path19.join)(input.runDir, "run.json")} --out ${(0, import_path19.join)(input.runDir, "improvement-packet.json")} --json`,
|
|
41087
|
+
externalAgentSummaryCommand((0, import_path19.dirname)(input.runDir))
|
|
40748
41088
|
]
|
|
40749
41089
|
};
|
|
40750
41090
|
}
|
|
@@ -40753,16 +41093,16 @@ function registerEval(program3) {
|
|
|
40753
41093
|
const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
|
|
40754
41094
|
external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
|
|
40755
41095
|
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|
|
40756
|
-
const batchDir = (0,
|
|
40757
|
-
const replayFile = opts.replayFile ? (0,
|
|
41096
|
+
const batchDir = (0, import_path19.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
|
|
41097
|
+
const replayFile = opts.replayFile ? (0, import_path19.resolve)(String(opts.replayFile)) : void 0;
|
|
40758
41098
|
const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
|
|
40759
41099
|
const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
|
|
40760
41100
|
const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
|
|
40761
|
-
(0,
|
|
40762
|
-
const
|
|
41101
|
+
(0, import_fs20.mkdirSync)(batchDir, { recursive: true });
|
|
41102
|
+
const runs2 = models.map((model, index) => {
|
|
40763
41103
|
const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
|
|
40764
|
-
const runDir = (0,
|
|
40765
|
-
(0,
|
|
41104
|
+
const runDir = (0, import_path19.join)(batchDir, runId);
|
|
41105
|
+
(0, import_fs20.mkdirSync)(runDir, { recursive: true });
|
|
40766
41106
|
const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
|
|
40767
41107
|
const commandArgs = [
|
|
40768
41108
|
"eval",
|
|
@@ -40805,23 +41145,23 @@ function registerEval(program3) {
|
|
|
40805
41145
|
expected_answer: expectedAnswer ?? null,
|
|
40806
41146
|
workspace_type: String(opts.workspaceType || "clean-no-repo"),
|
|
40807
41147
|
agent_shell: String(opts.agentShell || "vscode-terminal"),
|
|
40808
|
-
run_count:
|
|
40809
|
-
runs,
|
|
40810
|
-
summary_command:
|
|
41148
|
+
run_count: runs2.length,
|
|
41149
|
+
runs: runs2,
|
|
41150
|
+
summary_command: externalAgentSummaryCommand(batchDir)
|
|
40811
41151
|
};
|
|
40812
|
-
const batchPath = (0,
|
|
40813
|
-
(0,
|
|
41152
|
+
const batchPath = (0, import_path19.join)(batchDir, "batch.json");
|
|
41153
|
+
(0, import_fs20.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
|
|
40814
41154
|
`, "utf8");
|
|
40815
41155
|
format(cliEnvelope({
|
|
40816
41156
|
schemaVersion: "external_agent_batch_plan_result.v1",
|
|
40817
41157
|
status: "exported",
|
|
40818
41158
|
reasonCode: "external_agent_batch_plan_created",
|
|
40819
|
-
summary: `External-agent batch plan created for ${
|
|
41159
|
+
summary: `External-agent batch plan created for ${runs2.length} model(s).`,
|
|
40820
41160
|
artifacts: {
|
|
40821
41161
|
batch: batchPath
|
|
40822
41162
|
},
|
|
40823
41163
|
nextCommands: [
|
|
40824
|
-
...
|
|
41164
|
+
...runs2.map((run) => run.launch_command),
|
|
40825
41165
|
batch.summary_command
|
|
40826
41166
|
],
|
|
40827
41167
|
extra: { batch }
|
|
@@ -40830,11 +41170,11 @@ function registerEval(program3) {
|
|
|
40830
41170
|
external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
|
|
40831
41171
|
const status = normalizeStatus(opts.status);
|
|
40832
41172
|
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|
|
40833
|
-
const runDir = (0,
|
|
40834
|
-
const replayFile = opts.replayFile ? (0,
|
|
41173
|
+
const runDir = (0, import_path19.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
|
|
41174
|
+
const replayFile = opts.replayFile ? (0, import_path19.resolve)(String(opts.replayFile)) : void 0;
|
|
40835
41175
|
const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
|
|
40836
41176
|
const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
|
|
40837
|
-
(0,
|
|
41177
|
+
(0, import_fs20.mkdirSync)(runDir, { recursive: true });
|
|
40838
41178
|
const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
|
|
40839
41179
|
const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
|
|
40840
41180
|
const shell = inferShell(opts.shell);
|
|
@@ -40860,7 +41200,7 @@ function registerEval(program3) {
|
|
|
40860
41200
|
}
|
|
40861
41201
|
};
|
|
40862
41202
|
writeSession(runDir, session);
|
|
40863
|
-
(0,
|
|
41203
|
+
(0, import_fs20.writeFileSync)((0, import_path19.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
|
|
40864
41204
|
let shellExitCode = null;
|
|
40865
41205
|
if (opts.shell !== false) {
|
|
40866
41206
|
process.stdout.write(`
|
|
@@ -40882,8 +41222,8 @@ Exit the shell to finalize run.json.
|
|
|
40882
41222
|
shellExitCode = typeof result.status === "number" ? result.status : null;
|
|
40883
41223
|
}
|
|
40884
41224
|
const artifact = buildRunArtifact({ runDir, session, status, reasonCode: opts.reasonCode, shellExitCode });
|
|
40885
|
-
const runPath = (0,
|
|
40886
|
-
(0,
|
|
41225
|
+
const runPath = (0, import_path19.join)(runDir, "run.json");
|
|
41226
|
+
(0, import_fs20.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
|
|
40887
41227
|
`, "utf8");
|
|
40888
41228
|
format(cliEnvelope({
|
|
40889
41229
|
schemaVersion: "external_agent_capture_result.v1",
|
|
@@ -40893,12 +41233,57 @@ Exit the shell to finalize run.json.
|
|
|
40893
41233
|
artifacts: {
|
|
40894
41234
|
run: runPath,
|
|
40895
41235
|
prompt: promptPath,
|
|
40896
|
-
commands: (0,
|
|
41236
|
+
commands: (0, import_path19.join)(runDir, "commands.ndjson")
|
|
40897
41237
|
},
|
|
40898
41238
|
nextCommands: artifact.next_commands,
|
|
40899
41239
|
extra: { run: artifact }
|
|
40900
41240
|
}), { json: Boolean(opts.json) });
|
|
40901
41241
|
});
|
|
41242
|
+
external.command("summary").description("Summarize external_agent_run.v1 artifacts from a clean external-agent run root").requiredOption("--root <dir>", "Root containing external-agent run directories with run.json files").option("--out <path>", "Write summary JSON to this path").option("--report <path>", "Write script-style report JSON to this path").option("--current-baseline-only", "Summarize only the latest detected run cohort").option("--cohort <id>", "Summarize one explicit cohort id").option("--json", "Output as JSON").action(async (opts) => {
|
|
41243
|
+
const { summary, report } = runExternalAgentRunSummary({
|
|
41244
|
+
root: String(opts.root),
|
|
41245
|
+
out: opts.out ? String(opts.out) : void 0,
|
|
41246
|
+
report: opts.report ? String(opts.report) : void 0,
|
|
41247
|
+
currentBaselineOnly: Boolean(opts.currentBaselineOnly),
|
|
41248
|
+
cohortId: opts.cohort ? String(opts.cohort) : null
|
|
41249
|
+
});
|
|
41250
|
+
format(cliEnvelope({
|
|
41251
|
+
schemaVersion: "external_agent_run_summary_result.v1",
|
|
41252
|
+
status: report.status === "passed" ? "pass" : "fail",
|
|
41253
|
+
reasonCode: report.status === "passed" ? "external_agent_run_summary_created" : "external_agent_run_summary_invalid_runs",
|
|
41254
|
+
summary: `External-agent summary covers ${summary.run_count} run(s).`,
|
|
41255
|
+
artifacts: {
|
|
41256
|
+
summary: opts.out ? String(opts.out) : null,
|
|
41257
|
+
report: opts.report ? String(opts.report) : null
|
|
41258
|
+
},
|
|
41259
|
+
nextCommands: summary.next_commands,
|
|
41260
|
+
extra: { external_agent_summary: summary, report }
|
|
41261
|
+
}), { json: Boolean(opts.json) });
|
|
41262
|
+
if (report.status !== "passed") process.exitCode = 1;
|
|
41263
|
+
});
|
|
41264
|
+
const runs = external.command("runs").description("Compatibility namespace for external-agent run artifact utilities");
|
|
41265
|
+
runs.command("summary").description("Compatibility alias for `foh eval external-agent summary`").requiredOption("--root <dir>", "Root containing external-agent run directories with run.json files").option("--out <path>", "Write summary JSON to this path").option("--report <path>", "Write script-style report JSON to this path").option("--current-baseline-only", "Summarize only the latest detected run cohort").option("--cohort <id>", "Summarize one explicit cohort id").option("--json", "Output as JSON").action(async (opts) => {
|
|
41266
|
+
const { summary, report } = runExternalAgentRunSummary({
|
|
41267
|
+
root: String(opts.root),
|
|
41268
|
+
out: opts.out ? String(opts.out) : void 0,
|
|
41269
|
+
report: opts.report ? String(opts.report) : void 0,
|
|
41270
|
+
currentBaselineOnly: Boolean(opts.currentBaselineOnly),
|
|
41271
|
+
cohortId: opts.cohort ? String(opts.cohort) : null
|
|
41272
|
+
});
|
|
41273
|
+
format(cliEnvelope({
|
|
41274
|
+
schemaVersion: "external_agent_run_summary_result.v1",
|
|
41275
|
+
status: report.status === "passed" ? "pass" : "fail",
|
|
41276
|
+
reasonCode: report.status === "passed" ? "external_agent_run_summary_created" : "external_agent_run_summary_invalid_runs",
|
|
41277
|
+
summary: `External-agent summary covers ${summary.run_count} run(s).`,
|
|
41278
|
+
artifacts: {
|
|
41279
|
+
summary: opts.out ? String(opts.out) : null,
|
|
41280
|
+
report: opts.report ? String(opts.report) : null
|
|
41281
|
+
},
|
|
41282
|
+
nextCommands: summary.next_commands,
|
|
41283
|
+
extra: { external_agent_summary: summary, report }
|
|
41284
|
+
}), { json: Boolean(opts.json) });
|
|
41285
|
+
if (report.status !== "passed") process.exitCode = 1;
|
|
41286
|
+
});
|
|
40902
41287
|
external.command("scan-artifacts").description("Scan and redact external-agent run artifacts before they are promoted into improvement loops").requiredOption("--run-dir <path>", "External-agent run artifact directory").option("--private-repo-root <path>", "Private repository root that must not appear in artifacts").option("--write-redacted", "Write .redacted copies next to scanned artifacts").option("--json", "Output as JSON").action(async (opts) => {
|
|
40903
41288
|
const report = scanExternalAgentArtifacts({
|
|
40904
41289
|
runDir: String(opts.runDir),
|
|
@@ -40956,8 +41341,8 @@ Exit the shell to finalize run.json.
|
|
|
40956
41341
|
requireExplicitEvalAuth: true,
|
|
40957
41342
|
minimumEvalAuthTtlMs: (plan.timeout_minutes + 5) * 60 * 1e3
|
|
40958
41343
|
});
|
|
40959
|
-
const resultPath = (0,
|
|
40960
|
-
(0,
|
|
41344
|
+
const resultPath = (0, import_path19.join)(plan.batch_dir, "execution-result.json");
|
|
41345
|
+
(0, import_fs20.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
|
|
40961
41346
|
`, "utf8");
|
|
40962
41347
|
format(cliEnvelope({
|
|
40963
41348
|
schemaVersion: "external_agent_execution_result.v1",
|
|
@@ -40974,7 +41359,7 @@ Exit the shell to finalize run.json.
|
|
|
40974
41359
|
plan.runs.find((item) => item.run_id === run.run_id)?.run_dir || ".",
|
|
40975
41360
|
plan.private_repo_root_explicit ? plan.private_repo_root : void 0
|
|
40976
41361
|
)),
|
|
40977
|
-
|
|
41362
|
+
externalAgentSummaryCommand(plan.batch_dir)
|
|
40978
41363
|
],
|
|
40979
41364
|
extra: { result }
|
|
40980
41365
|
}), { json: Boolean(opts.json) });
|