@f-o-h/cli 0.1.70 → 0.1.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/foh.js +589 -204
  2. package/package.json +1 -1
package/dist/foh.js CHANGED
@@ -6046,7 +6046,7 @@ var require_compile = __commonJS({
6046
6046
  const schOrFunc = root.refs[ref];
6047
6047
  if (schOrFunc)
6048
6048
  return schOrFunc;
6049
- let _sch = resolve13.call(this, root, ref);
6049
+ let _sch = resolve14.call(this, root, ref);
6050
6050
  if (_sch === void 0) {
6051
6051
  const schema2 = (_a2 = root.localRefs) === null || _a2 === void 0 ? void 0 : _a2[ref];
6052
6052
  const { schemaId } = this.opts;
@@ -6073,7 +6073,7 @@ var require_compile = __commonJS({
6073
6073
  function sameSchemaEnv(s1, s2) {
6074
6074
  return s1.schema === s2.schema && s1.root === s2.root && s1.baseId === s2.baseId;
6075
6075
  }
6076
- function resolve13(root, ref) {
6076
+ function resolve14(root, ref) {
6077
6077
  let sch;
6078
6078
  while (typeof (sch = this.refs[ref]) == "string")
6079
6079
  ref = sch;
@@ -6648,55 +6648,55 @@ var require_fast_uri = __commonJS({
6648
6648
  }
6649
6649
  return uri;
6650
6650
  }
6651
- function resolve13(baseURI, relativeURI, options) {
6651
+ function resolve14(baseURI, relativeURI, options) {
6652
6652
  const schemelessOptions = options ? Object.assign({ scheme: "null" }, options) : { scheme: "null" };
6653
6653
  const resolved = resolveComponent(parse3(baseURI, schemelessOptions), parse3(relativeURI, schemelessOptions), schemelessOptions, true);
6654
6654
  schemelessOptions.skipEscape = true;
6655
6655
  return serialize(resolved, schemelessOptions);
6656
6656
  }
6657
- function resolveComponent(base, relative3, options, skipNormalization) {
6657
+ function resolveComponent(base, relative4, options, skipNormalization) {
6658
6658
  const target = {};
6659
6659
  if (!skipNormalization) {
6660
6660
  base = parse3(serialize(base, options), options);
6661
- relative3 = parse3(serialize(relative3, options), options);
6661
+ relative4 = parse3(serialize(relative4, options), options);
6662
6662
  }
6663
6663
  options = options || {};
6664
- if (!options.tolerant && relative3.scheme) {
6665
- target.scheme = relative3.scheme;
6666
- target.userinfo = relative3.userinfo;
6667
- target.host = relative3.host;
6668
- target.port = relative3.port;
6669
- target.path = removeDotSegments(relative3.path || "");
6670
- target.query = relative3.query;
6664
+ if (!options.tolerant && relative4.scheme) {
6665
+ target.scheme = relative4.scheme;
6666
+ target.userinfo = relative4.userinfo;
6667
+ target.host = relative4.host;
6668
+ target.port = relative4.port;
6669
+ target.path = removeDotSegments(relative4.path || "");
6670
+ target.query = relative4.query;
6671
6671
  } else {
6672
- if (relative3.userinfo !== void 0 || relative3.host !== void 0 || relative3.port !== void 0) {
6673
- target.userinfo = relative3.userinfo;
6674
- target.host = relative3.host;
6675
- target.port = relative3.port;
6676
- target.path = removeDotSegments(relative3.path || "");
6677
- target.query = relative3.query;
6672
+ if (relative4.userinfo !== void 0 || relative4.host !== void 0 || relative4.port !== void 0) {
6673
+ target.userinfo = relative4.userinfo;
6674
+ target.host = relative4.host;
6675
+ target.port = relative4.port;
6676
+ target.path = removeDotSegments(relative4.path || "");
6677
+ target.query = relative4.query;
6678
6678
  } else {
6679
- if (!relative3.path) {
6679
+ if (!relative4.path) {
6680
6680
  target.path = base.path;
6681
- if (relative3.query !== void 0) {
6682
- target.query = relative3.query;
6681
+ if (relative4.query !== void 0) {
6682
+ target.query = relative4.query;
6683
6683
  } else {
6684
6684
  target.query = base.query;
6685
6685
  }
6686
6686
  } else {
6687
- if (relative3.path[0] === "/") {
6688
- target.path = removeDotSegments(relative3.path);
6687
+ if (relative4.path[0] === "/") {
6688
+ target.path = removeDotSegments(relative4.path);
6689
6689
  } else {
6690
6690
  if ((base.userinfo !== void 0 || base.host !== void 0 || base.port !== void 0) && !base.path) {
6691
- target.path = "/" + relative3.path;
6691
+ target.path = "/" + relative4.path;
6692
6692
  } else if (!base.path) {
6693
- target.path = relative3.path;
6693
+ target.path = relative4.path;
6694
6694
  } else {
6695
- target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative3.path;
6695
+ target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative4.path;
6696
6696
  }
6697
6697
  target.path = removeDotSegments(target.path);
6698
6698
  }
6699
- target.query = relative3.query;
6699
+ target.query = relative4.query;
6700
6700
  }
6701
6701
  target.userinfo = base.userinfo;
6702
6702
  target.host = base.host;
@@ -6704,7 +6704,7 @@ var require_fast_uri = __commonJS({
6704
6704
  }
6705
6705
  target.scheme = base.scheme;
6706
6706
  }
6707
- target.fragment = relative3.fragment;
6707
+ target.fragment = relative4.fragment;
6708
6708
  return target;
6709
6709
  }
6710
6710
  function equal(uriA, uriB, options) {
@@ -6875,7 +6875,7 @@ var require_fast_uri = __commonJS({
6875
6875
  var fastUri = {
6876
6876
  SCHEMES,
6877
6877
  normalize,
6878
- resolve: resolve13,
6878
+ resolve: resolve14,
6879
6879
  resolveComponent,
6880
6880
  equal,
6881
6881
  serialize,
@@ -10172,21 +10172,21 @@ async function promptLine(label, {
10172
10172
  allowEmpty = false,
10173
10173
  defaultValue
10174
10174
  } = {}) {
10175
- return await new Promise((resolve13) => {
10175
+ return await new Promise((resolve14) => {
10176
10176
  const suffix = defaultValue ? ` [${defaultValue}]` : "";
10177
10177
  const rl = (0, import_readline.createInterface)({ input: process.stdin, output: process.stdout, terminal: true });
10178
10178
  rl.question(`${label}${suffix}: `, (answer) => {
10179
10179
  rl.close();
10180
10180
  const value = String(answer ?? "").trim();
10181
10181
  if (!value && typeof defaultValue === "string") {
10182
- resolve13(defaultValue);
10182
+ resolve14(defaultValue);
10183
10183
  return;
10184
10184
  }
10185
10185
  if (!value && !allowEmpty) {
10186
- resolve13("");
10186
+ resolve14("");
10187
10187
  return;
10188
10188
  }
10189
- resolve13(value);
10189
+ resolve14(value);
10190
10190
  });
10191
10191
  });
10192
10192
  }
@@ -10194,7 +10194,7 @@ async function promptSecret(label) {
10194
10194
  if (!process.stdin.isTTY || !process.stdout.isTTY || typeof process.stdin.setRawMode !== "function") {
10195
10195
  return await promptLine(label);
10196
10196
  }
10197
- return await new Promise((resolve13) => {
10197
+ return await new Promise((resolve14) => {
10198
10198
  const stdin = process.stdin;
10199
10199
  const stdout = process.stdout;
10200
10200
  const wasRaw = Boolean(stdin.isRaw);
@@ -10208,7 +10208,7 @@ async function promptSecret(label) {
10208
10208
  const finish = () => {
10209
10209
  cleanup();
10210
10210
  stdout.write("\n");
10211
- resolve13(value);
10211
+ resolve14(value);
10212
10212
  };
10213
10213
  const onData = (chunk) => {
10214
10214
  const text = typeof chunk === "string" ? chunk : chunk.toString("utf8");
@@ -10217,7 +10217,7 @@ async function promptSecret(label) {
10217
10217
  cleanup();
10218
10218
  process.exitCode = 130;
10219
10219
  stdout.write("\n");
10220
- return resolve13("");
10220
+ return resolve14("");
10221
10221
  }
10222
10222
  if (char === "\r" || char === "\n") {
10223
10223
  finish();
@@ -10490,7 +10490,7 @@ async function storeAuthenticatedSession(params) {
10490
10490
  return output;
10491
10491
  }
10492
10492
  function sleep(ms) {
10493
- return new Promise((resolve13) => setTimeout(resolve13, ms));
10493
+ return new Promise((resolve14) => setTimeout(resolve14, ms));
10494
10494
  }
10495
10495
  function hasExplicitTimeoutFlag(argv = process.argv) {
10496
10496
  return argv.some((arg) => arg === "--timeout-seconds" || arg.startsWith("--timeout-seconds="));
@@ -11048,7 +11048,7 @@ async function pollUntil(check2, opts) {
11048
11048
  }
11049
11049
  }
11050
11050
  function sleep2(ms) {
11051
- return new Promise((resolve13) => setTimeout(resolve13, ms));
11051
+ return new Promise((resolve14) => setTimeout(resolve14, ms));
11052
11052
  }
11053
11053
 
11054
11054
  // src/commands/compliance.ts
@@ -14172,8 +14172,8 @@ function registerAgentGuardrailCommands(agent) {
14172
14172
  try {
14173
14173
  rule = JSON.parse(opts.rule);
14174
14174
  } catch {
14175
- const { readFileSync: readFileSync16 } = await import("fs");
14176
- rule = JSON.parse(readFileSync16(opts.rule, "utf-8"));
14175
+ const { readFileSync: readFileSync17 } = await import("fs");
14176
+ rule = JSON.parse(readFileSync17(opts.rule, "utf-8"));
14177
14177
  }
14178
14178
  const data = await apiFetch(`/v1/console/agents/${opts.agent}/guardrails`, {
14179
14179
  method: "POST",
@@ -14719,9 +14719,9 @@ function registerAgent(program3) {
14719
14719
  process.stdout.write(yaml);
14720
14720
  return;
14721
14721
  }
14722
- const { writeFileSync: writeFileSync13 } = await import("fs");
14722
+ const { writeFileSync: writeFileSync14 } = await import("fs");
14723
14723
  const outputPath = opts.output ?? "tenant.yaml";
14724
- writeFileSync13(
14724
+ writeFileSync14(
14725
14725
  outputPath,
14726
14726
  `# tenant.yaml - Front Of House agent manifest
14727
14727
  # Edit this file and run: foh plan tenant.yaml
@@ -16173,11 +16173,11 @@ function registerVoice(program3) {
16173
16173
  }
16174
16174
  const outputPath = String(opts.out || `foh-voice-preview-${provider}-${voiceId}.mp3`).trim();
16175
16175
  const audio = Buffer.from(await res.arrayBuffer());
16176
- const { mkdirSync: mkdirSync8, writeFileSync: writeFileSync13 } = await import("fs");
16177
- const { dirname: dirname11, resolve: resolve13 } = await import("path");
16178
- const absolutePath = resolve13(outputPath);
16179
- mkdirSync8(dirname11(absolutePath), { recursive: true });
16180
- writeFileSync13(absolutePath, audio);
16176
+ const { mkdirSync: mkdirSync9, writeFileSync: writeFileSync14 } = await import("fs");
16177
+ const { dirname: dirname12, resolve: resolve14 } = await import("path");
16178
+ const absolutePath = resolve14(outputPath);
16179
+ mkdirSync9(dirname12(absolutePath), { recursive: true });
16180
+ writeFileSync14(absolutePath, audio);
16181
16181
  format({
16182
16182
  status: "ok",
16183
16183
  provider,
@@ -30668,7 +30668,7 @@ var Protocol = class {
30668
30668
  return;
30669
30669
  }
30670
30670
  const pollInterval = task2.pollInterval ?? this._options?.defaultTaskPollInterval ?? 1e3;
30671
- await new Promise((resolve13) => setTimeout(resolve13, pollInterval));
30671
+ await new Promise((resolve14) => setTimeout(resolve14, pollInterval));
30672
30672
  options?.signal?.throwIfAborted();
30673
30673
  }
30674
30674
  } catch (error2) {
@@ -30685,7 +30685,7 @@ var Protocol = class {
30685
30685
  */
30686
30686
  request(request, resultSchema, options) {
30687
30687
  const { relatedRequestId, resumptionToken, onresumptiontoken, task, relatedTask } = options ?? {};
30688
- return new Promise((resolve13, reject) => {
30688
+ return new Promise((resolve14, reject) => {
30689
30689
  const earlyReject = (error2) => {
30690
30690
  reject(error2);
30691
30691
  };
@@ -30763,7 +30763,7 @@ var Protocol = class {
30763
30763
  if (!parseResult.success) {
30764
30764
  reject(parseResult.error);
30765
30765
  } else {
30766
- resolve13(parseResult.data);
30766
+ resolve14(parseResult.data);
30767
30767
  }
30768
30768
  } catch (error2) {
30769
30769
  reject(error2);
@@ -31024,12 +31024,12 @@ var Protocol = class {
31024
31024
  }
31025
31025
  } catch {
31026
31026
  }
31027
- return new Promise((resolve13, reject) => {
31027
+ return new Promise((resolve14, reject) => {
31028
31028
  if (signal.aborted) {
31029
31029
  reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
31030
31030
  return;
31031
31031
  }
31032
- const timeoutId = setTimeout(resolve13, interval);
31032
+ const timeoutId = setTimeout(resolve14, interval);
31033
31033
  signal.addEventListener("abort", () => {
31034
31034
  clearTimeout(timeoutId);
31035
31035
  reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
@@ -32129,7 +32129,7 @@ var McpServer = class {
32129
32129
  let task = createTaskResult.task;
32130
32130
  const pollInterval = task.pollInterval ?? 5e3;
32131
32131
  while (task.status !== "completed" && task.status !== "failed" && task.status !== "cancelled") {
32132
- await new Promise((resolve13) => setTimeout(resolve13, pollInterval));
32132
+ await new Promise((resolve14) => setTimeout(resolve14, pollInterval));
32133
32133
  const updatedTask = await extra.taskStore.getTask(taskId);
32134
32134
  if (!updatedTask) {
32135
32135
  throw new McpError(ErrorCode.InternalError, `Task ${taskId} not found during polling`);
@@ -32778,19 +32778,19 @@ var StdioServerTransport = class {
32778
32778
  this.onclose?.();
32779
32779
  }
32780
32780
  send(message) {
32781
- return new Promise((resolve13) => {
32781
+ return new Promise((resolve14) => {
32782
32782
  const json3 = serializeMessage(message);
32783
32783
  if (this._stdout.write(json3)) {
32784
- resolve13();
32784
+ resolve14();
32785
32785
  } else {
32786
- this._stdout.once("drain", resolve13);
32786
+ this._stdout.once("drain", resolve14);
32787
32787
  }
32788
32788
  });
32789
32789
  }
32790
32790
  };
32791
32791
 
32792
32792
  // src/lib/cli-version.ts
32793
- var CLI_VERSION = "0.1.70";
32793
+ var CLI_VERSION = "0.1.71";
32794
32794
 
32795
32795
  // src/commands/mcp-serve.ts
32796
32796
  var DEFAULT_TIMEOUT_MS = 12e4;
@@ -32975,7 +32975,7 @@ async function runFohCli(params) {
32975
32975
  effectiveArgv.push("--json");
32976
32976
  }
32977
32977
  const command = `foh ${effectiveArgv.join(" ")}`;
32978
- return await new Promise((resolve13) => {
32978
+ return await new Promise((resolve14) => {
32979
32979
  const child = (0, import_node_child_process.spawn)(process.execPath, [cliEntry, ...effectiveArgv], {
32980
32980
  stdio: ["ignore", "pipe", "pipe"],
32981
32981
  env: {
@@ -33000,7 +33000,7 @@ async function runFohCli(params) {
33000
33000
  });
33001
33001
  child.once("error", (error2) => {
33002
33002
  clearTimeout(timeoutHandle);
33003
- resolve13({
33003
+ resolve14({
33004
33004
  ok: false,
33005
33005
  command,
33006
33006
  argv: effectiveArgv,
@@ -33016,7 +33016,7 @@ async function runFohCli(params) {
33016
33016
  const stderrText = finalizeBoundedText(stderrBuffer);
33017
33017
  const exitCode = Number.isFinite(code ?? NaN) ? Number(code) : 1;
33018
33018
  const stdoutJson = tryParseJson(stdoutText);
33019
- resolve13({
33019
+ resolve14({
33020
33020
  ok: !timedOut && exitCode === 0,
33021
33021
  command,
33022
33022
  argv: effectiveArgv,
@@ -35186,8 +35186,8 @@ function registerSetup(program3) {
35186
35186
  }
35187
35187
  try {
35188
35188
  const manifest = await agentExport(resolvedAgentId, { apiUrlOverride: opts.apiUrl });
35189
- const { writeFileSync: writeFileSync13 } = await import("fs");
35190
- writeFileSync13(
35189
+ const { writeFileSync: writeFileSync14 } = await import("fs");
35190
+ writeFileSync14(
35191
35191
  "tenant.yaml",
35192
35192
  `# tenant.yaml - Front Of House agent manifest
35193
35193
  # Edit this file and run: foh plan tenant.yaml
@@ -35357,8 +35357,8 @@ function registerSim(program3) {
35357
35357
  }
35358
35358
  const cert = response.certificate;
35359
35359
  if (opts.out) {
35360
- const { writeFileSync: writeFileSync13 } = await import("fs");
35361
- writeFileSync13(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
35360
+ const { writeFileSync: writeFileSync14 } = await import("fs");
35361
+ writeFileSync14(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
35362
35362
  process.stderr.write(` Certificate written to ${opts.out}
35363
35363
  `);
35364
35364
  }
@@ -35408,8 +35408,8 @@ function registerSim(program3) {
35408
35408
  });
35409
35409
  }
35410
35410
  if (opts.out) {
35411
- const { writeFileSync: writeFileSync13 } = await import("fs");
35412
- writeFileSync13(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
35411
+ const { writeFileSync: writeFileSync14 } = await import("fs");
35412
+ writeFileSync14(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
35413
35413
  process.stderr.write(` Final certificate written to ${opts.out}
35414
35414
  `);
35415
35415
  }
@@ -38469,7 +38469,7 @@ async function runSelf(args, apiUrlOverride) {
38469
38469
  if (apiUrlOverride && !spawnArgs.includes("--api-url")) {
38470
38470
  spawnArgs.push("--api-url", apiUrlOverride);
38471
38471
  }
38472
- return await new Promise((resolve13, reject) => {
38472
+ return await new Promise((resolve14, reject) => {
38473
38473
  const child = (0, import_child_process2.spawn)(process.execPath, [process.argv[1], ...spawnArgs], {
38474
38474
  stdio: "inherit",
38475
38475
  env: {
@@ -38479,7 +38479,7 @@ async function runSelf(args, apiUrlOverride) {
38479
38479
  }
38480
38480
  });
38481
38481
  child.once("error", reject);
38482
- child.once("close", (code) => resolve13(typeof code === "number" ? code : 1));
38482
+ child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
38483
38483
  });
38484
38484
  }
38485
38485
  function shouldUseInteractiveHome(argv) {
@@ -38857,17 +38857,17 @@ function detectUpdateAvailability(currentVersion, cwd = process.cwd()) {
38857
38857
  async function applyRepoUpdate(repoRoot) {
38858
38858
  const scriptPath = (0, import_path9.join)(repoRoot, "scripts", "Install-FohCli.ps1");
38859
38859
  if (process.platform === "win32") {
38860
- return await new Promise((resolve13, reject) => {
38860
+ return await new Promise((resolve14, reject) => {
38861
38861
  const child = (0, import_child_process3.spawn)(
38862
38862
  "powershell",
38863
38863
  ["-ExecutionPolicy", "Bypass", "-File", scriptPath],
38864
38864
  { stdio: "inherit" }
38865
38865
  );
38866
38866
  child.once("error", reject);
38867
- child.once("close", (code) => resolve13(typeof code === "number" ? code : 1));
38867
+ child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
38868
38868
  });
38869
38869
  }
38870
- return await new Promise((resolve13, reject) => {
38870
+ return await new Promise((resolve14, reject) => {
38871
38871
  const child = (0, import_child_process3.spawn)(
38872
38872
  "corepack",
38873
38873
  ["pnpm", "cli:install:global"],
@@ -38877,7 +38877,7 @@ async function applyRepoUpdate(repoRoot) {
38877
38877
  }
38878
38878
  );
38879
38879
  child.once("error", reject);
38880
- child.once("close", (code) => resolve13(typeof code === "number" ? code : 1));
38880
+ child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
38881
38881
  });
38882
38882
  }
38883
38883
  function shouldShowUpdateNotice(argv = process.argv) {
@@ -39013,8 +39013,8 @@ function registerUpdate(program3) {
39013
39013
  }
39014
39014
 
39015
39015
  // src/commands/eval.ts
39016
- var import_fs19 = require("fs");
39017
- var import_path18 = require("path");
39016
+ var import_fs20 = require("fs");
39017
+ var import_path19 = require("path");
39018
39018
  var import_child_process6 = require("child_process");
39019
39019
 
39020
39020
  // src/lib/external-agent-artifact-safety.ts
@@ -39349,9 +39349,9 @@ function readCommandRecords(runDir) {
39349
39349
  }
39350
39350
 
39351
39351
  // src/lib/external-agent-executor.ts
39352
- var import_fs18 = require("fs");
39352
+ var import_fs19 = require("fs");
39353
39353
  var import_os2 = require("os");
39354
- var import_path17 = require("path");
39354
+ var import_path18 = require("path");
39355
39355
  var import_child_process5 = require("child_process");
39356
39356
 
39357
39357
  // src/lib/external-agent-executor-env.ts
@@ -39518,40 +39518,394 @@ function copyExternalAgentCommandCaptureArtifacts(input) {
39518
39518
  }
39519
39519
 
39520
39520
  // src/lib/external-agent-executor-classification.ts
39521
+ var import_fs17 = require("fs");
39522
+ var import_path16 = require("path");
39523
+
39524
+ // src/lib/external-agent-run-summary.ts
39521
39525
  var import_fs16 = require("fs");
39522
39526
  var import_path15 = require("path");
39527
+ var REQUIRED_RUN_FIELDS = [
39528
+ "schema_version",
39529
+ "run_id",
39530
+ "status",
39531
+ "model_provider",
39532
+ "model_name",
39533
+ "prompt_version",
39534
+ "started_at",
39535
+ "manual_intervention_count",
39536
+ "environment",
39537
+ "public_entrypoints",
39538
+ "commands_run",
39539
+ "docs_pages_used",
39540
+ "artifacts"
39541
+ ];
39542
+ var VALID_STATUSES = /* @__PURE__ */ new Set(["pass", "hold", "fail"]);
39543
+ var DOC_URL_RE = /https:\/\/frontofhouse\.okii\.uk\/[^\s"'`)<>,;\\\]}]*/g;
39544
+ function quoteShellArg(value) {
39545
+ const text = String(value);
39546
+ if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
39547
+ return `"${text.replace(/(["$`])/g, "\\$1")}"`;
39548
+ }
39549
+ function externalAgentSummaryCommand(root) {
39550
+ const summaryPath = (0, import_path15.join)(root, "latest-summary.json");
39551
+ const reportPath = (0, import_path15.join)(root, "summary.report.json");
39552
+ return [
39553
+ "foh",
39554
+ "eval",
39555
+ "external-agent",
39556
+ "summary",
39557
+ "--root",
39558
+ quoteShellArg(root),
39559
+ "--out",
39560
+ quoteShellArg(summaryPath),
39561
+ "--report",
39562
+ quoteShellArg(reportPath),
39563
+ "--json"
39564
+ ].join(" ");
39565
+ }
39566
+ function readJson(filePath) {
39567
+ return JSON.parse((0, import_fs16.readFileSync)(filePath, "utf8").replace(/^\uFEFF/, ""));
39568
+ }
39569
+ function readNdjson(filePath) {
39570
+ if (!(0, import_fs16.existsSync)(filePath)) return [];
39571
+ return (0, import_fs16.readFileSync)(filePath, "utf8").split(/\r?\n/).map((line) => line.trim()).filter(Boolean).map((line) => {
39572
+ try {
39573
+ const parsed = JSON.parse(line);
39574
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : null;
39575
+ } catch {
39576
+ return null;
39577
+ }
39578
+ }).filter((record2) => Boolean(record2));
39579
+ }
39580
+ function asObject(value) {
39581
+ return value && typeof value === "object" && !Array.isArray(value) ? value : null;
39582
+ }
39583
+ function toArray2(value) {
39584
+ return Array.isArray(value) ? value : [];
39585
+ }
39586
+ function increment(map3, key, amount = 1) {
39587
+ const normalized = String(key || "unknown");
39588
+ map3.set(normalized, (map3.get(normalized) || 0) + amount);
39589
+ }
39590
+ function ranked(map3) {
39591
+ return Array.from(map3.entries()).map(([key, count]) => ({ key, count })).sort((a, b) => b.count - a.count || a.key.localeCompare(b.key));
39592
+ }
39593
+ function collectDocUrls(text) {
39594
+ return Array.from(new Set((String(text || "").match(DOC_URL_RE) || []).map((url2) => url2.replace(/[.?!:]+$/g, "")).filter((url2) => url2.startsWith("https://frontofhouse.okii.uk/")))).sort();
39595
+ }
39596
+ function findRunFiles(root) {
39597
+ if (!(0, import_fs16.existsSync)(root)) return [];
39598
+ const files = [];
39599
+ const stack = [root];
39600
+ while (stack.length > 0) {
39601
+ const current = stack.pop();
39602
+ if (!current) continue;
39603
+ for (const entry of (0, import_fs16.readdirSync)(current, { withFileTypes: true })) {
39604
+ const absolute = (0, import_path15.join)(current, entry.name);
39605
+ if (entry.isDirectory()) {
39606
+ stack.push(absolute);
39607
+ } else if (entry.isFile() && entry.name === "run.json") {
39608
+ files.push(absolute);
39609
+ }
39610
+ }
39611
+ }
39612
+ return files.sort();
39613
+ }
39614
+ function validateExternalAgentRun(value) {
39615
+ const findings = [];
39616
+ const run = asObject(value);
39617
+ if (!run) return [{ id: "run_not_object", detail: "run artifact must be an object" }];
39618
+ for (const field of REQUIRED_RUN_FIELDS) {
39619
+ if (!(field in run)) findings.push({ id: "required_field_missing", field });
39620
+ }
39621
+ if (run.schema_version !== "external_agent_run.v1") {
39622
+ findings.push({ id: "schema_version_invalid", expected: "external_agent_run.v1", actual: run.schema_version ?? null });
39623
+ }
39624
+ if (!VALID_STATUSES.has(String(run.status || ""))) {
39625
+ findings.push({ id: "status_invalid", expected: Array.from(VALID_STATUSES), actual: run.status ?? null });
39626
+ }
39627
+ if ((run.status === "hold" || run.status === "fail") && !String(run.failure_reason_code || "").trim()) {
39628
+ findings.push({ id: "failure_reason_code_missing" });
39629
+ }
39630
+ if (!Number.isInteger(run.manual_intervention_count) || Number(run.manual_intervention_count) < 0) {
39631
+ findings.push({ id: "manual_intervention_count_invalid" });
39632
+ }
39633
+ if (!Array.isArray(run.commands_run)) findings.push({ id: "commands_run_invalid" });
39634
+ if (!Array.isArray(run.docs_pages_used)) findings.push({ id: "docs_pages_used_invalid" });
39635
+ if (!asObject(run.environment)) findings.push({ id: "environment_invalid" });
39636
+ if (!asObject(run.artifacts)) findings.push({ id: "artifacts_invalid" });
39637
+ if (toArray2(run.public_entrypoints).length === 0) findings.push({ id: "public_entrypoints_missing" });
39638
+ return findings;
39639
+ }
39640
+ function runSortTime(run) {
39641
+ const raw = String(run.ended_at || run.started_at || "");
39642
+ const time3 = Date.parse(raw);
39643
+ return Number.isFinite(time3) ? time3 : 0;
39644
+ }
39645
+ function cohortIdForRunPath(root, runPath) {
39646
+ const normalized = (0, import_path15.relative)(root, (0, import_path15.dirname)(runPath)).replaceAll("\\", "/");
39647
+ const parts = normalized.split("/").filter(Boolean);
39648
+ if (parts.length === 0) return ".";
39649
+ if (/^\d{4}-\d{2}-\d{2}$/.test(parts[0]) && parts[1]) return `${parts[0]}/${parts[1]}`;
39650
+ return parts[0];
39651
+ }
39652
+ function readRunRecords(root, cwd) {
39653
+ const records = [];
39654
+ const invalid_runs = [];
39655
+ for (const file2 of findRunFiles(root)) {
39656
+ try {
39657
+ const parsed = readJson(file2);
39658
+ const findings = validateExternalAgentRun(parsed);
39659
+ if (findings.length > 0) {
39660
+ invalid_runs.push({ path: (0, import_path15.relative)(cwd, file2).replaceAll("\\", "/"), findings });
39661
+ continue;
39662
+ }
39663
+ const run = parsed;
39664
+ records.push({
39665
+ path: file2,
39666
+ run,
39667
+ cohort_id: cohortIdForRunPath(root, file2),
39668
+ sort_time: runSortTime(run)
39669
+ });
39670
+ } catch (error2) {
39671
+ invalid_runs.push({
39672
+ path: (0, import_path15.relative)(cwd, file2).replaceAll("\\", "/"),
39673
+ findings: [{ id: "json_parse_failed", detail: error2 instanceof Error ? error2.message : String(error2) }]
39674
+ });
39675
+ }
39676
+ }
39677
+ return { records, invalid_runs };
39678
+ }
39679
+ function latestCohortId(records) {
39680
+ return records.slice().sort((a, b) => b.sort_time - a.sort_time || b.path.localeCompare(a.path))[0]?.cohort_id ?? null;
39681
+ }
39682
+ function ownerSubsystemFor(reasonCode) {
39683
+ const reason = String(reasonCode || "").toLowerCase();
39684
+ if (reason.includes("simulation") || reason.includes("certification") || reason.includes("scenario")) return "dojo_certification";
39685
+ if (reason.includes("contact_phone") || reason.includes("voice_contact") || reason.includes("provider_capacity") || reason.includes("byon")) return "voice_contact";
39686
+ if (reason.includes("exec_policy") || reason.includes("policy_blocked") || reason.includes("sandbox") || reason.includes("runner") || reason.includes("codex")) return "infra_runner";
39687
+ if (reason.includes("api") || reason.includes("http_4") || reason.includes("http_5") || reason.includes("404") || reason.includes("500") || reason.includes("roundtrip")) return "api_contract";
39688
+ if (reason.includes("cli") || reason.includes("command") || reason.includes("flag")) return "cli";
39689
+ if (reason.includes("docs") || reason.includes("unclear") || reason.includes("not_found")) return "docs";
39690
+ if (reason.includes("auth") || reason.includes("org") || reason.includes("config")) return "infra_runner";
39691
+ if (reason.includes("runtime") || reason.includes("widget") || reason.includes("proof")) return "runtime";
39692
+ return "product_ux";
39693
+ }
39694
+ function recommendedFixFor(reasonCode) {
39695
+ const owner = ownerSubsystemFor(reasonCode);
39696
+ if (owner === "api_contract") return "fix_api";
39697
+ if (owner === "cli") return "fix_cli";
39698
+ if (owner === "docs") return "fix_docs";
39699
+ if (owner === "runtime") return "fix_runtime";
39700
+ if (owner === "dojo_certification") return "add_test";
39701
+ return "fix_config";
39702
+ }
39703
+ function collapseCommandRecords(records) {
39704
+ const order = [];
39705
+ const byId = /* @__PURE__ */ new Map();
39706
+ for (const record2 of records) {
39707
+ const id = String(record2.command_id || `${record2.recorded_at || ""}:${record2.command || ""}`);
39708
+ if (!byId.has(id)) order.push(id);
39709
+ const previous = byId.get(id);
39710
+ byId.set(id, record2.phase === "completed" ? record2 : previous || record2);
39711
+ }
39712
+ return order.map((id) => byId.get(id)).filter((record2) => Boolean(record2));
39713
+ }
39714
+ function analyzeRunArtifacts(runPath, run, cwd) {
39715
+ const runDir = (0, import_path15.dirname)(runPath);
39716
+ const commands = collapseCommandRecords(readNdjson((0, import_path15.join)(runDir, "commands.ndjson")));
39717
+ const reasonCounts = /* @__PURE__ */ new Map();
39718
+ const slowSteps = [];
39719
+ let completed = 0;
39720
+ let withDuration = 0;
39721
+ let totalDuration = 0;
39722
+ for (const command of commands) {
39723
+ if (command.phase === "completed" || command.completed_at) completed += 1;
39724
+ if (typeof command.duration_ms === "number") {
39725
+ withDuration += 1;
39726
+ totalDuration += command.duration_ms;
39727
+ slowSteps.push({
39728
+ run_id: run.run_id,
39729
+ run_path: (0, import_path15.relative)(cwd, runPath).replaceAll("\\", "/"),
39730
+ command: command.command || "",
39731
+ duration_ms: command.duration_ms,
39732
+ status: command.status || null,
39733
+ reason_code: command.reason_code || null,
39734
+ check_reason_codes: Array.isArray(command.check_reason_codes) ? command.check_reason_codes : []
39735
+ });
39736
+ }
39737
+ if (command.reason_code) increment(reasonCounts, command.reason_code);
39738
+ for (const reasonCode of toArray2(command.check_reason_codes)) {
39739
+ if (reasonCode) increment(reasonCounts, reasonCode);
39740
+ }
39741
+ }
39742
+ const codexEvents = readNdjson((0, import_path15.join)(runDir, "codex-exec.jsonl"));
39743
+ const codexDocs = /* @__PURE__ */ new Set();
39744
+ let codexCommandExecutions = 0;
39745
+ let codexFailedExitCodes = 0;
39746
+ for (const event of codexEvents) {
39747
+ const item = asObject(event.item) || event;
39748
+ if (item.type === "command_execution" && item.status === "completed") {
39749
+ codexCommandExecutions += 1;
39750
+ if (typeof item.exit_code === "number" && item.exit_code !== 0) codexFailedExitCodes += 1;
39751
+ }
39752
+ for (const url2 of collectDocUrls(JSON.stringify(event))) codexDocs.add(url2);
39753
+ }
39754
+ const docs = /* @__PURE__ */ new Set([
39755
+ ...toArray2(run.docs_pages_used).map(String),
39756
+ ...Array.from(codexDocs)
39757
+ ]);
39758
+ return {
39759
+ command_log_present: (0, import_fs16.existsSync)((0, import_path15.join)(runDir, "commands.ndjson")),
39760
+ command_count: commands.length,
39761
+ completed_command_count: completed,
39762
+ missing_completion_count: Math.max(0, commands.length - completed),
39763
+ commands_with_duration_count: withDuration,
39764
+ total_command_duration_ms: totalDuration,
39765
+ command_reason_codes: ranked(reasonCounts),
39766
+ slow_steps: slowSteps.sort((a, b) => Number(b.duration_ms) - Number(a.duration_ms)).slice(0, 10),
39767
+ docs_pages_observed: Array.from(docs).sort(),
39768
+ codex_command_execution_completed_count: codexCommandExecutions,
39769
+ codex_failed_exit_code_count: codexFailedExitCodes
39770
+ };
39771
+ }
39772
+ function summarizeExternalAgentRuns(options) {
39773
+ const cwd = (0, import_path15.resolve)(options.cwd || process.cwd());
39774
+ const root = (0, import_path15.resolve)(cwd, options.root);
39775
+ const loaded = readRunRecords(root, cwd);
39776
+ const selectedCohortId = options.cohortId || (options.currentBaselineOnly ? latestCohortId(loaded.records) : null);
39777
+ const records = selectedCohortId ? loaded.records.filter((record2) => record2.cohort_id === selectedCohortId) : loaded.records;
39778
+ const statusCounts = /* @__PURE__ */ new Map();
39779
+ const modelCounts = /* @__PURE__ */ new Map();
39780
+ const failureCounts = /* @__PURE__ */ new Map();
39781
+ const commandReasonCounts = /* @__PURE__ */ new Map();
39782
+ const docsCounts = /* @__PURE__ */ new Map();
39783
+ const slowSteps = [];
39784
+ let manualInterventions = 0;
39785
+ let commandCount = 0;
39786
+ let completedCommandCount = 0;
39787
+ let missingCompletionCount = 0;
39788
+ let commandsWithDurationCount = 0;
39789
+ let totalCommandDurationMs = 0;
39790
+ let commandLogRunCount = 0;
39791
+ let codexCommandExecutions = 0;
39792
+ let codexFailedExitCodes = 0;
39793
+ for (const record2 of records) {
39794
+ const run = record2.run;
39795
+ increment(statusCounts, run.status);
39796
+ increment(modelCounts, `${run.model_provider}/${run.model_name}`);
39797
+ manualInterventions += Number(run.manual_intervention_count || 0);
39798
+ if (run.status !== "pass") increment(failureCounts, run.failure_reason_code || "unknown");
39799
+ const artifactSummary = analyzeRunArtifacts(record2.path, run, cwd);
39800
+ if (artifactSummary.command_log_present) commandLogRunCount += 1;
39801
+ commandCount += Number(artifactSummary.command_count || 0);
39802
+ completedCommandCount += Number(artifactSummary.completed_command_count || 0);
39803
+ missingCompletionCount += Number(artifactSummary.missing_completion_count || 0);
39804
+ commandsWithDurationCount += Number(artifactSummary.commands_with_duration_count || 0);
39805
+ totalCommandDurationMs += Number(artifactSummary.total_command_duration_ms || 0);
39806
+ codexCommandExecutions += Number(artifactSummary.codex_command_execution_completed_count || 0);
39807
+ codexFailedExitCodes += Number(artifactSummary.codex_failed_exit_code_count || 0);
39808
+ for (const row of toArray2(artifactSummary.slow_steps)) slowSteps.push(row);
39809
+ for (const row of toArray2(artifactSummary.command_reason_codes)) {
39810
+ const entry = asObject(row);
39811
+ if (entry) increment(commandReasonCounts, entry.key, Number(entry.count || 1));
39812
+ }
39813
+ for (const page of toArray2(artifactSummary.docs_pages_observed)) increment(docsCounts, page);
39814
+ }
39815
+ const topFailures = ranked(failureCounts);
39816
+ const commandReasonCodes = ranked(commandReasonCounts);
39817
+ const recommendedFixes = topFailures.map((failure) => ({
39818
+ reason_code: failure.key,
39819
+ count: failure.count,
39820
+ recommended_fix: recommendedFixFor(failure.key),
39821
+ owner_subsystem: ownerSubsystemFor(failure.key)
39822
+ }));
39823
+ const nextRecommendedFix = recommendedFixes[0] || null;
39824
+ return {
39825
+ schema_version: "external_agent_run_summary.v1",
39826
+ generated_at: (/* @__PURE__ */ new Date()).toISOString(),
39827
+ root: (0, import_path15.relative)(cwd, root).replaceAll("\\", "/") || ".",
39828
+ cohort_id: selectedCohortId,
39829
+ current_baseline_only: Boolean(selectedCohortId),
39830
+ run_count: records.length,
39831
+ invalid_run_count: selectedCohortId ? 0 : loaded.invalid_runs.length,
39832
+ status_counts: Object.fromEntries(statusCounts),
39833
+ model_counts: ranked(modelCounts),
39834
+ manual_intervention_count: manualInterventions,
39835
+ top_failure_reason_codes: topFailures,
39836
+ docs_pages_observed: ranked(docsCounts),
39837
+ command_telemetry: {
39838
+ run_count_with_command_log: commandLogRunCount,
39839
+ command_count: commandCount,
39840
+ completed_command_count: completedCommandCount,
39841
+ missing_completion_count: missingCompletionCount,
39842
+ commands_with_duration_count: commandsWithDurationCount,
39843
+ total_command_duration_ms: totalCommandDurationMs,
39844
+ command_reason_codes: commandReasonCodes,
39845
+ slow_steps: slowSteps.sort((a, b) => Number(b.duration_ms || 0) - Number(a.duration_ms || 0) || String(a.command || "").localeCompare(String(b.command || ""))).slice(0, 20)
39846
+ },
39847
+ codex_telemetry: {
39848
+ command_execution_completed_count: codexCommandExecutions,
39849
+ failed_exit_code_count: codexFailedExitCodes
39850
+ },
39851
+ recommended_fixes: recommendedFixes,
39852
+ next_recommended_fix: nextRecommendedFix,
39853
+ fix_selection_policy: {
39854
+ mode: "coherent_failure_cluster_first",
39855
+ rule: "Fix the highest-impact owner subsystem locally with focused proof, then rerun the same prompt once externally.",
39856
+ run_failure_weight: 3,
39857
+ command_reason_weight: 1
39858
+ },
39859
+ next_commands: nextRecommendedFix ? [`foh bug improve --from external-agent-run --file <run_dir>/run.json --json`] : [],
39860
+ invalid_runs: selectedCohortId ? [] : loaded.invalid_runs,
39861
+ run_paths: records.map((record2) => (0, import_path15.relative)(cwd, record2.path).replaceAll("\\", "/")).sort()
39862
+ };
39863
+ }
39864
+ function runExternalAgentRunSummary(options) {
39865
+ const summary = summarizeExternalAgentRuns(options);
39866
+ const invalidRuns = toArray2(summary.invalid_runs);
39867
+ const status = invalidRuns.length > 0 ? "failed" : "passed";
39868
+ const report = {
39869
+ report_schema_version: "script_report.v1",
39870
+ script: "foh eval external-agent summary",
39871
+ checked_at: (/* @__PURE__ */ new Date()).toISOString(),
39872
+ status,
39873
+ errors: invalidRuns.map((entry) => {
39874
+ const object3 = asObject(entry);
39875
+ return `${object3?.path || "unknown"}: ${JSON.stringify(object3?.findings || [])}`;
39876
+ }),
39877
+ warnings: Number(summary.run_count || 0) === 0 ? ["no external-agent run artifacts found"] : [],
39878
+ report: summary
39879
+ };
39880
+ if (options.out) {
39881
+ (0, import_fs16.mkdirSync)((0, import_path15.dirname)((0, import_path15.resolve)(options.cwd || process.cwd(), options.out)), { recursive: true });
39882
+ (0, import_fs16.writeFileSync)((0, import_path15.resolve)(options.cwd || process.cwd(), options.out), `${JSON.stringify(summary, null, 2)}
39883
+ `, "utf8");
39884
+ }
39885
+ if (options.report) {
39886
+ (0, import_fs16.mkdirSync)((0, import_path15.dirname)((0, import_path15.resolve)(options.cwd || process.cwd(), options.report)), { recursive: true });
39887
+ (0, import_fs16.writeFileSync)((0, import_path15.resolve)(options.cwd || process.cwd(), options.report), `${JSON.stringify(report, null, 2)}
39888
+ `, "utf8");
39889
+ }
39890
+ return { summary, report };
39891
+ }
39892
+
39893
+ // src/lib/external-agent-executor-classification.ts
39523
39894
  function proofArtifactPasses(runDir) {
39524
- const proofPath = (0, import_path15.join)(runDir, "proof.json");
39525
- if (!(0, import_fs16.existsSync)(proofPath)) return false;
39895
+ const proofPath = (0, import_path16.join)(runDir, "proof.json");
39896
+ if (!(0, import_fs17.existsSync)(proofPath)) return false;
39526
39897
  try {
39527
- const parsed = JSON.parse((0, import_fs16.readFileSync)(proofPath, "utf8"));
39898
+ const parsed = JSON.parse((0, import_fs17.readFileSync)(proofPath, "utf8"));
39528
39899
  return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
39529
39900
  } catch {
39530
39901
  return false;
39531
39902
  }
39532
39903
  }
39533
39904
  function readIfExists(path2) {
39534
- return (0, import_fs16.existsSync)(path2) ? (0, import_fs16.readFileSync)(path2, "utf8") : "";
39905
+ return (0, import_fs17.existsSync)(path2) ? (0, import_fs17.readFileSync)(path2, "utf8") : "";
39535
39906
  }
39536
39907
  function relativeArtifactName(path2) {
39537
- return (0, import_path15.basename)(path2);
39538
- }
39539
- function externalAgentSummaryCommand(root) {
39540
- return [
39541
- "node",
39542
- "scripts/summarize-external-agent-runs.mjs",
39543
- "--root",
39544
- quoteShellArg(root),
39545
- "--out",
39546
- quoteShellArg((0, import_path15.join)(root, "latest-summary.json")),
39547
- "--report",
39548
- quoteShellArg((0, import_path15.join)(root, "summary.report.json"))
39549
- ].join(" ");
39550
- }
39551
- function quoteShellArg(value) {
39552
- const text = String(value);
39553
- if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
39554
- return `"${text.replace(/(["$`])/g, "\\$1")}"`;
39908
+ return (0, import_path16.basename)(path2);
39555
39909
  }
39556
39910
  function classifyExternalAgentRun(input) {
39557
39911
  if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
@@ -39697,13 +40051,13 @@ function buildExecutedExternalAgentRunArtifact(input) {
39697
40051
  },
39698
40052
  artifacts: {
39699
40053
  terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
39700
- command_log: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
39701
- proof_bundle: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
39702
- replay_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
39703
- knowledge_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
40054
+ command_log: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
40055
+ proof_bundle: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
40056
+ replay_packet: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
40057
+ knowledge_packet: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
39704
40058
  improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
39705
40059
  agent_metadata: agentMetadata.path,
39706
- notes: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
40060
+ notes: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
39707
40061
  runner_last_message: relativeArtifactName(input.run.outputs.last_message),
39708
40062
  runner_stderr: relativeArtifactName(input.run.outputs.stderr),
39709
40063
  codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
@@ -39711,25 +40065,25 @@ function buildExecutedExternalAgentRunArtifact(input) {
39711
40065
  artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
39712
40066
  },
39713
40067
  summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
39714
- next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))] : [
40068
+ next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path16.dirname)(input.run.run_dir))] : [
39715
40069
  "foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
39716
40070
  "foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
39717
- externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))
40071
+ externalAgentSummaryCommand((0, import_path16.dirname)(input.run.run_dir))
39718
40072
  ]
39719
40073
  };
39720
40074
  }
39721
40075
 
39722
40076
  // src/lib/external-agent-runner-execution.ts
39723
40077
  var import_child_process4 = require("child_process");
39724
- var import_fs17 = require("fs");
39725
- var import_path16 = require("path");
40078
+ var import_fs18 = require("fs");
40079
+ var import_path17 = require("path");
39726
40080
  function buildCommandInvocation(command, args) {
39727
40081
  if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
39728
- const binDir = (0, import_path16.dirname)(command);
39729
- const codexEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
39730
- if ((0, import_fs17.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
39731
- const geminiEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
39732
- if ((0, import_fs17.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
40082
+ const binDir = (0, import_path17.dirname)(command);
40083
+ const codexEntrypoint = (0, import_path17.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
40084
+ if ((0, import_fs18.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
40085
+ const geminiEntrypoint = (0, import_path17.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
40086
+ if ((0, import_fs18.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
39733
40087
  }
39734
40088
  return { command, args };
39735
40089
  }
@@ -39744,8 +40098,8 @@ function spawnExternalAgentRunner(input) {
39744
40098
  stdio: ["pipe", "pipe", "pipe"],
39745
40099
  windowsHide: true
39746
40100
  });
39747
- const stdout = (0, import_fs17.createWriteStream)(input.stdoutPath, { flags: "w" });
39748
- const stderr = (0, import_fs17.createWriteStream)(input.stderrPath, { flags: "w" });
40101
+ const stdout = (0, import_fs18.createWriteStream)(input.stdoutPath, { flags: "w" });
40102
+ const stderr = (0, import_fs18.createWriteStream)(input.stderrPath, { flags: "w" });
39749
40103
  child.stdout.pipe(stdout);
39750
40104
  child.stderr.pipe(stderr);
39751
40105
  child.stdin.end(input.prompt);
@@ -39857,14 +40211,14 @@ async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}
39857
40211
  };
39858
40212
  }
39859
40213
  function normalizeForCompare(path2) {
39860
- const resolved = (0, import_path17.resolve)(path2);
40214
+ const resolved = (0, import_path18.resolve)(path2);
39861
40215
  return process.platform === "win32" ? resolved.toLowerCase() : resolved;
39862
40216
  }
39863
40217
  function isPathInside(childPath, parentPath) {
39864
40218
  const child = normalizeForCompare(childPath);
39865
40219
  const parent = normalizeForCompare(parentPath);
39866
- const rel = (0, import_path17.relative)(parent, child);
39867
- return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path17.isAbsolute)(rel);
40220
+ const rel = (0, import_path18.relative)(parent, child);
40221
+ return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path18.isAbsolute)(rel);
39868
40222
  }
39869
40223
  function requireString(value, field) {
39870
40224
  if (typeof value !== "string" || value.trim() === "") {
@@ -39873,10 +40227,10 @@ function requireString(value, field) {
39873
40227
  return value;
39874
40228
  }
39875
40229
  function readBatch(batchPath) {
39876
- if (!(0, import_fs18.existsSync)(batchPath)) {
40230
+ if (!(0, import_fs19.existsSync)(batchPath)) {
39877
40231
  throw new ExternalAgentExecutorError("external_agent_batch_not_found", `Batch file not found: ${batchPath}`);
39878
40232
  }
39879
- const parsed = JSON.parse((0, import_fs18.readFileSync)(batchPath, "utf8"));
40233
+ const parsed = JSON.parse((0, import_fs19.readFileSync)(batchPath, "utf8"));
39880
40234
  if (parsed.schema_version !== "external_agent_batch_plan.v1") {
39881
40235
  throw new ExternalAgentExecutorError("invalid_external_agent_batch", "Batch schema_version must be external_agent_batch_plan.v1.");
39882
40236
  }
@@ -39913,8 +40267,8 @@ function resolveCodexProbeCommand() {
39913
40267
  if (process.platform !== "win32") return "codex";
39914
40268
  const appData = process.env.APPDATA;
39915
40269
  if (appData) {
39916
- const appDataShim = (0, import_path17.join)(appData, "npm", "codex.cmd");
39917
- if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
40270
+ const appDataShim = (0, import_path18.join)(appData, "npm", "codex.cmd");
40271
+ if ((0, import_fs19.existsSync)(appDataShim)) return appDataShim;
39918
40272
  }
39919
40273
  return "codex.cmd";
39920
40274
  }
@@ -39925,8 +40279,8 @@ function resolveGeminiProbeCommand() {
39925
40279
  if (process.platform !== "win32") return "gemini";
39926
40280
  const appData = process.env.APPDATA;
39927
40281
  if (appData) {
39928
- const appDataShim = (0, import_path17.join)(appData, "npm", "gemini.cmd");
39929
- if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
40282
+ const appDataShim = (0, import_path18.join)(appData, "npm", "gemini.cmd");
40283
+ if ((0, import_fs19.existsSync)(appDataShim)) return appDataShim;
39930
40284
  }
39931
40285
  return "gemini.cmd";
39932
40286
  }
@@ -40197,34 +40551,34 @@ function safeRunId(value) {
40197
40551
  return value.toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "run";
40198
40552
  }
40199
40553
  function resolveWorkspaceRoot(input) {
40200
- if (input.workspaceRoot) return (0, import_path17.resolve)(input.workspaceRoot);
40201
- const batchStem = (0, import_path17.basename)((0, import_path17.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40202
- const repoStem = (0, import_path17.basename)((0, import_path17.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40203
- return (0, import_path17.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
40554
+ if (input.workspaceRoot) return (0, import_path18.resolve)(input.workspaceRoot);
40555
+ const batchStem = (0, import_path18.basename)((0, import_path18.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40556
+ const repoStem = (0, import_path18.basename)((0, import_path18.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40557
+ return (0, import_path18.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
40204
40558
  }
40205
40559
  function findNearestGitRoot(startPath) {
40206
- let current = (0, import_path17.resolve)(startPath);
40560
+ let current = (0, import_path18.resolve)(startPath);
40207
40561
  while (true) {
40208
- if ((0, import_fs18.existsSync)((0, import_path17.join)(current, ".git"))) return current;
40209
- const parent = (0, import_path17.dirname)(current);
40562
+ if ((0, import_fs19.existsSync)((0, import_path18.join)(current, ".git"))) return current;
40563
+ const parent = (0, import_path18.dirname)(current);
40210
40564
  if (parent === current) return null;
40211
40565
  current = parent;
40212
40566
  }
40213
40567
  }
40214
40568
  function resolvePrivateRepoRoot(input) {
40215
40569
  if (input.explicitPrivateRepoRoot) {
40216
- return { root: (0, import_path17.resolve)(input.explicitPrivateRepoRoot), explicit: true };
40570
+ return { root: (0, import_path18.resolve)(input.explicitPrivateRepoRoot), explicit: true };
40217
40571
  }
40218
- const cwd = (0, import_path17.resolve)(input.cwd || process.cwd());
40572
+ const cwd = (0, import_path18.resolve)(input.cwd || process.cwd());
40219
40573
  const gitRoot = findNearestGitRoot(cwd);
40220
40574
  if (gitRoot) return { root: gitRoot, explicit: false };
40221
40575
  return {
40222
- root: (0, import_path17.join)(cwd, ".foh-no-private-repo-root-sentinel"),
40576
+ root: (0, import_path18.join)(cwd, ".foh-no-private-repo-root-sentinel"),
40223
40577
  explicit: false
40224
40578
  };
40225
40579
  }
40226
40580
  function promptVersionFromPath(promptPath) {
40227
- const raw = (0, import_fs18.readFileSync)(promptPath, "utf8");
40581
+ const raw = (0, import_fs19.readFileSync)(promptPath, "utf8");
40228
40582
  if (raw.includes("Do not assume access to the private source repository")) return "blank-setup.v1";
40229
40583
  return "unknown";
40230
40584
  }
@@ -40233,7 +40587,7 @@ function createExternalAgentExecutorPlan(options) {
40233
40587
  if (runner !== "codex" && runner !== "gemini") {
40234
40588
  throw new ExternalAgentExecutorError("unsupported_external_agent_runner", `Unsupported runner: ${runner}`);
40235
40589
  }
40236
- const batchPath = (0, import_path17.resolve)(options.batchPath);
40590
+ const batchPath = (0, import_path18.resolve)(options.batchPath);
40237
40591
  const batch = readBatch(batchPath);
40238
40592
  const runnerProbe = validateRunner(options, runner);
40239
40593
  const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
@@ -40252,17 +40606,17 @@ function createExternalAgentExecutorPlan(options) {
40252
40606
  `Workspace root must be outside the private repository. workspace=${workspaceRoot} repo=${privateRepoRoot}`
40253
40607
  );
40254
40608
  }
40255
- (0, import_fs18.mkdirSync)(workspaceRoot, { recursive: true });
40256
- const batchDir = (0, import_path17.resolve)(String(batch.batch_dir || (0, import_path17.resolve)(batchPath, "..")));
40609
+ (0, import_fs19.mkdirSync)(workspaceRoot, { recursive: true });
40610
+ const batchDir = (0, import_path18.resolve)(String(batch.batch_dir || (0, import_path18.resolve)(batchPath, "..")));
40257
40611
  const timeoutMinutes = Number.isFinite(options.timeoutMinutes) && Number(options.timeoutMinutes) > 0 ? Number(options.timeoutMinutes) : 30;
40258
40612
  const runs = batch.runs.map((run) => {
40259
40613
  const runId = safeRunId(requireString(run.run_id, "runs[].run_id"));
40260
- const runDir = (0, import_path17.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
40261
- const promptPath = (0, import_path17.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
40262
- const workspaceDir = (0, import_path17.join)(workspaceRoot, runId);
40263
- (0, import_fs18.mkdirSync)(workspaceDir, { recursive: true });
40264
- (0, import_fs18.writeFileSync)(
40265
- (0, import_path17.join)(workspaceDir, "README.md"),
40614
+ const runDir = (0, import_path18.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
40615
+ const promptPath = (0, import_path18.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
40616
+ const workspaceDir = (0, import_path18.join)(workspaceRoot, runId);
40617
+ (0, import_fs19.mkdirSync)(workspaceDir, { recursive: true });
40618
+ (0, import_fs19.writeFileSync)(
40619
+ (0, import_path18.join)(workspaceDir, "README.md"),
40266
40620
  [
40267
40621
  "# FOH External-Agent Workspace",
40268
40622
  "",
@@ -40280,11 +40634,11 @@ function createExternalAgentExecutorPlan(options) {
40280
40634
  });
40281
40635
  const promptVersion = String(env[EXTERNAL_AGENT_PROMPT_VERSION_ENV] || "unknown");
40282
40636
  const outputStem = runner === "gemini" ? "gemini" : "codex";
40283
- const jsonlPath = (0, import_path17.join)(runDir, `${outputStem}-exec.jsonl`);
40284
- const lastMessagePath = (0, import_path17.join)(runDir, `${outputStem}-last-message.md`);
40285
- const stderrPath = (0, import_path17.join)(runDir, `${outputStem}-stderr.txt`);
40286
- const runPath = (0, import_path17.join)(runDir, "run.json");
40287
- const artifactSafetyPath = (0, import_path17.join)(runDir, "artifact-safety.json");
40637
+ const jsonlPath = (0, import_path18.join)(runDir, `${outputStem}-exec.jsonl`);
40638
+ const lastMessagePath = (0, import_path18.join)(runDir, `${outputStem}-last-message.md`);
40639
+ const stderrPath = (0, import_path18.join)(runDir, `${outputStem}-stderr.txt`);
40640
+ const runPath = (0, import_path18.join)(runDir, "run.json");
40641
+ const artifactSafetyPath = (0, import_path18.join)(runDir, "artifact-safety.json");
40288
40642
  const args = runner === "gemini" ? [
40289
40643
  ...runnerProbe.globalArgs,
40290
40644
  ...runnerProbe.execArgs
@@ -40375,9 +40729,9 @@ function createExternalAgentExecutorPlan(options) {
40375
40729
  };
40376
40730
  }
40377
40731
  function writeExternalAgentExecutorPlan(plan) {
40378
- const path2 = (0, import_path17.join)(plan.batch_dir, "executor-plan.json");
40379
- (0, import_fs18.mkdirSync)(plan.batch_dir, { recursive: true });
40380
- (0, import_fs18.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
40732
+ const path2 = (0, import_path18.join)(plan.batch_dir, "executor-plan.json");
40733
+ (0, import_fs19.mkdirSync)(plan.batch_dir, { recursive: true });
40734
+ (0, import_fs19.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
40381
40735
  `, "utf8");
40382
40736
  return path2;
40383
40737
  }
@@ -40392,7 +40746,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40392
40746
  if (authPreflight && !authPreflight.ok) {
40393
40747
  const endedAt2 = (/* @__PURE__ */ new Date()).toISOString();
40394
40748
  const blockedResults = plan.runs.map((run) => {
40395
- (0, import_fs18.mkdirSync)(run.run_dir, { recursive: true });
40749
+ (0, import_fs19.mkdirSync)(run.run_dir, { recursive: true });
40396
40750
  const runArtifact = buildExecutedExternalAgentRunArtifact({
40397
40751
  run,
40398
40752
  startedAt,
@@ -40403,7 +40757,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40403
40757
  timedOut: false,
40404
40758
  durationMs: 0
40405
40759
  });
40406
- (0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40760
+ (0, import_fs19.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40407
40761
  `, "utf8");
40408
40762
  return {
40409
40763
  run_id: run.run_id,
@@ -40430,8 +40784,8 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40430
40784
  }
40431
40785
  for (const run of plan.runs) {
40432
40786
  const runStartedAt = (/* @__PURE__ */ new Date()).toISOString();
40433
- const commandCaptureDir = (0, import_path17.join)(run.workspace_dir, ".foh-capture");
40434
- (0, import_fs18.mkdirSync)(commandCaptureDir, { recursive: true });
40787
+ const commandCaptureDir = (0, import_path18.join)(run.workspace_dir, ".foh-capture");
40788
+ (0, import_fs19.mkdirSync)(commandCaptureDir, { recursive: true });
40435
40789
  const env = buildCodexExecutorEnv({
40436
40790
  sourceEnv: options.env,
40437
40791
  runDir: commandCaptureDir,
@@ -40442,7 +40796,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40442
40796
  args: run.args,
40443
40797
  cwd: run.workspace_dir,
40444
40798
  env,
40445
- prompt: (0, import_fs18.readFileSync)(run.prompt_path, "utf8"),
40799
+ prompt: (0, import_fs19.readFileSync)(run.prompt_path, "utf8"),
40446
40800
  stdoutPath: run.outputs.jsonl,
40447
40801
  stderrPath: run.outputs.stderr,
40448
40802
  timeoutMs: plan.timeout_minutes * 60 * 1e3
@@ -40455,7 +40809,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40455
40809
  privateRepoRoot,
40456
40810
  writeRedacted: true
40457
40811
  });
40458
- (0, import_fs18.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
40812
+ (0, import_fs19.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
40459
40813
  `, "utf8");
40460
40814
  const runEndedAt = (/* @__PURE__ */ new Date()).toISOString();
40461
40815
  const classification = classifyExternalAgentRun({
@@ -40474,7 +40828,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40474
40828
  timedOut: spawned.timedOut,
40475
40829
  durationMs: spawned.durationMs
40476
40830
  });
40477
- (0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40831
+ (0, import_fs19.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40478
40832
  `, "utf8");
40479
40833
  results.push({
40480
40834
  run_id: run.run_id,
@@ -40523,13 +40877,13 @@ function defaultRunDir(modelName, promptVersion) {
40523
40877
  const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
40524
40878
  const safeModel = String(modelName || "unknown-model").toLowerCase().replace(/[^a-z0-9_-]+/g, "-");
40525
40879
  const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
40526
- return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
40880
+ return (0, import_path19.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
40527
40881
  }
40528
40882
  function defaultBatchDir(promptVersion) {
40529
40883
  const date4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
40530
40884
  const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
40531
40885
  const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
40532
- return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
40886
+ return (0, import_path19.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
40533
40887
  }
40534
40888
  function safeSlug(value) {
40535
40889
  return String(value || "unknown").toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "unknown";
@@ -40543,20 +40897,6 @@ function scanArtifactsCommand(runDir, privateRepoRoot) {
40543
40897
  const privateRootArg = privateRepoRoot ? ` --private-repo-root ${quoteArg(privateRepoRoot)}` : "";
40544
40898
  return `foh eval external-agent scan-artifacts --run-dir ${quoteArg(runDir)}${privateRootArg} --write-redacted --json`;
40545
40899
  }
40546
- function externalAgentSummaryCommand2(root) {
40547
- const summaryPath = (0, import_path18.join)(root, "latest-summary.json");
40548
- const reportPath = (0, import_path18.join)(root, "summary.report.json");
40549
- return [
40550
- "node",
40551
- "scripts/summarize-external-agent-runs.mjs",
40552
- "--root",
40553
- quoteArg(root),
40554
- "--out",
40555
- quoteArg(summaryPath),
40556
- "--report",
40557
- quoteArg(reportPath)
40558
- ].join(" ");
40559
- }
40560
40900
  function executorRecoveryCommands(reasonCode, runner) {
40561
40901
  const normalizedRunner = String(runner || "codex").trim().toLowerCase();
40562
40902
  if (reasonCode === "external_agent_runner_binary_missing") {
@@ -40659,14 +40999,14 @@ function writePrompt(runDir, promptVersion, context = {}) {
40659
40999
  replayPromptContext(context.replayFile),
40660
41000
  knowledgeMissPromptContext(context.knowledgeQuestion, context.expectedAnswer)
40661
41001
  ].join("");
40662
- const path2 = (0, import_path18.join)(runDir, "prompt.txt");
40663
- (0, import_fs19.writeFileSync)(path2, `${prompt}
41002
+ const path2 = (0, import_path19.join)(runDir, "prompt.txt");
41003
+ (0, import_fs20.writeFileSync)(path2, `${prompt}
40664
41004
  `, "utf8");
40665
41005
  return path2;
40666
41006
  }
40667
41007
  function writeSession(runDir, session) {
40668
- const path2 = (0, import_path18.join)(runDir, "session.json");
40669
- (0, import_fs19.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
41008
+ const path2 = (0, import_path19.join)(runDir, "session.json");
41009
+ (0, import_fs20.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
40670
41010
  `, "utf8");
40671
41011
  return path2;
40672
41012
  }
@@ -40742,9 +41082,9 @@ function buildRunArtifact(input) {
40742
41082
  notes: "notes.md"
40743
41083
  },
40744
41084
  summary: status === "pass" ? "External-agent capture session completed and was marked pass." : `External-agent capture session completed with ${commands.length} captured FOH command(s); classify and improve reason ${reasonCode}.`,
40745
- next_commands: status === "pass" ? [externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))] : [
40746
- `foh bug improve --from external-agent-run --file ${(0, import_path18.join)(input.runDir, "run.json")} --out ${(0, import_path18.join)(input.runDir, "improvement-packet.json")} --json`,
40747
- externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))
41085
+ next_commands: status === "pass" ? [externalAgentSummaryCommand((0, import_path19.dirname)(input.runDir))] : [
41086
+ `foh bug improve --from external-agent-run --file ${(0, import_path19.join)(input.runDir, "run.json")} --out ${(0, import_path19.join)(input.runDir, "improvement-packet.json")} --json`,
41087
+ externalAgentSummaryCommand((0, import_path19.dirname)(input.runDir))
40748
41088
  ]
40749
41089
  };
40750
41090
  }
@@ -40753,16 +41093,16 @@ function registerEval(program3) {
40753
41093
  const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
40754
41094
  external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
40755
41095
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40756
- const batchDir = (0, import_path18.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
40757
- const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
41096
+ const batchDir = (0, import_path19.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
41097
+ const replayFile = opts.replayFile ? (0, import_path19.resolve)(String(opts.replayFile)) : void 0;
40758
41098
  const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40759
41099
  const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40760
41100
  const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
40761
- (0, import_fs19.mkdirSync)(batchDir, { recursive: true });
40762
- const runs = models.map((model, index) => {
41101
+ (0, import_fs20.mkdirSync)(batchDir, { recursive: true });
41102
+ const runs2 = models.map((model, index) => {
40763
41103
  const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
40764
- const runDir = (0, import_path18.join)(batchDir, runId);
40765
- (0, import_fs19.mkdirSync)(runDir, { recursive: true });
41104
+ const runDir = (0, import_path19.join)(batchDir, runId);
41105
+ (0, import_fs20.mkdirSync)(runDir, { recursive: true });
40766
41106
  const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40767
41107
  const commandArgs = [
40768
41108
  "eval",
@@ -40805,23 +41145,23 @@ function registerEval(program3) {
40805
41145
  expected_answer: expectedAnswer ?? null,
40806
41146
  workspace_type: String(opts.workspaceType || "clean-no-repo"),
40807
41147
  agent_shell: String(opts.agentShell || "vscode-terminal"),
40808
- run_count: runs.length,
40809
- runs,
40810
- summary_command: externalAgentSummaryCommand2(batchDir)
41148
+ run_count: runs2.length,
41149
+ runs: runs2,
41150
+ summary_command: externalAgentSummaryCommand(batchDir)
40811
41151
  };
40812
- const batchPath = (0, import_path18.join)(batchDir, "batch.json");
40813
- (0, import_fs19.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
41152
+ const batchPath = (0, import_path19.join)(batchDir, "batch.json");
41153
+ (0, import_fs20.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
40814
41154
  `, "utf8");
40815
41155
  format(cliEnvelope({
40816
41156
  schemaVersion: "external_agent_batch_plan_result.v1",
40817
41157
  status: "exported",
40818
41158
  reasonCode: "external_agent_batch_plan_created",
40819
- summary: `External-agent batch plan created for ${runs.length} model(s).`,
41159
+ summary: `External-agent batch plan created for ${runs2.length} model(s).`,
40820
41160
  artifacts: {
40821
41161
  batch: batchPath
40822
41162
  },
40823
41163
  nextCommands: [
40824
- ...runs.map((run) => run.launch_command),
41164
+ ...runs2.map((run) => run.launch_command),
40825
41165
  batch.summary_command
40826
41166
  ],
40827
41167
  extra: { batch }
@@ -40830,11 +41170,11 @@ function registerEval(program3) {
40830
41170
  external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
40831
41171
  const status = normalizeStatus(opts.status);
40832
41172
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40833
- const runDir = (0, import_path18.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
40834
- const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
41173
+ const runDir = (0, import_path19.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
41174
+ const replayFile = opts.replayFile ? (0, import_path19.resolve)(String(opts.replayFile)) : void 0;
40835
41175
  const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40836
41176
  const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40837
- (0, import_fs19.mkdirSync)(runDir, { recursive: true });
41177
+ (0, import_fs20.mkdirSync)(runDir, { recursive: true });
40838
41178
  const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
40839
41179
  const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40840
41180
  const shell = inferShell(opts.shell);
@@ -40860,7 +41200,7 @@ function registerEval(program3) {
40860
41200
  }
40861
41201
  };
40862
41202
  writeSession(runDir, session);
40863
- (0, import_fs19.writeFileSync)((0, import_path18.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
41203
+ (0, import_fs20.writeFileSync)((0, import_path19.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
40864
41204
  let shellExitCode = null;
40865
41205
  if (opts.shell !== false) {
40866
41206
  process.stdout.write(`
@@ -40882,8 +41222,8 @@ Exit the shell to finalize run.json.
40882
41222
  shellExitCode = typeof result.status === "number" ? result.status : null;
40883
41223
  }
40884
41224
  const artifact = buildRunArtifact({ runDir, session, status, reasonCode: opts.reasonCode, shellExitCode });
40885
- const runPath = (0, import_path18.join)(runDir, "run.json");
40886
- (0, import_fs19.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
41225
+ const runPath = (0, import_path19.join)(runDir, "run.json");
41226
+ (0, import_fs20.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
40887
41227
  `, "utf8");
40888
41228
  format(cliEnvelope({
40889
41229
  schemaVersion: "external_agent_capture_result.v1",
@@ -40893,12 +41233,57 @@ Exit the shell to finalize run.json.
40893
41233
  artifacts: {
40894
41234
  run: runPath,
40895
41235
  prompt: promptPath,
40896
- commands: (0, import_path18.join)(runDir, "commands.ndjson")
41236
+ commands: (0, import_path19.join)(runDir, "commands.ndjson")
40897
41237
  },
40898
41238
  nextCommands: artifact.next_commands,
40899
41239
  extra: { run: artifact }
40900
41240
  }), { json: Boolean(opts.json) });
40901
41241
  });
41242
+ external.command("summary").description("Summarize external_agent_run.v1 artifacts from a clean external-agent run root").requiredOption("--root <dir>", "Root containing external-agent run directories with run.json files").option("--out <path>", "Write summary JSON to this path").option("--report <path>", "Write script-style report JSON to this path").option("--current-baseline-only", "Summarize only the latest detected run cohort").option("--cohort <id>", "Summarize one explicit cohort id").option("--json", "Output as JSON").action(async (opts) => {
41243
+ const { summary, report } = runExternalAgentRunSummary({
41244
+ root: String(opts.root),
41245
+ out: opts.out ? String(opts.out) : void 0,
41246
+ report: opts.report ? String(opts.report) : void 0,
41247
+ currentBaselineOnly: Boolean(opts.currentBaselineOnly),
41248
+ cohortId: opts.cohort ? String(opts.cohort) : null
41249
+ });
41250
+ format(cliEnvelope({
41251
+ schemaVersion: "external_agent_run_summary_result.v1",
41252
+ status: report.status === "passed" ? "pass" : "fail",
41253
+ reasonCode: report.status === "passed" ? "external_agent_run_summary_created" : "external_agent_run_summary_invalid_runs",
41254
+ summary: `External-agent summary covers ${summary.run_count} run(s).`,
41255
+ artifacts: {
41256
+ summary: opts.out ? String(opts.out) : null,
41257
+ report: opts.report ? String(opts.report) : null
41258
+ },
41259
+ nextCommands: summary.next_commands,
41260
+ extra: { external_agent_summary: summary, report }
41261
+ }), { json: Boolean(opts.json) });
41262
+ if (report.status !== "passed") process.exitCode = 1;
41263
+ });
41264
+ const runs = external.command("runs").description("Compatibility namespace for external-agent run artifact utilities");
41265
+ runs.command("summary").description("Compatibility alias for `foh eval external-agent summary`").requiredOption("--root <dir>", "Root containing external-agent run directories with run.json files").option("--out <path>", "Write summary JSON to this path").option("--report <path>", "Write script-style report JSON to this path").option("--current-baseline-only", "Summarize only the latest detected run cohort").option("--cohort <id>", "Summarize one explicit cohort id").option("--json", "Output as JSON").action(async (opts) => {
41266
+ const { summary, report } = runExternalAgentRunSummary({
41267
+ root: String(opts.root),
41268
+ out: opts.out ? String(opts.out) : void 0,
41269
+ report: opts.report ? String(opts.report) : void 0,
41270
+ currentBaselineOnly: Boolean(opts.currentBaselineOnly),
41271
+ cohortId: opts.cohort ? String(opts.cohort) : null
41272
+ });
41273
+ format(cliEnvelope({
41274
+ schemaVersion: "external_agent_run_summary_result.v1",
41275
+ status: report.status === "passed" ? "pass" : "fail",
41276
+ reasonCode: report.status === "passed" ? "external_agent_run_summary_created" : "external_agent_run_summary_invalid_runs",
41277
+ summary: `External-agent summary covers ${summary.run_count} run(s).`,
41278
+ artifacts: {
41279
+ summary: opts.out ? String(opts.out) : null,
41280
+ report: opts.report ? String(opts.report) : null
41281
+ },
41282
+ nextCommands: summary.next_commands,
41283
+ extra: { external_agent_summary: summary, report }
41284
+ }), { json: Boolean(opts.json) });
41285
+ if (report.status !== "passed") process.exitCode = 1;
41286
+ });
40902
41287
  external.command("scan-artifacts").description("Scan and redact external-agent run artifacts before they are promoted into improvement loops").requiredOption("--run-dir <path>", "External-agent run artifact directory").option("--private-repo-root <path>", "Private repository root that must not appear in artifacts").option("--write-redacted", "Write .redacted copies next to scanned artifacts").option("--json", "Output as JSON").action(async (opts) => {
40903
41288
  const report = scanExternalAgentArtifacts({
40904
41289
  runDir: String(opts.runDir),
@@ -40956,8 +41341,8 @@ Exit the shell to finalize run.json.
40956
41341
  requireExplicitEvalAuth: true,
40957
41342
  minimumEvalAuthTtlMs: (plan.timeout_minutes + 5) * 60 * 1e3
40958
41343
  });
40959
- const resultPath = (0, import_path18.join)(plan.batch_dir, "execution-result.json");
40960
- (0, import_fs19.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
41344
+ const resultPath = (0, import_path19.join)(plan.batch_dir, "execution-result.json");
41345
+ (0, import_fs20.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
40961
41346
  `, "utf8");
40962
41347
  format(cliEnvelope({
40963
41348
  schemaVersion: "external_agent_execution_result.v1",
@@ -40974,7 +41359,7 @@ Exit the shell to finalize run.json.
40974
41359
  plan.runs.find((item) => item.run_id === run.run_id)?.run_dir || ".",
40975
41360
  plan.private_repo_root_explicit ? plan.private_repo_root : void 0
40976
41361
  )),
40977
- externalAgentSummaryCommand2(plan.batch_dir)
41362
+ externalAgentSummaryCommand(plan.batch_dir)
40978
41363
  ],
40979
41364
  extra: { result }
40980
41365
  }), { json: Boolean(opts.json) });