@f-o-h/cli 0.1.70 → 0.1.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/foh.js +648 -204
  2. package/package.json +3 -2
package/dist/foh.js CHANGED
@@ -6046,7 +6046,7 @@ var require_compile = __commonJS({
6046
6046
  const schOrFunc = root.refs[ref];
6047
6047
  if (schOrFunc)
6048
6048
  return schOrFunc;
6049
- let _sch = resolve13.call(this, root, ref);
6049
+ let _sch = resolve14.call(this, root, ref);
6050
6050
  if (_sch === void 0) {
6051
6051
  const schema2 = (_a2 = root.localRefs) === null || _a2 === void 0 ? void 0 : _a2[ref];
6052
6052
  const { schemaId } = this.opts;
@@ -6073,7 +6073,7 @@ var require_compile = __commonJS({
6073
6073
  function sameSchemaEnv(s1, s2) {
6074
6074
  return s1.schema === s2.schema && s1.root === s2.root && s1.baseId === s2.baseId;
6075
6075
  }
6076
- function resolve13(root, ref) {
6076
+ function resolve14(root, ref) {
6077
6077
  let sch;
6078
6078
  while (typeof (sch = this.refs[ref]) == "string")
6079
6079
  ref = sch;
@@ -6648,55 +6648,55 @@ var require_fast_uri = __commonJS({
6648
6648
  }
6649
6649
  return uri;
6650
6650
  }
6651
- function resolve13(baseURI, relativeURI, options) {
6651
+ function resolve14(baseURI, relativeURI, options) {
6652
6652
  const schemelessOptions = options ? Object.assign({ scheme: "null" }, options) : { scheme: "null" };
6653
6653
  const resolved = resolveComponent(parse3(baseURI, schemelessOptions), parse3(relativeURI, schemelessOptions), schemelessOptions, true);
6654
6654
  schemelessOptions.skipEscape = true;
6655
6655
  return serialize(resolved, schemelessOptions);
6656
6656
  }
6657
- function resolveComponent(base, relative3, options, skipNormalization) {
6657
+ function resolveComponent(base, relative4, options, skipNormalization) {
6658
6658
  const target = {};
6659
6659
  if (!skipNormalization) {
6660
6660
  base = parse3(serialize(base, options), options);
6661
- relative3 = parse3(serialize(relative3, options), options);
6661
+ relative4 = parse3(serialize(relative4, options), options);
6662
6662
  }
6663
6663
  options = options || {};
6664
- if (!options.tolerant && relative3.scheme) {
6665
- target.scheme = relative3.scheme;
6666
- target.userinfo = relative3.userinfo;
6667
- target.host = relative3.host;
6668
- target.port = relative3.port;
6669
- target.path = removeDotSegments(relative3.path || "");
6670
- target.query = relative3.query;
6664
+ if (!options.tolerant && relative4.scheme) {
6665
+ target.scheme = relative4.scheme;
6666
+ target.userinfo = relative4.userinfo;
6667
+ target.host = relative4.host;
6668
+ target.port = relative4.port;
6669
+ target.path = removeDotSegments(relative4.path || "");
6670
+ target.query = relative4.query;
6671
6671
  } else {
6672
- if (relative3.userinfo !== void 0 || relative3.host !== void 0 || relative3.port !== void 0) {
6673
- target.userinfo = relative3.userinfo;
6674
- target.host = relative3.host;
6675
- target.port = relative3.port;
6676
- target.path = removeDotSegments(relative3.path || "");
6677
- target.query = relative3.query;
6672
+ if (relative4.userinfo !== void 0 || relative4.host !== void 0 || relative4.port !== void 0) {
6673
+ target.userinfo = relative4.userinfo;
6674
+ target.host = relative4.host;
6675
+ target.port = relative4.port;
6676
+ target.path = removeDotSegments(relative4.path || "");
6677
+ target.query = relative4.query;
6678
6678
  } else {
6679
- if (!relative3.path) {
6679
+ if (!relative4.path) {
6680
6680
  target.path = base.path;
6681
- if (relative3.query !== void 0) {
6682
- target.query = relative3.query;
6681
+ if (relative4.query !== void 0) {
6682
+ target.query = relative4.query;
6683
6683
  } else {
6684
6684
  target.query = base.query;
6685
6685
  }
6686
6686
  } else {
6687
- if (relative3.path[0] === "/") {
6688
- target.path = removeDotSegments(relative3.path);
6687
+ if (relative4.path[0] === "/") {
6688
+ target.path = removeDotSegments(relative4.path);
6689
6689
  } else {
6690
6690
  if ((base.userinfo !== void 0 || base.host !== void 0 || base.port !== void 0) && !base.path) {
6691
- target.path = "/" + relative3.path;
6691
+ target.path = "/" + relative4.path;
6692
6692
  } else if (!base.path) {
6693
- target.path = relative3.path;
6693
+ target.path = relative4.path;
6694
6694
  } else {
6695
- target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative3.path;
6695
+ target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative4.path;
6696
6696
  }
6697
6697
  target.path = removeDotSegments(target.path);
6698
6698
  }
6699
- target.query = relative3.query;
6699
+ target.query = relative4.query;
6700
6700
  }
6701
6701
  target.userinfo = base.userinfo;
6702
6702
  target.host = base.host;
@@ -6704,7 +6704,7 @@ var require_fast_uri = __commonJS({
6704
6704
  }
6705
6705
  target.scheme = base.scheme;
6706
6706
  }
6707
- target.fragment = relative3.fragment;
6707
+ target.fragment = relative4.fragment;
6708
6708
  return target;
6709
6709
  }
6710
6710
  function equal(uriA, uriB, options) {
@@ -6875,7 +6875,7 @@ var require_fast_uri = __commonJS({
6875
6875
  var fastUri = {
6876
6876
  SCHEMES,
6877
6877
  normalize,
6878
- resolve: resolve13,
6878
+ resolve: resolve14,
6879
6879
  resolveComponent,
6880
6880
  equal,
6881
6881
  serialize,
@@ -10172,21 +10172,21 @@ async function promptLine(label, {
10172
10172
  allowEmpty = false,
10173
10173
  defaultValue
10174
10174
  } = {}) {
10175
- return await new Promise((resolve13) => {
10175
+ return await new Promise((resolve14) => {
10176
10176
  const suffix = defaultValue ? ` [${defaultValue}]` : "";
10177
10177
  const rl = (0, import_readline.createInterface)({ input: process.stdin, output: process.stdout, terminal: true });
10178
10178
  rl.question(`${label}${suffix}: `, (answer) => {
10179
10179
  rl.close();
10180
10180
  const value = String(answer ?? "").trim();
10181
10181
  if (!value && typeof defaultValue === "string") {
10182
- resolve13(defaultValue);
10182
+ resolve14(defaultValue);
10183
10183
  return;
10184
10184
  }
10185
10185
  if (!value && !allowEmpty) {
10186
- resolve13("");
10186
+ resolve14("");
10187
10187
  return;
10188
10188
  }
10189
- resolve13(value);
10189
+ resolve14(value);
10190
10190
  });
10191
10191
  });
10192
10192
  }
@@ -10194,7 +10194,7 @@ async function promptSecret(label) {
10194
10194
  if (!process.stdin.isTTY || !process.stdout.isTTY || typeof process.stdin.setRawMode !== "function") {
10195
10195
  return await promptLine(label);
10196
10196
  }
10197
- return await new Promise((resolve13) => {
10197
+ return await new Promise((resolve14) => {
10198
10198
  const stdin = process.stdin;
10199
10199
  const stdout = process.stdout;
10200
10200
  const wasRaw = Boolean(stdin.isRaw);
@@ -10208,7 +10208,7 @@ async function promptSecret(label) {
10208
10208
  const finish = () => {
10209
10209
  cleanup();
10210
10210
  stdout.write("\n");
10211
- resolve13(value);
10211
+ resolve14(value);
10212
10212
  };
10213
10213
  const onData = (chunk) => {
10214
10214
  const text = typeof chunk === "string" ? chunk : chunk.toString("utf8");
@@ -10217,7 +10217,7 @@ async function promptSecret(label) {
10217
10217
  cleanup();
10218
10218
  process.exitCode = 130;
10219
10219
  stdout.write("\n");
10220
- return resolve13("");
10220
+ return resolve14("");
10221
10221
  }
10222
10222
  if (char === "\r" || char === "\n") {
10223
10223
  finish();
@@ -10490,7 +10490,7 @@ async function storeAuthenticatedSession(params) {
10490
10490
  return output;
10491
10491
  }
10492
10492
  function sleep(ms) {
10493
- return new Promise((resolve13) => setTimeout(resolve13, ms));
10493
+ return new Promise((resolve14) => setTimeout(resolve14, ms));
10494
10494
  }
10495
10495
  function hasExplicitTimeoutFlag(argv = process.argv) {
10496
10496
  return argv.some((arg) => arg === "--timeout-seconds" || arg.startsWith("--timeout-seconds="));
@@ -11048,7 +11048,7 @@ async function pollUntil(check2, opts) {
11048
11048
  }
11049
11049
  }
11050
11050
  function sleep2(ms) {
11051
- return new Promise((resolve13) => setTimeout(resolve13, ms));
11051
+ return new Promise((resolve14) => setTimeout(resolve14, ms));
11052
11052
  }
11053
11053
 
11054
11054
  // src/commands/compliance.ts
@@ -14172,8 +14172,8 @@ function registerAgentGuardrailCommands(agent) {
14172
14172
  try {
14173
14173
  rule = JSON.parse(opts.rule);
14174
14174
  } catch {
14175
- const { readFileSync: readFileSync16 } = await import("fs");
14176
- rule = JSON.parse(readFileSync16(opts.rule, "utf-8"));
14175
+ const { readFileSync: readFileSync17 } = await import("fs");
14176
+ rule = JSON.parse(readFileSync17(opts.rule, "utf-8"));
14177
14177
  }
14178
14178
  const data = await apiFetch(`/v1/console/agents/${opts.agent}/guardrails`, {
14179
14179
  method: "POST",
@@ -14719,9 +14719,9 @@ function registerAgent(program3) {
14719
14719
  process.stdout.write(yaml);
14720
14720
  return;
14721
14721
  }
14722
- const { writeFileSync: writeFileSync13 } = await import("fs");
14722
+ const { writeFileSync: writeFileSync14 } = await import("fs");
14723
14723
  const outputPath = opts.output ?? "tenant.yaml";
14724
- writeFileSync13(
14724
+ writeFileSync14(
14725
14725
  outputPath,
14726
14726
  `# tenant.yaml - Front Of House agent manifest
14727
14727
  # Edit this file and run: foh plan tenant.yaml
@@ -16173,11 +16173,11 @@ function registerVoice(program3) {
16173
16173
  }
16174
16174
  const outputPath = String(opts.out || `foh-voice-preview-${provider}-${voiceId}.mp3`).trim();
16175
16175
  const audio = Buffer.from(await res.arrayBuffer());
16176
- const { mkdirSync: mkdirSync8, writeFileSync: writeFileSync13 } = await import("fs");
16177
- const { dirname: dirname11, resolve: resolve13 } = await import("path");
16178
- const absolutePath = resolve13(outputPath);
16179
- mkdirSync8(dirname11(absolutePath), { recursive: true });
16180
- writeFileSync13(absolutePath, audio);
16176
+ const { mkdirSync: mkdirSync9, writeFileSync: writeFileSync14 } = await import("fs");
16177
+ const { dirname: dirname12, resolve: resolve14 } = await import("path");
16178
+ const absolutePath = resolve14(outputPath);
16179
+ mkdirSync9(dirname12(absolutePath), { recursive: true });
16180
+ writeFileSync14(absolutePath, audio);
16181
16181
  format({
16182
16182
  status: "ok",
16183
16183
  provider,
@@ -30668,7 +30668,7 @@ var Protocol = class {
30668
30668
  return;
30669
30669
  }
30670
30670
  const pollInterval = task2.pollInterval ?? this._options?.defaultTaskPollInterval ?? 1e3;
30671
- await new Promise((resolve13) => setTimeout(resolve13, pollInterval));
30671
+ await new Promise((resolve14) => setTimeout(resolve14, pollInterval));
30672
30672
  options?.signal?.throwIfAborted();
30673
30673
  }
30674
30674
  } catch (error2) {
@@ -30685,7 +30685,7 @@ var Protocol = class {
30685
30685
  */
30686
30686
  request(request, resultSchema, options) {
30687
30687
  const { relatedRequestId, resumptionToken, onresumptiontoken, task, relatedTask } = options ?? {};
30688
- return new Promise((resolve13, reject) => {
30688
+ return new Promise((resolve14, reject) => {
30689
30689
  const earlyReject = (error2) => {
30690
30690
  reject(error2);
30691
30691
  };
@@ -30763,7 +30763,7 @@ var Protocol = class {
30763
30763
  if (!parseResult.success) {
30764
30764
  reject(parseResult.error);
30765
30765
  } else {
30766
- resolve13(parseResult.data);
30766
+ resolve14(parseResult.data);
30767
30767
  }
30768
30768
  } catch (error2) {
30769
30769
  reject(error2);
@@ -31024,12 +31024,12 @@ var Protocol = class {
31024
31024
  }
31025
31025
  } catch {
31026
31026
  }
31027
- return new Promise((resolve13, reject) => {
31027
+ return new Promise((resolve14, reject) => {
31028
31028
  if (signal.aborted) {
31029
31029
  reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
31030
31030
  return;
31031
31031
  }
31032
- const timeoutId = setTimeout(resolve13, interval);
31032
+ const timeoutId = setTimeout(resolve14, interval);
31033
31033
  signal.addEventListener("abort", () => {
31034
31034
  clearTimeout(timeoutId);
31035
31035
  reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
@@ -32129,7 +32129,7 @@ var McpServer = class {
32129
32129
  let task = createTaskResult.task;
32130
32130
  const pollInterval = task.pollInterval ?? 5e3;
32131
32131
  while (task.status !== "completed" && task.status !== "failed" && task.status !== "cancelled") {
32132
- await new Promise((resolve13) => setTimeout(resolve13, pollInterval));
32132
+ await new Promise((resolve14) => setTimeout(resolve14, pollInterval));
32133
32133
  const updatedTask = await extra.taskStore.getTask(taskId);
32134
32134
  if (!updatedTask) {
32135
32135
  throw new McpError(ErrorCode.InternalError, `Task ${taskId} not found during polling`);
@@ -32778,19 +32778,19 @@ var StdioServerTransport = class {
32778
32778
  this.onclose?.();
32779
32779
  }
32780
32780
  send(message) {
32781
- return new Promise((resolve13) => {
32781
+ return new Promise((resolve14) => {
32782
32782
  const json3 = serializeMessage(message);
32783
32783
  if (this._stdout.write(json3)) {
32784
- resolve13();
32784
+ resolve14();
32785
32785
  } else {
32786
- this._stdout.once("drain", resolve13);
32786
+ this._stdout.once("drain", resolve14);
32787
32787
  }
32788
32788
  });
32789
32789
  }
32790
32790
  };
32791
32791
 
32792
32792
  // src/lib/cli-version.ts
32793
- var CLI_VERSION = "0.1.70";
32793
+ var CLI_VERSION = "0.1.72";
32794
32794
 
32795
32795
  // src/commands/mcp-serve.ts
32796
32796
  var DEFAULT_TIMEOUT_MS = 12e4;
@@ -32975,7 +32975,7 @@ async function runFohCli(params) {
32975
32975
  effectiveArgv.push("--json");
32976
32976
  }
32977
32977
  const command = `foh ${effectiveArgv.join(" ")}`;
32978
- return await new Promise((resolve13) => {
32978
+ return await new Promise((resolve14) => {
32979
32979
  const child = (0, import_node_child_process.spawn)(process.execPath, [cliEntry, ...effectiveArgv], {
32980
32980
  stdio: ["ignore", "pipe", "pipe"],
32981
32981
  env: {
@@ -33000,7 +33000,7 @@ async function runFohCli(params) {
33000
33000
  });
33001
33001
  child.once("error", (error2) => {
33002
33002
  clearTimeout(timeoutHandle);
33003
- resolve13({
33003
+ resolve14({
33004
33004
  ok: false,
33005
33005
  command,
33006
33006
  argv: effectiveArgv,
@@ -33016,7 +33016,7 @@ async function runFohCli(params) {
33016
33016
  const stderrText = finalizeBoundedText(stderrBuffer);
33017
33017
  const exitCode = Number.isFinite(code ?? NaN) ? Number(code) : 1;
33018
33018
  const stdoutJson = tryParseJson(stdoutText);
33019
- resolve13({
33019
+ resolve14({
33020
33020
  ok: !timedOut && exitCode === 0,
33021
33021
  command,
33022
33022
  argv: effectiveArgv,
@@ -35186,8 +35186,8 @@ function registerSetup(program3) {
35186
35186
  }
35187
35187
  try {
35188
35188
  const manifest = await agentExport(resolvedAgentId, { apiUrlOverride: opts.apiUrl });
35189
- const { writeFileSync: writeFileSync13 } = await import("fs");
35190
- writeFileSync13(
35189
+ const { writeFileSync: writeFileSync14 } = await import("fs");
35190
+ writeFileSync14(
35191
35191
  "tenant.yaml",
35192
35192
  `# tenant.yaml - Front Of House agent manifest
35193
35193
  # Edit this file and run: foh plan tenant.yaml
@@ -35357,8 +35357,8 @@ function registerSim(program3) {
35357
35357
  }
35358
35358
  const cert = response.certificate;
35359
35359
  if (opts.out) {
35360
- const { writeFileSync: writeFileSync13 } = await import("fs");
35361
- writeFileSync13(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
35360
+ const { writeFileSync: writeFileSync14 } = await import("fs");
35361
+ writeFileSync14(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
35362
35362
  process.stderr.write(` Certificate written to ${opts.out}
35363
35363
  `);
35364
35364
  }
@@ -35408,8 +35408,8 @@ function registerSim(program3) {
35408
35408
  });
35409
35409
  }
35410
35410
  if (opts.out) {
35411
- const { writeFileSync: writeFileSync13 } = await import("fs");
35412
- writeFileSync13(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
35411
+ const { writeFileSync: writeFileSync14 } = await import("fs");
35412
+ writeFileSync14(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
35413
35413
  process.stderr.write(` Final certificate written to ${opts.out}
35414
35414
  `);
35415
35415
  }
@@ -38469,7 +38469,7 @@ async function runSelf(args, apiUrlOverride) {
38469
38469
  if (apiUrlOverride && !spawnArgs.includes("--api-url")) {
38470
38470
  spawnArgs.push("--api-url", apiUrlOverride);
38471
38471
  }
38472
- return await new Promise((resolve13, reject) => {
38472
+ return await new Promise((resolve14, reject) => {
38473
38473
  const child = (0, import_child_process2.spawn)(process.execPath, [process.argv[1], ...spawnArgs], {
38474
38474
  stdio: "inherit",
38475
38475
  env: {
@@ -38479,7 +38479,7 @@ async function runSelf(args, apiUrlOverride) {
38479
38479
  }
38480
38480
  });
38481
38481
  child.once("error", reject);
38482
- child.once("close", (code) => resolve13(typeof code === "number" ? code : 1));
38482
+ child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
38483
38483
  });
38484
38484
  }
38485
38485
  function shouldUseInteractiveHome(argv) {
@@ -38857,17 +38857,17 @@ function detectUpdateAvailability(currentVersion, cwd = process.cwd()) {
38857
38857
  async function applyRepoUpdate(repoRoot) {
38858
38858
  const scriptPath = (0, import_path9.join)(repoRoot, "scripts", "Install-FohCli.ps1");
38859
38859
  if (process.platform === "win32") {
38860
- return await new Promise((resolve13, reject) => {
38860
+ return await new Promise((resolve14, reject) => {
38861
38861
  const child = (0, import_child_process3.spawn)(
38862
38862
  "powershell",
38863
38863
  ["-ExecutionPolicy", "Bypass", "-File", scriptPath],
38864
38864
  { stdio: "inherit" }
38865
38865
  );
38866
38866
  child.once("error", reject);
38867
- child.once("close", (code) => resolve13(typeof code === "number" ? code : 1));
38867
+ child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
38868
38868
  });
38869
38869
  }
38870
- return await new Promise((resolve13, reject) => {
38870
+ return await new Promise((resolve14, reject) => {
38871
38871
  const child = (0, import_child_process3.spawn)(
38872
38872
  "corepack",
38873
38873
  ["pnpm", "cli:install:global"],
@@ -38877,7 +38877,7 @@ async function applyRepoUpdate(repoRoot) {
38877
38877
  }
38878
38878
  );
38879
38879
  child.once("error", reject);
38880
- child.once("close", (code) => resolve13(typeof code === "number" ? code : 1));
38880
+ child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
38881
38881
  });
38882
38882
  }
38883
38883
  function shouldShowUpdateNotice(argv = process.argv) {
@@ -39013,8 +39013,8 @@ function registerUpdate(program3) {
39013
39013
  }
39014
39014
 
39015
39015
  // src/commands/eval.ts
39016
- var import_fs19 = require("fs");
39017
- var import_path18 = require("path");
39016
+ var import_fs20 = require("fs");
39017
+ var import_path19 = require("path");
39018
39018
  var import_child_process6 = require("child_process");
39019
39019
 
39020
39020
  // src/lib/external-agent-artifact-safety.ts
@@ -39349,9 +39349,9 @@ function readCommandRecords(runDir) {
39349
39349
  }
39350
39350
 
39351
39351
  // src/lib/external-agent-executor.ts
39352
- var import_fs18 = require("fs");
39352
+ var import_fs19 = require("fs");
39353
39353
  var import_os2 = require("os");
39354
- var import_path17 = require("path");
39354
+ var import_path18 = require("path");
39355
39355
  var import_child_process5 = require("child_process");
39356
39356
 
39357
39357
  // src/lib/external-agent-executor-env.ts
@@ -39518,40 +39518,453 @@ function copyExternalAgentCommandCaptureArtifacts(input) {
39518
39518
  }
39519
39519
 
39520
39520
  // src/lib/external-agent-executor-classification.ts
39521
+ var import_fs17 = require("fs");
39522
+ var import_path16 = require("path");
39523
+
39524
+ // src/lib/external-agent-run-summary.ts
39521
39525
  var import_fs16 = require("fs");
39522
39526
  var import_path15 = require("path");
39527
+ var REQUIRED_RUN_FIELDS = [
39528
+ "schema_version",
39529
+ "run_id",
39530
+ "status",
39531
+ "model_provider",
39532
+ "model_name",
39533
+ "prompt_version",
39534
+ "started_at",
39535
+ "manual_intervention_count",
39536
+ "environment",
39537
+ "public_entrypoints",
39538
+ "commands_run",
39539
+ "docs_pages_used",
39540
+ "artifacts"
39541
+ ];
39542
+ var VALID_STATUSES = /* @__PURE__ */ new Set(["pass", "hold", "fail"]);
39543
+ var DOC_URL_RE = /https:\/\/frontofhouse\.okii\.uk\/[^\s"'`)<>,;\\\]}]*/g;
39544
+ function quoteShellArg(value) {
39545
+ const text = String(value);
39546
+ if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
39547
+ return `"${text.replace(/(["$`])/g, "\\$1")}"`;
39548
+ }
39549
+ function externalAgentSummaryCommand(root) {
39550
+ const summaryPath = (0, import_path15.join)(root, "latest-summary.json");
39551
+ const reportPath = (0, import_path15.join)(root, "summary.report.json");
39552
+ return [
39553
+ "foh",
39554
+ "eval",
39555
+ "external-agent",
39556
+ "summary",
39557
+ "--root",
39558
+ quoteShellArg(root),
39559
+ "--out",
39560
+ quoteShellArg(summaryPath),
39561
+ "--report",
39562
+ quoteShellArg(reportPath),
39563
+ "--json"
39564
+ ].join(" ");
39565
+ }
39566
+ function readJson(filePath) {
39567
+ return JSON.parse((0, import_fs16.readFileSync)(filePath, "utf8").replace(/^\uFEFF/, ""));
39568
+ }
39569
+ function readNdjson(filePath) {
39570
+ if (!(0, import_fs16.existsSync)(filePath)) return [];
39571
+ return (0, import_fs16.readFileSync)(filePath, "utf8").split(/\r?\n/).map((line) => line.trim()).filter(Boolean).map((line) => {
39572
+ try {
39573
+ const parsed = JSON.parse(line);
39574
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : null;
39575
+ } catch {
39576
+ return null;
39577
+ }
39578
+ }).filter((record2) => Boolean(record2));
39579
+ }
39580
+ function asObject(value) {
39581
+ return value && typeof value === "object" && !Array.isArray(value) ? value : null;
39582
+ }
39583
+ function toArray2(value) {
39584
+ return Array.isArray(value) ? value : [];
39585
+ }
39586
+ function increment(map3, key, amount = 1) {
39587
+ const normalized = String(key || "unknown");
39588
+ map3.set(normalized, (map3.get(normalized) || 0) + amount);
39589
+ }
39590
+ function ranked(map3) {
39591
+ return Array.from(map3.entries()).map(([key, count]) => ({ key, count })).sort((a, b) => b.count - a.count || a.key.localeCompare(b.key));
39592
+ }
39593
+ function collectDocUrls(text) {
39594
+ return Array.from(new Set((String(text || "").match(DOC_URL_RE) || []).map((url2) => url2.replace(/[.?!:]+$/g, "")).filter((url2) => url2.startsWith("https://frontofhouse.okii.uk/")))).sort();
39595
+ }
39596
+ function findRunCandidates(root) {
39597
+ if (!(0, import_fs16.existsSync)(root)) return [];
39598
+ const candidates = [];
39599
+ const seenRunDirs = /* @__PURE__ */ new Set();
39600
+ const captureDirs = [];
39601
+ const stack = [root];
39602
+ while (stack.length > 0) {
39603
+ const current = stack.pop();
39604
+ if (!current) continue;
39605
+ for (const entry of (0, import_fs16.readdirSync)(current, { withFileTypes: true })) {
39606
+ const absolute = (0, import_path15.join)(current, entry.name);
39607
+ if (entry.isDirectory()) {
39608
+ stack.push(absolute);
39609
+ } else if (entry.isFile() && entry.name === "run.json") {
39610
+ candidates.push({ path: absolute, synthetic: false });
39611
+ seenRunDirs.add((0, import_path15.dirname)(absolute));
39612
+ } else if (entry.isFile() && entry.name === "commands.ndjson") {
39613
+ captureDirs.push(current);
39614
+ }
39615
+ }
39616
+ }
39617
+ for (const captureDir of captureDirs) {
39618
+ if (seenRunDirs.has(captureDir)) continue;
39619
+ candidates.push({ path: (0, import_path15.join)(captureDir, "run.json"), synthetic: true });
39620
+ }
39621
+ return candidates.sort((a, b) => a.path.localeCompare(b.path));
39622
+ }
39623
+ function validateExternalAgentRun(value) {
39624
+ const findings = [];
39625
+ const run = asObject(value);
39626
+ if (!run) return [{ id: "run_not_object", detail: "run artifact must be an object" }];
39627
+ for (const field of REQUIRED_RUN_FIELDS) {
39628
+ if (!(field in run)) findings.push({ id: "required_field_missing", field });
39629
+ }
39630
+ if (run.schema_version !== "external_agent_run.v1") {
39631
+ findings.push({ id: "schema_version_invalid", expected: "external_agent_run.v1", actual: run.schema_version ?? null });
39632
+ }
39633
+ if (!VALID_STATUSES.has(String(run.status || ""))) {
39634
+ findings.push({ id: "status_invalid", expected: Array.from(VALID_STATUSES), actual: run.status ?? null });
39635
+ }
39636
+ if ((run.status === "hold" || run.status === "fail") && !String(run.failure_reason_code || "").trim()) {
39637
+ findings.push({ id: "failure_reason_code_missing" });
39638
+ }
39639
+ if (!Number.isInteger(run.manual_intervention_count) || Number(run.manual_intervention_count) < 0) {
39640
+ findings.push({ id: "manual_intervention_count_invalid" });
39641
+ }
39642
+ if (!Array.isArray(run.commands_run)) findings.push({ id: "commands_run_invalid" });
39643
+ if (!Array.isArray(run.docs_pages_used)) findings.push({ id: "docs_pages_used_invalid" });
39644
+ if (!asObject(run.environment)) findings.push({ id: "environment_invalid" });
39645
+ if (!asObject(run.artifacts)) findings.push({ id: "artifacts_invalid" });
39646
+ if (toArray2(run.public_entrypoints).length === 0) findings.push({ id: "public_entrypoints_missing" });
39647
+ return findings;
39648
+ }
39649
+ function runSortTime(run) {
39650
+ const raw = String(run.ended_at || run.started_at || "");
39651
+ const time3 = Date.parse(raw);
39652
+ return Number.isFinite(time3) ? time3 : 0;
39653
+ }
39654
+ function latestCommandTime(commands) {
39655
+ const times = commands.map((command) => String(command.completed_at || command.started_at || command.recorded_at || "")).map((raw) => ({ raw, time: Date.parse(raw) })).filter((entry) => Number.isFinite(entry.time)).sort((a, b) => b.time - a.time);
39656
+ return times[0]?.raw ?? null;
39657
+ }
39658
+ function firstCommandTime(commands) {
39659
+ const times = commands.map((command) => String(command.started_at || command.recorded_at || command.completed_at || "")).map((raw) => ({ raw, time: Date.parse(raw) })).filter((entry) => Number.isFinite(entry.time)).sort((a, b) => a.time - b.time);
39660
+ return times[0]?.raw ?? null;
39661
+ }
39662
+ function synthesizeRunFromCapture(runPath) {
39663
+ const runDir = (0, import_path15.dirname)(runPath);
39664
+ const commands = collapseCommandRecords(readNdjson((0, import_path15.join)(runDir, "commands.ndjson")));
39665
+ const metadata = asObject((0, import_fs16.existsSync)((0, import_path15.join)(runDir, "external-agent-metadata.json")) ? readJson((0, import_path15.join)(runDir, "external-agent-metadata.json")) : {});
39666
+ const blockerCodes = toArray2(metadata?.blocker_reason_codes).map(String).filter(Boolean);
39667
+ const holdReason = blockerCodes[0] || "external_agent_capture_unfinalized";
39668
+ const firstCommand = commands[0] || {};
39669
+ const startedAt = firstCommandTime(commands) || (/* @__PURE__ */ new Date(0)).toISOString();
39670
+ const endedAt = latestCommandTime(commands) || startedAt;
39671
+ const status = blockerCodes.length > 0 ? "hold" : "pass";
39672
+ const docs = toArray2(metadata?.docs_pages_used).map(String).filter(Boolean);
39673
+ const runId = (0, import_path15.dirname)(runPath).split(/[\\/]/).filter(Boolean).slice(-3).join("-") || "capture-only-run";
39674
+ return {
39675
+ schema_version: "external_agent_run.v1",
39676
+ run_id: runId,
39677
+ status,
39678
+ failure_reason_code: status === "pass" ? null : holdReason,
39679
+ model_provider: "unknown",
39680
+ model_name: "unknown",
39681
+ prompt_version: String(firstCommand.prompt_version || "unknown"),
39682
+ started_at: startedAt,
39683
+ ended_at: endedAt,
39684
+ manual_intervention_count: 0,
39685
+ environment: {
39686
+ foh_cli_version: firstCommand.cli_version || null,
39687
+ capture_only: true
39688
+ },
39689
+ public_entrypoints: [
39690
+ "https://frontofhouse.okii.uk",
39691
+ "npx --yes @f-o-h/cli@latest"
39692
+ ],
39693
+ commands_run: commands.map((command) => String(command.command || "")).filter(Boolean),
39694
+ docs_pages_used: docs,
39695
+ artifacts: {
39696
+ command_log: "commands.ndjson",
39697
+ agent_metadata: (0, import_fs16.existsSync)((0, import_path15.join)(runDir, "external-agent-metadata.json")) ? "external-agent-metadata.json" : null,
39698
+ capture_only: true
39699
+ },
39700
+ summary: "Synthetic run record created from external-agent capture artifacts because run.json was missing."
39701
+ };
39702
+ }
39703
+ function cohortIdForRunPath(root, runPath) {
39704
+ const normalized = (0, import_path15.relative)(root, (0, import_path15.dirname)(runPath)).replaceAll("\\", "/");
39705
+ const parts = normalized.split("/").filter(Boolean);
39706
+ if (parts.length === 0) return ".";
39707
+ if (/^\d{4}-\d{2}-\d{2}$/.test(parts[0]) && parts[1]) return `${parts[0]}/${parts[1]}`;
39708
+ return parts[0];
39709
+ }
39710
+ function readRunRecords(root, cwd) {
39711
+ const records = [];
39712
+ const invalid_runs = [];
39713
+ for (const candidate of findRunCandidates(root)) {
39714
+ const file2 = candidate.path;
39715
+ try {
39716
+ const parsed = candidate.synthetic ? synthesizeRunFromCapture(file2) : readJson(file2);
39717
+ const findings = validateExternalAgentRun(parsed);
39718
+ if (findings.length > 0) {
39719
+ invalid_runs.push({ path: (0, import_path15.relative)(cwd, file2).replaceAll("\\", "/"), findings });
39720
+ continue;
39721
+ }
39722
+ const run = parsed;
39723
+ records.push({
39724
+ path: file2,
39725
+ run,
39726
+ cohort_id: cohortIdForRunPath(root, file2),
39727
+ sort_time: runSortTime(run)
39728
+ });
39729
+ } catch (error2) {
39730
+ invalid_runs.push({
39731
+ path: (0, import_path15.relative)(cwd, file2).replaceAll("\\", "/"),
39732
+ findings: [{ id: "json_parse_failed", detail: error2 instanceof Error ? error2.message : String(error2) }]
39733
+ });
39734
+ }
39735
+ }
39736
+ return { records, invalid_runs };
39737
+ }
39738
+ function latestCohortId(records) {
39739
+ return records.slice().sort((a, b) => b.sort_time - a.sort_time || b.path.localeCompare(a.path))[0]?.cohort_id ?? null;
39740
+ }
39741
+ function ownerSubsystemFor(reasonCode) {
39742
+ const reason = String(reasonCode || "").toLowerCase();
39743
+ if (reason.includes("simulation") || reason.includes("certification") || reason.includes("scenario")) return "dojo_certification";
39744
+ if (reason.includes("contact_phone") || reason.includes("voice_contact") || reason.includes("provider_capacity") || reason.includes("byon")) return "voice_contact";
39745
+ if (reason.includes("exec_policy") || reason.includes("policy_blocked") || reason.includes("sandbox") || reason.includes("runner") || reason.includes("codex")) return "infra_runner";
39746
+ if (reason.includes("api") || reason.includes("http_4") || reason.includes("http_5") || reason.includes("404") || reason.includes("500") || reason.includes("roundtrip")) return "api_contract";
39747
+ if (reason.includes("cli") || reason.includes("command") || reason.includes("flag")) return "cli";
39748
+ if (reason.includes("docs") || reason.includes("unclear") || reason.includes("not_found")) return "docs";
39749
+ if (reason.includes("auth") || reason.includes("org") || reason.includes("config")) return "infra_runner";
39750
+ if (reason.includes("runtime") || reason.includes("widget") || reason.includes("proof")) return "runtime";
39751
+ return "product_ux";
39752
+ }
39753
+ function recommendedFixFor(reasonCode) {
39754
+ const owner = ownerSubsystemFor(reasonCode);
39755
+ if (owner === "api_contract") return "fix_api";
39756
+ if (owner === "cli") return "fix_cli";
39757
+ if (owner === "docs") return "fix_docs";
39758
+ if (owner === "runtime") return "fix_runtime";
39759
+ if (owner === "dojo_certification") return "add_test";
39760
+ return "fix_config";
39761
+ }
39762
+ function collapseCommandRecords(records) {
39763
+ const order = [];
39764
+ const byId = /* @__PURE__ */ new Map();
39765
+ for (const record2 of records) {
39766
+ const id = String(record2.command_id || `${record2.recorded_at || ""}:${record2.command || ""}`);
39767
+ if (!byId.has(id)) order.push(id);
39768
+ const previous = byId.get(id);
39769
+ byId.set(id, record2.phase === "completed" ? record2 : previous || record2);
39770
+ }
39771
+ return order.map((id) => byId.get(id)).filter((record2) => Boolean(record2));
39772
+ }
39773
+ function analyzeRunArtifacts(runPath, run, cwd) {
39774
+ const runDir = (0, import_path15.dirname)(runPath);
39775
+ const commands = collapseCommandRecords(readNdjson((0, import_path15.join)(runDir, "commands.ndjson")));
39776
+ const reasonCounts = /* @__PURE__ */ new Map();
39777
+ const slowSteps = [];
39778
+ let completed = 0;
39779
+ let withDuration = 0;
39780
+ let totalDuration = 0;
39781
+ for (const command of commands) {
39782
+ if (command.phase === "completed" || command.completed_at) completed += 1;
39783
+ if (typeof command.duration_ms === "number") {
39784
+ withDuration += 1;
39785
+ totalDuration += command.duration_ms;
39786
+ slowSteps.push({
39787
+ run_id: run.run_id,
39788
+ run_path: (0, import_path15.relative)(cwd, runPath).replaceAll("\\", "/"),
39789
+ command: command.command || "",
39790
+ duration_ms: command.duration_ms,
39791
+ status: command.status || null,
39792
+ reason_code: command.reason_code || null,
39793
+ check_reason_codes: Array.isArray(command.check_reason_codes) ? command.check_reason_codes : []
39794
+ });
39795
+ }
39796
+ if (command.reason_code) increment(reasonCounts, command.reason_code);
39797
+ for (const reasonCode of toArray2(command.check_reason_codes)) {
39798
+ if (reasonCode) increment(reasonCounts, reasonCode);
39799
+ }
39800
+ }
39801
+ const codexEvents = readNdjson((0, import_path15.join)(runDir, "codex-exec.jsonl"));
39802
+ const codexDocs = /* @__PURE__ */ new Set();
39803
+ let codexCommandExecutions = 0;
39804
+ let codexFailedExitCodes = 0;
39805
+ for (const event of codexEvents) {
39806
+ const item = asObject(event.item) || event;
39807
+ if (item.type === "command_execution" && item.status === "completed") {
39808
+ codexCommandExecutions += 1;
39809
+ if (typeof item.exit_code === "number" && item.exit_code !== 0) codexFailedExitCodes += 1;
39810
+ }
39811
+ for (const url2 of collectDocUrls(JSON.stringify(event))) codexDocs.add(url2);
39812
+ }
39813
+ const docs = /* @__PURE__ */ new Set([
39814
+ ...toArray2(run.docs_pages_used).map(String),
39815
+ ...Array.from(codexDocs)
39816
+ ]);
39817
+ return {
39818
+ command_log_present: (0, import_fs16.existsSync)((0, import_path15.join)(runDir, "commands.ndjson")),
39819
+ command_count: commands.length,
39820
+ completed_command_count: completed,
39821
+ missing_completion_count: Math.max(0, commands.length - completed),
39822
+ commands_with_duration_count: withDuration,
39823
+ total_command_duration_ms: totalDuration,
39824
+ command_reason_codes: ranked(reasonCounts),
39825
+ slow_steps: slowSteps.sort((a, b) => Number(b.duration_ms) - Number(a.duration_ms)).slice(0, 10),
39826
+ docs_pages_observed: Array.from(docs).sort(),
39827
+ codex_command_execution_completed_count: codexCommandExecutions,
39828
+ codex_failed_exit_code_count: codexFailedExitCodes
39829
+ };
39830
+ }
39831
+ function summarizeExternalAgentRuns(options) {
39832
+ const cwd = (0, import_path15.resolve)(options.cwd || process.cwd());
39833
+ const root = (0, import_path15.resolve)(cwd, options.root);
39834
+ const loaded = readRunRecords(root, cwd);
39835
+ const selectedCohortId = options.cohortId || (options.currentBaselineOnly ? latestCohortId(loaded.records) : null);
39836
+ const records = selectedCohortId ? loaded.records.filter((record2) => record2.cohort_id === selectedCohortId) : loaded.records;
39837
+ const statusCounts = /* @__PURE__ */ new Map();
39838
+ const modelCounts = /* @__PURE__ */ new Map();
39839
+ const failureCounts = /* @__PURE__ */ new Map();
39840
+ const commandReasonCounts = /* @__PURE__ */ new Map();
39841
+ const docsCounts = /* @__PURE__ */ new Map();
39842
+ const slowSteps = [];
39843
+ let manualInterventions = 0;
39844
+ let commandCount = 0;
39845
+ let completedCommandCount = 0;
39846
+ let missingCompletionCount = 0;
39847
+ let commandsWithDurationCount = 0;
39848
+ let totalCommandDurationMs = 0;
39849
+ let commandLogRunCount = 0;
39850
+ let codexCommandExecutions = 0;
39851
+ let codexFailedExitCodes = 0;
39852
+ for (const record2 of records) {
39853
+ const run = record2.run;
39854
+ increment(statusCounts, run.status);
39855
+ increment(modelCounts, `${run.model_provider}/${run.model_name}`);
39856
+ manualInterventions += Number(run.manual_intervention_count || 0);
39857
+ if (run.status !== "pass") increment(failureCounts, run.failure_reason_code || "unknown");
39858
+ const artifactSummary = analyzeRunArtifacts(record2.path, run, cwd);
39859
+ if (artifactSummary.command_log_present) commandLogRunCount += 1;
39860
+ commandCount += Number(artifactSummary.command_count || 0);
39861
+ completedCommandCount += Number(artifactSummary.completed_command_count || 0);
39862
+ missingCompletionCount += Number(artifactSummary.missing_completion_count || 0);
39863
+ commandsWithDurationCount += Number(artifactSummary.commands_with_duration_count || 0);
39864
+ totalCommandDurationMs += Number(artifactSummary.total_command_duration_ms || 0);
39865
+ codexCommandExecutions += Number(artifactSummary.codex_command_execution_completed_count || 0);
39866
+ codexFailedExitCodes += Number(artifactSummary.codex_failed_exit_code_count || 0);
39867
+ for (const row of toArray2(artifactSummary.slow_steps)) slowSteps.push(row);
39868
+ for (const row of toArray2(artifactSummary.command_reason_codes)) {
39869
+ const entry = asObject(row);
39870
+ if (entry) increment(commandReasonCounts, entry.key, Number(entry.count || 1));
39871
+ }
39872
+ for (const page of toArray2(artifactSummary.docs_pages_observed)) increment(docsCounts, page);
39873
+ }
39874
+ const topFailures = ranked(failureCounts);
39875
+ const commandReasonCodes = ranked(commandReasonCounts);
39876
+ const recommendedFixes = topFailures.map((failure) => ({
39877
+ reason_code: failure.key,
39878
+ count: failure.count,
39879
+ recommended_fix: recommendedFixFor(failure.key),
39880
+ owner_subsystem: ownerSubsystemFor(failure.key)
39881
+ }));
39882
+ const nextRecommendedFix = recommendedFixes[0] || null;
39883
+ return {
39884
+ schema_version: "external_agent_run_summary.v1",
39885
+ generated_at: (/* @__PURE__ */ new Date()).toISOString(),
39886
+ root: (0, import_path15.relative)(cwd, root).replaceAll("\\", "/") || ".",
39887
+ cohort_id: selectedCohortId,
39888
+ current_baseline_only: Boolean(selectedCohortId),
39889
+ run_count: records.length,
39890
+ invalid_run_count: selectedCohortId ? 0 : loaded.invalid_runs.length,
39891
+ status_counts: Object.fromEntries(statusCounts),
39892
+ model_counts: ranked(modelCounts),
39893
+ manual_intervention_count: manualInterventions,
39894
+ top_failure_reason_codes: topFailures,
39895
+ docs_pages_observed: ranked(docsCounts),
39896
+ command_telemetry: {
39897
+ run_count_with_command_log: commandLogRunCount,
39898
+ command_count: commandCount,
39899
+ completed_command_count: completedCommandCount,
39900
+ missing_completion_count: missingCompletionCount,
39901
+ commands_with_duration_count: commandsWithDurationCount,
39902
+ total_command_duration_ms: totalCommandDurationMs,
39903
+ command_reason_codes: commandReasonCodes,
39904
+ slow_steps: slowSteps.sort((a, b) => Number(b.duration_ms || 0) - Number(a.duration_ms || 0) || String(a.command || "").localeCompare(String(b.command || ""))).slice(0, 20)
39905
+ },
39906
+ codex_telemetry: {
39907
+ command_execution_completed_count: codexCommandExecutions,
39908
+ failed_exit_code_count: codexFailedExitCodes
39909
+ },
39910
+ recommended_fixes: recommendedFixes,
39911
+ next_recommended_fix: nextRecommendedFix,
39912
+ fix_selection_policy: {
39913
+ mode: "coherent_failure_cluster_first",
39914
+ rule: "Fix the highest-impact owner subsystem locally with focused proof, then rerun the same prompt once externally.",
39915
+ run_failure_weight: 3,
39916
+ command_reason_weight: 1
39917
+ },
39918
+ next_commands: nextRecommendedFix ? [`foh bug improve --from external-agent-run --file <run_dir>/run.json --json`] : [],
39919
+ invalid_runs: selectedCohortId ? [] : loaded.invalid_runs,
39920
+ run_paths: records.map((record2) => (0, import_path15.relative)(cwd, record2.path).replaceAll("\\", "/")).sort()
39921
+ };
39922
+ }
39923
+ function runExternalAgentRunSummary(options) {
39924
+ const summary = summarizeExternalAgentRuns(options);
39925
+ const invalidRuns = toArray2(summary.invalid_runs);
39926
+ const status = invalidRuns.length > 0 ? "failed" : "passed";
39927
+ const report = {
39928
+ report_schema_version: "script_report.v1",
39929
+ script: "foh eval external-agent summary",
39930
+ checked_at: (/* @__PURE__ */ new Date()).toISOString(),
39931
+ status,
39932
+ errors: invalidRuns.map((entry) => {
39933
+ const object3 = asObject(entry);
39934
+ return `${object3?.path || "unknown"}: ${JSON.stringify(object3?.findings || [])}`;
39935
+ }),
39936
+ warnings: Number(summary.run_count || 0) === 0 ? ["no external-agent run artifacts found"] : [],
39937
+ report: summary
39938
+ };
39939
+ if (options.out) {
39940
+ (0, import_fs16.mkdirSync)((0, import_path15.dirname)((0, import_path15.resolve)(options.cwd || process.cwd(), options.out)), { recursive: true });
39941
+ (0, import_fs16.writeFileSync)((0, import_path15.resolve)(options.cwd || process.cwd(), options.out), `${JSON.stringify(summary, null, 2)}
39942
+ `, "utf8");
39943
+ }
39944
+ if (options.report) {
39945
+ (0, import_fs16.mkdirSync)((0, import_path15.dirname)((0, import_path15.resolve)(options.cwd || process.cwd(), options.report)), { recursive: true });
39946
+ (0, import_fs16.writeFileSync)((0, import_path15.resolve)(options.cwd || process.cwd(), options.report), `${JSON.stringify(report, null, 2)}
39947
+ `, "utf8");
39948
+ }
39949
+ return { summary, report };
39950
+ }
39951
+
39952
+ // src/lib/external-agent-executor-classification.ts
39523
39953
  function proofArtifactPasses(runDir) {
39524
- const proofPath = (0, import_path15.join)(runDir, "proof.json");
39525
- if (!(0, import_fs16.existsSync)(proofPath)) return false;
39954
+ const proofPath = (0, import_path16.join)(runDir, "proof.json");
39955
+ if (!(0, import_fs17.existsSync)(proofPath)) return false;
39526
39956
  try {
39527
- const parsed = JSON.parse((0, import_fs16.readFileSync)(proofPath, "utf8"));
39957
+ const parsed = JSON.parse((0, import_fs17.readFileSync)(proofPath, "utf8"));
39528
39958
  return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
39529
39959
  } catch {
39530
39960
  return false;
39531
39961
  }
39532
39962
  }
39533
39963
  function readIfExists(path2) {
39534
- return (0, import_fs16.existsSync)(path2) ? (0, import_fs16.readFileSync)(path2, "utf8") : "";
39964
+ return (0, import_fs17.existsSync)(path2) ? (0, import_fs17.readFileSync)(path2, "utf8") : "";
39535
39965
  }
39536
39966
  function relativeArtifactName(path2) {
39537
- return (0, import_path15.basename)(path2);
39538
- }
39539
- function externalAgentSummaryCommand(root) {
39540
- return [
39541
- "node",
39542
- "scripts/summarize-external-agent-runs.mjs",
39543
- "--root",
39544
- quoteShellArg(root),
39545
- "--out",
39546
- quoteShellArg((0, import_path15.join)(root, "latest-summary.json")),
39547
- "--report",
39548
- quoteShellArg((0, import_path15.join)(root, "summary.report.json"))
39549
- ].join(" ");
39550
- }
39551
- function quoteShellArg(value) {
39552
- const text = String(value);
39553
- if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
39554
- return `"${text.replace(/(["$`])/g, "\\$1")}"`;
39967
+ return (0, import_path16.basename)(path2);
39555
39968
  }
39556
39969
  function classifyExternalAgentRun(input) {
39557
39970
  if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
@@ -39697,13 +40110,13 @@ function buildExecutedExternalAgentRunArtifact(input) {
39697
40110
  },
39698
40111
  artifacts: {
39699
40112
  terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
39700
- command_log: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
39701
- proof_bundle: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
39702
- replay_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
39703
- knowledge_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
40113
+ command_log: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
40114
+ proof_bundle: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
40115
+ replay_packet: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
40116
+ knowledge_packet: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
39704
40117
  improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
39705
40118
  agent_metadata: agentMetadata.path,
39706
- notes: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
40119
+ notes: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
39707
40120
  runner_last_message: relativeArtifactName(input.run.outputs.last_message),
39708
40121
  runner_stderr: relativeArtifactName(input.run.outputs.stderr),
39709
40122
  codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
@@ -39711,25 +40124,25 @@ function buildExecutedExternalAgentRunArtifact(input) {
39711
40124
  artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
39712
40125
  },
39713
40126
  summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
39714
- next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))] : [
40127
+ next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path16.dirname)(input.run.run_dir))] : [
39715
40128
  "foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
39716
40129
  "foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
39717
- externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))
40130
+ externalAgentSummaryCommand((0, import_path16.dirname)(input.run.run_dir))
39718
40131
  ]
39719
40132
  };
39720
40133
  }
39721
40134
 
39722
40135
  // src/lib/external-agent-runner-execution.ts
39723
40136
  var import_child_process4 = require("child_process");
39724
- var import_fs17 = require("fs");
39725
- var import_path16 = require("path");
40137
+ var import_fs18 = require("fs");
40138
+ var import_path17 = require("path");
39726
40139
  function buildCommandInvocation(command, args) {
39727
40140
  if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
39728
- const binDir = (0, import_path16.dirname)(command);
39729
- const codexEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
39730
- if ((0, import_fs17.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
39731
- const geminiEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
39732
- if ((0, import_fs17.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
40141
+ const binDir = (0, import_path17.dirname)(command);
40142
+ const codexEntrypoint = (0, import_path17.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
40143
+ if ((0, import_fs18.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
40144
+ const geminiEntrypoint = (0, import_path17.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
40145
+ if ((0, import_fs18.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
39733
40146
  }
39734
40147
  return { command, args };
39735
40148
  }
@@ -39744,8 +40157,8 @@ function spawnExternalAgentRunner(input) {
39744
40157
  stdio: ["pipe", "pipe", "pipe"],
39745
40158
  windowsHide: true
39746
40159
  });
39747
- const stdout = (0, import_fs17.createWriteStream)(input.stdoutPath, { flags: "w" });
39748
- const stderr = (0, import_fs17.createWriteStream)(input.stderrPath, { flags: "w" });
40160
+ const stdout = (0, import_fs18.createWriteStream)(input.stdoutPath, { flags: "w" });
40161
+ const stderr = (0, import_fs18.createWriteStream)(input.stderrPath, { flags: "w" });
39749
40162
  child.stdout.pipe(stdout);
39750
40163
  child.stderr.pipe(stderr);
39751
40164
  child.stdin.end(input.prompt);
@@ -39857,14 +40270,14 @@ async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}
39857
40270
  };
39858
40271
  }
39859
40272
  function normalizeForCompare(path2) {
39860
- const resolved = (0, import_path17.resolve)(path2);
40273
+ const resolved = (0, import_path18.resolve)(path2);
39861
40274
  return process.platform === "win32" ? resolved.toLowerCase() : resolved;
39862
40275
  }
39863
40276
  function isPathInside(childPath, parentPath) {
39864
40277
  const child = normalizeForCompare(childPath);
39865
40278
  const parent = normalizeForCompare(parentPath);
39866
- const rel = (0, import_path17.relative)(parent, child);
39867
- return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path17.isAbsolute)(rel);
40279
+ const rel = (0, import_path18.relative)(parent, child);
40280
+ return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path18.isAbsolute)(rel);
39868
40281
  }
39869
40282
  function requireString(value, field) {
39870
40283
  if (typeof value !== "string" || value.trim() === "") {
@@ -39873,10 +40286,10 @@ function requireString(value, field) {
39873
40286
  return value;
39874
40287
  }
39875
40288
  function readBatch(batchPath) {
39876
- if (!(0, import_fs18.existsSync)(batchPath)) {
40289
+ if (!(0, import_fs19.existsSync)(batchPath)) {
39877
40290
  throw new ExternalAgentExecutorError("external_agent_batch_not_found", `Batch file not found: ${batchPath}`);
39878
40291
  }
39879
- const parsed = JSON.parse((0, import_fs18.readFileSync)(batchPath, "utf8"));
40292
+ const parsed = JSON.parse((0, import_fs19.readFileSync)(batchPath, "utf8"));
39880
40293
  if (parsed.schema_version !== "external_agent_batch_plan.v1") {
39881
40294
  throw new ExternalAgentExecutorError("invalid_external_agent_batch", "Batch schema_version must be external_agent_batch_plan.v1.");
39882
40295
  }
@@ -39913,8 +40326,8 @@ function resolveCodexProbeCommand() {
39913
40326
  if (process.platform !== "win32") return "codex";
39914
40327
  const appData = process.env.APPDATA;
39915
40328
  if (appData) {
39916
- const appDataShim = (0, import_path17.join)(appData, "npm", "codex.cmd");
39917
- if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
40329
+ const appDataShim = (0, import_path18.join)(appData, "npm", "codex.cmd");
40330
+ if ((0, import_fs19.existsSync)(appDataShim)) return appDataShim;
39918
40331
  }
39919
40332
  return "codex.cmd";
39920
40333
  }
@@ -39925,8 +40338,8 @@ function resolveGeminiProbeCommand() {
39925
40338
  if (process.platform !== "win32") return "gemini";
39926
40339
  const appData = process.env.APPDATA;
39927
40340
  if (appData) {
39928
- const appDataShim = (0, import_path17.join)(appData, "npm", "gemini.cmd");
39929
- if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
40341
+ const appDataShim = (0, import_path18.join)(appData, "npm", "gemini.cmd");
40342
+ if ((0, import_fs19.existsSync)(appDataShim)) return appDataShim;
39930
40343
  }
39931
40344
  return "gemini.cmd";
39932
40345
  }
@@ -40197,34 +40610,34 @@ function safeRunId(value) {
40197
40610
  return value.toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "run";
40198
40611
  }
40199
40612
  function resolveWorkspaceRoot(input) {
40200
- if (input.workspaceRoot) return (0, import_path17.resolve)(input.workspaceRoot);
40201
- const batchStem = (0, import_path17.basename)((0, import_path17.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40202
- const repoStem = (0, import_path17.basename)((0, import_path17.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40203
- return (0, import_path17.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
40613
+ if (input.workspaceRoot) return (0, import_path18.resolve)(input.workspaceRoot);
40614
+ const batchStem = (0, import_path18.basename)((0, import_path18.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40615
+ const repoStem = (0, import_path18.basename)((0, import_path18.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40616
+ return (0, import_path18.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
40204
40617
  }
40205
40618
  function findNearestGitRoot(startPath) {
40206
- let current = (0, import_path17.resolve)(startPath);
40619
+ let current = (0, import_path18.resolve)(startPath);
40207
40620
  while (true) {
40208
- if ((0, import_fs18.existsSync)((0, import_path17.join)(current, ".git"))) return current;
40209
- const parent = (0, import_path17.dirname)(current);
40621
+ if ((0, import_fs19.existsSync)((0, import_path18.join)(current, ".git"))) return current;
40622
+ const parent = (0, import_path18.dirname)(current);
40210
40623
  if (parent === current) return null;
40211
40624
  current = parent;
40212
40625
  }
40213
40626
  }
40214
40627
  function resolvePrivateRepoRoot(input) {
40215
40628
  if (input.explicitPrivateRepoRoot) {
40216
- return { root: (0, import_path17.resolve)(input.explicitPrivateRepoRoot), explicit: true };
40629
+ return { root: (0, import_path18.resolve)(input.explicitPrivateRepoRoot), explicit: true };
40217
40630
  }
40218
- const cwd = (0, import_path17.resolve)(input.cwd || process.cwd());
40631
+ const cwd = (0, import_path18.resolve)(input.cwd || process.cwd());
40219
40632
  const gitRoot = findNearestGitRoot(cwd);
40220
40633
  if (gitRoot) return { root: gitRoot, explicit: false };
40221
40634
  return {
40222
- root: (0, import_path17.join)(cwd, ".foh-no-private-repo-root-sentinel"),
40635
+ root: (0, import_path18.join)(cwd, ".foh-no-private-repo-root-sentinel"),
40223
40636
  explicit: false
40224
40637
  };
40225
40638
  }
40226
40639
  function promptVersionFromPath(promptPath) {
40227
- const raw = (0, import_fs18.readFileSync)(promptPath, "utf8");
40640
+ const raw = (0, import_fs19.readFileSync)(promptPath, "utf8");
40228
40641
  if (raw.includes("Do not assume access to the private source repository")) return "blank-setup.v1";
40229
40642
  return "unknown";
40230
40643
  }
@@ -40233,7 +40646,7 @@ function createExternalAgentExecutorPlan(options) {
40233
40646
  if (runner !== "codex" && runner !== "gemini") {
40234
40647
  throw new ExternalAgentExecutorError("unsupported_external_agent_runner", `Unsupported runner: ${runner}`);
40235
40648
  }
40236
- const batchPath = (0, import_path17.resolve)(options.batchPath);
40649
+ const batchPath = (0, import_path18.resolve)(options.batchPath);
40237
40650
  const batch = readBatch(batchPath);
40238
40651
  const runnerProbe = validateRunner(options, runner);
40239
40652
  const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
@@ -40252,17 +40665,17 @@ function createExternalAgentExecutorPlan(options) {
40252
40665
  `Workspace root must be outside the private repository. workspace=${workspaceRoot} repo=${privateRepoRoot}`
40253
40666
  );
40254
40667
  }
40255
- (0, import_fs18.mkdirSync)(workspaceRoot, { recursive: true });
40256
- const batchDir = (0, import_path17.resolve)(String(batch.batch_dir || (0, import_path17.resolve)(batchPath, "..")));
40668
+ (0, import_fs19.mkdirSync)(workspaceRoot, { recursive: true });
40669
+ const batchDir = (0, import_path18.resolve)(String(batch.batch_dir || (0, import_path18.resolve)(batchPath, "..")));
40257
40670
  const timeoutMinutes = Number.isFinite(options.timeoutMinutes) && Number(options.timeoutMinutes) > 0 ? Number(options.timeoutMinutes) : 30;
40258
40671
  const runs = batch.runs.map((run) => {
40259
40672
  const runId = safeRunId(requireString(run.run_id, "runs[].run_id"));
40260
- const runDir = (0, import_path17.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
40261
- const promptPath = (0, import_path17.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
40262
- const workspaceDir = (0, import_path17.join)(workspaceRoot, runId);
40263
- (0, import_fs18.mkdirSync)(workspaceDir, { recursive: true });
40264
- (0, import_fs18.writeFileSync)(
40265
- (0, import_path17.join)(workspaceDir, "README.md"),
40673
+ const runDir = (0, import_path18.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
40674
+ const promptPath = (0, import_path18.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
40675
+ const workspaceDir = (0, import_path18.join)(workspaceRoot, runId);
40676
+ (0, import_fs19.mkdirSync)(workspaceDir, { recursive: true });
40677
+ (0, import_fs19.writeFileSync)(
40678
+ (0, import_path18.join)(workspaceDir, "README.md"),
40266
40679
  [
40267
40680
  "# FOH External-Agent Workspace",
40268
40681
  "",
@@ -40280,11 +40693,11 @@ function createExternalAgentExecutorPlan(options) {
40280
40693
  });
40281
40694
  const promptVersion = String(env[EXTERNAL_AGENT_PROMPT_VERSION_ENV] || "unknown");
40282
40695
  const outputStem = runner === "gemini" ? "gemini" : "codex";
40283
- const jsonlPath = (0, import_path17.join)(runDir, `${outputStem}-exec.jsonl`);
40284
- const lastMessagePath = (0, import_path17.join)(runDir, `${outputStem}-last-message.md`);
40285
- const stderrPath = (0, import_path17.join)(runDir, `${outputStem}-stderr.txt`);
40286
- const runPath = (0, import_path17.join)(runDir, "run.json");
40287
- const artifactSafetyPath = (0, import_path17.join)(runDir, "artifact-safety.json");
40696
+ const jsonlPath = (0, import_path18.join)(runDir, `${outputStem}-exec.jsonl`);
40697
+ const lastMessagePath = (0, import_path18.join)(runDir, `${outputStem}-last-message.md`);
40698
+ const stderrPath = (0, import_path18.join)(runDir, `${outputStem}-stderr.txt`);
40699
+ const runPath = (0, import_path18.join)(runDir, "run.json");
40700
+ const artifactSafetyPath = (0, import_path18.join)(runDir, "artifact-safety.json");
40288
40701
  const args = runner === "gemini" ? [
40289
40702
  ...runnerProbe.globalArgs,
40290
40703
  ...runnerProbe.execArgs
@@ -40375,9 +40788,9 @@ function createExternalAgentExecutorPlan(options) {
40375
40788
  };
40376
40789
  }
40377
40790
  function writeExternalAgentExecutorPlan(plan) {
40378
- const path2 = (0, import_path17.join)(plan.batch_dir, "executor-plan.json");
40379
- (0, import_fs18.mkdirSync)(plan.batch_dir, { recursive: true });
40380
- (0, import_fs18.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
40791
+ const path2 = (0, import_path18.join)(plan.batch_dir, "executor-plan.json");
40792
+ (0, import_fs19.mkdirSync)(plan.batch_dir, { recursive: true });
40793
+ (0, import_fs19.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
40381
40794
  `, "utf8");
40382
40795
  return path2;
40383
40796
  }
@@ -40392,7 +40805,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40392
40805
  if (authPreflight && !authPreflight.ok) {
40393
40806
  const endedAt2 = (/* @__PURE__ */ new Date()).toISOString();
40394
40807
  const blockedResults = plan.runs.map((run) => {
40395
- (0, import_fs18.mkdirSync)(run.run_dir, { recursive: true });
40808
+ (0, import_fs19.mkdirSync)(run.run_dir, { recursive: true });
40396
40809
  const runArtifact = buildExecutedExternalAgentRunArtifact({
40397
40810
  run,
40398
40811
  startedAt,
@@ -40403,7 +40816,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40403
40816
  timedOut: false,
40404
40817
  durationMs: 0
40405
40818
  });
40406
- (0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40819
+ (0, import_fs19.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40407
40820
  `, "utf8");
40408
40821
  return {
40409
40822
  run_id: run.run_id,
@@ -40430,8 +40843,8 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40430
40843
  }
40431
40844
  for (const run of plan.runs) {
40432
40845
  const runStartedAt = (/* @__PURE__ */ new Date()).toISOString();
40433
- const commandCaptureDir = (0, import_path17.join)(run.workspace_dir, ".foh-capture");
40434
- (0, import_fs18.mkdirSync)(commandCaptureDir, { recursive: true });
40846
+ const commandCaptureDir = (0, import_path18.join)(run.workspace_dir, ".foh-capture");
40847
+ (0, import_fs19.mkdirSync)(commandCaptureDir, { recursive: true });
40435
40848
  const env = buildCodexExecutorEnv({
40436
40849
  sourceEnv: options.env,
40437
40850
  runDir: commandCaptureDir,
@@ -40442,7 +40855,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40442
40855
  args: run.args,
40443
40856
  cwd: run.workspace_dir,
40444
40857
  env,
40445
- prompt: (0, import_fs18.readFileSync)(run.prompt_path, "utf8"),
40858
+ prompt: (0, import_fs19.readFileSync)(run.prompt_path, "utf8"),
40446
40859
  stdoutPath: run.outputs.jsonl,
40447
40860
  stderrPath: run.outputs.stderr,
40448
40861
  timeoutMs: plan.timeout_minutes * 60 * 1e3
@@ -40455,7 +40868,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40455
40868
  privateRepoRoot,
40456
40869
  writeRedacted: true
40457
40870
  });
40458
- (0, import_fs18.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
40871
+ (0, import_fs19.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
40459
40872
  `, "utf8");
40460
40873
  const runEndedAt = (/* @__PURE__ */ new Date()).toISOString();
40461
40874
  const classification = classifyExternalAgentRun({
@@ -40474,7 +40887,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40474
40887
  timedOut: spawned.timedOut,
40475
40888
  durationMs: spawned.durationMs
40476
40889
  });
40477
- (0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40890
+ (0, import_fs19.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40478
40891
  `, "utf8");
40479
40892
  results.push({
40480
40893
  run_id: run.run_id,
@@ -40523,13 +40936,13 @@ function defaultRunDir(modelName, promptVersion) {
40523
40936
  const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
40524
40937
  const safeModel = String(modelName || "unknown-model").toLowerCase().replace(/[^a-z0-9_-]+/g, "-");
40525
40938
  const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
40526
- return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
40939
+ return (0, import_path19.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
40527
40940
  }
40528
40941
  function defaultBatchDir(promptVersion) {
40529
40942
  const date4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
40530
40943
  const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
40531
40944
  const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
40532
- return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
40945
+ return (0, import_path19.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
40533
40946
  }
40534
40947
  function safeSlug(value) {
40535
40948
  return String(value || "unknown").toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "unknown";
@@ -40543,20 +40956,6 @@ function scanArtifactsCommand(runDir, privateRepoRoot) {
40543
40956
  const privateRootArg = privateRepoRoot ? ` --private-repo-root ${quoteArg(privateRepoRoot)}` : "";
40544
40957
  return `foh eval external-agent scan-artifacts --run-dir ${quoteArg(runDir)}${privateRootArg} --write-redacted --json`;
40545
40958
  }
40546
- function externalAgentSummaryCommand2(root) {
40547
- const summaryPath = (0, import_path18.join)(root, "latest-summary.json");
40548
- const reportPath = (0, import_path18.join)(root, "summary.report.json");
40549
- return [
40550
- "node",
40551
- "scripts/summarize-external-agent-runs.mjs",
40552
- "--root",
40553
- quoteArg(root),
40554
- "--out",
40555
- quoteArg(summaryPath),
40556
- "--report",
40557
- quoteArg(reportPath)
40558
- ].join(" ");
40559
- }
40560
40959
  function executorRecoveryCommands(reasonCode, runner) {
40561
40960
  const normalizedRunner = String(runner || "codex").trim().toLowerCase();
40562
40961
  if (reasonCode === "external_agent_runner_binary_missing") {
@@ -40659,14 +41058,14 @@ function writePrompt(runDir, promptVersion, context = {}) {
40659
41058
  replayPromptContext(context.replayFile),
40660
41059
  knowledgeMissPromptContext(context.knowledgeQuestion, context.expectedAnswer)
40661
41060
  ].join("");
40662
- const path2 = (0, import_path18.join)(runDir, "prompt.txt");
40663
- (0, import_fs19.writeFileSync)(path2, `${prompt}
41061
+ const path2 = (0, import_path19.join)(runDir, "prompt.txt");
41062
+ (0, import_fs20.writeFileSync)(path2, `${prompt}
40664
41063
  `, "utf8");
40665
41064
  return path2;
40666
41065
  }
40667
41066
  function writeSession(runDir, session) {
40668
- const path2 = (0, import_path18.join)(runDir, "session.json");
40669
- (0, import_fs19.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
41067
+ const path2 = (0, import_path19.join)(runDir, "session.json");
41068
+ (0, import_fs20.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
40670
41069
  `, "utf8");
40671
41070
  return path2;
40672
41071
  }
@@ -40742,9 +41141,9 @@ function buildRunArtifact(input) {
40742
41141
  notes: "notes.md"
40743
41142
  },
40744
41143
  summary: status === "pass" ? "External-agent capture session completed and was marked pass." : `External-agent capture session completed with ${commands.length} captured FOH command(s); classify and improve reason ${reasonCode}.`,
40745
- next_commands: status === "pass" ? [externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))] : [
40746
- `foh bug improve --from external-agent-run --file ${(0, import_path18.join)(input.runDir, "run.json")} --out ${(0, import_path18.join)(input.runDir, "improvement-packet.json")} --json`,
40747
- externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))
41144
+ next_commands: status === "pass" ? [externalAgentSummaryCommand((0, import_path19.dirname)(input.runDir))] : [
41145
+ `foh bug improve --from external-agent-run --file ${(0, import_path19.join)(input.runDir, "run.json")} --out ${(0, import_path19.join)(input.runDir, "improvement-packet.json")} --json`,
41146
+ externalAgentSummaryCommand((0, import_path19.dirname)(input.runDir))
40748
41147
  ]
40749
41148
  };
40750
41149
  }
@@ -40753,16 +41152,16 @@ function registerEval(program3) {
40753
41152
  const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
40754
41153
  external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
40755
41154
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40756
- const batchDir = (0, import_path18.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
40757
- const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
41155
+ const batchDir = (0, import_path19.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
41156
+ const replayFile = opts.replayFile ? (0, import_path19.resolve)(String(opts.replayFile)) : void 0;
40758
41157
  const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40759
41158
  const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40760
41159
  const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
40761
- (0, import_fs19.mkdirSync)(batchDir, { recursive: true });
40762
- const runs = models.map((model, index) => {
41160
+ (0, import_fs20.mkdirSync)(batchDir, { recursive: true });
41161
+ const runs2 = models.map((model, index) => {
40763
41162
  const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
40764
- const runDir = (0, import_path18.join)(batchDir, runId);
40765
- (0, import_fs19.mkdirSync)(runDir, { recursive: true });
41163
+ const runDir = (0, import_path19.join)(batchDir, runId);
41164
+ (0, import_fs20.mkdirSync)(runDir, { recursive: true });
40766
41165
  const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40767
41166
  const commandArgs = [
40768
41167
  "eval",
@@ -40805,23 +41204,23 @@ function registerEval(program3) {
40805
41204
  expected_answer: expectedAnswer ?? null,
40806
41205
  workspace_type: String(opts.workspaceType || "clean-no-repo"),
40807
41206
  agent_shell: String(opts.agentShell || "vscode-terminal"),
40808
- run_count: runs.length,
40809
- runs,
40810
- summary_command: externalAgentSummaryCommand2(batchDir)
41207
+ run_count: runs2.length,
41208
+ runs: runs2,
41209
+ summary_command: externalAgentSummaryCommand(batchDir)
40811
41210
  };
40812
- const batchPath = (0, import_path18.join)(batchDir, "batch.json");
40813
- (0, import_fs19.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
41211
+ const batchPath = (0, import_path19.join)(batchDir, "batch.json");
41212
+ (0, import_fs20.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
40814
41213
  `, "utf8");
40815
41214
  format(cliEnvelope({
40816
41215
  schemaVersion: "external_agent_batch_plan_result.v1",
40817
41216
  status: "exported",
40818
41217
  reasonCode: "external_agent_batch_plan_created",
40819
- summary: `External-agent batch plan created for ${runs.length} model(s).`,
41218
+ summary: `External-agent batch plan created for ${runs2.length} model(s).`,
40820
41219
  artifacts: {
40821
41220
  batch: batchPath
40822
41221
  },
40823
41222
  nextCommands: [
40824
- ...runs.map((run) => run.launch_command),
41223
+ ...runs2.map((run) => run.launch_command),
40825
41224
  batch.summary_command
40826
41225
  ],
40827
41226
  extra: { batch }
@@ -40830,11 +41229,11 @@ function registerEval(program3) {
40830
41229
  external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
40831
41230
  const status = normalizeStatus(opts.status);
40832
41231
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40833
- const runDir = (0, import_path18.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
40834
- const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
41232
+ const runDir = (0, import_path19.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
41233
+ const replayFile = opts.replayFile ? (0, import_path19.resolve)(String(opts.replayFile)) : void 0;
40835
41234
  const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40836
41235
  const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40837
- (0, import_fs19.mkdirSync)(runDir, { recursive: true });
41236
+ (0, import_fs20.mkdirSync)(runDir, { recursive: true });
40838
41237
  const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
40839
41238
  const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40840
41239
  const shell = inferShell(opts.shell);
@@ -40860,7 +41259,7 @@ function registerEval(program3) {
40860
41259
  }
40861
41260
  };
40862
41261
  writeSession(runDir, session);
40863
- (0, import_fs19.writeFileSync)((0, import_path18.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
41262
+ (0, import_fs20.writeFileSync)((0, import_path19.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
40864
41263
  let shellExitCode = null;
40865
41264
  if (opts.shell !== false) {
40866
41265
  process.stdout.write(`
@@ -40882,8 +41281,8 @@ Exit the shell to finalize run.json.
40882
41281
  shellExitCode = typeof result.status === "number" ? result.status : null;
40883
41282
  }
40884
41283
  const artifact = buildRunArtifact({ runDir, session, status, reasonCode: opts.reasonCode, shellExitCode });
40885
- const runPath = (0, import_path18.join)(runDir, "run.json");
40886
- (0, import_fs19.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
41284
+ const runPath = (0, import_path19.join)(runDir, "run.json");
41285
+ (0, import_fs20.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
40887
41286
  `, "utf8");
40888
41287
  format(cliEnvelope({
40889
41288
  schemaVersion: "external_agent_capture_result.v1",
@@ -40893,12 +41292,57 @@ Exit the shell to finalize run.json.
40893
41292
  artifacts: {
40894
41293
  run: runPath,
40895
41294
  prompt: promptPath,
40896
- commands: (0, import_path18.join)(runDir, "commands.ndjson")
41295
+ commands: (0, import_path19.join)(runDir, "commands.ndjson")
40897
41296
  },
40898
41297
  nextCommands: artifact.next_commands,
40899
41298
  extra: { run: artifact }
40900
41299
  }), { json: Boolean(opts.json) });
40901
41300
  });
41301
+ external.command("summary").description("Summarize external_agent_run.v1 artifacts from a clean external-agent run root").requiredOption("--root <dir>", "Root containing external-agent run directories with run.json files").option("--out <path>", "Write summary JSON to this path").option("--report <path>", "Write script-style report JSON to this path").option("--current-baseline-only", "Summarize only the latest detected run cohort").option("--cohort <id>", "Summarize one explicit cohort id").option("--json", "Output as JSON").action(async (opts) => {
41302
+ const { summary, report } = runExternalAgentRunSummary({
41303
+ root: String(opts.root),
41304
+ out: opts.out ? String(opts.out) : void 0,
41305
+ report: opts.report ? String(opts.report) : void 0,
41306
+ currentBaselineOnly: Boolean(opts.currentBaselineOnly),
41307
+ cohortId: opts.cohort ? String(opts.cohort) : null
41308
+ });
41309
+ format(cliEnvelope({
41310
+ schemaVersion: "external_agent_run_summary_result.v1",
41311
+ status: report.status === "passed" ? "pass" : "fail",
41312
+ reasonCode: report.status === "passed" ? "external_agent_run_summary_created" : "external_agent_run_summary_invalid_runs",
41313
+ summary: `External-agent summary covers ${summary.run_count} run(s).`,
41314
+ artifacts: {
41315
+ summary: opts.out ? String(opts.out) : null,
41316
+ report: opts.report ? String(opts.report) : null
41317
+ },
41318
+ nextCommands: summary.next_commands,
41319
+ extra: { external_agent_summary: summary, report }
41320
+ }), { json: Boolean(opts.json) });
41321
+ if (report.status !== "passed") process.exitCode = 1;
41322
+ });
41323
+ const runs = external.command("runs").description("Compatibility namespace for external-agent run artifact utilities");
41324
+ runs.command("summary").description("Compatibility alias for `foh eval external-agent summary`").requiredOption("--root <dir>", "Root containing external-agent run directories with run.json files").option("--out <path>", "Write summary JSON to this path").option("--report <path>", "Write script-style report JSON to this path").option("--current-baseline-only", "Summarize only the latest detected run cohort").option("--cohort <id>", "Summarize one explicit cohort id").option("--json", "Output as JSON").action(async (opts) => {
41325
+ const { summary, report } = runExternalAgentRunSummary({
41326
+ root: String(opts.root),
41327
+ out: opts.out ? String(opts.out) : void 0,
41328
+ report: opts.report ? String(opts.report) : void 0,
41329
+ currentBaselineOnly: Boolean(opts.currentBaselineOnly),
41330
+ cohortId: opts.cohort ? String(opts.cohort) : null
41331
+ });
41332
+ format(cliEnvelope({
41333
+ schemaVersion: "external_agent_run_summary_result.v1",
41334
+ status: report.status === "passed" ? "pass" : "fail",
41335
+ reasonCode: report.status === "passed" ? "external_agent_run_summary_created" : "external_agent_run_summary_invalid_runs",
41336
+ summary: `External-agent summary covers ${summary.run_count} run(s).`,
41337
+ artifacts: {
41338
+ summary: opts.out ? String(opts.out) : null,
41339
+ report: opts.report ? String(opts.report) : null
41340
+ },
41341
+ nextCommands: summary.next_commands,
41342
+ extra: { external_agent_summary: summary, report }
41343
+ }), { json: Boolean(opts.json) });
41344
+ if (report.status !== "passed") process.exitCode = 1;
41345
+ });
40902
41346
  external.command("scan-artifacts").description("Scan and redact external-agent run artifacts before they are promoted into improvement loops").requiredOption("--run-dir <path>", "External-agent run artifact directory").option("--private-repo-root <path>", "Private repository root that must not appear in artifacts").option("--write-redacted", "Write .redacted copies next to scanned artifacts").option("--json", "Output as JSON").action(async (opts) => {
40903
41347
  const report = scanExternalAgentArtifacts({
40904
41348
  runDir: String(opts.runDir),
@@ -40956,8 +41400,8 @@ Exit the shell to finalize run.json.
40956
41400
  requireExplicitEvalAuth: true,
40957
41401
  minimumEvalAuthTtlMs: (plan.timeout_minutes + 5) * 60 * 1e3
40958
41402
  });
40959
- const resultPath = (0, import_path18.join)(plan.batch_dir, "execution-result.json");
40960
- (0, import_fs19.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
41403
+ const resultPath = (0, import_path19.join)(plan.batch_dir, "execution-result.json");
41404
+ (0, import_fs20.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
40961
41405
  `, "utf8");
40962
41406
  format(cliEnvelope({
40963
41407
  schemaVersion: "external_agent_execution_result.v1",
@@ -40974,7 +41418,7 @@ Exit the shell to finalize run.json.
40974
41418
  plan.runs.find((item) => item.run_id === run.run_id)?.run_dir || ".",
40975
41419
  plan.private_repo_root_explicit ? plan.private_repo_root : void 0
40976
41420
  )),
40977
- externalAgentSummaryCommand2(plan.batch_dir)
41421
+ externalAgentSummaryCommand(plan.batch_dir)
40978
41422
  ],
40979
41423
  extra: { result }
40980
41424
  }), { json: Boolean(opts.json) });