@f-o-h/cli 0.1.69 → 0.1.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/foh.js +1201 -752
  2. package/package.json +1 -1
package/dist/foh.js CHANGED
@@ -6046,7 +6046,7 @@ var require_compile = __commonJS({
6046
6046
  const schOrFunc = root.refs[ref];
6047
6047
  if (schOrFunc)
6048
6048
  return schOrFunc;
6049
- let _sch = resolve13.call(this, root, ref);
6049
+ let _sch = resolve14.call(this, root, ref);
6050
6050
  if (_sch === void 0) {
6051
6051
  const schema2 = (_a2 = root.localRefs) === null || _a2 === void 0 ? void 0 : _a2[ref];
6052
6052
  const { schemaId } = this.opts;
@@ -6073,7 +6073,7 @@ var require_compile = __commonJS({
6073
6073
  function sameSchemaEnv(s1, s2) {
6074
6074
  return s1.schema === s2.schema && s1.root === s2.root && s1.baseId === s2.baseId;
6075
6075
  }
6076
- function resolve13(root, ref) {
6076
+ function resolve14(root, ref) {
6077
6077
  let sch;
6078
6078
  while (typeof (sch = this.refs[ref]) == "string")
6079
6079
  ref = sch;
@@ -6648,55 +6648,55 @@ var require_fast_uri = __commonJS({
6648
6648
  }
6649
6649
  return uri;
6650
6650
  }
6651
- function resolve13(baseURI, relativeURI, options) {
6651
+ function resolve14(baseURI, relativeURI, options) {
6652
6652
  const schemelessOptions = options ? Object.assign({ scheme: "null" }, options) : { scheme: "null" };
6653
6653
  const resolved = resolveComponent(parse3(baseURI, schemelessOptions), parse3(relativeURI, schemelessOptions), schemelessOptions, true);
6654
6654
  schemelessOptions.skipEscape = true;
6655
6655
  return serialize(resolved, schemelessOptions);
6656
6656
  }
6657
- function resolveComponent(base, relative3, options, skipNormalization) {
6657
+ function resolveComponent(base, relative4, options, skipNormalization) {
6658
6658
  const target = {};
6659
6659
  if (!skipNormalization) {
6660
6660
  base = parse3(serialize(base, options), options);
6661
- relative3 = parse3(serialize(relative3, options), options);
6661
+ relative4 = parse3(serialize(relative4, options), options);
6662
6662
  }
6663
6663
  options = options || {};
6664
- if (!options.tolerant && relative3.scheme) {
6665
- target.scheme = relative3.scheme;
6666
- target.userinfo = relative3.userinfo;
6667
- target.host = relative3.host;
6668
- target.port = relative3.port;
6669
- target.path = removeDotSegments(relative3.path || "");
6670
- target.query = relative3.query;
6664
+ if (!options.tolerant && relative4.scheme) {
6665
+ target.scheme = relative4.scheme;
6666
+ target.userinfo = relative4.userinfo;
6667
+ target.host = relative4.host;
6668
+ target.port = relative4.port;
6669
+ target.path = removeDotSegments(relative4.path || "");
6670
+ target.query = relative4.query;
6671
6671
  } else {
6672
- if (relative3.userinfo !== void 0 || relative3.host !== void 0 || relative3.port !== void 0) {
6673
- target.userinfo = relative3.userinfo;
6674
- target.host = relative3.host;
6675
- target.port = relative3.port;
6676
- target.path = removeDotSegments(relative3.path || "");
6677
- target.query = relative3.query;
6672
+ if (relative4.userinfo !== void 0 || relative4.host !== void 0 || relative4.port !== void 0) {
6673
+ target.userinfo = relative4.userinfo;
6674
+ target.host = relative4.host;
6675
+ target.port = relative4.port;
6676
+ target.path = removeDotSegments(relative4.path || "");
6677
+ target.query = relative4.query;
6678
6678
  } else {
6679
- if (!relative3.path) {
6679
+ if (!relative4.path) {
6680
6680
  target.path = base.path;
6681
- if (relative3.query !== void 0) {
6682
- target.query = relative3.query;
6681
+ if (relative4.query !== void 0) {
6682
+ target.query = relative4.query;
6683
6683
  } else {
6684
6684
  target.query = base.query;
6685
6685
  }
6686
6686
  } else {
6687
- if (relative3.path[0] === "/") {
6688
- target.path = removeDotSegments(relative3.path);
6687
+ if (relative4.path[0] === "/") {
6688
+ target.path = removeDotSegments(relative4.path);
6689
6689
  } else {
6690
6690
  if ((base.userinfo !== void 0 || base.host !== void 0 || base.port !== void 0) && !base.path) {
6691
- target.path = "/" + relative3.path;
6691
+ target.path = "/" + relative4.path;
6692
6692
  } else if (!base.path) {
6693
- target.path = relative3.path;
6693
+ target.path = relative4.path;
6694
6694
  } else {
6695
- target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative3.path;
6695
+ target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative4.path;
6696
6696
  }
6697
6697
  target.path = removeDotSegments(target.path);
6698
6698
  }
6699
- target.query = relative3.query;
6699
+ target.query = relative4.query;
6700
6700
  }
6701
6701
  target.userinfo = base.userinfo;
6702
6702
  target.host = base.host;
@@ -6704,7 +6704,7 @@ var require_fast_uri = __commonJS({
6704
6704
  }
6705
6705
  target.scheme = base.scheme;
6706
6706
  }
6707
- target.fragment = relative3.fragment;
6707
+ target.fragment = relative4.fragment;
6708
6708
  return target;
6709
6709
  }
6710
6710
  function equal(uriA, uriB, options) {
@@ -6875,7 +6875,7 @@ var require_fast_uri = __commonJS({
6875
6875
  var fastUri = {
6876
6876
  SCHEMES,
6877
6877
  normalize,
6878
- resolve: resolve13,
6878
+ resolve: resolve14,
6879
6879
  resolveComponent,
6880
6880
  equal,
6881
6881
  serialize,
@@ -10172,21 +10172,21 @@ async function promptLine(label, {
10172
10172
  allowEmpty = false,
10173
10173
  defaultValue
10174
10174
  } = {}) {
10175
- return await new Promise((resolve13) => {
10175
+ return await new Promise((resolve14) => {
10176
10176
  const suffix = defaultValue ? ` [${defaultValue}]` : "";
10177
10177
  const rl = (0, import_readline.createInterface)({ input: process.stdin, output: process.stdout, terminal: true });
10178
10178
  rl.question(`${label}${suffix}: `, (answer) => {
10179
10179
  rl.close();
10180
10180
  const value = String(answer ?? "").trim();
10181
10181
  if (!value && typeof defaultValue === "string") {
10182
- resolve13(defaultValue);
10182
+ resolve14(defaultValue);
10183
10183
  return;
10184
10184
  }
10185
10185
  if (!value && !allowEmpty) {
10186
- resolve13("");
10186
+ resolve14("");
10187
10187
  return;
10188
10188
  }
10189
- resolve13(value);
10189
+ resolve14(value);
10190
10190
  });
10191
10191
  });
10192
10192
  }
@@ -10194,7 +10194,7 @@ async function promptSecret(label) {
10194
10194
  if (!process.stdin.isTTY || !process.stdout.isTTY || typeof process.stdin.setRawMode !== "function") {
10195
10195
  return await promptLine(label);
10196
10196
  }
10197
- return await new Promise((resolve13) => {
10197
+ return await new Promise((resolve14) => {
10198
10198
  const stdin = process.stdin;
10199
10199
  const stdout = process.stdout;
10200
10200
  const wasRaw = Boolean(stdin.isRaw);
@@ -10208,7 +10208,7 @@ async function promptSecret(label) {
10208
10208
  const finish = () => {
10209
10209
  cleanup();
10210
10210
  stdout.write("\n");
10211
- resolve13(value);
10211
+ resolve14(value);
10212
10212
  };
10213
10213
  const onData = (chunk) => {
10214
10214
  const text = typeof chunk === "string" ? chunk : chunk.toString("utf8");
@@ -10217,7 +10217,7 @@ async function promptSecret(label) {
10217
10217
  cleanup();
10218
10218
  process.exitCode = 130;
10219
10219
  stdout.write("\n");
10220
- return resolve13("");
10220
+ return resolve14("");
10221
10221
  }
10222
10222
  if (char === "\r" || char === "\n") {
10223
10223
  finish();
@@ -10490,7 +10490,7 @@ async function storeAuthenticatedSession(params) {
10490
10490
  return output;
10491
10491
  }
10492
10492
  function sleep(ms) {
10493
- return new Promise((resolve13) => setTimeout(resolve13, ms));
10493
+ return new Promise((resolve14) => setTimeout(resolve14, ms));
10494
10494
  }
10495
10495
  function hasExplicitTimeoutFlag(argv = process.argv) {
10496
10496
  return argv.some((arg) => arg === "--timeout-seconds" || arg.startsWith("--timeout-seconds="));
@@ -11048,7 +11048,7 @@ async function pollUntil(check2, opts) {
11048
11048
  }
11049
11049
  }
11050
11050
  function sleep2(ms) {
11051
- return new Promise((resolve13) => setTimeout(resolve13, ms));
11051
+ return new Promise((resolve14) => setTimeout(resolve14, ms));
11052
11052
  }
11053
11053
 
11054
11054
  // src/commands/compliance.ts
@@ -14172,8 +14172,8 @@ function registerAgentGuardrailCommands(agent) {
14172
14172
  try {
14173
14173
  rule = JSON.parse(opts.rule);
14174
14174
  } catch {
14175
- const { readFileSync: readFileSync14 } = await import("fs");
14176
- rule = JSON.parse(readFileSync14(opts.rule, "utf-8"));
14175
+ const { readFileSync: readFileSync17 } = await import("fs");
14176
+ rule = JSON.parse(readFileSync17(opts.rule, "utf-8"));
14177
14177
  }
14178
14178
  const data = await apiFetch(`/v1/console/agents/${opts.agent}/guardrails`, {
14179
14179
  method: "POST",
@@ -14323,7 +14323,7 @@ async function publishAgentFromCurrentDraft(agentId, options) {
14323
14323
  orgId
14324
14324
  });
14325
14325
  try {
14326
- await apiFetch(`/v1/console/agents/${agentId}/publish`, {
14326
+ return await apiFetch(`/v1/console/agents/${agentId}/publish`, {
14327
14327
  method: "POST",
14328
14328
  body: JSON.stringify({ flowDraft }),
14329
14329
  apiUrlOverride,
@@ -14399,7 +14399,7 @@ async function validateCertifyAndPublishAgent(opts) {
14399
14399
  remediation: `Run: foh agent validate --agent ${opts.agentId} to see details.`
14400
14400
  });
14401
14401
  }
14402
- await publishAgentFromCurrentDraft(opts.agentId, {
14402
+ const publish = await publishAgentFromCurrentDraft(opts.agentId, {
14403
14403
  apiUrlOverride: opts.apiUrlOverride,
14404
14404
  orgId: opts.orgId
14405
14405
  });
@@ -14409,7 +14409,7 @@ async function validateCertifyAndPublishAgent(opts) {
14409
14409
  status: "not_run",
14410
14410
  reason_code: "publish_consumes_existing_certification_evidence"
14411
14411
  },
14412
- publish: { ok: true }
14412
+ publish
14413
14413
  };
14414
14414
  }
14415
14415
 
@@ -14719,9 +14719,9 @@ function registerAgent(program3) {
14719
14719
  process.stdout.write(yaml);
14720
14720
  return;
14721
14721
  }
14722
- const { writeFileSync: writeFileSync12 } = await import("fs");
14722
+ const { writeFileSync: writeFileSync14 } = await import("fs");
14723
14723
  const outputPath = opts.output ?? "tenant.yaml";
14724
- writeFileSync12(
14724
+ writeFileSync14(
14725
14725
  outputPath,
14726
14726
  `# tenant.yaml - Front Of House agent manifest
14727
14727
  # Edit this file and run: foh plan tenant.yaml
@@ -15084,11 +15084,6 @@ function registerInstagramChannelCommands(instagram, addCommonOptions) {
15084
15084
  }));
15085
15085
  }
15086
15086
 
15087
- // src/commands/channel-whatsapp.ts
15088
- var import_node_crypto = require("node:crypto");
15089
- var import_node_fs = require("node:fs");
15090
- var path = __toESM(require("node:path"));
15091
-
15092
15087
  // src/commands/channel-whatsapp-helpers.ts
15093
15088
  function parsePositiveNumber(value, fallback) {
15094
15089
  if (value === void 0 || value === null || String(value).trim() === "") return fallback;
@@ -15215,7 +15210,11 @@ function buildReasonedNextSteps({
15215
15210
  return dedupeSteps(steps);
15216
15211
  }
15217
15212
 
15218
- // ../../scripts/lib/channel-live-proof-evaluator.mjs
15213
+ // src/commands/channel-whatsapp-live-proof.ts
15214
+ var import_node_fs = require("node:fs");
15215
+ var path = __toESM(require("node:path"));
15216
+
15217
+ // src/lib/channel-live-proof-evaluator.mjs
15219
15218
  function normalizeStatusValue(value) {
15220
15219
  return String(value || "").trim().toLowerCase();
15221
15220
  }
@@ -15282,55 +15281,7 @@ function evaluateChannelLiveProofArtifact({
15282
15281
  };
15283
15282
  }
15284
15283
 
15285
- // src/commands/channel-whatsapp.ts
15286
- var WHATSAPP_WEBHOOK_CHALLENGE_TIMEOUT_MS = 1e4;
15287
- var WHATSAPP_VERIFY_TOKEN_PREFIX = "foh-wa";
15288
- var WHATSAPP_SENDER_MODEL = {
15289
- test_number: "Meta test numbers are sandbox senders for free temporary API tests only; do not store them as production sender config.",
15290
- production_sender: "Production traffic requires a verified WhatsApp Business phone number; store that sender phone_number_id and WABA-owned credentials in FOH channel config.",
15291
- runtime_source_of_truth: "FOH runtime resolves WhatsApp credentials from org/channel config, not from personal mobile numbers or hidden environment fallbacks."
15292
- };
15293
- function parseBooleanOption({
15294
- value,
15295
- fallback,
15296
- optionName,
15297
- step
15298
- }) {
15299
- if (typeof value === "boolean") return value;
15300
- const normalized = String(value ?? "").trim().toLowerCase();
15301
- if (!normalized) return fallback;
15302
- if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "y") return true;
15303
- if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "n") return false;
15304
- throw new FohError({
15305
- step,
15306
- error: `Invalid boolean value for ${optionName}: ${String(value)}`,
15307
- remediation: `Use ${optionName} true|false (also accepts 1|0, yes|no).`
15308
- });
15309
- }
15310
- async function runWhatsAppReadinessChecks({
15311
- orgId,
15312
- apiUrlOverride,
15313
- verifyToken
15314
- }) {
15315
- const status = await apiFetch("/v1/console/channels/whatsapp/status", {
15316
- orgId,
15317
- apiUrlOverride
15318
- });
15319
- const verifyPayload = verifyToken ? { verifyToken: String(verifyToken) } : {};
15320
- const verify = await apiFetch("/v1/console/channels/whatsapp/verify", {
15321
- method: "POST",
15322
- body: JSON.stringify(verifyPayload),
15323
- orgId,
15324
- apiUrlOverride
15325
- });
15326
- const test = await apiFetch("/v1/console/channels/whatsapp/test", {
15327
- method: "POST",
15328
- body: JSON.stringify({ dryRun: true }),
15329
- orgId,
15330
- apiUrlOverride
15331
- });
15332
- return { status, verify, test };
15333
- }
15284
+ // src/commands/channel-whatsapp-live-proof.ts
15334
15285
  function resolveLiveProof({
15335
15286
  enabled,
15336
15287
  artifactPathRaw,
@@ -15386,6 +15337,57 @@ function resolveLiveProof({
15386
15337
  freshness: evaluated.freshness
15387
15338
  };
15388
15339
  }
15340
+
15341
+ // src/commands/channel-whatsapp-setup.ts
15342
+ var import_node_crypto = require("node:crypto");
15343
+ var WHATSAPP_WEBHOOK_CHALLENGE_TIMEOUT_MS = 1e4;
15344
+ var WHATSAPP_VERIFY_TOKEN_PREFIX = "foh-wa";
15345
+ var WHATSAPP_SENDER_MODEL = {
15346
+ test_number: "Meta test numbers are sandbox senders for free temporary API tests only; do not store them as production sender config.",
15347
+ production_sender: "Production traffic requires a verified WhatsApp Business phone number; store that sender phone_number_id and WABA-owned credentials in FOH channel config.",
15348
+ runtime_source_of_truth: "FOH runtime resolves WhatsApp credentials from org/channel config, not from personal mobile numbers or hidden environment fallbacks."
15349
+ };
15350
+ function parseBooleanOption({
15351
+ value,
15352
+ fallback,
15353
+ optionName,
15354
+ step
15355
+ }) {
15356
+ if (typeof value === "boolean") return value;
15357
+ const normalized = String(value ?? "").trim().toLowerCase();
15358
+ if (!normalized) return fallback;
15359
+ if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "y") return true;
15360
+ if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "n") return false;
15361
+ throw new FohError({
15362
+ step,
15363
+ error: `Invalid boolean value for ${optionName}: ${String(value)}`,
15364
+ remediation: `Use ${optionName} true|false (also accepts 1|0, yes|no).`
15365
+ });
15366
+ }
15367
+ async function runWhatsAppReadinessChecks({
15368
+ orgId,
15369
+ apiUrlOverride,
15370
+ verifyToken
15371
+ }) {
15372
+ const status = await apiFetch("/v1/console/channels/whatsapp/status", {
15373
+ orgId,
15374
+ apiUrlOverride
15375
+ });
15376
+ const verifyPayload = verifyToken ? { verifyToken: String(verifyToken) } : {};
15377
+ const verify = await apiFetch("/v1/console/channels/whatsapp/verify", {
15378
+ method: "POST",
15379
+ body: JSON.stringify(verifyPayload),
15380
+ orgId,
15381
+ apiUrlOverride
15382
+ });
15383
+ const test = await apiFetch("/v1/console/channels/whatsapp/test", {
15384
+ method: "POST",
15385
+ body: JSON.stringify({ dryRun: true }),
15386
+ orgId,
15387
+ apiUrlOverride
15388
+ });
15389
+ return { status, verify, test };
15390
+ }
15389
15391
  function buildWebhookUrl(apiBaseUrl) {
15390
15392
  return `${apiBaseUrl.replace(/\/$/, "")}/v1/whatsapp/webhook`;
15391
15393
  }
@@ -15522,6 +15524,8 @@ function assertProofPass(strict, reasons) {
15522
15524
  markCommandFailed(1);
15523
15525
  }
15524
15526
  }
15527
+
15528
+ // src/commands/channel-whatsapp.ts
15525
15529
  function registerWhatsAppChannelCommands(whatsapp, addCommonOptions) {
15526
15530
  addCommonOptions(
15527
15531
  whatsapp.command("start").description("Assess WhatsApp onboarding readiness and print fastest setup path")
@@ -16169,11 +16173,11 @@ function registerVoice(program3) {
16169
16173
  }
16170
16174
  const outputPath = String(opts.out || `foh-voice-preview-${provider}-${voiceId}.mp3`).trim();
16171
16175
  const audio = Buffer.from(await res.arrayBuffer());
16172
- const { mkdirSync: mkdirSync8, writeFileSync: writeFileSync12 } = await import("fs");
16173
- const { dirname: dirname8, resolve: resolve13 } = await import("path");
16174
- const absolutePath = resolve13(outputPath);
16175
- mkdirSync8(dirname8(absolutePath), { recursive: true });
16176
- writeFileSync12(absolutePath, audio);
16176
+ const { mkdirSync: mkdirSync9, writeFileSync: writeFileSync14 } = await import("fs");
16177
+ const { dirname: dirname12, resolve: resolve14 } = await import("path");
16178
+ const absolutePath = resolve14(outputPath);
16179
+ mkdirSync9(dirname12(absolutePath), { recursive: true });
16180
+ writeFileSync14(absolutePath, audio);
16177
16181
  format({
16178
16182
  status: "ok",
16179
16183
  provider,
@@ -30664,7 +30668,7 @@ var Protocol = class {
30664
30668
  return;
30665
30669
  }
30666
30670
  const pollInterval = task2.pollInterval ?? this._options?.defaultTaskPollInterval ?? 1e3;
30667
- await new Promise((resolve13) => setTimeout(resolve13, pollInterval));
30671
+ await new Promise((resolve14) => setTimeout(resolve14, pollInterval));
30668
30672
  options?.signal?.throwIfAborted();
30669
30673
  }
30670
30674
  } catch (error2) {
@@ -30681,7 +30685,7 @@ var Protocol = class {
30681
30685
  */
30682
30686
  request(request, resultSchema, options) {
30683
30687
  const { relatedRequestId, resumptionToken, onresumptiontoken, task, relatedTask } = options ?? {};
30684
- return new Promise((resolve13, reject) => {
30688
+ return new Promise((resolve14, reject) => {
30685
30689
  const earlyReject = (error2) => {
30686
30690
  reject(error2);
30687
30691
  };
@@ -30759,7 +30763,7 @@ var Protocol = class {
30759
30763
  if (!parseResult.success) {
30760
30764
  reject(parseResult.error);
30761
30765
  } else {
30762
- resolve13(parseResult.data);
30766
+ resolve14(parseResult.data);
30763
30767
  }
30764
30768
  } catch (error2) {
30765
30769
  reject(error2);
@@ -31020,12 +31024,12 @@ var Protocol = class {
31020
31024
  }
31021
31025
  } catch {
31022
31026
  }
31023
- return new Promise((resolve13, reject) => {
31027
+ return new Promise((resolve14, reject) => {
31024
31028
  if (signal.aborted) {
31025
31029
  reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
31026
31030
  return;
31027
31031
  }
31028
- const timeoutId = setTimeout(resolve13, interval);
31032
+ const timeoutId = setTimeout(resolve14, interval);
31029
31033
  signal.addEventListener("abort", () => {
31030
31034
  clearTimeout(timeoutId);
31031
31035
  reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
@@ -32125,7 +32129,7 @@ var McpServer = class {
32125
32129
  let task = createTaskResult.task;
32126
32130
  const pollInterval = task.pollInterval ?? 5e3;
32127
32131
  while (task.status !== "completed" && task.status !== "failed" && task.status !== "cancelled") {
32128
- await new Promise((resolve13) => setTimeout(resolve13, pollInterval));
32132
+ await new Promise((resolve14) => setTimeout(resolve14, pollInterval));
32129
32133
  const updatedTask = await extra.taskStore.getTask(taskId);
32130
32134
  if (!updatedTask) {
32131
32135
  throw new McpError(ErrorCode.InternalError, `Task ${taskId} not found during polling`);
@@ -32774,19 +32778,19 @@ var StdioServerTransport = class {
32774
32778
  this.onclose?.();
32775
32779
  }
32776
32780
  send(message) {
32777
- return new Promise((resolve13) => {
32781
+ return new Promise((resolve14) => {
32778
32782
  const json3 = serializeMessage(message);
32779
32783
  if (this._stdout.write(json3)) {
32780
- resolve13();
32784
+ resolve14();
32781
32785
  } else {
32782
- this._stdout.once("drain", resolve13);
32786
+ this._stdout.once("drain", resolve14);
32783
32787
  }
32784
32788
  });
32785
32789
  }
32786
32790
  };
32787
32791
 
32788
32792
  // src/lib/cli-version.ts
32789
- var CLI_VERSION = "0.1.69";
32793
+ var CLI_VERSION = "0.1.71";
32790
32794
 
32791
32795
  // src/commands/mcp-serve.ts
32792
32796
  var DEFAULT_TIMEOUT_MS = 12e4;
@@ -32971,7 +32975,7 @@ async function runFohCli(params) {
32971
32975
  effectiveArgv.push("--json");
32972
32976
  }
32973
32977
  const command = `foh ${effectiveArgv.join(" ")}`;
32974
- return await new Promise((resolve13) => {
32978
+ return await new Promise((resolve14) => {
32975
32979
  const child = (0, import_node_child_process.spawn)(process.execPath, [cliEntry, ...effectiveArgv], {
32976
32980
  stdio: ["ignore", "pipe", "pipe"],
32977
32981
  env: {
@@ -32996,7 +33000,7 @@ async function runFohCli(params) {
32996
33000
  });
32997
33001
  child.once("error", (error2) => {
32998
33002
  clearTimeout(timeoutHandle);
32999
- resolve13({
33003
+ resolve14({
33000
33004
  ok: false,
33001
33005
  command,
33002
33006
  argv: effectiveArgv,
@@ -33012,7 +33016,7 @@ async function runFohCli(params) {
33012
33016
  const stderrText = finalizeBoundedText(stderrBuffer);
33013
33017
  const exitCode = Number.isFinite(code ?? NaN) ? Number(code) : 1;
33014
33018
  const stdoutJson = tryParseJson(stdoutText);
33015
- resolve13({
33019
+ resolve14({
33016
33020
  ok: !timedOut && exitCode === 0,
33017
33021
  command,
33018
33022
  argv: effectiveArgv,
@@ -33806,6 +33810,35 @@ function readDraftKnowledgeText(draft) {
33806
33810
  const fromLegacy = typeof draft.knowledge_base === "string" ? draft.knowledge_base : "";
33807
33811
  return fromLegacy;
33808
33812
  }
33813
+ function normalizeKnowledgeText(value) {
33814
+ return value.replace(/^\uFEFF/, "").replace(/\r\n?/g, "\n").trim();
33815
+ }
33816
+ function splitDraftKnowledgeSegments(value) {
33817
+ return value.replace(/\r\n?/g, "\n").split(/\n\s*---+\s*\n/g).map((segment) => normalizeKnowledgeText(segment)).filter(Boolean);
33818
+ }
33819
+ function buildDraftKnowledgeUpdate(existing, fileContent) {
33820
+ const normalizedContent = normalizeKnowledgeText(fileContent);
33821
+ if (normalizedContent.length === 0) {
33822
+ throw new FohError({
33823
+ step: "knowledge.ingest-file",
33824
+ error: "Knowledge file is empty after normalization",
33825
+ remediation: "Pass a file with non-empty text content.",
33826
+ statusCode: 400
33827
+ });
33828
+ }
33829
+ const existingSegments = splitDraftKnowledgeSegments(existing);
33830
+ const duplicate = existingSegments.includes(normalizedContent);
33831
+ const nextSegments = duplicate ? existingSegments : [...existingSegments, normalizedContent];
33832
+ const nextKnowledge = nextSegments.join("\n\n---\n\n");
33833
+ const normalizedExisting = existingSegments.join("\n\n---\n\n");
33834
+ return {
33835
+ nextKnowledge,
33836
+ normalizedContent,
33837
+ duplicate,
33838
+ shouldPatch: nextKnowledge !== normalizedExisting || normalizeKnowledgeText(existing) !== normalizedExisting,
33839
+ segmentCount: nextSegments.length
33840
+ };
33841
+ }
33809
33842
  function tokenize(value) {
33810
33843
  return value.toLowerCase().split(/[^a-z0-9]+/g).map((token) => token.trim()).filter((token) => token.length >= 3);
33811
33844
  }
@@ -33929,23 +33962,25 @@ function registerKnowledge(program3) {
33929
33962
  apiUrlOverride: opts.apiUrl
33930
33963
  });
33931
33964
  const existing = readDraftKnowledgeText(draft);
33932
- const nextKnowledge = existing.trim().length > 0 ? `${existing}
33933
-
33934
- ---
33935
- ${content}` : content;
33936
- await apiFetch(`/v1/console/agents/${opts.agent}/draft`, {
33937
- method: "PATCH",
33938
- body: JSON.stringify({
33939
- knowledge_base_raw: nextKnowledge,
33940
- knowledge_base: nextKnowledge
33941
- }),
33942
- orgId: opts.org,
33943
- apiUrlOverride: opts.apiUrl
33944
- });
33965
+ const update = buildDraftKnowledgeUpdate(existing, content);
33966
+ if (update.shouldPatch) {
33967
+ await apiFetch(`/v1/console/agents/${opts.agent}/draft`, {
33968
+ method: "PATCH",
33969
+ body: JSON.stringify({
33970
+ knowledge_base_raw: update.nextKnowledge,
33971
+ knowledge_base: update.nextKnowledge
33972
+ }),
33973
+ orgId: opts.org,
33974
+ apiUrlOverride: opts.apiUrl
33975
+ });
33976
+ }
33945
33977
  data = {
33946
33978
  ok: true,
33947
33979
  source: "agent_draft_direct",
33948
- length: nextKnowledge.length
33980
+ length: update.nextKnowledge.length,
33981
+ draft_knowledge_updated: update.shouldPatch,
33982
+ draft_knowledge_deduped: update.duplicate,
33983
+ segment_count: update.segmentCount
33949
33984
  };
33950
33985
  } else {
33951
33986
  data = await apiFetch("/v1/knowledge/documents", {
@@ -33953,7 +33988,7 @@ ${content}` : content;
33953
33988
  body: JSON.stringify({
33954
33989
  name: (0, import_path2.basename)(opts.file),
33955
33990
  source_type: "text",
33956
- source_value: content,
33991
+ source_value: normalizeKnowledgeText(content),
33957
33992
  agent_id: opts.agent
33958
33993
  }),
33959
33994
  orgId: opts.org,
@@ -34078,47 +34113,8 @@ function registerLeads(program3) {
34078
34113
  // src/commands/setup.ts
34079
34114
  var import_crypto3 = require("crypto");
34080
34115
 
34081
- // src/lib/signed-report.ts
34082
- var import_crypto2 = require("crypto");
34083
- var import_fs4 = require("fs");
34084
- var import_path3 = require("path");
34085
- function canonicalize(value) {
34086
- if (value === null || value === void 0) return null;
34087
- if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
34088
- if (Array.isArray(value)) return value.map((entry) => canonicalize(entry));
34089
- if (typeof value === "object") {
34090
- const sortedEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, entryValue]) => [key, canonicalize(entryValue)]);
34091
- return Object.fromEntries(sortedEntries);
34092
- }
34093
- return String(value);
34094
- }
34095
- function stableStringify(value) {
34096
- return JSON.stringify(canonicalize(value), null, 2) + "\n";
34097
- }
34098
- function sha256Hex(input) {
34099
- return (0, import_crypto2.createHash)("sha256").update(input).digest("hex");
34100
- }
34101
- function signReport(reportPayload) {
34102
- const canonical = stableStringify(reportPayload);
34103
- return {
34104
- ...reportPayload,
34105
- report_hash: {
34106
- algorithm: "sha256",
34107
- digest_hex: sha256Hex(canonical),
34108
- canonicalization: "sorted-json-v1",
34109
- verification_note: "Recompute SHA-256 over canonical payload JSON before report_hash and compare digest_hex."
34110
- }
34111
- };
34112
- }
34113
- function writeSignedJsonArtifact(path2, value) {
34114
- const absolutePath = (0, import_path3.resolve)(path2);
34115
- (0, import_fs4.mkdirSync)((0, import_path3.dirname)(absolutePath), { recursive: true });
34116
- (0, import_fs4.writeFileSync)(absolutePath, stableStringify(value), "utf-8");
34117
- return absolutePath;
34118
- }
34119
-
34120
34116
  // src/commands/manifest.ts
34121
- var import_fs5 = require("fs");
34117
+ var import_fs4 = require("fs");
34122
34118
  var import_picocolors3 = __toESM(require_picocolors());
34123
34119
  function formatDiff(diffs) {
34124
34120
  if (diffs.length === 0) return "No changes";
@@ -34154,7 +34150,7 @@ function formatDiff(diffs) {
34154
34150
  function loadManifestFile(filePath) {
34155
34151
  let raw;
34156
34152
  try {
34157
- raw = (0, import_fs5.readFileSync)(filePath, "utf-8");
34153
+ raw = (0, import_fs4.readFileSync)(filePath, "utf-8");
34158
34154
  } catch {
34159
34155
  throw new FohError({
34160
34156
  step: "manifest.load",
@@ -34379,7 +34375,126 @@ function normalizeAgentCertMode(value) {
34379
34375
  return agentCertModeValues.includes(value) ? value : "quick";
34380
34376
  }
34381
34377
 
34382
- // src/commands/setup.ts
34378
+ // src/commands/setup-apply.ts
34379
+ function extractGuardrailsList(response) {
34380
+ if (Array.isArray(response)) return response;
34381
+ if (Array.isArray(response.guardrails)) return response.guardrails;
34382
+ if (Array.isArray(response.rules)) return response.rules;
34383
+ return [];
34384
+ }
34385
+ function isMissingAgentTestsError(error2) {
34386
+ if (!(error2 instanceof FohError)) return false;
34387
+ if (error2.statusCode !== 404) return false;
34388
+ const text = [
34389
+ error2.error,
34390
+ error2.reasonCode,
34391
+ error2.detail?.error,
34392
+ error2.detail?.message,
34393
+ error2.detail?.reason_code
34394
+ ].filter((value) => value !== void 0 && value !== null).join(" ").toLowerCase();
34395
+ return text.includes("no tests found") || text.includes("tests_not_found") || text.includes("agent_tests_not_configured");
34396
+ }
34397
+ function errorReasonCode(error2) {
34398
+ const detail = error2.detail;
34399
+ const code = detail && typeof detail.code === "string" ? detail.code : void 0;
34400
+ const reasonCode = detail && typeof detail.reason_code === "string" ? detail.reason_code : void 0;
34401
+ return error2.reasonCode ?? reasonCode ?? code;
34402
+ }
34403
+ function isAgentLimitReachedError(error2) {
34404
+ const reasonCode = errorReasonCode(error2);
34405
+ const text = [
34406
+ reasonCode,
34407
+ error2.error,
34408
+ error2.detail?.error,
34409
+ error2.detail?.message
34410
+ ].filter((value) => value !== void 0 && value !== null).join(" ").toLowerCase();
34411
+ return text.includes("agent_limit_reached") || text.includes("agent limit reached");
34412
+ }
34413
+ function shouldReuseSingleAgentForEval() {
34414
+ return Boolean(process.env.FOH_EXTERNAL_AGENT_RUN_DIR) && isNoSpendPolicy();
34415
+ }
34416
+ async function rebaseEvalAgentDraftFromTemplate(params) {
34417
+ const preview = await apiFetch(`/v1/console/templates/${params.templateId}`, {
34418
+ orgId: params.orgId,
34419
+ apiUrlOverride: params.apiUrlOverride
34420
+ });
34421
+ const draft = preview.template?.draft_config;
34422
+ if (!draft || typeof draft !== "object" || Array.isArray(draft)) {
34423
+ throw new FohError({
34424
+ step: "create_agent",
34425
+ error: "Template preview did not return a draft_config for eval reuse.",
34426
+ remediation: `Run: foh templates show --template ${params.templateId} --json, then retry setup.`,
34427
+ reasonCode: "eval_agent_template_rebase_failed"
34428
+ });
34429
+ }
34430
+ await apiFetch(`/v1/console/agents/${params.agentId}/draft`, {
34431
+ method: "PATCH",
34432
+ body: JSON.stringify({ ...draft, name: params.agentName }),
34433
+ orgId: params.orgId,
34434
+ apiUrlOverride: params.apiUrlOverride
34435
+ });
34436
+ return {
34437
+ template_rebased: true,
34438
+ template_id: params.templateId,
34439
+ draft_keys: Object.keys(draft).sort()
34440
+ };
34441
+ }
34442
+
34443
+ // src/commands/setup-missing-options.ts
34444
+ function optionNameToFlag(key) {
34445
+ return "--" + key.replace(/([A-Z])/g, "-$1").toLowerCase();
34446
+ }
34447
+ function buildMissingOptionsPlan(missing, opts) {
34448
+ const missingFlags = missing.map(optionNameToFlag);
34449
+ const signInUrl = buildConsoleSignInUrl(resolveConsoleBaseUrl(opts.consoleUrl));
34450
+ return cliEnvelope({
34451
+ status: "blocked",
34452
+ reasonCode: "setup_required_options_missing",
34453
+ summary: "Setup requires an authenticated org, an agent template, and an agent name before it can mutate customer resources.",
34454
+ ids: {},
34455
+ nextCommands: [
34456
+ "foh auth signup --web --json",
34457
+ "foh auth login --web --json",
34458
+ ...buildCliAuthFallbackCommands(),
34459
+ "npx --yes @f-o-h/cli@latest templates list --category buyer --json",
34460
+ 'npx --yes @f-o-h/cli@latest setup --org <org-id> --agent-template <buyer-template-id> --agent-name "Demo Buyer Agent" --widget-domains <domain> --voice-provider openai --voice-id alloy --report-out setup-report.json --json'
34461
+ ],
34462
+ extra: {
34463
+ code: "setup_required_options_missing",
34464
+ missing_options: missingFlags,
34465
+ reason: "setup requires an authenticated org, an agent template, and an agent name before it can mutate customer resources",
34466
+ sign_in_url: signInUrl,
34467
+ text_fallback: buildCliAuthFallbackInstructions(signInUrl),
34468
+ ai_agent_instruction: [
34469
+ "Do not guess org IDs, template IDs, or customer domains.",
34470
+ "If no browser is available, print sign_in_url and ask the user to sign in.",
34471
+ "Use npx --yes @f-o-h/cli@latest for every CLI command; do not use unpinned npx @f-o-h/cli.",
34472
+ "After auth, discover orgs and certification-oriented buyer templates with the listed commands.",
34473
+ "Prefer UK Buyer Qualification or Viewing Booking; do not use greeting-only templates for proof/certification.",
34474
+ "Rerun setup only after all missing_options are resolved."
34475
+ ]
34476
+ }
34477
+ });
34478
+ }
34479
+ function emitMissingOptionsPlan(missing, opts) {
34480
+ const plan = buildMissingOptionsPlan(missing, { consoleUrl: opts.consoleUrl });
34481
+ if (resolveJsonMode({ json: opts.json })) {
34482
+ format(plan, { json: true });
34483
+ return;
34484
+ }
34485
+ const flags = missing.map(optionNameToFlag).join(", ");
34486
+ process.stderr.write(`error: required options missing: ${flags}
34487
+ `);
34488
+ process.stderr.write(` Sign in: ${plan.sign_in_url}
34489
+ `);
34490
+ process.stderr.write(" Remediation:\n");
34491
+ for (const command of plan.next_commands) {
34492
+ process.stderr.write(` ${command}
34493
+ `);
34494
+ }
34495
+ }
34496
+
34497
+ // src/commands/setup-plan.ts
34383
34498
  var SETUP_STEP_ORDER = [
34384
34499
  "check_credentials",
34385
34500
  "check_org_access",
@@ -34398,12 +34513,6 @@ var SETUP_STEP_ORDER = [
34398
34513
  "publish_agent",
34399
34514
  "emit_summary"
34400
34515
  ];
34401
- function extractGuardrailsList(response) {
34402
- if (Array.isArray(response)) return response;
34403
- if (Array.isArray(response.guardrails)) return response.guardrails;
34404
- if (Array.isArray(response.rules)) return response.rules;
34405
- return [];
34406
- }
34407
34516
  function resolveResumeIndex(resumeFromRaw) {
34408
34517
  if (!resumeFromRaw) {
34409
34518
  return { resumeFrom: null, resumeIndex: 0 };
@@ -34430,9 +34539,6 @@ function timedStepResult(result, startedAtIso, startedAtMs) {
34430
34539
  duration_ms: Math.max(0, Date.now() - startedAtMs)
34431
34540
  };
34432
34541
  }
34433
- function optionNameToFlag(key) {
34434
- return "--" + key.replace(/([A-Z])/g, "-$1").toLowerCase();
34435
- }
34436
34542
  function normalizeSetupPhoneMode(raw) {
34437
34543
  const value = String(raw || "purchase").trim().toLowerCase();
34438
34544
  if (value === "observe" || value === "skip" || value === "purchase") return value;
@@ -34453,112 +34559,57 @@ function complianceSkipDetail(phoneMode) {
34453
34559
  operator_note: "Compliance is only required before paid FOH-owned phone purchase."
34454
34560
  };
34455
34561
  }
34456
- function isMissingAgentTestsError(error2) {
34457
- if (!(error2 instanceof FohError)) return false;
34458
- if (error2.statusCode !== 404) return false;
34459
- const text = [
34460
- error2.error,
34461
- error2.reasonCode,
34462
- error2.detail?.error,
34463
- error2.detail?.message,
34464
- error2.detail?.reason_code
34465
- ].filter((value) => value !== void 0 && value !== null).join(" ").toLowerCase();
34466
- return text.includes("no tests found") || text.includes("tests_not_found") || text.includes("agent_tests_not_configured");
34467
- }
34468
- function errorReasonCode(error2) {
34469
- const detail = error2.detail;
34470
- const code = detail && typeof detail.code === "string" ? detail.code : void 0;
34471
- const reasonCode = detail && typeof detail.reason_code === "string" ? detail.reason_code : void 0;
34472
- return error2.reasonCode ?? reasonCode ?? code;
34562
+
34563
+ // src/lib/signed-report.ts
34564
+ var import_crypto2 = require("crypto");
34565
+ var import_fs5 = require("fs");
34566
+ var import_path3 = require("path");
34567
+ function canonicalize(value) {
34568
+ if (value === null || value === void 0) return null;
34569
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
34570
+ if (Array.isArray(value)) return value.map((entry) => canonicalize(entry));
34571
+ if (typeof value === "object") {
34572
+ const sortedEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, entryValue]) => [key, canonicalize(entryValue)]);
34573
+ return Object.fromEntries(sortedEntries);
34574
+ }
34575
+ return String(value);
34473
34576
  }
34474
- function isAgentLimitReachedError(error2) {
34475
- const reasonCode = errorReasonCode(error2);
34476
- const text = [
34477
- reasonCode,
34478
- error2.error,
34479
- error2.detail?.error,
34480
- error2.detail?.message
34481
- ].filter((value) => value !== void 0 && value !== null).join(" ").toLowerCase();
34482
- return text.includes("agent_limit_reached") || text.includes("agent limit reached");
34577
+ function stableStringify(value) {
34578
+ return JSON.stringify(canonicalize(value), null, 2) + "\n";
34483
34579
  }
34484
- function shouldReuseSingleAgentForEval() {
34485
- return Boolean(process.env.FOH_EXTERNAL_AGENT_RUN_DIR) && isNoSpendPolicy();
34580
+ function sha256Hex(input) {
34581
+ return (0, import_crypto2.createHash)("sha256").update(input).digest("hex");
34486
34582
  }
34487
- async function rebaseEvalAgentDraftFromTemplate(params) {
34488
- const preview = await apiFetch(`/v1/console/templates/${params.templateId}`, {
34489
- orgId: params.orgId,
34490
- apiUrlOverride: params.apiUrlOverride
34491
- });
34492
- const draft = preview.template?.draft_config;
34493
- if (!draft || typeof draft !== "object" || Array.isArray(draft)) {
34494
- throw new FohError({
34495
- step: "create_agent",
34496
- error: "Template preview did not return a draft_config for eval reuse.",
34497
- remediation: `Run: foh templates show --template ${params.templateId} --json, then retry setup.`,
34498
- reasonCode: "eval_agent_template_rebase_failed"
34499
- });
34500
- }
34501
- await apiFetch(`/v1/console/agents/${params.agentId}/draft`, {
34502
- method: "PATCH",
34503
- body: JSON.stringify({ ...draft, name: params.agentName }),
34504
- orgId: params.orgId,
34505
- apiUrlOverride: params.apiUrlOverride
34506
- });
34583
+ function signReport(reportPayload) {
34584
+ const canonical = stableStringify(reportPayload);
34507
34585
  return {
34508
- template_rebased: true,
34509
- template_id: params.templateId,
34510
- draft_keys: Object.keys(draft).sort()
34586
+ ...reportPayload,
34587
+ report_hash: {
34588
+ algorithm: "sha256",
34589
+ digest_hex: sha256Hex(canonical),
34590
+ canonicalization: "sorted-json-v1",
34591
+ verification_note: "Recompute SHA-256 over canonical payload JSON before report_hash and compare digest_hex."
34592
+ }
34511
34593
  };
34512
34594
  }
34513
- function buildMissingOptionsPlan(missing, opts) {
34514
- const missingFlags = missing.map(optionNameToFlag);
34515
- const signInUrl = buildConsoleSignInUrl(resolveConsoleBaseUrl(opts.consoleUrl));
34516
- return cliEnvelope({
34517
- status: "blocked",
34518
- reasonCode: "setup_required_options_missing",
34519
- summary: "Setup requires an authenticated org, an agent template, and an agent name before it can mutate customer resources.",
34520
- ids: {},
34521
- nextCommands: [
34522
- "foh auth signup --web --json",
34523
- "foh auth login --web --json",
34524
- ...buildCliAuthFallbackCommands(),
34525
- "npx --yes @f-o-h/cli@latest templates list --category buyer --json",
34526
- 'npx --yes @f-o-h/cli@latest setup --org <org-id> --agent-template <buyer-template-id> --agent-name "Demo Buyer Agent" --widget-domains <domain> --voice-provider openai --voice-id alloy --report-out setup-report.json --json'
34527
- ],
34528
- extra: {
34529
- code: "setup_required_options_missing",
34530
- missing_options: missingFlags,
34531
- reason: "setup requires an authenticated org, an agent template, and an agent name before it can mutate customer resources",
34532
- sign_in_url: signInUrl,
34533
- text_fallback: buildCliAuthFallbackInstructions(signInUrl),
34534
- ai_agent_instruction: [
34535
- "Do not guess org IDs, template IDs, or customer domains.",
34536
- "If no browser is available, print sign_in_url and ask the user to sign in.",
34537
- "Use npx --yes @f-o-h/cli@latest for every CLI command; do not use unpinned npx @f-o-h/cli.",
34538
- "After auth, discover orgs and certification-oriented buyer templates with the listed commands.",
34539
- "Prefer UK Buyer Qualification or Viewing Booking; do not use greeting-only templates for proof/certification.",
34540
- "Rerun setup only after all missing_options are resolved."
34541
- ]
34542
- }
34543
- });
34595
+ function writeSignedJsonArtifact(path2, value) {
34596
+ const absolutePath = (0, import_path3.resolve)(path2);
34597
+ (0, import_fs5.mkdirSync)((0, import_path3.dirname)(absolutePath), { recursive: true });
34598
+ (0, import_fs5.writeFileSync)(absolutePath, stableStringify(value), "utf-8");
34599
+ return absolutePath;
34544
34600
  }
34545
- function emitMissingOptionsPlan(missing, opts) {
34546
- const plan = buildMissingOptionsPlan(missing, { consoleUrl: opts.consoleUrl });
34547
- if (resolveJsonMode({ json: opts.json })) {
34548
- format(plan, { json: true });
34549
- return;
34550
- }
34551
- const flags = missing.map(optionNameToFlag).join(", ");
34552
- process.stderr.write(`error: required options missing: ${flags}
34553
- `);
34554
- process.stderr.write(` Sign in: ${plan.sign_in_url}
34555
- `);
34556
- process.stderr.write(" Remediation:\n");
34557
- for (const command of plan.next_commands) {
34558
- process.stderr.write(` ${command}
34559
- `);
34560
- }
34601
+
34602
+ // src/commands/setup-report.ts
34603
+ function writeSetupRunReport(reportPayload, reportOut) {
34604
+ const signed = signReport(reportPayload);
34605
+ const reportPath = reportOut ? writeSignedJsonArtifact(String(reportOut), signed) : null;
34606
+ return {
34607
+ reportHash: signed.report_hash.digest_hex,
34608
+ reportPath
34609
+ };
34561
34610
  }
34611
+
34612
+ // src/commands/setup.ts
34562
34613
  function registerSetup(program3) {
34563
34614
  program3.command("setup").description("Fully provision a new agency customer in one command").option("--org <id>", "Org ID (default: stored org from foh org use)").option("--agent-template <id>", "Agent template ID (e.g. viewing-request)").option("--agent-name <name>", "Name for the new agent").option("--phone-country <cc>", "Phone number country code", "GB").option("--phone-area-code <code>", "Phone area code preference").option("--phone-mode <mode>", "Phone path: observe, skip, or purchase", "purchase").option("--widget-domains <domains>", "Comma-separated widget domain allowlist").option("--voice-provider <p>", "TTS provider: openai, azure, twilio").option("--voice-id <id>", "Voice ID").option("--skip-compliance", "Skip compliance submission and wait").option("--skip-voice", "Skip voice configuration").option("--skip-tests", "Skip smoke tests").option("--cert-mode <m>", "Simulation cert mode: quick, full, stress", "quick").option("--cert-adaptive-runs <n>", "Adaptive run count for certification loop", "30").option("--cert-max-improvement-rounds <n>", "Max instruction improvement rounds in cert loop (0-5)", "1").option("--resume-from <step>", `Resume from a setup step (${SETUP_STEP_ORDER.join(", ")})`).option("--report-out <path>", "Optional path to write signed setup run report JSON").option("--dry-run", "Print all steps that would run without making any API calls").option("--api-url <url>", "API base URL override").option("--console-url <url>", "Console sign-in URL override").option("--json", "Output as JSON").action(async (opts) => {
34564
34615
  if (!opts.org) {
@@ -34644,12 +34695,7 @@ function registerSetup(program3) {
34644
34695
  steps: completed,
34645
34696
  failure: failure ?? null
34646
34697
  };
34647
- const signed = signReport(reportPayload);
34648
- const reportPath = opts.reportOut ? writeSignedJsonArtifact(String(opts.reportOut), signed) : null;
34649
- return {
34650
- reportHash: signed.report_hash.digest_hex,
34651
- reportPath
34652
- };
34698
+ return writeSetupRunReport(reportPayload, opts.reportOut);
34653
34699
  };
34654
34700
  const shouldResumeSkip = (stepName) => {
34655
34701
  if (!resumeState.resumeFrom) return false;
@@ -35140,8 +35186,8 @@ function registerSetup(program3) {
35140
35186
  }
35141
35187
  try {
35142
35188
  const manifest = await agentExport(resolvedAgentId, { apiUrlOverride: opts.apiUrl });
35143
- const { writeFileSync: writeFileSync12 } = await import("fs");
35144
- writeFileSync12(
35189
+ const { writeFileSync: writeFileSync14 } = await import("fs");
35190
+ writeFileSync14(
35145
35191
  "tenant.yaml",
35146
35192
  `# tenant.yaml - Front Of House agent manifest
35147
35193
  # Edit this file and run: foh plan tenant.yaml
@@ -35311,8 +35357,8 @@ function registerSim(program3) {
35311
35357
  }
35312
35358
  const cert = response.certificate;
35313
35359
  if (opts.out) {
35314
- const { writeFileSync: writeFileSync12 } = await import("fs");
35315
- writeFileSync12(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
35360
+ const { writeFileSync: writeFileSync14 } = await import("fs");
35361
+ writeFileSync14(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
35316
35362
  process.stderr.write(` Certificate written to ${opts.out}
35317
35363
  `);
35318
35364
  }
@@ -35362,8 +35408,8 @@ function registerSim(program3) {
35362
35408
  });
35363
35409
  }
35364
35410
  if (opts.out) {
35365
- const { writeFileSync: writeFileSync12 } = await import("fs");
35366
- writeFileSync12(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
35411
+ const { writeFileSync: writeFileSync14 } = await import("fs");
35412
+ writeFileSync14(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
35367
35413
  process.stderr.write(` Final certificate written to ${opts.out}
35368
35414
  `);
35369
35415
  }
@@ -38423,7 +38469,7 @@ async function runSelf(args, apiUrlOverride) {
38423
38469
  if (apiUrlOverride && !spawnArgs.includes("--api-url")) {
38424
38470
  spawnArgs.push("--api-url", apiUrlOverride);
38425
38471
  }
38426
- return await new Promise((resolve13, reject) => {
38472
+ return await new Promise((resolve14, reject) => {
38427
38473
  const child = (0, import_child_process2.spawn)(process.execPath, [process.argv[1], ...spawnArgs], {
38428
38474
  stdio: "inherit",
38429
38475
  env: {
@@ -38433,7 +38479,7 @@ async function runSelf(args, apiUrlOverride) {
38433
38479
  }
38434
38480
  });
38435
38481
  child.once("error", reject);
38436
- child.once("close", (code) => resolve13(typeof code === "number" ? code : 1));
38482
+ child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
38437
38483
  });
38438
38484
  }
38439
38485
  function shouldUseInteractiveHome(argv) {
@@ -38811,17 +38857,17 @@ function detectUpdateAvailability(currentVersion, cwd = process.cwd()) {
38811
38857
  async function applyRepoUpdate(repoRoot) {
38812
38858
  const scriptPath = (0, import_path9.join)(repoRoot, "scripts", "Install-FohCli.ps1");
38813
38859
  if (process.platform === "win32") {
38814
- return await new Promise((resolve13, reject) => {
38860
+ return await new Promise((resolve14, reject) => {
38815
38861
  const child = (0, import_child_process3.spawn)(
38816
38862
  "powershell",
38817
38863
  ["-ExecutionPolicy", "Bypass", "-File", scriptPath],
38818
38864
  { stdio: "inherit" }
38819
38865
  );
38820
38866
  child.once("error", reject);
38821
- child.once("close", (code) => resolve13(typeof code === "number" ? code : 1));
38867
+ child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
38822
38868
  });
38823
38869
  }
38824
- return await new Promise((resolve13, reject) => {
38870
+ return await new Promise((resolve14, reject) => {
38825
38871
  const child = (0, import_child_process3.spawn)(
38826
38872
  "corepack",
38827
38873
  ["pnpm", "cli:install:global"],
@@ -38831,7 +38877,7 @@ async function applyRepoUpdate(repoRoot) {
38831
38877
  }
38832
38878
  );
38833
38879
  child.once("error", reject);
38834
- child.once("close", (code) => resolve13(typeof code === "number" ? code : 1));
38880
+ child.once("close", (code) => resolve14(typeof code === "number" ? code : 1));
38835
38881
  });
38836
38882
  }
38837
38883
  function shouldShowUpdateNotice(argv = process.argv) {
@@ -38967,9 +39013,9 @@ function registerUpdate(program3) {
38967
39013
  }
38968
39014
 
38969
39015
  // src/commands/eval.ts
38970
- var import_fs16 = require("fs");
38971
- var import_path14 = require("path");
38972
- var import_child_process5 = require("child_process");
39016
+ var import_fs20 = require("fs");
39017
+ var import_path19 = require("path");
39018
+ var import_child_process6 = require("child_process");
38973
39019
 
38974
39020
  // src/lib/external-agent-artifact-safety.ts
38975
39021
  var import_fs12 = require("fs");
@@ -39303,63 +39349,13 @@ function readCommandRecords(runDir) {
39303
39349
  }
39304
39350
 
39305
39351
  // src/lib/external-agent-executor.ts
39306
- var import_fs15 = require("fs");
39352
+ var import_fs19 = require("fs");
39307
39353
  var import_os2 = require("os");
39308
- var import_path13 = require("path");
39309
- var import_child_process4 = require("child_process");
39354
+ var import_path18 = require("path");
39355
+ var import_child_process5 = require("child_process");
39310
39356
 
39311
- // src/lib/external-agent-metadata.ts
39312
- var import_fs14 = require("fs");
39357
+ // src/lib/external-agent-executor-env.ts
39313
39358
  var import_path12 = require("path");
39314
- var EXTERNAL_AGENT_METADATA_FILENAMES = [
39315
- "external-agent-metadata.json",
39316
- "agent-metadata.json"
39317
- ];
39318
- var PUBLIC_DOC_URL_RE = /^https:\/\/frontofhouse\.okii\.uk(?:\/[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]*)?$/;
39319
- function normalizeDocUrl(value) {
39320
- const raw = typeof value === "string" ? value : value && typeof value === "object" && typeof value.url === "string" ? String(value.url) : "";
39321
- const url2 = raw.trim().replace(/[.?!:]+$/g, "");
39322
- if (!PUBLIC_DOC_URL_RE.test(url2)) return null;
39323
- return url2;
39324
- }
39325
- function collectDocsFrom(value, docs) {
39326
- if (Array.isArray(value)) {
39327
- for (const entry of value) {
39328
- const url2 = normalizeDocUrl(entry);
39329
- if (url2) docs.add(url2);
39330
- }
39331
- }
39332
- }
39333
- function readExternalAgentMetadata(runDir) {
39334
- for (const filename of EXTERNAL_AGENT_METADATA_FILENAMES) {
39335
- const path2 = (0, import_path12.join)(runDir, filename);
39336
- if (!(0, import_fs14.existsSync)(path2)) continue;
39337
- try {
39338
- const parsed = JSON.parse((0, import_fs14.readFileSync)(path2, "utf8"));
39339
- const docs = /* @__PURE__ */ new Set();
39340
- collectDocsFrom(parsed.docs_pages_used, docs);
39341
- collectDocsFrom(parsed.docs_pages_observed, docs);
39342
- collectDocsFrom(parsed.docs_used, docs);
39343
- collectDocsFrom(parsed.public_docs_used, docs);
39344
- return {
39345
- path: filename,
39346
- docs_pages_used: Array.from(docs).sort()
39347
- };
39348
- } catch {
39349
- return {
39350
- path: filename,
39351
- docs_pages_used: []
39352
- };
39353
- }
39354
- }
39355
- return {
39356
- path: null,
39357
- docs_pages_used: []
39358
- };
39359
- }
39360
-
39361
- // src/lib/external-agent-executor.ts
39362
- var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
39363
39359
  var CODEX_EXECUTOR_DENIED_ENV_PREFIXES = [
39364
39360
  "SUPABASE_",
39365
39361
  "DATABASE_",
@@ -39403,15 +39399,6 @@ var EXTERNAL_AGENT_EVAL_AUTH_ENV_MAP = {
39403
39399
  FOH_EXTERNAL_AGENT_EVAL_API_URL: "FOH_API_URL",
39404
39400
  FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT: "FOH_TOKEN_EXPIRES_AT"
39405
39401
  };
39406
- var DEFAULT_FOH_API_URL2 = "https://api.frontofhouse.okii.uk";
39407
- var ExternalAgentExecutorError = class extends Error {
39408
- reasonCode;
39409
- constructor(reasonCode, message) {
39410
- super(message);
39411
- this.name = "ExternalAgentExecutorError";
39412
- this.reasonCode = reasonCode;
39413
- }
39414
- };
39415
39402
  function isDeniedEnvKey(key) {
39416
39403
  const upper = key.toUpperCase();
39417
39404
  if (CODEX_EXECUTOR_DENIED_ENV_NAMES.some((name) => upper === name)) return true;
@@ -39432,7 +39419,7 @@ function buildCodexExecutorEnv(input) {
39432
39419
  env[childKey] = value;
39433
39420
  }
39434
39421
  }
39435
- env.npm_config_cache = (0, import_path13.join)((0, import_path13.dirname)(input.runDir), ".npm-cache");
39422
+ env.npm_config_cache = (0, import_path12.join)((0, import_path12.dirname)(input.runDir), ".npm-cache");
39436
39423
  env.npm_config_prefer_online = "true";
39437
39424
  env.npm_config_update_notifier = "false";
39438
39425
  env.npm_config_yes = "true";
@@ -39442,6 +39429,723 @@ function buildCodexExecutorEnv(input) {
39442
39429
  env.FOH_CLI_SUPPRESS_BANNER = "1";
39443
39430
  return env;
39444
39431
  }
39432
+
39433
+ // src/lib/external-agent-executor-artifacts.ts
39434
+ var import_fs15 = require("fs");
39435
+ var import_path14 = require("path");
39436
+
39437
+ // src/lib/external-agent-metadata.ts
39438
+ var import_fs14 = require("fs");
39439
+ var import_path13 = require("path");
39440
+ var EXTERNAL_AGENT_METADATA_FILENAMES = [
39441
+ "external-agent-metadata.json",
39442
+ "agent-metadata.json"
39443
+ ];
39444
+ var PUBLIC_DOC_URL_RE = /^https:\/\/frontofhouse\.okii\.uk(?:\/[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]*)?$/;
39445
+ function normalizeDocUrl(value) {
39446
+ const raw = typeof value === "string" ? value : value && typeof value === "object" && typeof value.url === "string" ? String(value.url) : "";
39447
+ const url2 = raw.trim().replace(/[.?!:]+$/g, "");
39448
+ if (!PUBLIC_DOC_URL_RE.test(url2)) return null;
39449
+ return url2;
39450
+ }
39451
+ function collectDocsFrom(value, docs) {
39452
+ if (Array.isArray(value)) {
39453
+ for (const entry of value) {
39454
+ const url2 = normalizeDocUrl(entry);
39455
+ if (url2) docs.add(url2);
39456
+ }
39457
+ }
39458
+ }
39459
+ function readExternalAgentMetadata(runDir) {
39460
+ for (const filename of EXTERNAL_AGENT_METADATA_FILENAMES) {
39461
+ const path2 = (0, import_path13.join)(runDir, filename);
39462
+ if (!(0, import_fs14.existsSync)(path2)) continue;
39463
+ try {
39464
+ const parsed = JSON.parse((0, import_fs14.readFileSync)(path2, "utf8"));
39465
+ const docs = /* @__PURE__ */ new Set();
39466
+ collectDocsFrom(parsed.docs_pages_used, docs);
39467
+ collectDocsFrom(parsed.docs_pages_observed, docs);
39468
+ collectDocsFrom(parsed.docs_used, docs);
39469
+ collectDocsFrom(parsed.public_docs_used, docs);
39470
+ return {
39471
+ path: filename,
39472
+ docs_pages_used: Array.from(docs).sort()
39473
+ };
39474
+ } catch {
39475
+ return {
39476
+ path: filename,
39477
+ docs_pages_used: []
39478
+ };
39479
+ }
39480
+ }
39481
+ return {
39482
+ path: null,
39483
+ docs_pages_used: []
39484
+ };
39485
+ }
39486
+
39487
+ // src/lib/external-agent-executor-artifacts.ts
39488
+ function redactArtifactFile(path2, input = {}) {
39489
+ if (!(0, import_fs15.existsSync)(path2)) return;
39490
+ const original = (0, import_fs15.readFileSync)(path2, "utf8");
39491
+ const redacted = redactExternalAgentArtifactText(original, input);
39492
+ if (redacted !== original) (0, import_fs15.writeFileSync)(path2, redacted, "utf8");
39493
+ }
39494
+ function redactExternalAgentOutputArtifacts(run, input = {}) {
39495
+ redactArtifactFile(run.outputs.jsonl, input);
39496
+ redactArtifactFile(run.outputs.last_message, input);
39497
+ redactArtifactFile(run.outputs.stderr, input);
39498
+ redactArtifactFile((0, import_path14.join)(run.run_dir, "commands.ndjson"), input);
39499
+ if (!(0, import_fs15.existsSync)(run.run_dir)) return;
39500
+ for (const name of (0, import_fs15.readdirSync)(run.run_dir)) {
39501
+ if (name.startsWith("command-output-cmd_") && !name.endsWith(".redacted")) {
39502
+ redactArtifactFile((0, import_path14.join)(run.run_dir, name), input);
39503
+ }
39504
+ }
39505
+ }
39506
+ function copyExternalAgentCommandCaptureArtifacts(input) {
39507
+ const commandLog = (0, import_path14.join)(input.captureDir, "commands.ndjson");
39508
+ if ((0, import_fs15.existsSync)(commandLog)) {
39509
+ (0, import_fs15.writeFileSync)((0, import_path14.join)(input.runDir, "commands.ndjson"), (0, import_fs15.readFileSync)(commandLog, "utf8"), "utf8");
39510
+ }
39511
+ for (const name of (0, import_fs15.readdirSync)(input.captureDir)) {
39512
+ if (name.startsWith("command-output-cmd_")) {
39513
+ (0, import_fs15.copyFileSync)((0, import_path14.join)(input.captureDir, name), (0, import_path14.join)(input.runDir, name));
39514
+ } else if (EXTERNAL_AGENT_METADATA_FILENAMES.includes(name)) {
39515
+ (0, import_fs15.copyFileSync)((0, import_path14.join)(input.captureDir, name), (0, import_path14.join)(input.runDir, name));
39516
+ }
39517
+ }
39518
+ }
39519
+
39520
+ // src/lib/external-agent-executor-classification.ts
39521
+ var import_fs17 = require("fs");
39522
+ var import_path16 = require("path");
39523
+
39524
+ // src/lib/external-agent-run-summary.ts
39525
+ var import_fs16 = require("fs");
39526
+ var import_path15 = require("path");
39527
+ var REQUIRED_RUN_FIELDS = [
39528
+ "schema_version",
39529
+ "run_id",
39530
+ "status",
39531
+ "model_provider",
39532
+ "model_name",
39533
+ "prompt_version",
39534
+ "started_at",
39535
+ "manual_intervention_count",
39536
+ "environment",
39537
+ "public_entrypoints",
39538
+ "commands_run",
39539
+ "docs_pages_used",
39540
+ "artifacts"
39541
+ ];
39542
+ var VALID_STATUSES = /* @__PURE__ */ new Set(["pass", "hold", "fail"]);
39543
+ var DOC_URL_RE = /https:\/\/frontofhouse\.okii\.uk\/[^\s"'`)<>,;\\\]}]*/g;
39544
+ function quoteShellArg(value) {
39545
+ const text = String(value);
39546
+ if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
39547
+ return `"${text.replace(/(["$`])/g, "\\$1")}"`;
39548
+ }
39549
+ function externalAgentSummaryCommand(root) {
39550
+ const summaryPath = (0, import_path15.join)(root, "latest-summary.json");
39551
+ const reportPath = (0, import_path15.join)(root, "summary.report.json");
39552
+ return [
39553
+ "foh",
39554
+ "eval",
39555
+ "external-agent",
39556
+ "summary",
39557
+ "--root",
39558
+ quoteShellArg(root),
39559
+ "--out",
39560
+ quoteShellArg(summaryPath),
39561
+ "--report",
39562
+ quoteShellArg(reportPath),
39563
+ "--json"
39564
+ ].join(" ");
39565
+ }
39566
+ function readJson(filePath) {
39567
+ return JSON.parse((0, import_fs16.readFileSync)(filePath, "utf8").replace(/^\uFEFF/, ""));
39568
+ }
39569
+ function readNdjson(filePath) {
39570
+ if (!(0, import_fs16.existsSync)(filePath)) return [];
39571
+ return (0, import_fs16.readFileSync)(filePath, "utf8").split(/\r?\n/).map((line) => line.trim()).filter(Boolean).map((line) => {
39572
+ try {
39573
+ const parsed = JSON.parse(line);
39574
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : null;
39575
+ } catch {
39576
+ return null;
39577
+ }
39578
+ }).filter((record2) => Boolean(record2));
39579
+ }
39580
+ function asObject(value) {
39581
+ return value && typeof value === "object" && !Array.isArray(value) ? value : null;
39582
+ }
39583
+ function toArray2(value) {
39584
+ return Array.isArray(value) ? value : [];
39585
+ }
39586
+ function increment(map3, key, amount = 1) {
39587
+ const normalized = String(key || "unknown");
39588
+ map3.set(normalized, (map3.get(normalized) || 0) + amount);
39589
+ }
39590
+ function ranked(map3) {
39591
+ return Array.from(map3.entries()).map(([key, count]) => ({ key, count })).sort((a, b) => b.count - a.count || a.key.localeCompare(b.key));
39592
+ }
39593
+ function collectDocUrls(text) {
39594
+ return Array.from(new Set((String(text || "").match(DOC_URL_RE) || []).map((url2) => url2.replace(/[.?!:]+$/g, "")).filter((url2) => url2.startsWith("https://frontofhouse.okii.uk/")))).sort();
39595
+ }
39596
+ function findRunFiles(root) {
39597
+ if (!(0, import_fs16.existsSync)(root)) return [];
39598
+ const files = [];
39599
+ const stack = [root];
39600
+ while (stack.length > 0) {
39601
+ const current = stack.pop();
39602
+ if (!current) continue;
39603
+ for (const entry of (0, import_fs16.readdirSync)(current, { withFileTypes: true })) {
39604
+ const absolute = (0, import_path15.join)(current, entry.name);
39605
+ if (entry.isDirectory()) {
39606
+ stack.push(absolute);
39607
+ } else if (entry.isFile() && entry.name === "run.json") {
39608
+ files.push(absolute);
39609
+ }
39610
+ }
39611
+ }
39612
+ return files.sort();
39613
+ }
39614
+ function validateExternalAgentRun(value) {
39615
+ const findings = [];
39616
+ const run = asObject(value);
39617
+ if (!run) return [{ id: "run_not_object", detail: "run artifact must be an object" }];
39618
+ for (const field of REQUIRED_RUN_FIELDS) {
39619
+ if (!(field in run)) findings.push({ id: "required_field_missing", field });
39620
+ }
39621
+ if (run.schema_version !== "external_agent_run.v1") {
39622
+ findings.push({ id: "schema_version_invalid", expected: "external_agent_run.v1", actual: run.schema_version ?? null });
39623
+ }
39624
+ if (!VALID_STATUSES.has(String(run.status || ""))) {
39625
+ findings.push({ id: "status_invalid", expected: Array.from(VALID_STATUSES), actual: run.status ?? null });
39626
+ }
39627
+ if ((run.status === "hold" || run.status === "fail") && !String(run.failure_reason_code || "").trim()) {
39628
+ findings.push({ id: "failure_reason_code_missing" });
39629
+ }
39630
+ if (!Number.isInteger(run.manual_intervention_count) || Number(run.manual_intervention_count) < 0) {
39631
+ findings.push({ id: "manual_intervention_count_invalid" });
39632
+ }
39633
+ if (!Array.isArray(run.commands_run)) findings.push({ id: "commands_run_invalid" });
39634
+ if (!Array.isArray(run.docs_pages_used)) findings.push({ id: "docs_pages_used_invalid" });
39635
+ if (!asObject(run.environment)) findings.push({ id: "environment_invalid" });
39636
+ if (!asObject(run.artifacts)) findings.push({ id: "artifacts_invalid" });
39637
+ if (toArray2(run.public_entrypoints).length === 0) findings.push({ id: "public_entrypoints_missing" });
39638
+ return findings;
39639
+ }
39640
+ function runSortTime(run) {
39641
+ const raw = String(run.ended_at || run.started_at || "");
39642
+ const time3 = Date.parse(raw);
39643
+ return Number.isFinite(time3) ? time3 : 0;
39644
+ }
39645
+ function cohortIdForRunPath(root, runPath) {
39646
+ const normalized = (0, import_path15.relative)(root, (0, import_path15.dirname)(runPath)).replaceAll("\\", "/");
39647
+ const parts = normalized.split("/").filter(Boolean);
39648
+ if (parts.length === 0) return ".";
39649
+ if (/^\d{4}-\d{2}-\d{2}$/.test(parts[0]) && parts[1]) return `${parts[0]}/${parts[1]}`;
39650
+ return parts[0];
39651
+ }
39652
+ function readRunRecords(root, cwd) {
39653
+ const records = [];
39654
+ const invalid_runs = [];
39655
+ for (const file2 of findRunFiles(root)) {
39656
+ try {
39657
+ const parsed = readJson(file2);
39658
+ const findings = validateExternalAgentRun(parsed);
39659
+ if (findings.length > 0) {
39660
+ invalid_runs.push({ path: (0, import_path15.relative)(cwd, file2).replaceAll("\\", "/"), findings });
39661
+ continue;
39662
+ }
39663
+ const run = parsed;
39664
+ records.push({
39665
+ path: file2,
39666
+ run,
39667
+ cohort_id: cohortIdForRunPath(root, file2),
39668
+ sort_time: runSortTime(run)
39669
+ });
39670
+ } catch (error2) {
39671
+ invalid_runs.push({
39672
+ path: (0, import_path15.relative)(cwd, file2).replaceAll("\\", "/"),
39673
+ findings: [{ id: "json_parse_failed", detail: error2 instanceof Error ? error2.message : String(error2) }]
39674
+ });
39675
+ }
39676
+ }
39677
+ return { records, invalid_runs };
39678
+ }
39679
+ function latestCohortId(records) {
39680
+ return records.slice().sort((a, b) => b.sort_time - a.sort_time || b.path.localeCompare(a.path))[0]?.cohort_id ?? null;
39681
+ }
39682
+ function ownerSubsystemFor(reasonCode) {
39683
+ const reason = String(reasonCode || "").toLowerCase();
39684
+ if (reason.includes("simulation") || reason.includes("certification") || reason.includes("scenario")) return "dojo_certification";
39685
+ if (reason.includes("contact_phone") || reason.includes("voice_contact") || reason.includes("provider_capacity") || reason.includes("byon")) return "voice_contact";
39686
+ if (reason.includes("exec_policy") || reason.includes("policy_blocked") || reason.includes("sandbox") || reason.includes("runner") || reason.includes("codex")) return "infra_runner";
39687
+ if (reason.includes("api") || reason.includes("http_4") || reason.includes("http_5") || reason.includes("404") || reason.includes("500") || reason.includes("roundtrip")) return "api_contract";
39688
+ if (reason.includes("cli") || reason.includes("command") || reason.includes("flag")) return "cli";
39689
+ if (reason.includes("docs") || reason.includes("unclear") || reason.includes("not_found")) return "docs";
39690
+ if (reason.includes("auth") || reason.includes("org") || reason.includes("config")) return "infra_runner";
39691
+ if (reason.includes("runtime") || reason.includes("widget") || reason.includes("proof")) return "runtime";
39692
+ return "product_ux";
39693
+ }
39694
+ function recommendedFixFor(reasonCode) {
39695
+ const owner = ownerSubsystemFor(reasonCode);
39696
+ if (owner === "api_contract") return "fix_api";
39697
+ if (owner === "cli") return "fix_cli";
39698
+ if (owner === "docs") return "fix_docs";
39699
+ if (owner === "runtime") return "fix_runtime";
39700
+ if (owner === "dojo_certification") return "add_test";
39701
+ return "fix_config";
39702
+ }
39703
+ function collapseCommandRecords(records) {
39704
+ const order = [];
39705
+ const byId = /* @__PURE__ */ new Map();
39706
+ for (const record2 of records) {
39707
+ const id = String(record2.command_id || `${record2.recorded_at || ""}:${record2.command || ""}`);
39708
+ if (!byId.has(id)) order.push(id);
39709
+ const previous = byId.get(id);
39710
+ byId.set(id, record2.phase === "completed" ? record2 : previous || record2);
39711
+ }
39712
+ return order.map((id) => byId.get(id)).filter((record2) => Boolean(record2));
39713
+ }
39714
+ function analyzeRunArtifacts(runPath, run, cwd) {
39715
+ const runDir = (0, import_path15.dirname)(runPath);
39716
+ const commands = collapseCommandRecords(readNdjson((0, import_path15.join)(runDir, "commands.ndjson")));
39717
+ const reasonCounts = /* @__PURE__ */ new Map();
39718
+ const slowSteps = [];
39719
+ let completed = 0;
39720
+ let withDuration = 0;
39721
+ let totalDuration = 0;
39722
+ for (const command of commands) {
39723
+ if (command.phase === "completed" || command.completed_at) completed += 1;
39724
+ if (typeof command.duration_ms === "number") {
39725
+ withDuration += 1;
39726
+ totalDuration += command.duration_ms;
39727
+ slowSteps.push({
39728
+ run_id: run.run_id,
39729
+ run_path: (0, import_path15.relative)(cwd, runPath).replaceAll("\\", "/"),
39730
+ command: command.command || "",
39731
+ duration_ms: command.duration_ms,
39732
+ status: command.status || null,
39733
+ reason_code: command.reason_code || null,
39734
+ check_reason_codes: Array.isArray(command.check_reason_codes) ? command.check_reason_codes : []
39735
+ });
39736
+ }
39737
+ if (command.reason_code) increment(reasonCounts, command.reason_code);
39738
+ for (const reasonCode of toArray2(command.check_reason_codes)) {
39739
+ if (reasonCode) increment(reasonCounts, reasonCode);
39740
+ }
39741
+ }
39742
+ const codexEvents = readNdjson((0, import_path15.join)(runDir, "codex-exec.jsonl"));
39743
+ const codexDocs = /* @__PURE__ */ new Set();
39744
+ let codexCommandExecutions = 0;
39745
+ let codexFailedExitCodes = 0;
39746
+ for (const event of codexEvents) {
39747
+ const item = asObject(event.item) || event;
39748
+ if (item.type === "command_execution" && item.status === "completed") {
39749
+ codexCommandExecutions += 1;
39750
+ if (typeof item.exit_code === "number" && item.exit_code !== 0) codexFailedExitCodes += 1;
39751
+ }
39752
+ for (const url2 of collectDocUrls(JSON.stringify(event))) codexDocs.add(url2);
39753
+ }
39754
+ const docs = /* @__PURE__ */ new Set([
39755
+ ...toArray2(run.docs_pages_used).map(String),
39756
+ ...Array.from(codexDocs)
39757
+ ]);
39758
+ return {
39759
+ command_log_present: (0, import_fs16.existsSync)((0, import_path15.join)(runDir, "commands.ndjson")),
39760
+ command_count: commands.length,
39761
+ completed_command_count: completed,
39762
+ missing_completion_count: Math.max(0, commands.length - completed),
39763
+ commands_with_duration_count: withDuration,
39764
+ total_command_duration_ms: totalDuration,
39765
+ command_reason_codes: ranked(reasonCounts),
39766
+ slow_steps: slowSteps.sort((a, b) => Number(b.duration_ms) - Number(a.duration_ms)).slice(0, 10),
39767
+ docs_pages_observed: Array.from(docs).sort(),
39768
+ codex_command_execution_completed_count: codexCommandExecutions,
39769
+ codex_failed_exit_code_count: codexFailedExitCodes
39770
+ };
39771
+ }
39772
+ function summarizeExternalAgentRuns(options) {
39773
+ const cwd = (0, import_path15.resolve)(options.cwd || process.cwd());
39774
+ const root = (0, import_path15.resolve)(cwd, options.root);
39775
+ const loaded = readRunRecords(root, cwd);
39776
+ const selectedCohortId = options.cohortId || (options.currentBaselineOnly ? latestCohortId(loaded.records) : null);
39777
+ const records = selectedCohortId ? loaded.records.filter((record2) => record2.cohort_id === selectedCohortId) : loaded.records;
39778
+ const statusCounts = /* @__PURE__ */ new Map();
39779
+ const modelCounts = /* @__PURE__ */ new Map();
39780
+ const failureCounts = /* @__PURE__ */ new Map();
39781
+ const commandReasonCounts = /* @__PURE__ */ new Map();
39782
+ const docsCounts = /* @__PURE__ */ new Map();
39783
+ const slowSteps = [];
39784
+ let manualInterventions = 0;
39785
+ let commandCount = 0;
39786
+ let completedCommandCount = 0;
39787
+ let missingCompletionCount = 0;
39788
+ let commandsWithDurationCount = 0;
39789
+ let totalCommandDurationMs = 0;
39790
+ let commandLogRunCount = 0;
39791
+ let codexCommandExecutions = 0;
39792
+ let codexFailedExitCodes = 0;
39793
+ for (const record2 of records) {
39794
+ const run = record2.run;
39795
+ increment(statusCounts, run.status);
39796
+ increment(modelCounts, `${run.model_provider}/${run.model_name}`);
39797
+ manualInterventions += Number(run.manual_intervention_count || 0);
39798
+ if (run.status !== "pass") increment(failureCounts, run.failure_reason_code || "unknown");
39799
+ const artifactSummary = analyzeRunArtifacts(record2.path, run, cwd);
39800
+ if (artifactSummary.command_log_present) commandLogRunCount += 1;
39801
+ commandCount += Number(artifactSummary.command_count || 0);
39802
+ completedCommandCount += Number(artifactSummary.completed_command_count || 0);
39803
+ missingCompletionCount += Number(artifactSummary.missing_completion_count || 0);
39804
+ commandsWithDurationCount += Number(artifactSummary.commands_with_duration_count || 0);
39805
+ totalCommandDurationMs += Number(artifactSummary.total_command_duration_ms || 0);
39806
+ codexCommandExecutions += Number(artifactSummary.codex_command_execution_completed_count || 0);
39807
+ codexFailedExitCodes += Number(artifactSummary.codex_failed_exit_code_count || 0);
39808
+ for (const row of toArray2(artifactSummary.slow_steps)) slowSteps.push(row);
39809
+ for (const row of toArray2(artifactSummary.command_reason_codes)) {
39810
+ const entry = asObject(row);
39811
+ if (entry) increment(commandReasonCounts, entry.key, Number(entry.count || 1));
39812
+ }
39813
+ for (const page of toArray2(artifactSummary.docs_pages_observed)) increment(docsCounts, page);
39814
+ }
39815
+ const topFailures = ranked(failureCounts);
39816
+ const commandReasonCodes = ranked(commandReasonCounts);
39817
+ const recommendedFixes = topFailures.map((failure) => ({
39818
+ reason_code: failure.key,
39819
+ count: failure.count,
39820
+ recommended_fix: recommendedFixFor(failure.key),
39821
+ owner_subsystem: ownerSubsystemFor(failure.key)
39822
+ }));
39823
+ const nextRecommendedFix = recommendedFixes[0] || null;
39824
+ return {
39825
+ schema_version: "external_agent_run_summary.v1",
39826
+ generated_at: (/* @__PURE__ */ new Date()).toISOString(),
39827
+ root: (0, import_path15.relative)(cwd, root).replaceAll("\\", "/") || ".",
39828
+ cohort_id: selectedCohortId,
39829
+ current_baseline_only: Boolean(selectedCohortId),
39830
+ run_count: records.length,
39831
+ invalid_run_count: selectedCohortId ? 0 : loaded.invalid_runs.length,
39832
+ status_counts: Object.fromEntries(statusCounts),
39833
+ model_counts: ranked(modelCounts),
39834
+ manual_intervention_count: manualInterventions,
39835
+ top_failure_reason_codes: topFailures,
39836
+ docs_pages_observed: ranked(docsCounts),
39837
+ command_telemetry: {
39838
+ run_count_with_command_log: commandLogRunCount,
39839
+ command_count: commandCount,
39840
+ completed_command_count: completedCommandCount,
39841
+ missing_completion_count: missingCompletionCount,
39842
+ commands_with_duration_count: commandsWithDurationCount,
39843
+ total_command_duration_ms: totalCommandDurationMs,
39844
+ command_reason_codes: commandReasonCodes,
39845
+ slow_steps: slowSteps.sort((a, b) => Number(b.duration_ms || 0) - Number(a.duration_ms || 0) || String(a.command || "").localeCompare(String(b.command || ""))).slice(0, 20)
39846
+ },
39847
+ codex_telemetry: {
39848
+ command_execution_completed_count: codexCommandExecutions,
39849
+ failed_exit_code_count: codexFailedExitCodes
39850
+ },
39851
+ recommended_fixes: recommendedFixes,
39852
+ next_recommended_fix: nextRecommendedFix,
39853
+ fix_selection_policy: {
39854
+ mode: "coherent_failure_cluster_first",
39855
+ rule: "Fix the highest-impact owner subsystem locally with focused proof, then rerun the same prompt once externally.",
39856
+ run_failure_weight: 3,
39857
+ command_reason_weight: 1
39858
+ },
39859
+ next_commands: nextRecommendedFix ? [`foh bug improve --from external-agent-run --file <run_dir>/run.json --json`] : [],
39860
+ invalid_runs: selectedCohortId ? [] : loaded.invalid_runs,
39861
+ run_paths: records.map((record2) => (0, import_path15.relative)(cwd, record2.path).replaceAll("\\", "/")).sort()
39862
+ };
39863
+ }
39864
+ function runExternalAgentRunSummary(options) {
39865
+ const summary = summarizeExternalAgentRuns(options);
39866
+ const invalidRuns = toArray2(summary.invalid_runs);
39867
+ const status = invalidRuns.length > 0 ? "failed" : "passed";
39868
+ const report = {
39869
+ report_schema_version: "script_report.v1",
39870
+ script: "foh eval external-agent summary",
39871
+ checked_at: (/* @__PURE__ */ new Date()).toISOString(),
39872
+ status,
39873
+ errors: invalidRuns.map((entry) => {
39874
+ const object3 = asObject(entry);
39875
+ return `${object3?.path || "unknown"}: ${JSON.stringify(object3?.findings || [])}`;
39876
+ }),
39877
+ warnings: Number(summary.run_count || 0) === 0 ? ["no external-agent run artifacts found"] : [],
39878
+ report: summary
39879
+ };
39880
+ if (options.out) {
39881
+ (0, import_fs16.mkdirSync)((0, import_path15.dirname)((0, import_path15.resolve)(options.cwd || process.cwd(), options.out)), { recursive: true });
39882
+ (0, import_fs16.writeFileSync)((0, import_path15.resolve)(options.cwd || process.cwd(), options.out), `${JSON.stringify(summary, null, 2)}
39883
+ `, "utf8");
39884
+ }
39885
+ if (options.report) {
39886
+ (0, import_fs16.mkdirSync)((0, import_path15.dirname)((0, import_path15.resolve)(options.cwd || process.cwd(), options.report)), { recursive: true });
39887
+ (0, import_fs16.writeFileSync)((0, import_path15.resolve)(options.cwd || process.cwd(), options.report), `${JSON.stringify(report, null, 2)}
39888
+ `, "utf8");
39889
+ }
39890
+ return { summary, report };
39891
+ }
39892
+
39893
+ // src/lib/external-agent-executor-classification.ts
39894
+ function proofArtifactPasses(runDir) {
39895
+ const proofPath = (0, import_path16.join)(runDir, "proof.json");
39896
+ if (!(0, import_fs17.existsSync)(proofPath)) return false;
39897
+ try {
39898
+ const parsed = JSON.parse((0, import_fs17.readFileSync)(proofPath, "utf8"));
39899
+ return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
39900
+ } catch {
39901
+ return false;
39902
+ }
39903
+ }
39904
+ function readIfExists(path2) {
39905
+ return (0, import_fs17.existsSync)(path2) ? (0, import_fs17.readFileSync)(path2, "utf8") : "";
39906
+ }
39907
+ function relativeArtifactName(path2) {
39908
+ return (0, import_path16.basename)(path2);
39909
+ }
39910
+ function classifyExternalAgentRun(input) {
39911
+ if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
39912
+ if (!input.artifactSafetyOk) return { status: "fail", reasonCode: "external_agent_artifact_safety_blocked" };
39913
+ const completedCommands = readCommandRecords(input.run.run_dir).filter((record2) => record2.phase === "completed");
39914
+ const observedVersions = completedCommands.map((record2) => String(record2.cli_version || "").trim()).filter((version2) => /^\d+\.\d+\.\d+$/.test(version2));
39915
+ if (observedVersions.some((version2) => version2 !== CLI_VERSION)) {
39916
+ return { status: "hold", reasonCode: "external_agent_cli_version_drift" };
39917
+ }
39918
+ const commandReasonCodes = completedCommands.flatMap((record2) => [
39919
+ String(record2.reason_code || ""),
39920
+ ...Array.isArray(record2.check_reason_codes) ? record2.check_reason_codes.map((code) => String(code || "")) : []
39921
+ ]).filter(Boolean);
39922
+ const hasCommandReason = (pattern) => commandReasonCodes.some((reason) => pattern.test(reason));
39923
+ if (hasCommandReason(new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i"))) {
39924
+ return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
39925
+ }
39926
+ if (hasCommandReason(/provider_capacity_blocked/i)) {
39927
+ return { status: "hold", reasonCode: "provider_capacity_blocked" };
39928
+ }
39929
+ if (hasCommandReason(/byon_voice_number_not_configured/i)) {
39930
+ return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
39931
+ }
39932
+ if (hasCommandReason(/contact_phone_provisioning_failed/i)) {
39933
+ return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
39934
+ }
39935
+ if (hasCommandReason(/voice_contact_expected_no_spend_hold/i)) {
39936
+ return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
39937
+ }
39938
+ if (hasCommandReason(/contact_phone_missing/i)) {
39939
+ return { status: "hold", reasonCode: "voice_contact_phone_missing" };
39940
+ }
39941
+ if (hasCommandReason(/sim(?:ulation)?[_-]?cert(?:ify|ification)?.*failed|simulation_certification_failed/i)) {
39942
+ return { status: "hold", reasonCode: "simulation_certification_failed" };
39943
+ }
39944
+ if (hasCommandReason(/proof_held/i)) {
39945
+ return { status: "hold", reasonCode: "external_agent_proof_held" };
39946
+ }
39947
+ if (hasCommandReason(/agent_limit_reached/i)) {
39948
+ return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
39949
+ }
39950
+ const lastMessage = readIfExists(input.run.outputs.last_message);
39951
+ const stderr = readIfExists(input.run.outputs.stderr);
39952
+ const combined = `${lastMessage}
39953
+ ${stderr}`;
39954
+ if (/need[^.\n]*(?:private|source)[^.\n]*repo|cannot[^.\n]*without[^.\n]*(?:private|source)[^.\n]*repo|clone[^.\n]*(?:private|source)[^.\n]*repo/i.test(combined)) {
39955
+ return { status: "fail", reasonCode: "private_repo_assumption_detected" };
39956
+ }
39957
+ if (/(?:blocked|rejected|declined) by policy|EXEC_POLICY_BLOCKED|command execution was rejected|shell commands were rejected/i.test(combined)) {
39958
+ return { status: "hold", reasonCode: "codex_exec_policy_blocked" };
39959
+ }
39960
+ if (/bwrap:.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|bubblewrap.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|Failed RTM_NEWADDR|ENV_SANDBOX_EXEC_BLOCKED|permission profiles requiring direct runtime enforcement are incompatible with --use-legacy-landlock|legacy[_ -]?landlock.*incompatible/i.test(combined)) {
39961
+ return { status: "hold", reasonCode: "codex_sandbox_exec_blocked" };
39962
+ }
39963
+ if (/ENV_NETWORK_DNS_BLOCK|Could not resolve host|npm ping.*timeout|NO_EXECUTABLE_INSTALL/i.test(combined)) {
39964
+ return { status: "hold", reasonCode: "codex_network_dns_blocked" };
39965
+ }
39966
+ if (new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i").test(combined)) {
39967
+ return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
39968
+ }
39969
+ if (/provider_capacity_blocked/i.test(combined)) {
39970
+ return { status: "hold", reasonCode: "provider_capacity_blocked" };
39971
+ }
39972
+ if (/byon_voice_number_not_configured/i.test(combined)) {
39973
+ return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
39974
+ }
39975
+ if (/contact_phone_provisioning_failed/i.test(combined)) {
39976
+ return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
39977
+ }
39978
+ if (/voice_contact_expected_no_spend_hold/i.test(combined)) {
39979
+ return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
39980
+ }
39981
+ if (/contact_phone_missing/i.test(combined)) {
39982
+ return { status: "hold", reasonCode: "voice_contact_phone_missing" };
39983
+ }
39984
+ if (/simulation_certification_failed/i.test(combined)) {
39985
+ return { status: "hold", reasonCode: "simulation_certification_failed" };
39986
+ }
39987
+ if (/proof_held/i.test(combined)) {
39988
+ return { status: "hold", reasonCode: "external_agent_proof_held" };
39989
+ }
39990
+ if (/agent_limit_reached/i.test(combined)) {
39991
+ return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
39992
+ }
39993
+ if (/browser|approve|approval|login|auth|sign in/i.test(combined) && !proofArtifactPasses(input.run.run_dir)) {
39994
+ return { status: "hold", reasonCode: "auth_browser_approval_required" };
39995
+ }
39996
+ if (input.exitCode !== 0) return { status: "hold", reasonCode: `${input.run.command}_runner_nonzero_exit` };
39997
+ if (proofArtifactPasses(input.run.run_dir)) return { status: "pass", reasonCode: null };
39998
+ return { status: "hold", reasonCode: "external_agent_proof_artifact_missing" };
39999
+ }
40000
+ function buildExecutedExternalAgentRunArtifact(input) {
40001
+ const commands = readCommandRecords(input.run.run_dir);
40002
+ const agentMetadata = readExternalAgentMetadata(input.run.run_dir);
40003
+ return {
40004
+ schema_version: "external_agent_run.v1",
40005
+ run_id: input.run.run_id,
40006
+ status: input.status,
40007
+ failure_reason_code: input.reasonCode,
40008
+ model_provider: input.run.model_provider,
40009
+ model_name: input.run.model_name,
40010
+ runner_model: input.run.runner_model,
40011
+ agent_shell: `${input.run.command}-exec`,
40012
+ workspace_type: "clean-no-repo-programmatic",
40013
+ prompt_version: input.run.prompt_version,
40014
+ prompt_path: "prompt.txt",
40015
+ started_at: input.startedAt,
40016
+ ended_at: input.endedAt,
40017
+ manual_intervention_count: 0,
40018
+ manual_interventions: [],
40019
+ environment: {
40020
+ os: process.platform,
40021
+ node_version: process.version,
40022
+ npm_version: null,
40023
+ foh_cli_version: CLI_VERSION,
40024
+ runner_exit_code: input.exitCode,
40025
+ runner_timed_out: input.timedOut,
40026
+ duration_ms: input.durationMs
40027
+ },
40028
+ public_entrypoints: [
40029
+ "https://frontofhouse.okii.uk",
40030
+ "https://frontofhouse.okii.uk/llms.txt",
40031
+ "https://frontofhouse.okii.uk/openapi.yaml",
40032
+ "npx --yes @f-o-h/cli@latest"
40033
+ ],
40034
+ commands_run: commands.map((command) => command.command),
40035
+ docs_pages_used: agentMetadata.docs_pages_used,
40036
+ eval_state: {
40037
+ lifecycle_strategy: "reuse_existing_eval_state",
40038
+ org_reuse_expected: true,
40039
+ agent_reuse_expected: true,
40040
+ widget_reuse_expected: true,
40041
+ fresh_org_expected: false,
40042
+ ephemeral_org_expected: false,
40043
+ fresh_agent_expected: false,
40044
+ phone_purchase_expected: false,
40045
+ paid_resource_creation_expected: false,
40046
+ spend_policy_expected: NO_SPEND_POLICY,
40047
+ cleanup_expected: false,
40048
+ cleanup_strategy: "no_cleanup_for_reused_eval_state",
40049
+ paid_resource_strategy: "blocked_unless_explicit_byon_or_operator_approved",
40050
+ rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
40051
+ },
40052
+ artifacts: {
40053
+ terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
40054
+ command_log: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
40055
+ proof_bundle: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
40056
+ replay_packet: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
40057
+ knowledge_packet: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
40058
+ improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
40059
+ agent_metadata: agentMetadata.path,
40060
+ notes: (0, import_fs17.existsSync)((0, import_path16.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
40061
+ runner_last_message: relativeArtifactName(input.run.outputs.last_message),
40062
+ runner_stderr: relativeArtifactName(input.run.outputs.stderr),
40063
+ codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
40064
+ codex_stderr: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.stderr) : null,
40065
+ artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
40066
+ },
40067
+ summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
40068
+ next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path16.dirname)(input.run.run_dir))] : [
40069
+ "foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
40070
+ "foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
40071
+ externalAgentSummaryCommand((0, import_path16.dirname)(input.run.run_dir))
40072
+ ]
40073
+ };
40074
+ }
40075
+
40076
+ // src/lib/external-agent-runner-execution.ts
40077
+ var import_child_process4 = require("child_process");
40078
+ var import_fs18 = require("fs");
40079
+ var import_path17 = require("path");
40080
+ function buildCommandInvocation(command, args) {
40081
+ if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
40082
+ const binDir = (0, import_path17.dirname)(command);
40083
+ const codexEntrypoint = (0, import_path17.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
40084
+ if ((0, import_fs18.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
40085
+ const geminiEntrypoint = (0, import_path17.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
40086
+ if ((0, import_fs18.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
40087
+ }
40088
+ return { command, args };
40089
+ }
40090
+ function spawnExternalAgentRunner(input) {
40091
+ return new Promise((resolveRun) => {
40092
+ const started = Date.now();
40093
+ const commandInvocation = buildCommandInvocation(input.command, input.args);
40094
+ const child = (0, import_child_process4.spawn)(commandInvocation.command, commandInvocation.args, {
40095
+ cwd: input.cwd,
40096
+ env: input.env,
40097
+ shell: false,
40098
+ stdio: ["pipe", "pipe", "pipe"],
40099
+ windowsHide: true
40100
+ });
40101
+ const stdout = (0, import_fs18.createWriteStream)(input.stdoutPath, { flags: "w" });
40102
+ const stderr = (0, import_fs18.createWriteStream)(input.stderrPath, { flags: "w" });
40103
+ child.stdout.pipe(stdout);
40104
+ child.stderr.pipe(stderr);
40105
+ child.stdin.end(input.prompt);
40106
+ let timedOut = false;
40107
+ const timer = setTimeout(() => {
40108
+ timedOut = true;
40109
+ if (child.pid && process.platform === "win32") {
40110
+ (0, import_child_process4.spawnSync)("taskkill.exe", ["/pid", String(child.pid), "/t", "/f"], { stdio: "ignore" });
40111
+ } else {
40112
+ child.kill("SIGKILL");
40113
+ }
40114
+ }, input.timeoutMs);
40115
+ child.on("close", (exitCode) => {
40116
+ clearTimeout(timer);
40117
+ stdout.end();
40118
+ stderr.end();
40119
+ resolveRun({
40120
+ exitCode,
40121
+ timedOut,
40122
+ durationMs: Date.now() - started
40123
+ });
40124
+ });
40125
+ child.on("error", () => {
40126
+ clearTimeout(timer);
40127
+ stdout.end();
40128
+ stderr.end();
40129
+ resolveRun({
40130
+ exitCode: null,
40131
+ timedOut,
40132
+ durationMs: Date.now() - started
40133
+ });
40134
+ });
40135
+ });
40136
+ }
40137
+
40138
+ // src/lib/external-agent-executor.ts
40139
+ var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
40140
+ var DEFAULT_FOH_API_URL2 = "https://api.frontofhouse.okii.uk";
40141
+ var ExternalAgentExecutorError = class extends Error {
40142
+ reasonCode;
40143
+ constructor(reasonCode, message) {
40144
+ super(message);
40145
+ this.name = "ExternalAgentExecutorError";
40146
+ this.reasonCode = reasonCode;
40147
+ }
40148
+ };
39445
40149
  function readExternalAgentEvalAuthEnv(env = process.env) {
39446
40150
  return {
39447
40151
  token: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || "").trim(),
@@ -39507,14 +40211,14 @@ async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}
39507
40211
  };
39508
40212
  }
39509
40213
  function normalizeForCompare(path2) {
39510
- const resolved = (0, import_path13.resolve)(path2);
40214
+ const resolved = (0, import_path18.resolve)(path2);
39511
40215
  return process.platform === "win32" ? resolved.toLowerCase() : resolved;
39512
40216
  }
39513
40217
  function isPathInside(childPath, parentPath) {
39514
40218
  const child = normalizeForCompare(childPath);
39515
40219
  const parent = normalizeForCompare(parentPath);
39516
- const rel = (0, import_path13.relative)(parent, child);
39517
- return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path13.isAbsolute)(rel);
40220
+ const rel = (0, import_path18.relative)(parent, child);
40221
+ return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path18.isAbsolute)(rel);
39518
40222
  }
39519
40223
  function requireString(value, field) {
39520
40224
  if (typeof value !== "string" || value.trim() === "") {
@@ -39523,10 +40227,10 @@ function requireString(value, field) {
39523
40227
  return value;
39524
40228
  }
39525
40229
  function readBatch(batchPath) {
39526
- if (!(0, import_fs15.existsSync)(batchPath)) {
40230
+ if (!(0, import_fs19.existsSync)(batchPath)) {
39527
40231
  throw new ExternalAgentExecutorError("external_agent_batch_not_found", `Batch file not found: ${batchPath}`);
39528
40232
  }
39529
- const parsed = JSON.parse((0, import_fs15.readFileSync)(batchPath, "utf8"));
40233
+ const parsed = JSON.parse((0, import_fs19.readFileSync)(batchPath, "utf8"));
39530
40234
  if (parsed.schema_version !== "external_agent_batch_plan.v1") {
39531
40235
  throw new ExternalAgentExecutorError("invalid_external_agent_batch", "Batch schema_version must be external_agent_batch_plan.v1.");
39532
40236
  }
@@ -39541,11 +40245,11 @@ function defaultRunnerProbe(command, args) {
39541
40245
  encoding: "utf8",
39542
40246
  timeout: isGeminiHeadlessSmoke ? GEMINI_HEADLESS_PROBE_TIMEOUT_MS : void 0
39543
40247
  };
39544
- const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process4.spawnSync)(
40248
+ const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process5.spawnSync)(
39545
40249
  "powershell.exe",
39546
40250
  ["-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", `& ${[command, ...args].map(quotePowerShellArg).join(" ")}`],
39547
40251
  spawnOptions
39548
- ) : (0, import_child_process4.spawnSync)(command, args, spawnOptions);
40252
+ ) : (0, import_child_process5.spawnSync)(command, args, spawnOptions);
39549
40253
  return {
39550
40254
  status: typeof result.status === "number" ? result.status : null,
39551
40255
  stdout: String(result.stdout || ""),
@@ -39559,29 +40263,12 @@ function geminiCapacityUnavailable(text) {
39559
40263
  function quotePowerShellArg(value) {
39560
40264
  return `'${value.replace(/'/g, "''")}'`;
39561
40265
  }
39562
- function quoteShellArg(value) {
39563
- const text = String(value);
39564
- if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
39565
- return `"${text.replace(/(["$`])/g, "\\$1")}"`;
39566
- }
39567
- function externalAgentSummaryCommand(root) {
39568
- return [
39569
- "node",
39570
- "scripts/summarize-external-agent-runs.mjs",
39571
- "--root",
39572
- quoteShellArg(root),
39573
- "--out",
39574
- quoteShellArg((0, import_path13.join)(root, "latest-summary.json")),
39575
- "--report",
39576
- quoteShellArg((0, import_path13.join)(root, "summary.report.json"))
39577
- ].join(" ");
39578
- }
39579
40266
  function resolveCodexProbeCommand() {
39580
40267
  if (process.platform !== "win32") return "codex";
39581
40268
  const appData = process.env.APPDATA;
39582
40269
  if (appData) {
39583
- const appDataShim = (0, import_path13.join)(appData, "npm", "codex.cmd");
39584
- if ((0, import_fs15.existsSync)(appDataShim)) return appDataShim;
40270
+ const appDataShim = (0, import_path18.join)(appData, "npm", "codex.cmd");
40271
+ if ((0, import_fs19.existsSync)(appDataShim)) return appDataShim;
39585
40272
  }
39586
40273
  return "codex.cmd";
39587
40274
  }
@@ -39592,8 +40279,8 @@ function resolveGeminiProbeCommand() {
39592
40279
  if (process.platform !== "win32") return "gemini";
39593
40280
  const appData = process.env.APPDATA;
39594
40281
  if (appData) {
39595
- const appDataShim = (0, import_path13.join)(appData, "npm", "gemini.cmd");
39596
- if ((0, import_fs15.existsSync)(appDataShim)) return appDataShim;
40282
+ const appDataShim = (0, import_path18.join)(appData, "npm", "gemini.cmd");
40283
+ if ((0, import_fs19.existsSync)(appDataShim)) return appDataShim;
39597
40284
  }
39598
40285
  return "gemini.cmd";
39599
40286
  }
@@ -39864,34 +40551,34 @@ function safeRunId(value) {
39864
40551
  return value.toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "run";
39865
40552
  }
39866
40553
  function resolveWorkspaceRoot(input) {
39867
- if (input.workspaceRoot) return (0, import_path13.resolve)(input.workspaceRoot);
39868
- const batchStem = (0, import_path13.basename)((0, import_path13.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
39869
- const repoStem = (0, import_path13.basename)((0, import_path13.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
39870
- return (0, import_path13.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
40554
+ if (input.workspaceRoot) return (0, import_path18.resolve)(input.workspaceRoot);
40555
+ const batchStem = (0, import_path18.basename)((0, import_path18.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40556
+ const repoStem = (0, import_path18.basename)((0, import_path18.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40557
+ return (0, import_path18.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
39871
40558
  }
39872
40559
  function findNearestGitRoot(startPath) {
39873
- let current = (0, import_path13.resolve)(startPath);
40560
+ let current = (0, import_path18.resolve)(startPath);
39874
40561
  while (true) {
39875
- if ((0, import_fs15.existsSync)((0, import_path13.join)(current, ".git"))) return current;
39876
- const parent = (0, import_path13.dirname)(current);
40562
+ if ((0, import_fs19.existsSync)((0, import_path18.join)(current, ".git"))) return current;
40563
+ const parent = (0, import_path18.dirname)(current);
39877
40564
  if (parent === current) return null;
39878
40565
  current = parent;
39879
40566
  }
39880
40567
  }
39881
40568
  function resolvePrivateRepoRoot(input) {
39882
40569
  if (input.explicitPrivateRepoRoot) {
39883
- return { root: (0, import_path13.resolve)(input.explicitPrivateRepoRoot), explicit: true };
40570
+ return { root: (0, import_path18.resolve)(input.explicitPrivateRepoRoot), explicit: true };
39884
40571
  }
39885
- const cwd = (0, import_path13.resolve)(input.cwd || process.cwd());
40572
+ const cwd = (0, import_path18.resolve)(input.cwd || process.cwd());
39886
40573
  const gitRoot = findNearestGitRoot(cwd);
39887
40574
  if (gitRoot) return { root: gitRoot, explicit: false };
39888
40575
  return {
39889
- root: (0, import_path13.join)(cwd, ".foh-no-private-repo-root-sentinel"),
40576
+ root: (0, import_path18.join)(cwd, ".foh-no-private-repo-root-sentinel"),
39890
40577
  explicit: false
39891
40578
  };
39892
40579
  }
39893
40580
  function promptVersionFromPath(promptPath) {
39894
- const raw = (0, import_fs15.readFileSync)(promptPath, "utf8");
40581
+ const raw = (0, import_fs19.readFileSync)(promptPath, "utf8");
39895
40582
  if (raw.includes("Do not assume access to the private source repository")) return "blank-setup.v1";
39896
40583
  return "unknown";
39897
40584
  }
@@ -39900,7 +40587,7 @@ function createExternalAgentExecutorPlan(options) {
39900
40587
  if (runner !== "codex" && runner !== "gemini") {
39901
40588
  throw new ExternalAgentExecutorError("unsupported_external_agent_runner", `Unsupported runner: ${runner}`);
39902
40589
  }
39903
- const batchPath = (0, import_path13.resolve)(options.batchPath);
40590
+ const batchPath = (0, import_path18.resolve)(options.batchPath);
39904
40591
  const batch = readBatch(batchPath);
39905
40592
  const runnerProbe = validateRunner(options, runner);
39906
40593
  const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
@@ -39919,17 +40606,17 @@ function createExternalAgentExecutorPlan(options) {
39919
40606
  `Workspace root must be outside the private repository. workspace=${workspaceRoot} repo=${privateRepoRoot}`
39920
40607
  );
39921
40608
  }
39922
- (0, import_fs15.mkdirSync)(workspaceRoot, { recursive: true });
39923
- const batchDir = (0, import_path13.resolve)(String(batch.batch_dir || (0, import_path13.resolve)(batchPath, "..")));
40609
+ (0, import_fs19.mkdirSync)(workspaceRoot, { recursive: true });
40610
+ const batchDir = (0, import_path18.resolve)(String(batch.batch_dir || (0, import_path18.resolve)(batchPath, "..")));
39924
40611
  const timeoutMinutes = Number.isFinite(options.timeoutMinutes) && Number(options.timeoutMinutes) > 0 ? Number(options.timeoutMinutes) : 30;
39925
40612
  const runs = batch.runs.map((run) => {
39926
40613
  const runId = safeRunId(requireString(run.run_id, "runs[].run_id"));
39927
- const runDir = (0, import_path13.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
39928
- const promptPath = (0, import_path13.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
39929
- const workspaceDir = (0, import_path13.join)(workspaceRoot, runId);
39930
- (0, import_fs15.mkdirSync)(workspaceDir, { recursive: true });
39931
- (0, import_fs15.writeFileSync)(
39932
- (0, import_path13.join)(workspaceDir, "README.md"),
40614
+ const runDir = (0, import_path18.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
40615
+ const promptPath = (0, import_path18.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
40616
+ const workspaceDir = (0, import_path18.join)(workspaceRoot, runId);
40617
+ (0, import_fs19.mkdirSync)(workspaceDir, { recursive: true });
40618
+ (0, import_fs19.writeFileSync)(
40619
+ (0, import_path18.join)(workspaceDir, "README.md"),
39933
40620
  [
39934
40621
  "# FOH External-Agent Workspace",
39935
40622
  "",
@@ -39947,11 +40634,11 @@ function createExternalAgentExecutorPlan(options) {
39947
40634
  });
39948
40635
  const promptVersion = String(env[EXTERNAL_AGENT_PROMPT_VERSION_ENV] || "unknown");
39949
40636
  const outputStem = runner === "gemini" ? "gemini" : "codex";
39950
- const jsonlPath = (0, import_path13.join)(runDir, `${outputStem}-exec.jsonl`);
39951
- const lastMessagePath = (0, import_path13.join)(runDir, `${outputStem}-last-message.md`);
39952
- const stderrPath = (0, import_path13.join)(runDir, `${outputStem}-stderr.txt`);
39953
- const runPath = (0, import_path13.join)(runDir, "run.json");
39954
- const artifactSafetyPath = (0, import_path13.join)(runDir, "artifact-safety.json");
40637
+ const jsonlPath = (0, import_path18.join)(runDir, `${outputStem}-exec.jsonl`);
40638
+ const lastMessagePath = (0, import_path18.join)(runDir, `${outputStem}-last-message.md`);
40639
+ const stderrPath = (0, import_path18.join)(runDir, `${outputStem}-stderr.txt`);
40640
+ const runPath = (0, import_path18.join)(runDir, "run.json");
40641
+ const artifactSafetyPath = (0, import_path18.join)(runDir, "artifact-safety.json");
39955
40642
  const args = runner === "gemini" ? [
39956
40643
  ...runnerProbe.globalArgs,
39957
40644
  ...runnerProbe.execArgs
@@ -40042,281 +40729,12 @@ function createExternalAgentExecutorPlan(options) {
40042
40729
  };
40043
40730
  }
40044
40731
  function writeExternalAgentExecutorPlan(plan) {
40045
- const path2 = (0, import_path13.join)(plan.batch_dir, "executor-plan.json");
40046
- (0, import_fs15.mkdirSync)(plan.batch_dir, { recursive: true });
40047
- (0, import_fs15.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
40732
+ const path2 = (0, import_path18.join)(plan.batch_dir, "executor-plan.json");
40733
+ (0, import_fs19.mkdirSync)(plan.batch_dir, { recursive: true });
40734
+ (0, import_fs19.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
40048
40735
  `, "utf8");
40049
40736
  return path2;
40050
40737
  }
40051
- function proofArtifactPasses(runDir) {
40052
- const proofPath = (0, import_path13.join)(runDir, "proof.json");
40053
- if (!(0, import_fs15.existsSync)(proofPath)) return false;
40054
- try {
40055
- const parsed = JSON.parse((0, import_fs15.readFileSync)(proofPath, "utf8"));
40056
- return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
40057
- } catch {
40058
- return false;
40059
- }
40060
- }
40061
- function readIfExists(path2) {
40062
- return (0, import_fs15.existsSync)(path2) ? (0, import_fs15.readFileSync)(path2, "utf8") : "";
40063
- }
40064
- function redactArtifactFile(path2, input = {}) {
40065
- if (!(0, import_fs15.existsSync)(path2)) return;
40066
- const original = (0, import_fs15.readFileSync)(path2, "utf8");
40067
- const redacted = redactExternalAgentArtifactText(original, input);
40068
- if (redacted !== original) (0, import_fs15.writeFileSync)(path2, redacted, "utf8");
40069
- }
40070
- function redactOutputArtifacts(run, input = {}) {
40071
- redactArtifactFile(run.outputs.jsonl, input);
40072
- redactArtifactFile(run.outputs.last_message, input);
40073
- redactArtifactFile(run.outputs.stderr, input);
40074
- redactArtifactFile((0, import_path13.join)(run.run_dir, "commands.ndjson"), input);
40075
- if (!(0, import_fs15.existsSync)(run.run_dir)) return;
40076
- for (const name of (0, import_fs15.readdirSync)(run.run_dir)) {
40077
- if (name.startsWith("command-output-cmd_") && !name.endsWith(".redacted")) {
40078
- redactArtifactFile((0, import_path13.join)(run.run_dir, name), input);
40079
- }
40080
- }
40081
- }
40082
- function copyCommandCaptureArtifacts(input) {
40083
- const commandLog = (0, import_path13.join)(input.captureDir, "commands.ndjson");
40084
- if ((0, import_fs15.existsSync)(commandLog)) {
40085
- (0, import_fs15.writeFileSync)((0, import_path13.join)(input.runDir, "commands.ndjson"), (0, import_fs15.readFileSync)(commandLog, "utf8"), "utf8");
40086
- }
40087
- for (const name of (0, import_fs15.readdirSync)(input.captureDir)) {
40088
- if (name.startsWith("command-output-cmd_")) {
40089
- (0, import_fs15.copyFileSync)((0, import_path13.join)(input.captureDir, name), (0, import_path13.join)(input.runDir, name));
40090
- } else if (EXTERNAL_AGENT_METADATA_FILENAMES.includes(name)) {
40091
- (0, import_fs15.copyFileSync)((0, import_path13.join)(input.captureDir, name), (0, import_path13.join)(input.runDir, name));
40092
- }
40093
- }
40094
- }
40095
- function relativeArtifactName(path2) {
40096
- return (0, import_path13.basename)(path2);
40097
- }
40098
- function classifyRun(input) {
40099
- if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
40100
- if (!input.artifactSafetyOk) return { status: "fail", reasonCode: "external_agent_artifact_safety_blocked" };
40101
- const completedCommands = readCommandRecords(input.run.run_dir).filter((record2) => record2.phase === "completed");
40102
- const observedVersions = completedCommands.map((record2) => String(record2.cli_version || "").trim()).filter((version2) => /^\d+\.\d+\.\d+$/.test(version2));
40103
- if (observedVersions.some((version2) => version2 !== CLI_VERSION)) {
40104
- return { status: "hold", reasonCode: "external_agent_cli_version_drift" };
40105
- }
40106
- const commandReasonCodes = completedCommands.flatMap((record2) => [
40107
- String(record2.reason_code || ""),
40108
- ...Array.isArray(record2.check_reason_codes) ? record2.check_reason_codes.map((code) => String(code || "")) : []
40109
- ]).filter(Boolean);
40110
- const hasCommandReason = (pattern) => commandReasonCodes.some((reason) => pattern.test(reason));
40111
- if (hasCommandReason(new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i"))) {
40112
- return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
40113
- }
40114
- if (hasCommandReason(/provider_capacity_blocked/i)) {
40115
- return { status: "hold", reasonCode: "provider_capacity_blocked" };
40116
- }
40117
- if (hasCommandReason(/byon_voice_number_not_configured/i)) {
40118
- return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
40119
- }
40120
- if (hasCommandReason(/contact_phone_provisioning_failed/i)) {
40121
- return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
40122
- }
40123
- if (hasCommandReason(/voice_contact_expected_no_spend_hold/i)) {
40124
- return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
40125
- }
40126
- if (hasCommandReason(/contact_phone_missing/i)) {
40127
- return { status: "hold", reasonCode: "voice_contact_phone_missing" };
40128
- }
40129
- if (hasCommandReason(/sim(?:ulation)?[_-]?cert(?:ify|ification)?.*failed|simulation_certification_failed/i)) {
40130
- return { status: "hold", reasonCode: "simulation_certification_failed" };
40131
- }
40132
- if (hasCommandReason(/proof_held/i)) {
40133
- return { status: "hold", reasonCode: "external_agent_proof_held" };
40134
- }
40135
- if (hasCommandReason(/agent_limit_reached/i)) {
40136
- return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
40137
- }
40138
- const lastMessage = readIfExists(input.run.outputs.last_message);
40139
- const stderr = readIfExists(input.run.outputs.stderr);
40140
- const combined = `${lastMessage}
40141
- ${stderr}`;
40142
- if (/need[^.\n]*(?:private|source)[^.\n]*repo|cannot[^.\n]*without[^.\n]*(?:private|source)[^.\n]*repo|clone[^.\n]*(?:private|source)[^.\n]*repo/i.test(combined)) {
40143
- return { status: "fail", reasonCode: "private_repo_assumption_detected" };
40144
- }
40145
- if (/(?:blocked|rejected|declined) by policy|EXEC_POLICY_BLOCKED|command execution was rejected|shell commands were rejected/i.test(combined)) {
40146
- return { status: "hold", reasonCode: "codex_exec_policy_blocked" };
40147
- }
40148
- if (/bwrap:.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|bubblewrap.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|Failed RTM_NEWADDR|ENV_SANDBOX_EXEC_BLOCKED|permission profiles requiring direct runtime enforcement are incompatible with --use-legacy-landlock|legacy[_ -]?landlock.*incompatible/i.test(combined)) {
40149
- return { status: "hold", reasonCode: "codex_sandbox_exec_blocked" };
40150
- }
40151
- if (/ENV_NETWORK_DNS_BLOCK|Could not resolve host|npm ping.*timeout|NO_EXECUTABLE_INSTALL/i.test(combined)) {
40152
- return { status: "hold", reasonCode: "codex_network_dns_blocked" };
40153
- }
40154
- if (new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i").test(combined)) {
40155
- return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
40156
- }
40157
- if (/provider_capacity_blocked/i.test(combined)) {
40158
- return { status: "hold", reasonCode: "provider_capacity_blocked" };
40159
- }
40160
- if (/byon_voice_number_not_configured/i.test(combined)) {
40161
- return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
40162
- }
40163
- if (/contact_phone_provisioning_failed/i.test(combined)) {
40164
- return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
40165
- }
40166
- if (/voice_contact_expected_no_spend_hold/i.test(combined)) {
40167
- return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
40168
- }
40169
- if (/contact_phone_missing/i.test(combined)) {
40170
- return { status: "hold", reasonCode: "voice_contact_phone_missing" };
40171
- }
40172
- if (/simulation_certification_failed/i.test(combined)) {
40173
- return { status: "hold", reasonCode: "simulation_certification_failed" };
40174
- }
40175
- if (/proof_held/i.test(combined)) {
40176
- return { status: "hold", reasonCode: "external_agent_proof_held" };
40177
- }
40178
- if (/agent_limit_reached/i.test(combined)) {
40179
- return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
40180
- }
40181
- if (/browser|approve|approval|login|auth|sign in/i.test(combined) && !proofArtifactPasses(input.run.run_dir)) {
40182
- return { status: "hold", reasonCode: "auth_browser_approval_required" };
40183
- }
40184
- if (input.exitCode !== 0) return { status: "hold", reasonCode: `${input.run.command}_runner_nonzero_exit` };
40185
- if (proofArtifactPasses(input.run.run_dir)) return { status: "pass", reasonCode: null };
40186
- return { status: "hold", reasonCode: "external_agent_proof_artifact_missing" };
40187
- }
40188
- function buildExecutedRunArtifact(input) {
40189
- const commands = readCommandRecords(input.run.run_dir);
40190
- const agentMetadata = readExternalAgentMetadata(input.run.run_dir);
40191
- return {
40192
- schema_version: "external_agent_run.v1",
40193
- run_id: input.run.run_id,
40194
- status: input.status,
40195
- failure_reason_code: input.reasonCode,
40196
- model_provider: input.run.model_provider,
40197
- model_name: input.run.model_name,
40198
- runner_model: input.run.runner_model,
40199
- agent_shell: `${input.run.command}-exec`,
40200
- workspace_type: "clean-no-repo-programmatic",
40201
- prompt_version: input.run.prompt_version,
40202
- prompt_path: "prompt.txt",
40203
- started_at: input.startedAt,
40204
- ended_at: input.endedAt,
40205
- manual_intervention_count: 0,
40206
- manual_interventions: [],
40207
- environment: {
40208
- os: process.platform,
40209
- node_version: process.version,
40210
- npm_version: null,
40211
- foh_cli_version: CLI_VERSION,
40212
- runner_exit_code: input.exitCode,
40213
- runner_timed_out: input.timedOut,
40214
- duration_ms: input.durationMs
40215
- },
40216
- public_entrypoints: [
40217
- "https://frontofhouse.okii.uk",
40218
- "https://frontofhouse.okii.uk/llms.txt",
40219
- "https://frontofhouse.okii.uk/openapi.yaml",
40220
- "npx --yes @f-o-h/cli@latest"
40221
- ],
40222
- commands_run: commands.map((command) => command.command),
40223
- docs_pages_used: agentMetadata.docs_pages_used,
40224
- eval_state: {
40225
- lifecycle_strategy: "reuse_existing_eval_state",
40226
- org_reuse_expected: true,
40227
- agent_reuse_expected: true,
40228
- widget_reuse_expected: true,
40229
- fresh_org_expected: false,
40230
- ephemeral_org_expected: false,
40231
- fresh_agent_expected: false,
40232
- phone_purchase_expected: false,
40233
- paid_resource_creation_expected: false,
40234
- spend_policy_expected: NO_SPEND_POLICY,
40235
- cleanup_expected: false,
40236
- cleanup_strategy: "no_cleanup_for_reused_eval_state",
40237
- paid_resource_strategy: "blocked_unless_explicit_byon_or_operator_approved",
40238
- rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
40239
- },
40240
- artifacts: {
40241
- terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
40242
- command_log: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
40243
- proof_bundle: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
40244
- replay_packet: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
40245
- knowledge_packet: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
40246
- improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
40247
- agent_metadata: agentMetadata.path,
40248
- notes: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
40249
- runner_last_message: relativeArtifactName(input.run.outputs.last_message),
40250
- runner_stderr: relativeArtifactName(input.run.outputs.stderr),
40251
- codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
40252
- codex_stderr: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.stderr) : null,
40253
- artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
40254
- },
40255
- summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
40256
- next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path13.dirname)(input.run.run_dir))] : [
40257
- "foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
40258
- "foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
40259
- externalAgentSummaryCommand((0, import_path13.dirname)(input.run.run_dir))
40260
- ]
40261
- };
40262
- }
40263
- function spawnRunner(input) {
40264
- return new Promise((resolveRun) => {
40265
- const started = Date.now();
40266
- const commandInvocation = buildCommandInvocation(input.command, input.args);
40267
- const child = (0, import_child_process4.spawn)(commandInvocation.command, commandInvocation.args, {
40268
- cwd: input.cwd,
40269
- env: input.env,
40270
- shell: false,
40271
- stdio: ["pipe", "pipe", "pipe"],
40272
- windowsHide: true
40273
- });
40274
- const stdout = (0, import_fs15.createWriteStream)(input.stdoutPath, { flags: "w" });
40275
- const stderr = (0, import_fs15.createWriteStream)(input.stderrPath, { flags: "w" });
40276
- child.stdout.pipe(stdout);
40277
- child.stderr.pipe(stderr);
40278
- child.stdin.end(input.prompt);
40279
- let timedOut = false;
40280
- const timer = setTimeout(() => {
40281
- timedOut = true;
40282
- if (child.pid && process.platform === "win32") {
40283
- (0, import_child_process4.spawnSync)("taskkill.exe", ["/pid", String(child.pid), "/t", "/f"], { stdio: "ignore" });
40284
- } else {
40285
- child.kill("SIGKILL");
40286
- }
40287
- }, input.timeoutMs);
40288
- child.on("close", (exitCode) => {
40289
- clearTimeout(timer);
40290
- stdout.end();
40291
- stderr.end();
40292
- resolveRun({
40293
- exitCode,
40294
- timedOut,
40295
- durationMs: Date.now() - started
40296
- });
40297
- });
40298
- child.on("error", () => {
40299
- clearTimeout(timer);
40300
- stdout.end();
40301
- stderr.end();
40302
- resolveRun({
40303
- exitCode: null,
40304
- timedOut,
40305
- durationMs: Date.now() - started
40306
- });
40307
- });
40308
- });
40309
- }
40310
- function buildCommandInvocation(command, args) {
40311
- if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
40312
- const binDir = (0, import_path13.dirname)(command);
40313
- const codexEntrypoint = (0, import_path13.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
40314
- if ((0, import_fs15.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
40315
- const geminiEntrypoint = (0, import_path13.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
40316
- if ((0, import_fs15.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
40317
- }
40318
- return { command, args };
40319
- }
40320
40738
  async function executeExternalAgentExecutorPlan(plan, options = {}) {
40321
40739
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
40322
40740
  const results = [];
@@ -40328,8 +40746,8 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40328
40746
  if (authPreflight && !authPreflight.ok) {
40329
40747
  const endedAt2 = (/* @__PURE__ */ new Date()).toISOString();
40330
40748
  const blockedResults = plan.runs.map((run) => {
40331
- (0, import_fs15.mkdirSync)(run.run_dir, { recursive: true });
40332
- const runArtifact = buildExecutedRunArtifact({
40749
+ (0, import_fs19.mkdirSync)(run.run_dir, { recursive: true });
40750
+ const runArtifact = buildExecutedExternalAgentRunArtifact({
40333
40751
  run,
40334
40752
  startedAt,
40335
40753
  endedAt: endedAt2,
@@ -40339,7 +40757,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40339
40757
  timedOut: false,
40340
40758
  durationMs: 0
40341
40759
  });
40342
- (0, import_fs15.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40760
+ (0, import_fs19.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40343
40761
  `, "utf8");
40344
40762
  return {
40345
40763
  run_id: run.run_id,
@@ -40366,41 +40784,41 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40366
40784
  }
40367
40785
  for (const run of plan.runs) {
40368
40786
  const runStartedAt = (/* @__PURE__ */ new Date()).toISOString();
40369
- const commandCaptureDir = (0, import_path13.join)(run.workspace_dir, ".foh-capture");
40370
- (0, import_fs15.mkdirSync)(commandCaptureDir, { recursive: true });
40787
+ const commandCaptureDir = (0, import_path18.join)(run.workspace_dir, ".foh-capture");
40788
+ (0, import_fs19.mkdirSync)(commandCaptureDir, { recursive: true });
40371
40789
  const env = buildCodexExecutorEnv({
40372
40790
  sourceEnv: options.env,
40373
40791
  runDir: commandCaptureDir,
40374
40792
  promptVersion: run.prompt_version
40375
40793
  });
40376
- const spawned = await spawnRunner({
40794
+ const spawned = await spawnExternalAgentRunner({
40377
40795
  command: runnerCommand,
40378
40796
  args: run.args,
40379
40797
  cwd: run.workspace_dir,
40380
40798
  env,
40381
- prompt: (0, import_fs15.readFileSync)(run.prompt_path, "utf8"),
40799
+ prompt: (0, import_fs19.readFileSync)(run.prompt_path, "utf8"),
40382
40800
  stdoutPath: run.outputs.jsonl,
40383
40801
  stderrPath: run.outputs.stderr,
40384
40802
  timeoutMs: plan.timeout_minutes * 60 * 1e3
40385
40803
  });
40386
- copyCommandCaptureArtifacts({ captureDir: commandCaptureDir, runDir: run.run_dir });
40804
+ copyExternalAgentCommandCaptureArtifacts({ captureDir: commandCaptureDir, runDir: run.run_dir });
40387
40805
  const privateRepoRoot = options.privateRepoRoot || plan.private_repo_root;
40388
- redactOutputArtifacts(run, { privateRepoRoot });
40806
+ redactExternalAgentOutputArtifacts(run, { privateRepoRoot });
40389
40807
  const artifactSafety = scanExternalAgentArtifacts({
40390
40808
  runDir: run.run_dir,
40391
40809
  privateRepoRoot,
40392
40810
  writeRedacted: true
40393
40811
  });
40394
- (0, import_fs15.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
40812
+ (0, import_fs19.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
40395
40813
  `, "utf8");
40396
40814
  const runEndedAt = (/* @__PURE__ */ new Date()).toISOString();
40397
- const classification = classifyRun({
40815
+ const classification = classifyExternalAgentRun({
40398
40816
  run,
40399
40817
  exitCode: spawned.exitCode,
40400
40818
  timedOut: spawned.timedOut,
40401
40819
  artifactSafetyOk: artifactSafety.ok
40402
40820
  });
40403
- const runArtifact = buildExecutedRunArtifact({
40821
+ const runArtifact = buildExecutedExternalAgentRunArtifact({
40404
40822
  run,
40405
40823
  startedAt: runStartedAt,
40406
40824
  endedAt: runEndedAt,
@@ -40410,7 +40828,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40410
40828
  timedOut: spawned.timedOut,
40411
40829
  durationMs: spawned.durationMs
40412
40830
  });
40413
- (0, import_fs15.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40831
+ (0, import_fs19.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40414
40832
  `, "utf8");
40415
40833
  results.push({
40416
40834
  run_id: run.run_id,
@@ -40459,13 +40877,13 @@ function defaultRunDir(modelName, promptVersion) {
40459
40877
  const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
40460
40878
  const safeModel = String(modelName || "unknown-model").toLowerCase().replace(/[^a-z0-9_-]+/g, "-");
40461
40879
  const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
40462
- return (0, import_path14.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
40880
+ return (0, import_path19.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
40463
40881
  }
40464
40882
  function defaultBatchDir(promptVersion) {
40465
40883
  const date4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
40466
40884
  const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
40467
40885
  const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
40468
- return (0, import_path14.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
40886
+ return (0, import_path19.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
40469
40887
  }
40470
40888
  function safeSlug(value) {
40471
40889
  return String(value || "unknown").toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "unknown";
@@ -40479,20 +40897,6 @@ function scanArtifactsCommand(runDir, privateRepoRoot) {
40479
40897
  const privateRootArg = privateRepoRoot ? ` --private-repo-root ${quoteArg(privateRepoRoot)}` : "";
40480
40898
  return `foh eval external-agent scan-artifacts --run-dir ${quoteArg(runDir)}${privateRootArg} --write-redacted --json`;
40481
40899
  }
40482
- function externalAgentSummaryCommand2(root) {
40483
- const summaryPath = (0, import_path14.join)(root, "latest-summary.json");
40484
- const reportPath = (0, import_path14.join)(root, "summary.report.json");
40485
- return [
40486
- "node",
40487
- "scripts/summarize-external-agent-runs.mjs",
40488
- "--root",
40489
- quoteArg(root),
40490
- "--out",
40491
- quoteArg(summaryPath),
40492
- "--report",
40493
- quoteArg(reportPath)
40494
- ].join(" ");
40495
- }
40496
40900
  function executorRecoveryCommands(reasonCode, runner) {
40497
40901
  const normalizedRunner = String(runner || "codex").trim().toLowerCase();
40498
40902
  if (reasonCode === "external_agent_runner_binary_missing") {
@@ -40595,14 +40999,14 @@ function writePrompt(runDir, promptVersion, context = {}) {
40595
40999
  replayPromptContext(context.replayFile),
40596
41000
  knowledgeMissPromptContext(context.knowledgeQuestion, context.expectedAnswer)
40597
41001
  ].join("");
40598
- const path2 = (0, import_path14.join)(runDir, "prompt.txt");
40599
- (0, import_fs16.writeFileSync)(path2, `${prompt}
41002
+ const path2 = (0, import_path19.join)(runDir, "prompt.txt");
41003
+ (0, import_fs20.writeFileSync)(path2, `${prompt}
40600
41004
  `, "utf8");
40601
41005
  return path2;
40602
41006
  }
40603
41007
  function writeSession(runDir, session) {
40604
- const path2 = (0, import_path14.join)(runDir, "session.json");
40605
- (0, import_fs16.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
41008
+ const path2 = (0, import_path19.join)(runDir, "session.json");
41009
+ (0, import_fs20.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
40606
41010
  `, "utf8");
40607
41011
  return path2;
40608
41012
  }
@@ -40678,9 +41082,9 @@ function buildRunArtifact(input) {
40678
41082
  notes: "notes.md"
40679
41083
  },
40680
41084
  summary: status === "pass" ? "External-agent capture session completed and was marked pass." : `External-agent capture session completed with ${commands.length} captured FOH command(s); classify and improve reason ${reasonCode}.`,
40681
- next_commands: status === "pass" ? [externalAgentSummaryCommand2((0, import_path14.dirname)(input.runDir))] : [
40682
- `foh bug improve --from external-agent-run --file ${(0, import_path14.join)(input.runDir, "run.json")} --out ${(0, import_path14.join)(input.runDir, "improvement-packet.json")} --json`,
40683
- externalAgentSummaryCommand2((0, import_path14.dirname)(input.runDir))
41085
+ next_commands: status === "pass" ? [externalAgentSummaryCommand((0, import_path19.dirname)(input.runDir))] : [
41086
+ `foh bug improve --from external-agent-run --file ${(0, import_path19.join)(input.runDir, "run.json")} --out ${(0, import_path19.join)(input.runDir, "improvement-packet.json")} --json`,
41087
+ externalAgentSummaryCommand((0, import_path19.dirname)(input.runDir))
40684
41088
  ]
40685
41089
  };
40686
41090
  }
@@ -40689,16 +41093,16 @@ function registerEval(program3) {
40689
41093
  const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
40690
41094
  external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
40691
41095
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40692
- const batchDir = (0, import_path14.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
40693
- const replayFile = opts.replayFile ? (0, import_path14.resolve)(String(opts.replayFile)) : void 0;
41096
+ const batchDir = (0, import_path19.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
41097
+ const replayFile = opts.replayFile ? (0, import_path19.resolve)(String(opts.replayFile)) : void 0;
40694
41098
  const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40695
41099
  const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40696
41100
  const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
40697
- (0, import_fs16.mkdirSync)(batchDir, { recursive: true });
40698
- const runs = models.map((model, index) => {
41101
+ (0, import_fs20.mkdirSync)(batchDir, { recursive: true });
41102
+ const runs2 = models.map((model, index) => {
40699
41103
  const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
40700
- const runDir = (0, import_path14.join)(batchDir, runId);
40701
- (0, import_fs16.mkdirSync)(runDir, { recursive: true });
41104
+ const runDir = (0, import_path19.join)(batchDir, runId);
41105
+ (0, import_fs20.mkdirSync)(runDir, { recursive: true });
40702
41106
  const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40703
41107
  const commandArgs = [
40704
41108
  "eval",
@@ -40741,23 +41145,23 @@ function registerEval(program3) {
40741
41145
  expected_answer: expectedAnswer ?? null,
40742
41146
  workspace_type: String(opts.workspaceType || "clean-no-repo"),
40743
41147
  agent_shell: String(opts.agentShell || "vscode-terminal"),
40744
- run_count: runs.length,
40745
- runs,
40746
- summary_command: externalAgentSummaryCommand2(batchDir)
41148
+ run_count: runs2.length,
41149
+ runs: runs2,
41150
+ summary_command: externalAgentSummaryCommand(batchDir)
40747
41151
  };
40748
- const batchPath = (0, import_path14.join)(batchDir, "batch.json");
40749
- (0, import_fs16.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
41152
+ const batchPath = (0, import_path19.join)(batchDir, "batch.json");
41153
+ (0, import_fs20.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
40750
41154
  `, "utf8");
40751
41155
  format(cliEnvelope({
40752
41156
  schemaVersion: "external_agent_batch_plan_result.v1",
40753
41157
  status: "exported",
40754
41158
  reasonCode: "external_agent_batch_plan_created",
40755
- summary: `External-agent batch plan created for ${runs.length} model(s).`,
41159
+ summary: `External-agent batch plan created for ${runs2.length} model(s).`,
40756
41160
  artifacts: {
40757
41161
  batch: batchPath
40758
41162
  },
40759
41163
  nextCommands: [
40760
- ...runs.map((run) => run.launch_command),
41164
+ ...runs2.map((run) => run.launch_command),
40761
41165
  batch.summary_command
40762
41166
  ],
40763
41167
  extra: { batch }
@@ -40766,11 +41170,11 @@ function registerEval(program3) {
40766
41170
  external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
40767
41171
  const status = normalizeStatus(opts.status);
40768
41172
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40769
- const runDir = (0, import_path14.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
40770
- const replayFile = opts.replayFile ? (0, import_path14.resolve)(String(opts.replayFile)) : void 0;
41173
+ const runDir = (0, import_path19.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
41174
+ const replayFile = opts.replayFile ? (0, import_path19.resolve)(String(opts.replayFile)) : void 0;
40771
41175
  const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40772
41176
  const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40773
- (0, import_fs16.mkdirSync)(runDir, { recursive: true });
41177
+ (0, import_fs20.mkdirSync)(runDir, { recursive: true });
40774
41178
  const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
40775
41179
  const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40776
41180
  const shell = inferShell(opts.shell);
@@ -40796,7 +41200,7 @@ function registerEval(program3) {
40796
41200
  }
40797
41201
  };
40798
41202
  writeSession(runDir, session);
40799
- (0, import_fs16.writeFileSync)((0, import_path14.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
41203
+ (0, import_fs20.writeFileSync)((0, import_path19.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
40800
41204
  let shellExitCode = null;
40801
41205
  if (opts.shell !== false) {
40802
41206
  process.stdout.write(`
@@ -40806,7 +41210,7 @@ Prompt: ${promptPath}
40806
41210
  Exit the shell to finalize run.json.
40807
41211
 
40808
41212
  `);
40809
- const result = (0, import_child_process5.spawnSync)(shell.command, shell.args, {
41213
+ const result = (0, import_child_process6.spawnSync)(shell.command, shell.args, {
40810
41214
  stdio: "inherit",
40811
41215
  env: {
40812
41216
  ...process.env,
@@ -40818,8 +41222,8 @@ Exit the shell to finalize run.json.
40818
41222
  shellExitCode = typeof result.status === "number" ? result.status : null;
40819
41223
  }
40820
41224
  const artifact = buildRunArtifact({ runDir, session, status, reasonCode: opts.reasonCode, shellExitCode });
40821
- const runPath = (0, import_path14.join)(runDir, "run.json");
40822
- (0, import_fs16.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
41225
+ const runPath = (0, import_path19.join)(runDir, "run.json");
41226
+ (0, import_fs20.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
40823
41227
  `, "utf8");
40824
41228
  format(cliEnvelope({
40825
41229
  schemaVersion: "external_agent_capture_result.v1",
@@ -40829,12 +41233,57 @@ Exit the shell to finalize run.json.
40829
41233
  artifacts: {
40830
41234
  run: runPath,
40831
41235
  prompt: promptPath,
40832
- commands: (0, import_path14.join)(runDir, "commands.ndjson")
41236
+ commands: (0, import_path19.join)(runDir, "commands.ndjson")
40833
41237
  },
40834
41238
  nextCommands: artifact.next_commands,
40835
41239
  extra: { run: artifact }
40836
41240
  }), { json: Boolean(opts.json) });
40837
41241
  });
41242
+ external.command("summary").description("Summarize external_agent_run.v1 artifacts from a clean external-agent run root").requiredOption("--root <dir>", "Root containing external-agent run directories with run.json files").option("--out <path>", "Write summary JSON to this path").option("--report <path>", "Write script-style report JSON to this path").option("--current-baseline-only", "Summarize only the latest detected run cohort").option("--cohort <id>", "Summarize one explicit cohort id").option("--json", "Output as JSON").action(async (opts) => {
41243
+ const { summary, report } = runExternalAgentRunSummary({
41244
+ root: String(opts.root),
41245
+ out: opts.out ? String(opts.out) : void 0,
41246
+ report: opts.report ? String(opts.report) : void 0,
41247
+ currentBaselineOnly: Boolean(opts.currentBaselineOnly),
41248
+ cohortId: opts.cohort ? String(opts.cohort) : null
41249
+ });
41250
+ format(cliEnvelope({
41251
+ schemaVersion: "external_agent_run_summary_result.v1",
41252
+ status: report.status === "passed" ? "pass" : "fail",
41253
+ reasonCode: report.status === "passed" ? "external_agent_run_summary_created" : "external_agent_run_summary_invalid_runs",
41254
+ summary: `External-agent summary covers ${summary.run_count} run(s).`,
41255
+ artifacts: {
41256
+ summary: opts.out ? String(opts.out) : null,
41257
+ report: opts.report ? String(opts.report) : null
41258
+ },
41259
+ nextCommands: summary.next_commands,
41260
+ extra: { external_agent_summary: summary, report }
41261
+ }), { json: Boolean(opts.json) });
41262
+ if (report.status !== "passed") process.exitCode = 1;
41263
+ });
41264
+ const runs = external.command("runs").description("Compatibility namespace for external-agent run artifact utilities");
41265
+ runs.command("summary").description("Compatibility alias for `foh eval external-agent summary`").requiredOption("--root <dir>", "Root containing external-agent run directories with run.json files").option("--out <path>", "Write summary JSON to this path").option("--report <path>", "Write script-style report JSON to this path").option("--current-baseline-only", "Summarize only the latest detected run cohort").option("--cohort <id>", "Summarize one explicit cohort id").option("--json", "Output as JSON").action(async (opts) => {
41266
+ const { summary, report } = runExternalAgentRunSummary({
41267
+ root: String(opts.root),
41268
+ out: opts.out ? String(opts.out) : void 0,
41269
+ report: opts.report ? String(opts.report) : void 0,
41270
+ currentBaselineOnly: Boolean(opts.currentBaselineOnly),
41271
+ cohortId: opts.cohort ? String(opts.cohort) : null
41272
+ });
41273
+ format(cliEnvelope({
41274
+ schemaVersion: "external_agent_run_summary_result.v1",
41275
+ status: report.status === "passed" ? "pass" : "fail",
41276
+ reasonCode: report.status === "passed" ? "external_agent_run_summary_created" : "external_agent_run_summary_invalid_runs",
41277
+ summary: `External-agent summary covers ${summary.run_count} run(s).`,
41278
+ artifacts: {
41279
+ summary: opts.out ? String(opts.out) : null,
41280
+ report: opts.report ? String(opts.report) : null
41281
+ },
41282
+ nextCommands: summary.next_commands,
41283
+ extra: { external_agent_summary: summary, report }
41284
+ }), { json: Boolean(opts.json) });
41285
+ if (report.status !== "passed") process.exitCode = 1;
41286
+ });
40838
41287
  external.command("scan-artifacts").description("Scan and redact external-agent run artifacts before they are promoted into improvement loops").requiredOption("--run-dir <path>", "External-agent run artifact directory").option("--private-repo-root <path>", "Private repository root that must not appear in artifacts").option("--write-redacted", "Write .redacted copies next to scanned artifacts").option("--json", "Output as JSON").action(async (opts) => {
40839
41288
  const report = scanExternalAgentArtifacts({
40840
41289
  runDir: String(opts.runDir),
@@ -40892,8 +41341,8 @@ Exit the shell to finalize run.json.
40892
41341
  requireExplicitEvalAuth: true,
40893
41342
  minimumEvalAuthTtlMs: (plan.timeout_minutes + 5) * 60 * 1e3
40894
41343
  });
40895
- const resultPath = (0, import_path14.join)(plan.batch_dir, "execution-result.json");
40896
- (0, import_fs16.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
41344
+ const resultPath = (0, import_path19.join)(plan.batch_dir, "execution-result.json");
41345
+ (0, import_fs20.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
40897
41346
  `, "utf8");
40898
41347
  format(cliEnvelope({
40899
41348
  schemaVersion: "external_agent_execution_result.v1",
@@ -40910,7 +41359,7 @@ Exit the shell to finalize run.json.
40910
41359
  plan.runs.find((item) => item.run_id === run.run_id)?.run_dir || ".",
40911
41360
  plan.private_repo_root_explicit ? plan.private_repo_root : void 0
40912
41361
  )),
40913
- externalAgentSummaryCommand2(plan.batch_dir)
41362
+ externalAgentSummaryCommand(plan.batch_dir)
40914
41363
  ],
40915
41364
  extra: { result }
40916
41365
  }), { json: Boolean(opts.json) });