@f-o-h/cli 0.1.69 → 0.1.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/foh.js +725 -661
  2. package/package.json +1 -1
package/dist/foh.js CHANGED
@@ -14172,8 +14172,8 @@ function registerAgentGuardrailCommands(agent) {
14172
14172
  try {
14173
14173
  rule = JSON.parse(opts.rule);
14174
14174
  } catch {
14175
- const { readFileSync: readFileSync14 } = await import("fs");
14176
- rule = JSON.parse(readFileSync14(opts.rule, "utf-8"));
14175
+ const { readFileSync: readFileSync16 } = await import("fs");
14176
+ rule = JSON.parse(readFileSync16(opts.rule, "utf-8"));
14177
14177
  }
14178
14178
  const data = await apiFetch(`/v1/console/agents/${opts.agent}/guardrails`, {
14179
14179
  method: "POST",
@@ -14323,7 +14323,7 @@ async function publishAgentFromCurrentDraft(agentId, options) {
14323
14323
  orgId
14324
14324
  });
14325
14325
  try {
14326
- await apiFetch(`/v1/console/agents/${agentId}/publish`, {
14326
+ return await apiFetch(`/v1/console/agents/${agentId}/publish`, {
14327
14327
  method: "POST",
14328
14328
  body: JSON.stringify({ flowDraft }),
14329
14329
  apiUrlOverride,
@@ -14399,7 +14399,7 @@ async function validateCertifyAndPublishAgent(opts) {
14399
14399
  remediation: `Run: foh agent validate --agent ${opts.agentId} to see details.`
14400
14400
  });
14401
14401
  }
14402
- await publishAgentFromCurrentDraft(opts.agentId, {
14402
+ const publish = await publishAgentFromCurrentDraft(opts.agentId, {
14403
14403
  apiUrlOverride: opts.apiUrlOverride,
14404
14404
  orgId: opts.orgId
14405
14405
  });
@@ -14409,7 +14409,7 @@ async function validateCertifyAndPublishAgent(opts) {
14409
14409
  status: "not_run",
14410
14410
  reason_code: "publish_consumes_existing_certification_evidence"
14411
14411
  },
14412
- publish: { ok: true }
14412
+ publish
14413
14413
  };
14414
14414
  }
14415
14415
 
@@ -14719,9 +14719,9 @@ function registerAgent(program3) {
14719
14719
  process.stdout.write(yaml);
14720
14720
  return;
14721
14721
  }
14722
- const { writeFileSync: writeFileSync12 } = await import("fs");
14722
+ const { writeFileSync: writeFileSync13 } = await import("fs");
14723
14723
  const outputPath = opts.output ?? "tenant.yaml";
14724
- writeFileSync12(
14724
+ writeFileSync13(
14725
14725
  outputPath,
14726
14726
  `# tenant.yaml - Front Of House agent manifest
14727
14727
  # Edit this file and run: foh plan tenant.yaml
@@ -15084,11 +15084,6 @@ function registerInstagramChannelCommands(instagram, addCommonOptions) {
15084
15084
  }));
15085
15085
  }
15086
15086
 
15087
- // src/commands/channel-whatsapp.ts
15088
- var import_node_crypto = require("node:crypto");
15089
- var import_node_fs = require("node:fs");
15090
- var path = __toESM(require("node:path"));
15091
-
15092
15087
  // src/commands/channel-whatsapp-helpers.ts
15093
15088
  function parsePositiveNumber(value, fallback) {
15094
15089
  if (value === void 0 || value === null || String(value).trim() === "") return fallback;
@@ -15215,7 +15210,11 @@ function buildReasonedNextSteps({
15215
15210
  return dedupeSteps(steps);
15216
15211
  }
15217
15212
 
15218
- // ../../scripts/lib/channel-live-proof-evaluator.mjs
15213
+ // src/commands/channel-whatsapp-live-proof.ts
15214
+ var import_node_fs = require("node:fs");
15215
+ var path = __toESM(require("node:path"));
15216
+
15217
+ // src/lib/channel-live-proof-evaluator.mjs
15219
15218
  function normalizeStatusValue(value) {
15220
15219
  return String(value || "").trim().toLowerCase();
15221
15220
  }
@@ -15282,55 +15281,7 @@ function evaluateChannelLiveProofArtifact({
15282
15281
  };
15283
15282
  }
15284
15283
 
15285
- // src/commands/channel-whatsapp.ts
15286
- var WHATSAPP_WEBHOOK_CHALLENGE_TIMEOUT_MS = 1e4;
15287
- var WHATSAPP_VERIFY_TOKEN_PREFIX = "foh-wa";
15288
- var WHATSAPP_SENDER_MODEL = {
15289
- test_number: "Meta test numbers are sandbox senders for free temporary API tests only; do not store them as production sender config.",
15290
- production_sender: "Production traffic requires a verified WhatsApp Business phone number; store that sender phone_number_id and WABA-owned credentials in FOH channel config.",
15291
- runtime_source_of_truth: "FOH runtime resolves WhatsApp credentials from org/channel config, not from personal mobile numbers or hidden environment fallbacks."
15292
- };
15293
- function parseBooleanOption({
15294
- value,
15295
- fallback,
15296
- optionName,
15297
- step
15298
- }) {
15299
- if (typeof value === "boolean") return value;
15300
- const normalized = String(value ?? "").trim().toLowerCase();
15301
- if (!normalized) return fallback;
15302
- if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "y") return true;
15303
- if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "n") return false;
15304
- throw new FohError({
15305
- step,
15306
- error: `Invalid boolean value for ${optionName}: ${String(value)}`,
15307
- remediation: `Use ${optionName} true|false (also accepts 1|0, yes|no).`
15308
- });
15309
- }
15310
- async function runWhatsAppReadinessChecks({
15311
- orgId,
15312
- apiUrlOverride,
15313
- verifyToken
15314
- }) {
15315
- const status = await apiFetch("/v1/console/channels/whatsapp/status", {
15316
- orgId,
15317
- apiUrlOverride
15318
- });
15319
- const verifyPayload = verifyToken ? { verifyToken: String(verifyToken) } : {};
15320
- const verify = await apiFetch("/v1/console/channels/whatsapp/verify", {
15321
- method: "POST",
15322
- body: JSON.stringify(verifyPayload),
15323
- orgId,
15324
- apiUrlOverride
15325
- });
15326
- const test = await apiFetch("/v1/console/channels/whatsapp/test", {
15327
- method: "POST",
15328
- body: JSON.stringify({ dryRun: true }),
15329
- orgId,
15330
- apiUrlOverride
15331
- });
15332
- return { status, verify, test };
15333
- }
15284
+ // src/commands/channel-whatsapp-live-proof.ts
15334
15285
  function resolveLiveProof({
15335
15286
  enabled,
15336
15287
  artifactPathRaw,
@@ -15386,6 +15337,57 @@ function resolveLiveProof({
15386
15337
  freshness: evaluated.freshness
15387
15338
  };
15388
15339
  }
15340
+
15341
+ // src/commands/channel-whatsapp-setup.ts
15342
+ var import_node_crypto = require("node:crypto");
15343
+ var WHATSAPP_WEBHOOK_CHALLENGE_TIMEOUT_MS = 1e4;
15344
+ var WHATSAPP_VERIFY_TOKEN_PREFIX = "foh-wa";
15345
+ var WHATSAPP_SENDER_MODEL = {
15346
+ test_number: "Meta test numbers are sandbox senders for free temporary API tests only; do not store them as production sender config.",
15347
+ production_sender: "Production traffic requires a verified WhatsApp Business phone number; store that sender phone_number_id and WABA-owned credentials in FOH channel config.",
15348
+ runtime_source_of_truth: "FOH runtime resolves WhatsApp credentials from org/channel config, not from personal mobile numbers or hidden environment fallbacks."
15349
+ };
15350
+ function parseBooleanOption({
15351
+ value,
15352
+ fallback,
15353
+ optionName,
15354
+ step
15355
+ }) {
15356
+ if (typeof value === "boolean") return value;
15357
+ const normalized = String(value ?? "").trim().toLowerCase();
15358
+ if (!normalized) return fallback;
15359
+ if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "y") return true;
15360
+ if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "n") return false;
15361
+ throw new FohError({
15362
+ step,
15363
+ error: `Invalid boolean value for ${optionName}: ${String(value)}`,
15364
+ remediation: `Use ${optionName} true|false (also accepts 1|0, yes|no).`
15365
+ });
15366
+ }
15367
+ async function runWhatsAppReadinessChecks({
15368
+ orgId,
15369
+ apiUrlOverride,
15370
+ verifyToken
15371
+ }) {
15372
+ const status = await apiFetch("/v1/console/channels/whatsapp/status", {
15373
+ orgId,
15374
+ apiUrlOverride
15375
+ });
15376
+ const verifyPayload = verifyToken ? { verifyToken: String(verifyToken) } : {};
15377
+ const verify = await apiFetch("/v1/console/channels/whatsapp/verify", {
15378
+ method: "POST",
15379
+ body: JSON.stringify(verifyPayload),
15380
+ orgId,
15381
+ apiUrlOverride
15382
+ });
15383
+ const test = await apiFetch("/v1/console/channels/whatsapp/test", {
15384
+ method: "POST",
15385
+ body: JSON.stringify({ dryRun: true }),
15386
+ orgId,
15387
+ apiUrlOverride
15388
+ });
15389
+ return { status, verify, test };
15390
+ }
15389
15391
  function buildWebhookUrl(apiBaseUrl) {
15390
15392
  return `${apiBaseUrl.replace(/\/$/, "")}/v1/whatsapp/webhook`;
15391
15393
  }
@@ -15522,6 +15524,8 @@ function assertProofPass(strict, reasons) {
15522
15524
  markCommandFailed(1);
15523
15525
  }
15524
15526
  }
15527
+
15528
+ // src/commands/channel-whatsapp.ts
15525
15529
  function registerWhatsAppChannelCommands(whatsapp, addCommonOptions) {
15526
15530
  addCommonOptions(
15527
15531
  whatsapp.command("start").description("Assess WhatsApp onboarding readiness and print fastest setup path")
@@ -16169,11 +16173,11 @@ function registerVoice(program3) {
16169
16173
  }
16170
16174
  const outputPath = String(opts.out || `foh-voice-preview-${provider}-${voiceId}.mp3`).trim();
16171
16175
  const audio = Buffer.from(await res.arrayBuffer());
16172
- const { mkdirSync: mkdirSync8, writeFileSync: writeFileSync12 } = await import("fs");
16173
- const { dirname: dirname8, resolve: resolve13 } = await import("path");
16176
+ const { mkdirSync: mkdirSync8, writeFileSync: writeFileSync13 } = await import("fs");
16177
+ const { dirname: dirname11, resolve: resolve13 } = await import("path");
16174
16178
  const absolutePath = resolve13(outputPath);
16175
- mkdirSync8(dirname8(absolutePath), { recursive: true });
16176
- writeFileSync12(absolutePath, audio);
16179
+ mkdirSync8(dirname11(absolutePath), { recursive: true });
16180
+ writeFileSync13(absolutePath, audio);
16177
16181
  format({
16178
16182
  status: "ok",
16179
16183
  provider,
@@ -32786,7 +32790,7 @@ var StdioServerTransport = class {
32786
32790
  };
32787
32791
 
32788
32792
  // src/lib/cli-version.ts
32789
- var CLI_VERSION = "0.1.69";
32793
+ var CLI_VERSION = "0.1.70";
32790
32794
 
32791
32795
  // src/commands/mcp-serve.ts
32792
32796
  var DEFAULT_TIMEOUT_MS = 12e4;
@@ -33806,6 +33810,35 @@ function readDraftKnowledgeText(draft) {
33806
33810
  const fromLegacy = typeof draft.knowledge_base === "string" ? draft.knowledge_base : "";
33807
33811
  return fromLegacy;
33808
33812
  }
33813
+ function normalizeKnowledgeText(value) {
33814
+ return value.replace(/^\uFEFF/, "").replace(/\r\n?/g, "\n").trim();
33815
+ }
33816
+ function splitDraftKnowledgeSegments(value) {
33817
+ return value.replace(/\r\n?/g, "\n").split(/\n\s*---+\s*\n/g).map((segment) => normalizeKnowledgeText(segment)).filter(Boolean);
33818
+ }
33819
+ function buildDraftKnowledgeUpdate(existing, fileContent) {
33820
+ const normalizedContent = normalizeKnowledgeText(fileContent);
33821
+ if (normalizedContent.length === 0) {
33822
+ throw new FohError({
33823
+ step: "knowledge.ingest-file",
33824
+ error: "Knowledge file is empty after normalization",
33825
+ remediation: "Pass a file with non-empty text content.",
33826
+ statusCode: 400
33827
+ });
33828
+ }
33829
+ const existingSegments = splitDraftKnowledgeSegments(existing);
33830
+ const duplicate = existingSegments.includes(normalizedContent);
33831
+ const nextSegments = duplicate ? existingSegments : [...existingSegments, normalizedContent];
33832
+ const nextKnowledge = nextSegments.join("\n\n---\n\n");
33833
+ const normalizedExisting = existingSegments.join("\n\n---\n\n");
33834
+ return {
33835
+ nextKnowledge,
33836
+ normalizedContent,
33837
+ duplicate,
33838
+ shouldPatch: nextKnowledge !== normalizedExisting || normalizeKnowledgeText(existing) !== normalizedExisting,
33839
+ segmentCount: nextSegments.length
33840
+ };
33841
+ }
33809
33842
  function tokenize(value) {
33810
33843
  return value.toLowerCase().split(/[^a-z0-9]+/g).map((token) => token.trim()).filter((token) => token.length >= 3);
33811
33844
  }
@@ -33929,23 +33962,25 @@ function registerKnowledge(program3) {
33929
33962
  apiUrlOverride: opts.apiUrl
33930
33963
  });
33931
33964
  const existing = readDraftKnowledgeText(draft);
33932
- const nextKnowledge = existing.trim().length > 0 ? `${existing}
33933
-
33934
- ---
33935
- ${content}` : content;
33936
- await apiFetch(`/v1/console/agents/${opts.agent}/draft`, {
33937
- method: "PATCH",
33938
- body: JSON.stringify({
33939
- knowledge_base_raw: nextKnowledge,
33940
- knowledge_base: nextKnowledge
33941
- }),
33942
- orgId: opts.org,
33943
- apiUrlOverride: opts.apiUrl
33944
- });
33965
+ const update = buildDraftKnowledgeUpdate(existing, content);
33966
+ if (update.shouldPatch) {
33967
+ await apiFetch(`/v1/console/agents/${opts.agent}/draft`, {
33968
+ method: "PATCH",
33969
+ body: JSON.stringify({
33970
+ knowledge_base_raw: update.nextKnowledge,
33971
+ knowledge_base: update.nextKnowledge
33972
+ }),
33973
+ orgId: opts.org,
33974
+ apiUrlOverride: opts.apiUrl
33975
+ });
33976
+ }
33945
33977
  data = {
33946
33978
  ok: true,
33947
33979
  source: "agent_draft_direct",
33948
- length: nextKnowledge.length
33980
+ length: update.nextKnowledge.length,
33981
+ draft_knowledge_updated: update.shouldPatch,
33982
+ draft_knowledge_deduped: update.duplicate,
33983
+ segment_count: update.segmentCount
33949
33984
  };
33950
33985
  } else {
33951
33986
  data = await apiFetch("/v1/knowledge/documents", {
@@ -33953,7 +33988,7 @@ ${content}` : content;
33953
33988
  body: JSON.stringify({
33954
33989
  name: (0, import_path2.basename)(opts.file),
33955
33990
  source_type: "text",
33956
- source_value: content,
33991
+ source_value: normalizeKnowledgeText(content),
33957
33992
  agent_id: opts.agent
33958
33993
  }),
33959
33994
  orgId: opts.org,
@@ -34078,47 +34113,8 @@ function registerLeads(program3) {
34078
34113
  // src/commands/setup.ts
34079
34114
  var import_crypto3 = require("crypto");
34080
34115
 
34081
- // src/lib/signed-report.ts
34082
- var import_crypto2 = require("crypto");
34083
- var import_fs4 = require("fs");
34084
- var import_path3 = require("path");
34085
- function canonicalize(value) {
34086
- if (value === null || value === void 0) return null;
34087
- if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
34088
- if (Array.isArray(value)) return value.map((entry) => canonicalize(entry));
34089
- if (typeof value === "object") {
34090
- const sortedEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, entryValue]) => [key, canonicalize(entryValue)]);
34091
- return Object.fromEntries(sortedEntries);
34092
- }
34093
- return String(value);
34094
- }
34095
- function stableStringify(value) {
34096
- return JSON.stringify(canonicalize(value), null, 2) + "\n";
34097
- }
34098
- function sha256Hex(input) {
34099
- return (0, import_crypto2.createHash)("sha256").update(input).digest("hex");
34100
- }
34101
- function signReport(reportPayload) {
34102
- const canonical = stableStringify(reportPayload);
34103
- return {
34104
- ...reportPayload,
34105
- report_hash: {
34106
- algorithm: "sha256",
34107
- digest_hex: sha256Hex(canonical),
34108
- canonicalization: "sorted-json-v1",
34109
- verification_note: "Recompute SHA-256 over canonical payload JSON before report_hash and compare digest_hex."
34110
- }
34111
- };
34112
- }
34113
- function writeSignedJsonArtifact(path2, value) {
34114
- const absolutePath = (0, import_path3.resolve)(path2);
34115
- (0, import_fs4.mkdirSync)((0, import_path3.dirname)(absolutePath), { recursive: true });
34116
- (0, import_fs4.writeFileSync)(absolutePath, stableStringify(value), "utf-8");
34117
- return absolutePath;
34118
- }
34119
-
34120
34116
  // src/commands/manifest.ts
34121
- var import_fs5 = require("fs");
34117
+ var import_fs4 = require("fs");
34122
34118
  var import_picocolors3 = __toESM(require_picocolors());
34123
34119
  function formatDiff(diffs) {
34124
34120
  if (diffs.length === 0) return "No changes";
@@ -34154,7 +34150,7 @@ function formatDiff(diffs) {
34154
34150
  function loadManifestFile(filePath) {
34155
34151
  let raw;
34156
34152
  try {
34157
- raw = (0, import_fs5.readFileSync)(filePath, "utf-8");
34153
+ raw = (0, import_fs4.readFileSync)(filePath, "utf-8");
34158
34154
  } catch {
34159
34155
  throw new FohError({
34160
34156
  step: "manifest.load",
@@ -34379,80 +34375,13 @@ function normalizeAgentCertMode(value) {
34379
34375
  return agentCertModeValues.includes(value) ? value : "quick";
34380
34376
  }
34381
34377
 
34382
- // src/commands/setup.ts
34383
- var SETUP_STEP_ORDER = [
34384
- "check_credentials",
34385
- "check_org_access",
34386
- "submit_compliance",
34387
- "wait_compliance",
34388
- "provision_phone",
34389
- "create_agent",
34390
- "validate_agent",
34391
- "seed_guardrails",
34392
- "ensure_widget",
34393
- "set_widget_domains",
34394
- "configure_voice",
34395
- "run_smoke_test",
34396
- "sim_certify_loop",
34397
- "widget_smoke",
34398
- "publish_agent",
34399
- "emit_summary"
34400
- ];
34378
+ // src/commands/setup-apply.ts
34401
34379
  function extractGuardrailsList(response) {
34402
34380
  if (Array.isArray(response)) return response;
34403
34381
  if (Array.isArray(response.guardrails)) return response.guardrails;
34404
34382
  if (Array.isArray(response.rules)) return response.rules;
34405
34383
  return [];
34406
34384
  }
34407
- function resolveResumeIndex(resumeFromRaw) {
34408
- if (!resumeFromRaw) {
34409
- return { resumeFrom: null, resumeIndex: 0 };
34410
- }
34411
- const resumeFrom = String(resumeFromRaw).trim();
34412
- const resumeIndex = SETUP_STEP_ORDER.indexOf(resumeFrom);
34413
- if (resumeIndex < 0) {
34414
- throw new FohError({
34415
- step: "setup.resume",
34416
- error: `Invalid --resume-from step: ${String(resumeFromRaw)}`,
34417
- remediation: `Use one of: ${SETUP_STEP_ORDER.join(", ")}`
34418
- });
34419
- }
34420
- return { resumeFrom, resumeIndex };
34421
- }
34422
- function nowIso() {
34423
- return (/* @__PURE__ */ new Date()).toISOString();
34424
- }
34425
- function timedStepResult(result, startedAtIso, startedAtMs) {
34426
- return {
34427
- ...result,
34428
- started_at: startedAtIso,
34429
- completed_at: nowIso(),
34430
- duration_ms: Math.max(0, Date.now() - startedAtMs)
34431
- };
34432
- }
34433
- function optionNameToFlag(key) {
34434
- return "--" + key.replace(/([A-Z])/g, "-$1").toLowerCase();
34435
- }
34436
- function normalizeSetupPhoneMode(raw) {
34437
- const value = String(raw || "purchase").trim().toLowerCase();
34438
- if (value === "observe" || value === "skip" || value === "purchase") return value;
34439
- throw new FohError({
34440
- step: "setup.phone_mode",
34441
- error: `Invalid --phone-mode "${String(raw)}"`,
34442
- remediation: "Use one of: observe, skip, purchase.",
34443
- reasonCode: "setup_invalid_phone_mode"
34444
- });
34445
- }
34446
- function complianceSkipDetail(phoneMode) {
34447
- return {
34448
- reason_code: `compliance_skipped_phone_mode_${phoneMode}`,
34449
- phone_mode: phoneMode,
34450
- spend_policy: resolveCliSpendPolicy(),
34451
- spend_class: "free",
34452
- safe_to_retry: true,
34453
- operator_note: "Compliance is only required before paid FOH-owned phone purchase."
34454
- };
34455
- }
34456
34385
  function isMissingAgentTestsError(error2) {
34457
34386
  if (!(error2 instanceof FohError)) return false;
34458
34387
  if (error2.statusCode !== 404) return false;
@@ -34510,6 +34439,11 @@ async function rebaseEvalAgentDraftFromTemplate(params) {
34510
34439
  draft_keys: Object.keys(draft).sort()
34511
34440
  };
34512
34441
  }
34442
+
34443
+ // src/commands/setup-missing-options.ts
34444
+ function optionNameToFlag(key) {
34445
+ return "--" + key.replace(/([A-Z])/g, "-$1").toLowerCase();
34446
+ }
34513
34447
  function buildMissingOptionsPlan(missing, opts) {
34514
34448
  const missingFlags = missing.map(optionNameToFlag);
34515
34449
  const signInUrl = buildConsoleSignInUrl(resolveConsoleBaseUrl(opts.consoleUrl));
@@ -34559,20 +34493,137 @@ function emitMissingOptionsPlan(missing, opts) {
34559
34493
  `);
34560
34494
  }
34561
34495
  }
34562
- function registerSetup(program3) {
34563
- program3.command("setup").description("Fully provision a new agency customer in one command").option("--org <id>", "Org ID (default: stored org from foh org use)").option("--agent-template <id>", "Agent template ID (e.g. viewing-request)").option("--agent-name <name>", "Name for the new agent").option("--phone-country <cc>", "Phone number country code", "GB").option("--phone-area-code <code>", "Phone area code preference").option("--phone-mode <mode>", "Phone path: observe, skip, or purchase", "purchase").option("--widget-domains <domains>", "Comma-separated widget domain allowlist").option("--voice-provider <p>", "TTS provider: openai, azure, twilio").option("--voice-id <id>", "Voice ID").option("--skip-compliance", "Skip compliance submission and wait").option("--skip-voice", "Skip voice configuration").option("--skip-tests", "Skip smoke tests").option("--cert-mode <m>", "Simulation cert mode: quick, full, stress", "quick").option("--cert-adaptive-runs <n>", "Adaptive run count for certification loop", "30").option("--cert-max-improvement-rounds <n>", "Max instruction improvement rounds in cert loop (0-5)", "1").option("--resume-from <step>", `Resume from a setup step (${SETUP_STEP_ORDER.join(", ")})`).option("--report-out <path>", "Optional path to write signed setup run report JSON").option("--dry-run", "Print all steps that would run without making any API calls").option("--api-url <url>", "API base URL override").option("--console-url <url>", "Console sign-in URL override").option("--json", "Output as JSON").action(async (opts) => {
34564
- if (!opts.org) {
34565
- try {
34566
- opts.org = loadCredentials(opts.apiUrl).orgId;
34567
- } catch {
34568
- }
34569
- }
34570
- const missing = ["org", "agentTemplate", "agentName"].filter((key) => !opts[key]);
34571
- if (missing.length) {
34572
- emitMissingOptionsPlan(missing, { json: opts.json, consoleUrl: opts.consoleUrl });
34573
- markCommandFailed(1);
34574
- return;
34575
- }
34496
+
34497
+ // src/commands/setup-plan.ts
34498
+ var SETUP_STEP_ORDER = [
34499
+ "check_credentials",
34500
+ "check_org_access",
34501
+ "submit_compliance",
34502
+ "wait_compliance",
34503
+ "provision_phone",
34504
+ "create_agent",
34505
+ "validate_agent",
34506
+ "seed_guardrails",
34507
+ "ensure_widget",
34508
+ "set_widget_domains",
34509
+ "configure_voice",
34510
+ "run_smoke_test",
34511
+ "sim_certify_loop",
34512
+ "widget_smoke",
34513
+ "publish_agent",
34514
+ "emit_summary"
34515
+ ];
34516
+ function resolveResumeIndex(resumeFromRaw) {
34517
+ if (!resumeFromRaw) {
34518
+ return { resumeFrom: null, resumeIndex: 0 };
34519
+ }
34520
+ const resumeFrom = String(resumeFromRaw).trim();
34521
+ const resumeIndex = SETUP_STEP_ORDER.indexOf(resumeFrom);
34522
+ if (resumeIndex < 0) {
34523
+ throw new FohError({
34524
+ step: "setup.resume",
34525
+ error: `Invalid --resume-from step: ${String(resumeFromRaw)}`,
34526
+ remediation: `Use one of: ${SETUP_STEP_ORDER.join(", ")}`
34527
+ });
34528
+ }
34529
+ return { resumeFrom, resumeIndex };
34530
+ }
34531
+ function nowIso() {
34532
+ return (/* @__PURE__ */ new Date()).toISOString();
34533
+ }
34534
+ function timedStepResult(result, startedAtIso, startedAtMs) {
34535
+ return {
34536
+ ...result,
34537
+ started_at: startedAtIso,
34538
+ completed_at: nowIso(),
34539
+ duration_ms: Math.max(0, Date.now() - startedAtMs)
34540
+ };
34541
+ }
34542
+ function normalizeSetupPhoneMode(raw) {
34543
+ const value = String(raw || "purchase").trim().toLowerCase();
34544
+ if (value === "observe" || value === "skip" || value === "purchase") return value;
34545
+ throw new FohError({
34546
+ step: "setup.phone_mode",
34547
+ error: `Invalid --phone-mode "${String(raw)}"`,
34548
+ remediation: "Use one of: observe, skip, purchase.",
34549
+ reasonCode: "setup_invalid_phone_mode"
34550
+ });
34551
+ }
34552
+ function complianceSkipDetail(phoneMode) {
34553
+ return {
34554
+ reason_code: `compliance_skipped_phone_mode_${phoneMode}`,
34555
+ phone_mode: phoneMode,
34556
+ spend_policy: resolveCliSpendPolicy(),
34557
+ spend_class: "free",
34558
+ safe_to_retry: true,
34559
+ operator_note: "Compliance is only required before paid FOH-owned phone purchase."
34560
+ };
34561
+ }
34562
+
34563
+ // src/lib/signed-report.ts
34564
+ var import_crypto2 = require("crypto");
34565
+ var import_fs5 = require("fs");
34566
+ var import_path3 = require("path");
34567
+ function canonicalize(value) {
34568
+ if (value === null || value === void 0) return null;
34569
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
34570
+ if (Array.isArray(value)) return value.map((entry) => canonicalize(entry));
34571
+ if (typeof value === "object") {
34572
+ const sortedEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, entryValue]) => [key, canonicalize(entryValue)]);
34573
+ return Object.fromEntries(sortedEntries);
34574
+ }
34575
+ return String(value);
34576
+ }
34577
+ function stableStringify(value) {
34578
+ return JSON.stringify(canonicalize(value), null, 2) + "\n";
34579
+ }
34580
+ function sha256Hex(input) {
34581
+ return (0, import_crypto2.createHash)("sha256").update(input).digest("hex");
34582
+ }
34583
+ function signReport(reportPayload) {
34584
+ const canonical = stableStringify(reportPayload);
34585
+ return {
34586
+ ...reportPayload,
34587
+ report_hash: {
34588
+ algorithm: "sha256",
34589
+ digest_hex: sha256Hex(canonical),
34590
+ canonicalization: "sorted-json-v1",
34591
+ verification_note: "Recompute SHA-256 over canonical payload JSON before report_hash and compare digest_hex."
34592
+ }
34593
+ };
34594
+ }
34595
+ function writeSignedJsonArtifact(path2, value) {
34596
+ const absolutePath = (0, import_path3.resolve)(path2);
34597
+ (0, import_fs5.mkdirSync)((0, import_path3.dirname)(absolutePath), { recursive: true });
34598
+ (0, import_fs5.writeFileSync)(absolutePath, stableStringify(value), "utf-8");
34599
+ return absolutePath;
34600
+ }
34601
+
34602
+ // src/commands/setup-report.ts
34603
+ function writeSetupRunReport(reportPayload, reportOut) {
34604
+ const signed = signReport(reportPayload);
34605
+ const reportPath = reportOut ? writeSignedJsonArtifact(String(reportOut), signed) : null;
34606
+ return {
34607
+ reportHash: signed.report_hash.digest_hex,
34608
+ reportPath
34609
+ };
34610
+ }
34611
+
34612
+ // src/commands/setup.ts
34613
+ function registerSetup(program3) {
34614
+ program3.command("setup").description("Fully provision a new agency customer in one command").option("--org <id>", "Org ID (default: stored org from foh org use)").option("--agent-template <id>", "Agent template ID (e.g. viewing-request)").option("--agent-name <name>", "Name for the new agent").option("--phone-country <cc>", "Phone number country code", "GB").option("--phone-area-code <code>", "Phone area code preference").option("--phone-mode <mode>", "Phone path: observe, skip, or purchase", "purchase").option("--widget-domains <domains>", "Comma-separated widget domain allowlist").option("--voice-provider <p>", "TTS provider: openai, azure, twilio").option("--voice-id <id>", "Voice ID").option("--skip-compliance", "Skip compliance submission and wait").option("--skip-voice", "Skip voice configuration").option("--skip-tests", "Skip smoke tests").option("--cert-mode <m>", "Simulation cert mode: quick, full, stress", "quick").option("--cert-adaptive-runs <n>", "Adaptive run count for certification loop", "30").option("--cert-max-improvement-rounds <n>", "Max instruction improvement rounds in cert loop (0-5)", "1").option("--resume-from <step>", `Resume from a setup step (${SETUP_STEP_ORDER.join(", ")})`).option("--report-out <path>", "Optional path to write signed setup run report JSON").option("--dry-run", "Print all steps that would run without making any API calls").option("--api-url <url>", "API base URL override").option("--console-url <url>", "Console sign-in URL override").option("--json", "Output as JSON").action(async (opts) => {
34615
+ if (!opts.org) {
34616
+ try {
34617
+ opts.org = loadCredentials(opts.apiUrl).orgId;
34618
+ } catch {
34619
+ }
34620
+ }
34621
+ const missing = ["org", "agentTemplate", "agentName"].filter((key) => !opts[key]);
34622
+ if (missing.length) {
34623
+ emitMissingOptionsPlan(missing, { json: opts.json, consoleUrl: opts.consoleUrl });
34624
+ markCommandFailed(1);
34625
+ return;
34626
+ }
34576
34627
  let resumeState;
34577
34628
  try {
34578
34629
  resumeState = resolveResumeIndex(opts.resumeFrom);
@@ -34644,12 +34695,7 @@ function registerSetup(program3) {
34644
34695
  steps: completed,
34645
34696
  failure: failure ?? null
34646
34697
  };
34647
- const signed = signReport(reportPayload);
34648
- const reportPath = opts.reportOut ? writeSignedJsonArtifact(String(opts.reportOut), signed) : null;
34649
- return {
34650
- reportHash: signed.report_hash.digest_hex,
34651
- reportPath
34652
- };
34698
+ return writeSetupRunReport(reportPayload, opts.reportOut);
34653
34699
  };
34654
34700
  const shouldResumeSkip = (stepName) => {
34655
34701
  if (!resumeState.resumeFrom) return false;
@@ -35140,8 +35186,8 @@ function registerSetup(program3) {
35140
35186
  }
35141
35187
  try {
35142
35188
  const manifest = await agentExport(resolvedAgentId, { apiUrlOverride: opts.apiUrl });
35143
- const { writeFileSync: writeFileSync12 } = await import("fs");
35144
- writeFileSync12(
35189
+ const { writeFileSync: writeFileSync13 } = await import("fs");
35190
+ writeFileSync13(
35145
35191
  "tenant.yaml",
35146
35192
  `# tenant.yaml - Front Of House agent manifest
35147
35193
  # Edit this file and run: foh plan tenant.yaml
@@ -35311,8 +35357,8 @@ function registerSim(program3) {
35311
35357
  }
35312
35358
  const cert = response.certificate;
35313
35359
  if (opts.out) {
35314
- const { writeFileSync: writeFileSync12 } = await import("fs");
35315
- writeFileSync12(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
35360
+ const { writeFileSync: writeFileSync13 } = await import("fs");
35361
+ writeFileSync13(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
35316
35362
  process.stderr.write(` Certificate written to ${opts.out}
35317
35363
  `);
35318
35364
  }
@@ -35362,8 +35408,8 @@ function registerSim(program3) {
35362
35408
  });
35363
35409
  }
35364
35410
  if (opts.out) {
35365
- const { writeFileSync: writeFileSync12 } = await import("fs");
35366
- writeFileSync12(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
35411
+ const { writeFileSync: writeFileSync13 } = await import("fs");
35412
+ writeFileSync13(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
35367
35413
  process.stderr.write(` Final certificate written to ${opts.out}
35368
35414
  `);
35369
35415
  }
@@ -38967,9 +39013,9 @@ function registerUpdate(program3) {
38967
39013
  }
38968
39014
 
38969
39015
  // src/commands/eval.ts
38970
- var import_fs16 = require("fs");
38971
- var import_path14 = require("path");
38972
- var import_child_process5 = require("child_process");
39016
+ var import_fs19 = require("fs");
39017
+ var import_path18 = require("path");
39018
+ var import_child_process6 = require("child_process");
38973
39019
 
38974
39020
  // src/lib/external-agent-artifact-safety.ts
38975
39021
  var import_fs12 = require("fs");
@@ -39303,63 +39349,13 @@ function readCommandRecords(runDir) {
39303
39349
  }
39304
39350
 
39305
39351
  // src/lib/external-agent-executor.ts
39306
- var import_fs15 = require("fs");
39352
+ var import_fs18 = require("fs");
39307
39353
  var import_os2 = require("os");
39308
- var import_path13 = require("path");
39309
- var import_child_process4 = require("child_process");
39354
+ var import_path17 = require("path");
39355
+ var import_child_process5 = require("child_process");
39310
39356
 
39311
- // src/lib/external-agent-metadata.ts
39312
- var import_fs14 = require("fs");
39357
+ // src/lib/external-agent-executor-env.ts
39313
39358
  var import_path12 = require("path");
39314
- var EXTERNAL_AGENT_METADATA_FILENAMES = [
39315
- "external-agent-metadata.json",
39316
- "agent-metadata.json"
39317
- ];
39318
- var PUBLIC_DOC_URL_RE = /^https:\/\/frontofhouse\.okii\.uk(?:\/[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]*)?$/;
39319
- function normalizeDocUrl(value) {
39320
- const raw = typeof value === "string" ? value : value && typeof value === "object" && typeof value.url === "string" ? String(value.url) : "";
39321
- const url2 = raw.trim().replace(/[.?!:]+$/g, "");
39322
- if (!PUBLIC_DOC_URL_RE.test(url2)) return null;
39323
- return url2;
39324
- }
39325
- function collectDocsFrom(value, docs) {
39326
- if (Array.isArray(value)) {
39327
- for (const entry of value) {
39328
- const url2 = normalizeDocUrl(entry);
39329
- if (url2) docs.add(url2);
39330
- }
39331
- }
39332
- }
39333
- function readExternalAgentMetadata(runDir) {
39334
- for (const filename of EXTERNAL_AGENT_METADATA_FILENAMES) {
39335
- const path2 = (0, import_path12.join)(runDir, filename);
39336
- if (!(0, import_fs14.existsSync)(path2)) continue;
39337
- try {
39338
- const parsed = JSON.parse((0, import_fs14.readFileSync)(path2, "utf8"));
39339
- const docs = /* @__PURE__ */ new Set();
39340
- collectDocsFrom(parsed.docs_pages_used, docs);
39341
- collectDocsFrom(parsed.docs_pages_observed, docs);
39342
- collectDocsFrom(parsed.docs_used, docs);
39343
- collectDocsFrom(parsed.public_docs_used, docs);
39344
- return {
39345
- path: filename,
39346
- docs_pages_used: Array.from(docs).sort()
39347
- };
39348
- } catch {
39349
- return {
39350
- path: filename,
39351
- docs_pages_used: []
39352
- };
39353
- }
39354
- }
39355
- return {
39356
- path: null,
39357
- docs_pages_used: []
39358
- };
39359
- }
39360
-
39361
- // src/lib/external-agent-executor.ts
39362
- var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
39363
39359
  var CODEX_EXECUTOR_DENIED_ENV_PREFIXES = [
39364
39360
  "SUPABASE_",
39365
39361
  "DATABASE_",
@@ -39403,15 +39399,6 @@ var EXTERNAL_AGENT_EVAL_AUTH_ENV_MAP = {
39403
39399
  FOH_EXTERNAL_AGENT_EVAL_API_URL: "FOH_API_URL",
39404
39400
  FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT: "FOH_TOKEN_EXPIRES_AT"
39405
39401
  };
39406
- var DEFAULT_FOH_API_URL2 = "https://api.frontofhouse.okii.uk";
39407
- var ExternalAgentExecutorError = class extends Error {
39408
- reasonCode;
39409
- constructor(reasonCode, message) {
39410
- super(message);
39411
- this.name = "ExternalAgentExecutorError";
39412
- this.reasonCode = reasonCode;
39413
- }
39414
- };
39415
39402
  function isDeniedEnvKey(key) {
39416
39403
  const upper = key.toUpperCase();
39417
39404
  if (CODEX_EXECUTOR_DENIED_ENV_NAMES.some((name) => upper === name)) return true;
@@ -39432,7 +39419,7 @@ function buildCodexExecutorEnv(input) {
39432
39419
  env[childKey] = value;
39433
39420
  }
39434
39421
  }
39435
- env.npm_config_cache = (0, import_path13.join)((0, import_path13.dirname)(input.runDir), ".npm-cache");
39422
+ env.npm_config_cache = (0, import_path12.join)((0, import_path12.dirname)(input.runDir), ".npm-cache");
39436
39423
  env.npm_config_prefer_online = "true";
39437
39424
  env.npm_config_update_notifier = "false";
39438
39425
  env.npm_config_yes = "true";
@@ -39442,21 +39429,384 @@ function buildCodexExecutorEnv(input) {
39442
39429
  env.FOH_CLI_SUPPRESS_BANNER = "1";
39443
39430
  return env;
39444
39431
  }
39445
- function readExternalAgentEvalAuthEnv(env = process.env) {
39432
+
39433
+ // src/lib/external-agent-executor-artifacts.ts
39434
+ var import_fs15 = require("fs");
39435
+ var import_path14 = require("path");
39436
+
39437
+ // src/lib/external-agent-metadata.ts
39438
+ var import_fs14 = require("fs");
39439
+ var import_path13 = require("path");
39440
+ var EXTERNAL_AGENT_METADATA_FILENAMES = [
39441
+ "external-agent-metadata.json",
39442
+ "agent-metadata.json"
39443
+ ];
39444
+ var PUBLIC_DOC_URL_RE = /^https:\/\/frontofhouse\.okii\.uk(?:\/[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]*)?$/;
39445
+ function normalizeDocUrl(value) {
39446
+ const raw = typeof value === "string" ? value : value && typeof value === "object" && typeof value.url === "string" ? String(value.url) : "";
39447
+ const url2 = raw.trim().replace(/[.?!:]+$/g, "");
39448
+ if (!PUBLIC_DOC_URL_RE.test(url2)) return null;
39449
+ return url2;
39450
+ }
39451
+ function collectDocsFrom(value, docs) {
39452
+ if (Array.isArray(value)) {
39453
+ for (const entry of value) {
39454
+ const url2 = normalizeDocUrl(entry);
39455
+ if (url2) docs.add(url2);
39456
+ }
39457
+ }
39458
+ }
39459
+ function readExternalAgentMetadata(runDir) {
39460
+ for (const filename of EXTERNAL_AGENT_METADATA_FILENAMES) {
39461
+ const path2 = (0, import_path13.join)(runDir, filename);
39462
+ if (!(0, import_fs14.existsSync)(path2)) continue;
39463
+ try {
39464
+ const parsed = JSON.parse((0, import_fs14.readFileSync)(path2, "utf8"));
39465
+ const docs = /* @__PURE__ */ new Set();
39466
+ collectDocsFrom(parsed.docs_pages_used, docs);
39467
+ collectDocsFrom(parsed.docs_pages_observed, docs);
39468
+ collectDocsFrom(parsed.docs_used, docs);
39469
+ collectDocsFrom(parsed.public_docs_used, docs);
39470
+ return {
39471
+ path: filename,
39472
+ docs_pages_used: Array.from(docs).sort()
39473
+ };
39474
+ } catch {
39475
+ return {
39476
+ path: filename,
39477
+ docs_pages_used: []
39478
+ };
39479
+ }
39480
+ }
39446
39481
  return {
39447
- token: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || "").trim(),
39448
- orgId: String(env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || "").trim(),
39449
- apiUrl: String(env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_API_URL || DEFAULT_FOH_API_URL2).trim() || DEFAULT_FOH_API_URL2,
39450
- expiresAt: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || "").trim()
39482
+ path: null,
39483
+ docs_pages_used: []
39451
39484
  };
39452
39485
  }
39453
- function shouldRunExternalAgentEvalAuthPreflight(env = process.env) {
39454
- return Boolean(
39455
- env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || env.FOH_SERVICE_TOKEN || env.FOH_ORG_ID || env.FOH_API_URL || env.FOH_TOKEN_EXPIRES_AT
39456
- );
39486
+
39487
+ // src/lib/external-agent-executor-artifacts.ts
39488
+ function redactArtifactFile(path2, input = {}) {
39489
+ if (!(0, import_fs15.existsSync)(path2)) return;
39490
+ const original = (0, import_fs15.readFileSync)(path2, "utf8");
39491
+ const redacted = redactExternalAgentArtifactText(original, input);
39492
+ if (redacted !== original) (0, import_fs15.writeFileSync)(path2, redacted, "utf8");
39457
39493
  }
39458
- async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}) {
39459
- const hasExplicitEvalAuth = Boolean(
39494
+ function redactExternalAgentOutputArtifacts(run, input = {}) {
39495
+ redactArtifactFile(run.outputs.jsonl, input);
39496
+ redactArtifactFile(run.outputs.last_message, input);
39497
+ redactArtifactFile(run.outputs.stderr, input);
39498
+ redactArtifactFile((0, import_path14.join)(run.run_dir, "commands.ndjson"), input);
39499
+ if (!(0, import_fs15.existsSync)(run.run_dir)) return;
39500
+ for (const name of (0, import_fs15.readdirSync)(run.run_dir)) {
39501
+ if (name.startsWith("command-output-cmd_") && !name.endsWith(".redacted")) {
39502
+ redactArtifactFile((0, import_path14.join)(run.run_dir, name), input);
39503
+ }
39504
+ }
39505
+ }
39506
+ function copyExternalAgentCommandCaptureArtifacts(input) {
39507
+ const commandLog = (0, import_path14.join)(input.captureDir, "commands.ndjson");
39508
+ if ((0, import_fs15.existsSync)(commandLog)) {
39509
+ (0, import_fs15.writeFileSync)((0, import_path14.join)(input.runDir, "commands.ndjson"), (0, import_fs15.readFileSync)(commandLog, "utf8"), "utf8");
39510
+ }
39511
+ for (const name of (0, import_fs15.readdirSync)(input.captureDir)) {
39512
+ if (name.startsWith("command-output-cmd_")) {
39513
+ (0, import_fs15.copyFileSync)((0, import_path14.join)(input.captureDir, name), (0, import_path14.join)(input.runDir, name));
39514
+ } else if (EXTERNAL_AGENT_METADATA_FILENAMES.includes(name)) {
39515
+ (0, import_fs15.copyFileSync)((0, import_path14.join)(input.captureDir, name), (0, import_path14.join)(input.runDir, name));
39516
+ }
39517
+ }
39518
+ }
39519
+
39520
+ // src/lib/external-agent-executor-classification.ts
39521
+ var import_fs16 = require("fs");
39522
+ var import_path15 = require("path");
39523
+ function proofArtifactPasses(runDir) {
39524
+ const proofPath = (0, import_path15.join)(runDir, "proof.json");
39525
+ if (!(0, import_fs16.existsSync)(proofPath)) return false;
39526
+ try {
39527
+ const parsed = JSON.parse((0, import_fs16.readFileSync)(proofPath, "utf8"));
39528
+ return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
39529
+ } catch {
39530
+ return false;
39531
+ }
39532
+ }
39533
+ function readIfExists(path2) {
39534
+ return (0, import_fs16.existsSync)(path2) ? (0, import_fs16.readFileSync)(path2, "utf8") : "";
39535
+ }
39536
+ function relativeArtifactName(path2) {
39537
+ return (0, import_path15.basename)(path2);
39538
+ }
39539
+ function externalAgentSummaryCommand(root) {
39540
+ return [
39541
+ "node",
39542
+ "scripts/summarize-external-agent-runs.mjs",
39543
+ "--root",
39544
+ quoteShellArg(root),
39545
+ "--out",
39546
+ quoteShellArg((0, import_path15.join)(root, "latest-summary.json")),
39547
+ "--report",
39548
+ quoteShellArg((0, import_path15.join)(root, "summary.report.json"))
39549
+ ].join(" ");
39550
+ }
39551
+ function quoteShellArg(value) {
39552
+ const text = String(value);
39553
+ if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
39554
+ return `"${text.replace(/(["$`])/g, "\\$1")}"`;
39555
+ }
39556
+ function classifyExternalAgentRun(input) {
39557
+ if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
39558
+ if (!input.artifactSafetyOk) return { status: "fail", reasonCode: "external_agent_artifact_safety_blocked" };
39559
+ const completedCommands = readCommandRecords(input.run.run_dir).filter((record2) => record2.phase === "completed");
39560
+ const observedVersions = completedCommands.map((record2) => String(record2.cli_version || "").trim()).filter((version2) => /^\d+\.\d+\.\d+$/.test(version2));
39561
+ if (observedVersions.some((version2) => version2 !== CLI_VERSION)) {
39562
+ return { status: "hold", reasonCode: "external_agent_cli_version_drift" };
39563
+ }
39564
+ const commandReasonCodes = completedCommands.flatMap((record2) => [
39565
+ String(record2.reason_code || ""),
39566
+ ...Array.isArray(record2.check_reason_codes) ? record2.check_reason_codes.map((code) => String(code || "")) : []
39567
+ ]).filter(Boolean);
39568
+ const hasCommandReason = (pattern) => commandReasonCodes.some((reason) => pattern.test(reason));
39569
+ if (hasCommandReason(new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i"))) {
39570
+ return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
39571
+ }
39572
+ if (hasCommandReason(/provider_capacity_blocked/i)) {
39573
+ return { status: "hold", reasonCode: "provider_capacity_blocked" };
39574
+ }
39575
+ if (hasCommandReason(/byon_voice_number_not_configured/i)) {
39576
+ return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
39577
+ }
39578
+ if (hasCommandReason(/contact_phone_provisioning_failed/i)) {
39579
+ return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
39580
+ }
39581
+ if (hasCommandReason(/voice_contact_expected_no_spend_hold/i)) {
39582
+ return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
39583
+ }
39584
+ if (hasCommandReason(/contact_phone_missing/i)) {
39585
+ return { status: "hold", reasonCode: "voice_contact_phone_missing" };
39586
+ }
39587
+ if (hasCommandReason(/sim(?:ulation)?[_-]?cert(?:ify|ification)?.*failed|simulation_certification_failed/i)) {
39588
+ return { status: "hold", reasonCode: "simulation_certification_failed" };
39589
+ }
39590
+ if (hasCommandReason(/proof_held/i)) {
39591
+ return { status: "hold", reasonCode: "external_agent_proof_held" };
39592
+ }
39593
+ if (hasCommandReason(/agent_limit_reached/i)) {
39594
+ return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
39595
+ }
39596
+ const lastMessage = readIfExists(input.run.outputs.last_message);
39597
+ const stderr = readIfExists(input.run.outputs.stderr);
39598
+ const combined = `${lastMessage}
39599
+ ${stderr}`;
39600
+ if (/need[^.\n]*(?:private|source)[^.\n]*repo|cannot[^.\n]*without[^.\n]*(?:private|source)[^.\n]*repo|clone[^.\n]*(?:private|source)[^.\n]*repo/i.test(combined)) {
39601
+ return { status: "fail", reasonCode: "private_repo_assumption_detected" };
39602
+ }
39603
+ if (/(?:blocked|rejected|declined) by policy|EXEC_POLICY_BLOCKED|command execution was rejected|shell commands were rejected/i.test(combined)) {
39604
+ return { status: "hold", reasonCode: "codex_exec_policy_blocked" };
39605
+ }
39606
+ if (/bwrap:.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|bubblewrap.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|Failed RTM_NEWADDR|ENV_SANDBOX_EXEC_BLOCKED|permission profiles requiring direct runtime enforcement are incompatible with --use-legacy-landlock|legacy[_ -]?landlock.*incompatible/i.test(combined)) {
39607
+ return { status: "hold", reasonCode: "codex_sandbox_exec_blocked" };
39608
+ }
39609
+ if (/ENV_NETWORK_DNS_BLOCK|Could not resolve host|npm ping.*timeout|NO_EXECUTABLE_INSTALL/i.test(combined)) {
39610
+ return { status: "hold", reasonCode: "codex_network_dns_blocked" };
39611
+ }
39612
+ if (new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i").test(combined)) {
39613
+ return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
39614
+ }
39615
+ if (/provider_capacity_blocked/i.test(combined)) {
39616
+ return { status: "hold", reasonCode: "provider_capacity_blocked" };
39617
+ }
39618
+ if (/byon_voice_number_not_configured/i.test(combined)) {
39619
+ return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
39620
+ }
39621
+ if (/contact_phone_provisioning_failed/i.test(combined)) {
39622
+ return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
39623
+ }
39624
+ if (/voice_contact_expected_no_spend_hold/i.test(combined)) {
39625
+ return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
39626
+ }
39627
+ if (/contact_phone_missing/i.test(combined)) {
39628
+ return { status: "hold", reasonCode: "voice_contact_phone_missing" };
39629
+ }
39630
+ if (/simulation_certification_failed/i.test(combined)) {
39631
+ return { status: "hold", reasonCode: "simulation_certification_failed" };
39632
+ }
39633
+ if (/proof_held/i.test(combined)) {
39634
+ return { status: "hold", reasonCode: "external_agent_proof_held" };
39635
+ }
39636
+ if (/agent_limit_reached/i.test(combined)) {
39637
+ return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
39638
+ }
39639
+ if (/browser|approve|approval|login|auth|sign in/i.test(combined) && !proofArtifactPasses(input.run.run_dir)) {
39640
+ return { status: "hold", reasonCode: "auth_browser_approval_required" };
39641
+ }
39642
+ if (input.exitCode !== 0) return { status: "hold", reasonCode: `${input.run.command}_runner_nonzero_exit` };
39643
+ if (proofArtifactPasses(input.run.run_dir)) return { status: "pass", reasonCode: null };
39644
+ return { status: "hold", reasonCode: "external_agent_proof_artifact_missing" };
39645
+ }
39646
+ function buildExecutedExternalAgentRunArtifact(input) {
39647
+ const commands = readCommandRecords(input.run.run_dir);
39648
+ const agentMetadata = readExternalAgentMetadata(input.run.run_dir);
39649
+ return {
39650
+ schema_version: "external_agent_run.v1",
39651
+ run_id: input.run.run_id,
39652
+ status: input.status,
39653
+ failure_reason_code: input.reasonCode,
39654
+ model_provider: input.run.model_provider,
39655
+ model_name: input.run.model_name,
39656
+ runner_model: input.run.runner_model,
39657
+ agent_shell: `${input.run.command}-exec`,
39658
+ workspace_type: "clean-no-repo-programmatic",
39659
+ prompt_version: input.run.prompt_version,
39660
+ prompt_path: "prompt.txt",
39661
+ started_at: input.startedAt,
39662
+ ended_at: input.endedAt,
39663
+ manual_intervention_count: 0,
39664
+ manual_interventions: [],
39665
+ environment: {
39666
+ os: process.platform,
39667
+ node_version: process.version,
39668
+ npm_version: null,
39669
+ foh_cli_version: CLI_VERSION,
39670
+ runner_exit_code: input.exitCode,
39671
+ runner_timed_out: input.timedOut,
39672
+ duration_ms: input.durationMs
39673
+ },
39674
+ public_entrypoints: [
39675
+ "https://frontofhouse.okii.uk",
39676
+ "https://frontofhouse.okii.uk/llms.txt",
39677
+ "https://frontofhouse.okii.uk/openapi.yaml",
39678
+ "npx --yes @f-o-h/cli@latest"
39679
+ ],
39680
+ commands_run: commands.map((command) => command.command),
39681
+ docs_pages_used: agentMetadata.docs_pages_used,
39682
+ eval_state: {
39683
+ lifecycle_strategy: "reuse_existing_eval_state",
39684
+ org_reuse_expected: true,
39685
+ agent_reuse_expected: true,
39686
+ widget_reuse_expected: true,
39687
+ fresh_org_expected: false,
39688
+ ephemeral_org_expected: false,
39689
+ fresh_agent_expected: false,
39690
+ phone_purchase_expected: false,
39691
+ paid_resource_creation_expected: false,
39692
+ spend_policy_expected: NO_SPEND_POLICY,
39693
+ cleanup_expected: false,
39694
+ cleanup_strategy: "no_cleanup_for_reused_eval_state",
39695
+ paid_resource_strategy: "blocked_unless_explicit_byon_or_operator_approved",
39696
+ rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
39697
+ },
39698
+ artifacts: {
39699
+ terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
39700
+ command_log: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
39701
+ proof_bundle: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
39702
+ replay_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
39703
+ knowledge_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
39704
+ improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
39705
+ agent_metadata: agentMetadata.path,
39706
+ notes: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
39707
+ runner_last_message: relativeArtifactName(input.run.outputs.last_message),
39708
+ runner_stderr: relativeArtifactName(input.run.outputs.stderr),
39709
+ codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
39710
+ codex_stderr: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.stderr) : null,
39711
+ artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
39712
+ },
39713
+ summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
39714
+ next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))] : [
39715
+ "foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
39716
+ "foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
39717
+ externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))
39718
+ ]
39719
+ };
39720
+ }
39721
+
39722
+ // src/lib/external-agent-runner-execution.ts
39723
+ var import_child_process4 = require("child_process");
39724
+ var import_fs17 = require("fs");
39725
+ var import_path16 = require("path");
39726
+ function buildCommandInvocation(command, args) {
39727
+ if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
39728
+ const binDir = (0, import_path16.dirname)(command);
39729
+ const codexEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
39730
+ if ((0, import_fs17.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
39731
+ const geminiEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
39732
+ if ((0, import_fs17.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
39733
+ }
39734
+ return { command, args };
39735
+ }
39736
+ function spawnExternalAgentRunner(input) {
39737
+ return new Promise((resolveRun) => {
39738
+ const started = Date.now();
39739
+ const commandInvocation = buildCommandInvocation(input.command, input.args);
39740
+ const child = (0, import_child_process4.spawn)(commandInvocation.command, commandInvocation.args, {
39741
+ cwd: input.cwd,
39742
+ env: input.env,
39743
+ shell: false,
39744
+ stdio: ["pipe", "pipe", "pipe"],
39745
+ windowsHide: true
39746
+ });
39747
+ const stdout = (0, import_fs17.createWriteStream)(input.stdoutPath, { flags: "w" });
39748
+ const stderr = (0, import_fs17.createWriteStream)(input.stderrPath, { flags: "w" });
39749
+ child.stdout.pipe(stdout);
39750
+ child.stderr.pipe(stderr);
39751
+ child.stdin.end(input.prompt);
39752
+ let timedOut = false;
39753
+ const timer = setTimeout(() => {
39754
+ timedOut = true;
39755
+ if (child.pid && process.platform === "win32") {
39756
+ (0, import_child_process4.spawnSync)("taskkill.exe", ["/pid", String(child.pid), "/t", "/f"], { stdio: "ignore" });
39757
+ } else {
39758
+ child.kill("SIGKILL");
39759
+ }
39760
+ }, input.timeoutMs);
39761
+ child.on("close", (exitCode) => {
39762
+ clearTimeout(timer);
39763
+ stdout.end();
39764
+ stderr.end();
39765
+ resolveRun({
39766
+ exitCode,
39767
+ timedOut,
39768
+ durationMs: Date.now() - started
39769
+ });
39770
+ });
39771
+ child.on("error", () => {
39772
+ clearTimeout(timer);
39773
+ stdout.end();
39774
+ stderr.end();
39775
+ resolveRun({
39776
+ exitCode: null,
39777
+ timedOut,
39778
+ durationMs: Date.now() - started
39779
+ });
39780
+ });
39781
+ });
39782
+ }
39783
+
39784
+ // src/lib/external-agent-executor.ts
39785
+ var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
39786
+ var DEFAULT_FOH_API_URL2 = "https://api.frontofhouse.okii.uk";
39787
+ var ExternalAgentExecutorError = class extends Error {
39788
+ reasonCode;
39789
+ constructor(reasonCode, message) {
39790
+ super(message);
39791
+ this.name = "ExternalAgentExecutorError";
39792
+ this.reasonCode = reasonCode;
39793
+ }
39794
+ };
39795
+ function readExternalAgentEvalAuthEnv(env = process.env) {
39796
+ return {
39797
+ token: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || "").trim(),
39798
+ orgId: String(env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || "").trim(),
39799
+ apiUrl: String(env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_API_URL || DEFAULT_FOH_API_URL2).trim() || DEFAULT_FOH_API_URL2,
39800
+ expiresAt: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || "").trim()
39801
+ };
39802
+ }
39803
+ function shouldRunExternalAgentEvalAuthPreflight(env = process.env) {
39804
+ return Boolean(
39805
+ env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || env.FOH_SERVICE_TOKEN || env.FOH_ORG_ID || env.FOH_API_URL || env.FOH_TOKEN_EXPIRES_AT
39806
+ );
39807
+ }
39808
+ async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}) {
39809
+ const hasExplicitEvalAuth = Boolean(
39460
39810
  env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT
39461
39811
  );
39462
39812
  if (!shouldRunExternalAgentEvalAuthPreflight(env) && !options.requireExplicitEvalAuth) return null;
@@ -39507,14 +39857,14 @@ async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}
39507
39857
  };
39508
39858
  }
39509
39859
  function normalizeForCompare(path2) {
39510
- const resolved = (0, import_path13.resolve)(path2);
39860
+ const resolved = (0, import_path17.resolve)(path2);
39511
39861
  return process.platform === "win32" ? resolved.toLowerCase() : resolved;
39512
39862
  }
39513
39863
  function isPathInside(childPath, parentPath) {
39514
39864
  const child = normalizeForCompare(childPath);
39515
39865
  const parent = normalizeForCompare(parentPath);
39516
- const rel = (0, import_path13.relative)(parent, child);
39517
- return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path13.isAbsolute)(rel);
39866
+ const rel = (0, import_path17.relative)(parent, child);
39867
+ return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path17.isAbsolute)(rel);
39518
39868
  }
39519
39869
  function requireString(value, field) {
39520
39870
  if (typeof value !== "string" || value.trim() === "") {
@@ -39523,10 +39873,10 @@ function requireString(value, field) {
39523
39873
  return value;
39524
39874
  }
39525
39875
  function readBatch(batchPath) {
39526
- if (!(0, import_fs15.existsSync)(batchPath)) {
39876
+ if (!(0, import_fs18.existsSync)(batchPath)) {
39527
39877
  throw new ExternalAgentExecutorError("external_agent_batch_not_found", `Batch file not found: ${batchPath}`);
39528
39878
  }
39529
- const parsed = JSON.parse((0, import_fs15.readFileSync)(batchPath, "utf8"));
39879
+ const parsed = JSON.parse((0, import_fs18.readFileSync)(batchPath, "utf8"));
39530
39880
  if (parsed.schema_version !== "external_agent_batch_plan.v1") {
39531
39881
  throw new ExternalAgentExecutorError("invalid_external_agent_batch", "Batch schema_version must be external_agent_batch_plan.v1.");
39532
39882
  }
@@ -39541,11 +39891,11 @@ function defaultRunnerProbe(command, args) {
39541
39891
  encoding: "utf8",
39542
39892
  timeout: isGeminiHeadlessSmoke ? GEMINI_HEADLESS_PROBE_TIMEOUT_MS : void 0
39543
39893
  };
39544
- const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process4.spawnSync)(
39894
+ const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process5.spawnSync)(
39545
39895
  "powershell.exe",
39546
39896
  ["-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", `& ${[command, ...args].map(quotePowerShellArg).join(" ")}`],
39547
39897
  spawnOptions
39548
- ) : (0, import_child_process4.spawnSync)(command, args, spawnOptions);
39898
+ ) : (0, import_child_process5.spawnSync)(command, args, spawnOptions);
39549
39899
  return {
39550
39900
  status: typeof result.status === "number" ? result.status : null,
39551
39901
  stdout: String(result.stdout || ""),
@@ -39559,29 +39909,12 @@ function geminiCapacityUnavailable(text) {
39559
39909
  function quotePowerShellArg(value) {
39560
39910
  return `'${value.replace(/'/g, "''")}'`;
39561
39911
  }
39562
- function quoteShellArg(value) {
39563
- const text = String(value);
39564
- if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
39565
- return `"${text.replace(/(["$`])/g, "\\$1")}"`;
39566
- }
39567
- function externalAgentSummaryCommand(root) {
39568
- return [
39569
- "node",
39570
- "scripts/summarize-external-agent-runs.mjs",
39571
- "--root",
39572
- quoteShellArg(root),
39573
- "--out",
39574
- quoteShellArg((0, import_path13.join)(root, "latest-summary.json")),
39575
- "--report",
39576
- quoteShellArg((0, import_path13.join)(root, "summary.report.json"))
39577
- ].join(" ");
39578
- }
39579
39912
  function resolveCodexProbeCommand() {
39580
39913
  if (process.platform !== "win32") return "codex";
39581
39914
  const appData = process.env.APPDATA;
39582
39915
  if (appData) {
39583
- const appDataShim = (0, import_path13.join)(appData, "npm", "codex.cmd");
39584
- if ((0, import_fs15.existsSync)(appDataShim)) return appDataShim;
39916
+ const appDataShim = (0, import_path17.join)(appData, "npm", "codex.cmd");
39917
+ if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
39585
39918
  }
39586
39919
  return "codex.cmd";
39587
39920
  }
@@ -39592,8 +39925,8 @@ function resolveGeminiProbeCommand() {
39592
39925
  if (process.platform !== "win32") return "gemini";
39593
39926
  const appData = process.env.APPDATA;
39594
39927
  if (appData) {
39595
- const appDataShim = (0, import_path13.join)(appData, "npm", "gemini.cmd");
39596
- if ((0, import_fs15.existsSync)(appDataShim)) return appDataShim;
39928
+ const appDataShim = (0, import_path17.join)(appData, "npm", "gemini.cmd");
39929
+ if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
39597
39930
  }
39598
39931
  return "gemini.cmd";
39599
39932
  }
@@ -39864,34 +40197,34 @@ function safeRunId(value) {
39864
40197
  return value.toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "run";
39865
40198
  }
39866
40199
  function resolveWorkspaceRoot(input) {
39867
- if (input.workspaceRoot) return (0, import_path13.resolve)(input.workspaceRoot);
39868
- const batchStem = (0, import_path13.basename)((0, import_path13.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
39869
- const repoStem = (0, import_path13.basename)((0, import_path13.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
39870
- return (0, import_path13.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
40200
+ if (input.workspaceRoot) return (0, import_path17.resolve)(input.workspaceRoot);
40201
+ const batchStem = (0, import_path17.basename)((0, import_path17.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40202
+ const repoStem = (0, import_path17.basename)((0, import_path17.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
40203
+ return (0, import_path17.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
39871
40204
  }
39872
40205
  function findNearestGitRoot(startPath) {
39873
- let current = (0, import_path13.resolve)(startPath);
40206
+ let current = (0, import_path17.resolve)(startPath);
39874
40207
  while (true) {
39875
- if ((0, import_fs15.existsSync)((0, import_path13.join)(current, ".git"))) return current;
39876
- const parent = (0, import_path13.dirname)(current);
40208
+ if ((0, import_fs18.existsSync)((0, import_path17.join)(current, ".git"))) return current;
40209
+ const parent = (0, import_path17.dirname)(current);
39877
40210
  if (parent === current) return null;
39878
40211
  current = parent;
39879
40212
  }
39880
40213
  }
39881
40214
  function resolvePrivateRepoRoot(input) {
39882
40215
  if (input.explicitPrivateRepoRoot) {
39883
- return { root: (0, import_path13.resolve)(input.explicitPrivateRepoRoot), explicit: true };
40216
+ return { root: (0, import_path17.resolve)(input.explicitPrivateRepoRoot), explicit: true };
39884
40217
  }
39885
- const cwd = (0, import_path13.resolve)(input.cwd || process.cwd());
40218
+ const cwd = (0, import_path17.resolve)(input.cwd || process.cwd());
39886
40219
  const gitRoot = findNearestGitRoot(cwd);
39887
40220
  if (gitRoot) return { root: gitRoot, explicit: false };
39888
40221
  return {
39889
- root: (0, import_path13.join)(cwd, ".foh-no-private-repo-root-sentinel"),
40222
+ root: (0, import_path17.join)(cwd, ".foh-no-private-repo-root-sentinel"),
39890
40223
  explicit: false
39891
40224
  };
39892
40225
  }
39893
40226
  function promptVersionFromPath(promptPath) {
39894
- const raw = (0, import_fs15.readFileSync)(promptPath, "utf8");
40227
+ const raw = (0, import_fs18.readFileSync)(promptPath, "utf8");
39895
40228
  if (raw.includes("Do not assume access to the private source repository")) return "blank-setup.v1";
39896
40229
  return "unknown";
39897
40230
  }
@@ -39900,7 +40233,7 @@ function createExternalAgentExecutorPlan(options) {
39900
40233
  if (runner !== "codex" && runner !== "gemini") {
39901
40234
  throw new ExternalAgentExecutorError("unsupported_external_agent_runner", `Unsupported runner: ${runner}`);
39902
40235
  }
39903
- const batchPath = (0, import_path13.resolve)(options.batchPath);
40236
+ const batchPath = (0, import_path17.resolve)(options.batchPath);
39904
40237
  const batch = readBatch(batchPath);
39905
40238
  const runnerProbe = validateRunner(options, runner);
39906
40239
  const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
@@ -39919,17 +40252,17 @@ function createExternalAgentExecutorPlan(options) {
39919
40252
  `Workspace root must be outside the private repository. workspace=${workspaceRoot} repo=${privateRepoRoot}`
39920
40253
  );
39921
40254
  }
39922
- (0, import_fs15.mkdirSync)(workspaceRoot, { recursive: true });
39923
- const batchDir = (0, import_path13.resolve)(String(batch.batch_dir || (0, import_path13.resolve)(batchPath, "..")));
40255
+ (0, import_fs18.mkdirSync)(workspaceRoot, { recursive: true });
40256
+ const batchDir = (0, import_path17.resolve)(String(batch.batch_dir || (0, import_path17.resolve)(batchPath, "..")));
39924
40257
  const timeoutMinutes = Number.isFinite(options.timeoutMinutes) && Number(options.timeoutMinutes) > 0 ? Number(options.timeoutMinutes) : 30;
39925
40258
  const runs = batch.runs.map((run) => {
39926
40259
  const runId = safeRunId(requireString(run.run_id, "runs[].run_id"));
39927
- const runDir = (0, import_path13.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
39928
- const promptPath = (0, import_path13.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
39929
- const workspaceDir = (0, import_path13.join)(workspaceRoot, runId);
39930
- (0, import_fs15.mkdirSync)(workspaceDir, { recursive: true });
39931
- (0, import_fs15.writeFileSync)(
39932
- (0, import_path13.join)(workspaceDir, "README.md"),
40260
+ const runDir = (0, import_path17.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
40261
+ const promptPath = (0, import_path17.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
40262
+ const workspaceDir = (0, import_path17.join)(workspaceRoot, runId);
40263
+ (0, import_fs18.mkdirSync)(workspaceDir, { recursive: true });
40264
+ (0, import_fs18.writeFileSync)(
40265
+ (0, import_path17.join)(workspaceDir, "README.md"),
39933
40266
  [
39934
40267
  "# FOH External-Agent Workspace",
39935
40268
  "",
@@ -39947,11 +40280,11 @@ function createExternalAgentExecutorPlan(options) {
39947
40280
  });
39948
40281
  const promptVersion = String(env[EXTERNAL_AGENT_PROMPT_VERSION_ENV] || "unknown");
39949
40282
  const outputStem = runner === "gemini" ? "gemini" : "codex";
39950
- const jsonlPath = (0, import_path13.join)(runDir, `${outputStem}-exec.jsonl`);
39951
- const lastMessagePath = (0, import_path13.join)(runDir, `${outputStem}-last-message.md`);
39952
- const stderrPath = (0, import_path13.join)(runDir, `${outputStem}-stderr.txt`);
39953
- const runPath = (0, import_path13.join)(runDir, "run.json");
39954
- const artifactSafetyPath = (0, import_path13.join)(runDir, "artifact-safety.json");
40283
+ const jsonlPath = (0, import_path17.join)(runDir, `${outputStem}-exec.jsonl`);
40284
+ const lastMessagePath = (0, import_path17.join)(runDir, `${outputStem}-last-message.md`);
40285
+ const stderrPath = (0, import_path17.join)(runDir, `${outputStem}-stderr.txt`);
40286
+ const runPath = (0, import_path17.join)(runDir, "run.json");
40287
+ const artifactSafetyPath = (0, import_path17.join)(runDir, "artifact-safety.json");
39955
40288
  const args = runner === "gemini" ? [
39956
40289
  ...runnerProbe.globalArgs,
39957
40290
  ...runnerProbe.execArgs
@@ -40042,281 +40375,12 @@ function createExternalAgentExecutorPlan(options) {
40042
40375
  };
40043
40376
  }
40044
40377
  function writeExternalAgentExecutorPlan(plan) {
40045
- const path2 = (0, import_path13.join)(plan.batch_dir, "executor-plan.json");
40046
- (0, import_fs15.mkdirSync)(plan.batch_dir, { recursive: true });
40047
- (0, import_fs15.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
40378
+ const path2 = (0, import_path17.join)(plan.batch_dir, "executor-plan.json");
40379
+ (0, import_fs18.mkdirSync)(plan.batch_dir, { recursive: true });
40380
+ (0, import_fs18.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
40048
40381
  `, "utf8");
40049
40382
  return path2;
40050
40383
  }
40051
- function proofArtifactPasses(runDir) {
40052
- const proofPath = (0, import_path13.join)(runDir, "proof.json");
40053
- if (!(0, import_fs15.existsSync)(proofPath)) return false;
40054
- try {
40055
- const parsed = JSON.parse((0, import_fs15.readFileSync)(proofPath, "utf8"));
40056
- return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
40057
- } catch {
40058
- return false;
40059
- }
40060
- }
40061
- function readIfExists(path2) {
40062
- return (0, import_fs15.existsSync)(path2) ? (0, import_fs15.readFileSync)(path2, "utf8") : "";
40063
- }
40064
- function redactArtifactFile(path2, input = {}) {
40065
- if (!(0, import_fs15.existsSync)(path2)) return;
40066
- const original = (0, import_fs15.readFileSync)(path2, "utf8");
40067
- const redacted = redactExternalAgentArtifactText(original, input);
40068
- if (redacted !== original) (0, import_fs15.writeFileSync)(path2, redacted, "utf8");
40069
- }
40070
- function redactOutputArtifacts(run, input = {}) {
40071
- redactArtifactFile(run.outputs.jsonl, input);
40072
- redactArtifactFile(run.outputs.last_message, input);
40073
- redactArtifactFile(run.outputs.stderr, input);
40074
- redactArtifactFile((0, import_path13.join)(run.run_dir, "commands.ndjson"), input);
40075
- if (!(0, import_fs15.existsSync)(run.run_dir)) return;
40076
- for (const name of (0, import_fs15.readdirSync)(run.run_dir)) {
40077
- if (name.startsWith("command-output-cmd_") && !name.endsWith(".redacted")) {
40078
- redactArtifactFile((0, import_path13.join)(run.run_dir, name), input);
40079
- }
40080
- }
40081
- }
40082
- function copyCommandCaptureArtifacts(input) {
40083
- const commandLog = (0, import_path13.join)(input.captureDir, "commands.ndjson");
40084
- if ((0, import_fs15.existsSync)(commandLog)) {
40085
- (0, import_fs15.writeFileSync)((0, import_path13.join)(input.runDir, "commands.ndjson"), (0, import_fs15.readFileSync)(commandLog, "utf8"), "utf8");
40086
- }
40087
- for (const name of (0, import_fs15.readdirSync)(input.captureDir)) {
40088
- if (name.startsWith("command-output-cmd_")) {
40089
- (0, import_fs15.copyFileSync)((0, import_path13.join)(input.captureDir, name), (0, import_path13.join)(input.runDir, name));
40090
- } else if (EXTERNAL_AGENT_METADATA_FILENAMES.includes(name)) {
40091
- (0, import_fs15.copyFileSync)((0, import_path13.join)(input.captureDir, name), (0, import_path13.join)(input.runDir, name));
40092
- }
40093
- }
40094
- }
40095
- function relativeArtifactName(path2) {
40096
- return (0, import_path13.basename)(path2);
40097
- }
40098
- function classifyRun(input) {
40099
- if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
40100
- if (!input.artifactSafetyOk) return { status: "fail", reasonCode: "external_agent_artifact_safety_blocked" };
40101
- const completedCommands = readCommandRecords(input.run.run_dir).filter((record2) => record2.phase === "completed");
40102
- const observedVersions = completedCommands.map((record2) => String(record2.cli_version || "").trim()).filter((version2) => /^\d+\.\d+\.\d+$/.test(version2));
40103
- if (observedVersions.some((version2) => version2 !== CLI_VERSION)) {
40104
- return { status: "hold", reasonCode: "external_agent_cli_version_drift" };
40105
- }
40106
- const commandReasonCodes = completedCommands.flatMap((record2) => [
40107
- String(record2.reason_code || ""),
40108
- ...Array.isArray(record2.check_reason_codes) ? record2.check_reason_codes.map((code) => String(code || "")) : []
40109
- ]).filter(Boolean);
40110
- const hasCommandReason = (pattern) => commandReasonCodes.some((reason) => pattern.test(reason));
40111
- if (hasCommandReason(new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i"))) {
40112
- return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
40113
- }
40114
- if (hasCommandReason(/provider_capacity_blocked/i)) {
40115
- return { status: "hold", reasonCode: "provider_capacity_blocked" };
40116
- }
40117
- if (hasCommandReason(/byon_voice_number_not_configured/i)) {
40118
- return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
40119
- }
40120
- if (hasCommandReason(/contact_phone_provisioning_failed/i)) {
40121
- return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
40122
- }
40123
- if (hasCommandReason(/voice_contact_expected_no_spend_hold/i)) {
40124
- return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
40125
- }
40126
- if (hasCommandReason(/contact_phone_missing/i)) {
40127
- return { status: "hold", reasonCode: "voice_contact_phone_missing" };
40128
- }
40129
- if (hasCommandReason(/sim(?:ulation)?[_-]?cert(?:ify|ification)?.*failed|simulation_certification_failed/i)) {
40130
- return { status: "hold", reasonCode: "simulation_certification_failed" };
40131
- }
40132
- if (hasCommandReason(/proof_held/i)) {
40133
- return { status: "hold", reasonCode: "external_agent_proof_held" };
40134
- }
40135
- if (hasCommandReason(/agent_limit_reached/i)) {
40136
- return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
40137
- }
40138
- const lastMessage = readIfExists(input.run.outputs.last_message);
40139
- const stderr = readIfExists(input.run.outputs.stderr);
40140
- const combined = `${lastMessage}
40141
- ${stderr}`;
40142
- if (/need[^.\n]*(?:private|source)[^.\n]*repo|cannot[^.\n]*without[^.\n]*(?:private|source)[^.\n]*repo|clone[^.\n]*(?:private|source)[^.\n]*repo/i.test(combined)) {
40143
- return { status: "fail", reasonCode: "private_repo_assumption_detected" };
40144
- }
40145
- if (/(?:blocked|rejected|declined) by policy|EXEC_POLICY_BLOCKED|command execution was rejected|shell commands were rejected/i.test(combined)) {
40146
- return { status: "hold", reasonCode: "codex_exec_policy_blocked" };
40147
- }
40148
- if (/bwrap:.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|bubblewrap.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|Failed RTM_NEWADDR|ENV_SANDBOX_EXEC_BLOCKED|permission profiles requiring direct runtime enforcement are incompatible with --use-legacy-landlock|legacy[_ -]?landlock.*incompatible/i.test(combined)) {
40149
- return { status: "hold", reasonCode: "codex_sandbox_exec_blocked" };
40150
- }
40151
- if (/ENV_NETWORK_DNS_BLOCK|Could not resolve host|npm ping.*timeout|NO_EXECUTABLE_INSTALL/i.test(combined)) {
40152
- return { status: "hold", reasonCode: "codex_network_dns_blocked" };
40153
- }
40154
- if (new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i").test(combined)) {
40155
- return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
40156
- }
40157
- if (/provider_capacity_blocked/i.test(combined)) {
40158
- return { status: "hold", reasonCode: "provider_capacity_blocked" };
40159
- }
40160
- if (/byon_voice_number_not_configured/i.test(combined)) {
40161
- return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
40162
- }
40163
- if (/contact_phone_provisioning_failed/i.test(combined)) {
40164
- return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
40165
- }
40166
- if (/voice_contact_expected_no_spend_hold/i.test(combined)) {
40167
- return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
40168
- }
40169
- if (/contact_phone_missing/i.test(combined)) {
40170
- return { status: "hold", reasonCode: "voice_contact_phone_missing" };
40171
- }
40172
- if (/simulation_certification_failed/i.test(combined)) {
40173
- return { status: "hold", reasonCode: "simulation_certification_failed" };
40174
- }
40175
- if (/proof_held/i.test(combined)) {
40176
- return { status: "hold", reasonCode: "external_agent_proof_held" };
40177
- }
40178
- if (/agent_limit_reached/i.test(combined)) {
40179
- return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
40180
- }
40181
- if (/browser|approve|approval|login|auth|sign in/i.test(combined) && !proofArtifactPasses(input.run.run_dir)) {
40182
- return { status: "hold", reasonCode: "auth_browser_approval_required" };
40183
- }
40184
- if (input.exitCode !== 0) return { status: "hold", reasonCode: `${input.run.command}_runner_nonzero_exit` };
40185
- if (proofArtifactPasses(input.run.run_dir)) return { status: "pass", reasonCode: null };
40186
- return { status: "hold", reasonCode: "external_agent_proof_artifact_missing" };
40187
- }
40188
- function buildExecutedRunArtifact(input) {
40189
- const commands = readCommandRecords(input.run.run_dir);
40190
- const agentMetadata = readExternalAgentMetadata(input.run.run_dir);
40191
- return {
40192
- schema_version: "external_agent_run.v1",
40193
- run_id: input.run.run_id,
40194
- status: input.status,
40195
- failure_reason_code: input.reasonCode,
40196
- model_provider: input.run.model_provider,
40197
- model_name: input.run.model_name,
40198
- runner_model: input.run.runner_model,
40199
- agent_shell: `${input.run.command}-exec`,
40200
- workspace_type: "clean-no-repo-programmatic",
40201
- prompt_version: input.run.prompt_version,
40202
- prompt_path: "prompt.txt",
40203
- started_at: input.startedAt,
40204
- ended_at: input.endedAt,
40205
- manual_intervention_count: 0,
40206
- manual_interventions: [],
40207
- environment: {
40208
- os: process.platform,
40209
- node_version: process.version,
40210
- npm_version: null,
40211
- foh_cli_version: CLI_VERSION,
40212
- runner_exit_code: input.exitCode,
40213
- runner_timed_out: input.timedOut,
40214
- duration_ms: input.durationMs
40215
- },
40216
- public_entrypoints: [
40217
- "https://frontofhouse.okii.uk",
40218
- "https://frontofhouse.okii.uk/llms.txt",
40219
- "https://frontofhouse.okii.uk/openapi.yaml",
40220
- "npx --yes @f-o-h/cli@latest"
40221
- ],
40222
- commands_run: commands.map((command) => command.command),
40223
- docs_pages_used: agentMetadata.docs_pages_used,
40224
- eval_state: {
40225
- lifecycle_strategy: "reuse_existing_eval_state",
40226
- org_reuse_expected: true,
40227
- agent_reuse_expected: true,
40228
- widget_reuse_expected: true,
40229
- fresh_org_expected: false,
40230
- ephemeral_org_expected: false,
40231
- fresh_agent_expected: false,
40232
- phone_purchase_expected: false,
40233
- paid_resource_creation_expected: false,
40234
- spend_policy_expected: NO_SPEND_POLICY,
40235
- cleanup_expected: false,
40236
- cleanup_strategy: "no_cleanup_for_reused_eval_state",
40237
- paid_resource_strategy: "blocked_unless_explicit_byon_or_operator_approved",
40238
- rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
40239
- },
40240
- artifacts: {
40241
- terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
40242
- command_log: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
40243
- proof_bundle: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
40244
- replay_packet: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
40245
- knowledge_packet: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
40246
- improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
40247
- agent_metadata: agentMetadata.path,
40248
- notes: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
40249
- runner_last_message: relativeArtifactName(input.run.outputs.last_message),
40250
- runner_stderr: relativeArtifactName(input.run.outputs.stderr),
40251
- codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
40252
- codex_stderr: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.stderr) : null,
40253
- artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
40254
- },
40255
- summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
40256
- next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path13.dirname)(input.run.run_dir))] : [
40257
- "foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
40258
- "foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
40259
- externalAgentSummaryCommand((0, import_path13.dirname)(input.run.run_dir))
40260
- ]
40261
- };
40262
- }
40263
- function spawnRunner(input) {
40264
- return new Promise((resolveRun) => {
40265
- const started = Date.now();
40266
- const commandInvocation = buildCommandInvocation(input.command, input.args);
40267
- const child = (0, import_child_process4.spawn)(commandInvocation.command, commandInvocation.args, {
40268
- cwd: input.cwd,
40269
- env: input.env,
40270
- shell: false,
40271
- stdio: ["pipe", "pipe", "pipe"],
40272
- windowsHide: true
40273
- });
40274
- const stdout = (0, import_fs15.createWriteStream)(input.stdoutPath, { flags: "w" });
40275
- const stderr = (0, import_fs15.createWriteStream)(input.stderrPath, { flags: "w" });
40276
- child.stdout.pipe(stdout);
40277
- child.stderr.pipe(stderr);
40278
- child.stdin.end(input.prompt);
40279
- let timedOut = false;
40280
- const timer = setTimeout(() => {
40281
- timedOut = true;
40282
- if (child.pid && process.platform === "win32") {
40283
- (0, import_child_process4.spawnSync)("taskkill.exe", ["/pid", String(child.pid), "/t", "/f"], { stdio: "ignore" });
40284
- } else {
40285
- child.kill("SIGKILL");
40286
- }
40287
- }, input.timeoutMs);
40288
- child.on("close", (exitCode) => {
40289
- clearTimeout(timer);
40290
- stdout.end();
40291
- stderr.end();
40292
- resolveRun({
40293
- exitCode,
40294
- timedOut,
40295
- durationMs: Date.now() - started
40296
- });
40297
- });
40298
- child.on("error", () => {
40299
- clearTimeout(timer);
40300
- stdout.end();
40301
- stderr.end();
40302
- resolveRun({
40303
- exitCode: null,
40304
- timedOut,
40305
- durationMs: Date.now() - started
40306
- });
40307
- });
40308
- });
40309
- }
40310
- function buildCommandInvocation(command, args) {
40311
- if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
40312
- const binDir = (0, import_path13.dirname)(command);
40313
- const codexEntrypoint = (0, import_path13.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
40314
- if ((0, import_fs15.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
40315
- const geminiEntrypoint = (0, import_path13.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
40316
- if ((0, import_fs15.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
40317
- }
40318
- return { command, args };
40319
- }
40320
40384
  async function executeExternalAgentExecutorPlan(plan, options = {}) {
40321
40385
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
40322
40386
  const results = [];
@@ -40328,8 +40392,8 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40328
40392
  if (authPreflight && !authPreflight.ok) {
40329
40393
  const endedAt2 = (/* @__PURE__ */ new Date()).toISOString();
40330
40394
  const blockedResults = plan.runs.map((run) => {
40331
- (0, import_fs15.mkdirSync)(run.run_dir, { recursive: true });
40332
- const runArtifact = buildExecutedRunArtifact({
40395
+ (0, import_fs18.mkdirSync)(run.run_dir, { recursive: true });
40396
+ const runArtifact = buildExecutedExternalAgentRunArtifact({
40333
40397
  run,
40334
40398
  startedAt,
40335
40399
  endedAt: endedAt2,
@@ -40339,7 +40403,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40339
40403
  timedOut: false,
40340
40404
  durationMs: 0
40341
40405
  });
40342
- (0, import_fs15.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40406
+ (0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40343
40407
  `, "utf8");
40344
40408
  return {
40345
40409
  run_id: run.run_id,
@@ -40366,41 +40430,41 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40366
40430
  }
40367
40431
  for (const run of plan.runs) {
40368
40432
  const runStartedAt = (/* @__PURE__ */ new Date()).toISOString();
40369
- const commandCaptureDir = (0, import_path13.join)(run.workspace_dir, ".foh-capture");
40370
- (0, import_fs15.mkdirSync)(commandCaptureDir, { recursive: true });
40433
+ const commandCaptureDir = (0, import_path17.join)(run.workspace_dir, ".foh-capture");
40434
+ (0, import_fs18.mkdirSync)(commandCaptureDir, { recursive: true });
40371
40435
  const env = buildCodexExecutorEnv({
40372
40436
  sourceEnv: options.env,
40373
40437
  runDir: commandCaptureDir,
40374
40438
  promptVersion: run.prompt_version
40375
40439
  });
40376
- const spawned = await spawnRunner({
40440
+ const spawned = await spawnExternalAgentRunner({
40377
40441
  command: runnerCommand,
40378
40442
  args: run.args,
40379
40443
  cwd: run.workspace_dir,
40380
40444
  env,
40381
- prompt: (0, import_fs15.readFileSync)(run.prompt_path, "utf8"),
40445
+ prompt: (0, import_fs18.readFileSync)(run.prompt_path, "utf8"),
40382
40446
  stdoutPath: run.outputs.jsonl,
40383
40447
  stderrPath: run.outputs.stderr,
40384
40448
  timeoutMs: plan.timeout_minutes * 60 * 1e3
40385
40449
  });
40386
- copyCommandCaptureArtifacts({ captureDir: commandCaptureDir, runDir: run.run_dir });
40450
+ copyExternalAgentCommandCaptureArtifacts({ captureDir: commandCaptureDir, runDir: run.run_dir });
40387
40451
  const privateRepoRoot = options.privateRepoRoot || plan.private_repo_root;
40388
- redactOutputArtifacts(run, { privateRepoRoot });
40452
+ redactExternalAgentOutputArtifacts(run, { privateRepoRoot });
40389
40453
  const artifactSafety = scanExternalAgentArtifacts({
40390
40454
  runDir: run.run_dir,
40391
40455
  privateRepoRoot,
40392
40456
  writeRedacted: true
40393
40457
  });
40394
- (0, import_fs15.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
40458
+ (0, import_fs18.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
40395
40459
  `, "utf8");
40396
40460
  const runEndedAt = (/* @__PURE__ */ new Date()).toISOString();
40397
- const classification = classifyRun({
40461
+ const classification = classifyExternalAgentRun({
40398
40462
  run,
40399
40463
  exitCode: spawned.exitCode,
40400
40464
  timedOut: spawned.timedOut,
40401
40465
  artifactSafetyOk: artifactSafety.ok
40402
40466
  });
40403
- const runArtifact = buildExecutedRunArtifact({
40467
+ const runArtifact = buildExecutedExternalAgentRunArtifact({
40404
40468
  run,
40405
40469
  startedAt: runStartedAt,
40406
40470
  endedAt: runEndedAt,
@@ -40410,7 +40474,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
40410
40474
  timedOut: spawned.timedOut,
40411
40475
  durationMs: spawned.durationMs
40412
40476
  });
40413
- (0, import_fs15.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40477
+ (0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
40414
40478
  `, "utf8");
40415
40479
  results.push({
40416
40480
  run_id: run.run_id,
@@ -40459,13 +40523,13 @@ function defaultRunDir(modelName, promptVersion) {
40459
40523
  const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
40460
40524
  const safeModel = String(modelName || "unknown-model").toLowerCase().replace(/[^a-z0-9_-]+/g, "-");
40461
40525
  const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
40462
- return (0, import_path14.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
40526
+ return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
40463
40527
  }
40464
40528
  function defaultBatchDir(promptVersion) {
40465
40529
  const date4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
40466
40530
  const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
40467
40531
  const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
40468
- return (0, import_path14.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
40532
+ return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
40469
40533
  }
40470
40534
  function safeSlug(value) {
40471
40535
  return String(value || "unknown").toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "unknown";
@@ -40480,8 +40544,8 @@ function scanArtifactsCommand(runDir, privateRepoRoot) {
40480
40544
  return `foh eval external-agent scan-artifacts --run-dir ${quoteArg(runDir)}${privateRootArg} --write-redacted --json`;
40481
40545
  }
40482
40546
  function externalAgentSummaryCommand2(root) {
40483
- const summaryPath = (0, import_path14.join)(root, "latest-summary.json");
40484
- const reportPath = (0, import_path14.join)(root, "summary.report.json");
40547
+ const summaryPath = (0, import_path18.join)(root, "latest-summary.json");
40548
+ const reportPath = (0, import_path18.join)(root, "summary.report.json");
40485
40549
  return [
40486
40550
  "node",
40487
40551
  "scripts/summarize-external-agent-runs.mjs",
@@ -40595,14 +40659,14 @@ function writePrompt(runDir, promptVersion, context = {}) {
40595
40659
  replayPromptContext(context.replayFile),
40596
40660
  knowledgeMissPromptContext(context.knowledgeQuestion, context.expectedAnswer)
40597
40661
  ].join("");
40598
- const path2 = (0, import_path14.join)(runDir, "prompt.txt");
40599
- (0, import_fs16.writeFileSync)(path2, `${prompt}
40662
+ const path2 = (0, import_path18.join)(runDir, "prompt.txt");
40663
+ (0, import_fs19.writeFileSync)(path2, `${prompt}
40600
40664
  `, "utf8");
40601
40665
  return path2;
40602
40666
  }
40603
40667
  function writeSession(runDir, session) {
40604
- const path2 = (0, import_path14.join)(runDir, "session.json");
40605
- (0, import_fs16.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
40668
+ const path2 = (0, import_path18.join)(runDir, "session.json");
40669
+ (0, import_fs19.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
40606
40670
  `, "utf8");
40607
40671
  return path2;
40608
40672
  }
@@ -40678,9 +40742,9 @@ function buildRunArtifact(input) {
40678
40742
  notes: "notes.md"
40679
40743
  },
40680
40744
  summary: status === "pass" ? "External-agent capture session completed and was marked pass." : `External-agent capture session completed with ${commands.length} captured FOH command(s); classify and improve reason ${reasonCode}.`,
40681
- next_commands: status === "pass" ? [externalAgentSummaryCommand2((0, import_path14.dirname)(input.runDir))] : [
40682
- `foh bug improve --from external-agent-run --file ${(0, import_path14.join)(input.runDir, "run.json")} --out ${(0, import_path14.join)(input.runDir, "improvement-packet.json")} --json`,
40683
- externalAgentSummaryCommand2((0, import_path14.dirname)(input.runDir))
40745
+ next_commands: status === "pass" ? [externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))] : [
40746
+ `foh bug improve --from external-agent-run --file ${(0, import_path18.join)(input.runDir, "run.json")} --out ${(0, import_path18.join)(input.runDir, "improvement-packet.json")} --json`,
40747
+ externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))
40684
40748
  ]
40685
40749
  };
40686
40750
  }
@@ -40689,16 +40753,16 @@ function registerEval(program3) {
40689
40753
  const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
40690
40754
  external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
40691
40755
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40692
- const batchDir = (0, import_path14.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
40693
- const replayFile = opts.replayFile ? (0, import_path14.resolve)(String(opts.replayFile)) : void 0;
40756
+ const batchDir = (0, import_path18.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
40757
+ const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
40694
40758
  const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40695
40759
  const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40696
40760
  const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
40697
- (0, import_fs16.mkdirSync)(batchDir, { recursive: true });
40761
+ (0, import_fs19.mkdirSync)(batchDir, { recursive: true });
40698
40762
  const runs = models.map((model, index) => {
40699
40763
  const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
40700
- const runDir = (0, import_path14.join)(batchDir, runId);
40701
- (0, import_fs16.mkdirSync)(runDir, { recursive: true });
40764
+ const runDir = (0, import_path18.join)(batchDir, runId);
40765
+ (0, import_fs19.mkdirSync)(runDir, { recursive: true });
40702
40766
  const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40703
40767
  const commandArgs = [
40704
40768
  "eval",
@@ -40745,8 +40809,8 @@ function registerEval(program3) {
40745
40809
  runs,
40746
40810
  summary_command: externalAgentSummaryCommand2(batchDir)
40747
40811
  };
40748
- const batchPath = (0, import_path14.join)(batchDir, "batch.json");
40749
- (0, import_fs16.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
40812
+ const batchPath = (0, import_path18.join)(batchDir, "batch.json");
40813
+ (0, import_fs19.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
40750
40814
  `, "utf8");
40751
40815
  format(cliEnvelope({
40752
40816
  schemaVersion: "external_agent_batch_plan_result.v1",
@@ -40766,11 +40830,11 @@ function registerEval(program3) {
40766
40830
  external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
40767
40831
  const status = normalizeStatus(opts.status);
40768
40832
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40769
- const runDir = (0, import_path14.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
40770
- const replayFile = opts.replayFile ? (0, import_path14.resolve)(String(opts.replayFile)) : void 0;
40833
+ const runDir = (0, import_path18.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
40834
+ const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
40771
40835
  const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40772
40836
  const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40773
- (0, import_fs16.mkdirSync)(runDir, { recursive: true });
40837
+ (0, import_fs19.mkdirSync)(runDir, { recursive: true });
40774
40838
  const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
40775
40839
  const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40776
40840
  const shell = inferShell(opts.shell);
@@ -40796,7 +40860,7 @@ function registerEval(program3) {
40796
40860
  }
40797
40861
  };
40798
40862
  writeSession(runDir, session);
40799
- (0, import_fs16.writeFileSync)((0, import_path14.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
40863
+ (0, import_fs19.writeFileSync)((0, import_path18.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
40800
40864
  let shellExitCode = null;
40801
40865
  if (opts.shell !== false) {
40802
40866
  process.stdout.write(`
@@ -40806,7 +40870,7 @@ Prompt: ${promptPath}
40806
40870
  Exit the shell to finalize run.json.
40807
40871
 
40808
40872
  `);
40809
- const result = (0, import_child_process5.spawnSync)(shell.command, shell.args, {
40873
+ const result = (0, import_child_process6.spawnSync)(shell.command, shell.args, {
40810
40874
  stdio: "inherit",
40811
40875
  env: {
40812
40876
  ...process.env,
@@ -40818,8 +40882,8 @@ Exit the shell to finalize run.json.
40818
40882
  shellExitCode = typeof result.status === "number" ? result.status : null;
40819
40883
  }
40820
40884
  const artifact = buildRunArtifact({ runDir, session, status, reasonCode: opts.reasonCode, shellExitCode });
40821
- const runPath = (0, import_path14.join)(runDir, "run.json");
40822
- (0, import_fs16.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
40885
+ const runPath = (0, import_path18.join)(runDir, "run.json");
40886
+ (0, import_fs19.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
40823
40887
  `, "utf8");
40824
40888
  format(cliEnvelope({
40825
40889
  schemaVersion: "external_agent_capture_result.v1",
@@ -40829,7 +40893,7 @@ Exit the shell to finalize run.json.
40829
40893
  artifacts: {
40830
40894
  run: runPath,
40831
40895
  prompt: promptPath,
40832
- commands: (0, import_path14.join)(runDir, "commands.ndjson")
40896
+ commands: (0, import_path18.join)(runDir, "commands.ndjson")
40833
40897
  },
40834
40898
  nextCommands: artifact.next_commands,
40835
40899
  extra: { run: artifact }
@@ -40892,8 +40956,8 @@ Exit the shell to finalize run.json.
40892
40956
  requireExplicitEvalAuth: true,
40893
40957
  minimumEvalAuthTtlMs: (plan.timeout_minutes + 5) * 60 * 1e3
40894
40958
  });
40895
- const resultPath = (0, import_path14.join)(plan.batch_dir, "execution-result.json");
40896
- (0, import_fs16.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
40959
+ const resultPath = (0, import_path18.join)(plan.batch_dir, "execution-result.json");
40960
+ (0, import_fs19.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
40897
40961
  `, "utf8");
40898
40962
  format(cliEnvelope({
40899
40963
  schemaVersion: "external_agent_execution_result.v1",