@f-o-h/cli 0.1.68 → 0.1.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/foh.js +754 -665
- package/package.json +41 -41
package/dist/foh.js
CHANGED
|
@@ -14172,8 +14172,8 @@ function registerAgentGuardrailCommands(agent) {
|
|
|
14172
14172
|
try {
|
|
14173
14173
|
rule = JSON.parse(opts.rule);
|
|
14174
14174
|
} catch {
|
|
14175
|
-
const { readFileSync:
|
|
14176
|
-
rule = JSON.parse(
|
|
14175
|
+
const { readFileSync: readFileSync16 } = await import("fs");
|
|
14176
|
+
rule = JSON.parse(readFileSync16(opts.rule, "utf-8"));
|
|
14177
14177
|
}
|
|
14178
14178
|
const data = await apiFetch(`/v1/console/agents/${opts.agent}/guardrails`, {
|
|
14179
14179
|
method: "POST",
|
|
@@ -14323,7 +14323,7 @@ async function publishAgentFromCurrentDraft(agentId, options) {
|
|
|
14323
14323
|
orgId
|
|
14324
14324
|
});
|
|
14325
14325
|
try {
|
|
14326
|
-
await apiFetch(`/v1/console/agents/${agentId}/publish`, {
|
|
14326
|
+
return await apiFetch(`/v1/console/agents/${agentId}/publish`, {
|
|
14327
14327
|
method: "POST",
|
|
14328
14328
|
body: JSON.stringify({ flowDraft }),
|
|
14329
14329
|
apiUrlOverride,
|
|
@@ -14399,7 +14399,7 @@ async function validateCertifyAndPublishAgent(opts) {
|
|
|
14399
14399
|
remediation: `Run: foh agent validate --agent ${opts.agentId} to see details.`
|
|
14400
14400
|
});
|
|
14401
14401
|
}
|
|
14402
|
-
await publishAgentFromCurrentDraft(opts.agentId, {
|
|
14402
|
+
const publish = await publishAgentFromCurrentDraft(opts.agentId, {
|
|
14403
14403
|
apiUrlOverride: opts.apiUrlOverride,
|
|
14404
14404
|
orgId: opts.orgId
|
|
14405
14405
|
});
|
|
@@ -14409,7 +14409,7 @@ async function validateCertifyAndPublishAgent(opts) {
|
|
|
14409
14409
|
status: "not_run",
|
|
14410
14410
|
reason_code: "publish_consumes_existing_certification_evidence"
|
|
14411
14411
|
},
|
|
14412
|
-
publish
|
|
14412
|
+
publish
|
|
14413
14413
|
};
|
|
14414
14414
|
}
|
|
14415
14415
|
|
|
@@ -14719,9 +14719,9 @@ function registerAgent(program3) {
|
|
|
14719
14719
|
process.stdout.write(yaml);
|
|
14720
14720
|
return;
|
|
14721
14721
|
}
|
|
14722
|
-
const { writeFileSync:
|
|
14722
|
+
const { writeFileSync: writeFileSync13 } = await import("fs");
|
|
14723
14723
|
const outputPath = opts.output ?? "tenant.yaml";
|
|
14724
|
-
|
|
14724
|
+
writeFileSync13(
|
|
14725
14725
|
outputPath,
|
|
14726
14726
|
`# tenant.yaml - Front Of House agent manifest
|
|
14727
14727
|
# Edit this file and run: foh plan tenant.yaml
|
|
@@ -15084,11 +15084,6 @@ function registerInstagramChannelCommands(instagram, addCommonOptions) {
|
|
|
15084
15084
|
}));
|
|
15085
15085
|
}
|
|
15086
15086
|
|
|
15087
|
-
// src/commands/channel-whatsapp.ts
|
|
15088
|
-
var import_node_crypto = require("node:crypto");
|
|
15089
|
-
var import_node_fs = require("node:fs");
|
|
15090
|
-
var path = __toESM(require("node:path"));
|
|
15091
|
-
|
|
15092
15087
|
// src/commands/channel-whatsapp-helpers.ts
|
|
15093
15088
|
function parsePositiveNumber(value, fallback) {
|
|
15094
15089
|
if (value === void 0 || value === null || String(value).trim() === "") return fallback;
|
|
@@ -15215,7 +15210,11 @@ function buildReasonedNextSteps({
|
|
|
15215
15210
|
return dedupeSteps(steps);
|
|
15216
15211
|
}
|
|
15217
15212
|
|
|
15218
|
-
//
|
|
15213
|
+
// src/commands/channel-whatsapp-live-proof.ts
|
|
15214
|
+
var import_node_fs = require("node:fs");
|
|
15215
|
+
var path = __toESM(require("node:path"));
|
|
15216
|
+
|
|
15217
|
+
// src/lib/channel-live-proof-evaluator.mjs
|
|
15219
15218
|
function normalizeStatusValue(value) {
|
|
15220
15219
|
return String(value || "").trim().toLowerCase();
|
|
15221
15220
|
}
|
|
@@ -15282,55 +15281,7 @@ function evaluateChannelLiveProofArtifact({
|
|
|
15282
15281
|
};
|
|
15283
15282
|
}
|
|
15284
15283
|
|
|
15285
|
-
// src/commands/channel-whatsapp.ts
|
|
15286
|
-
var WHATSAPP_WEBHOOK_CHALLENGE_TIMEOUT_MS = 1e4;
|
|
15287
|
-
var WHATSAPP_VERIFY_TOKEN_PREFIX = "foh-wa";
|
|
15288
|
-
var WHATSAPP_SENDER_MODEL = {
|
|
15289
|
-
test_number: "Meta test numbers are sandbox senders for free temporary API tests only; do not store them as production sender config.",
|
|
15290
|
-
production_sender: "Production traffic requires a verified WhatsApp Business phone number; store that sender phone_number_id and WABA-owned credentials in FOH channel config.",
|
|
15291
|
-
runtime_source_of_truth: "FOH runtime resolves WhatsApp credentials from org/channel config, not from personal mobile numbers or hidden environment fallbacks."
|
|
15292
|
-
};
|
|
15293
|
-
function parseBooleanOption({
|
|
15294
|
-
value,
|
|
15295
|
-
fallback,
|
|
15296
|
-
optionName,
|
|
15297
|
-
step
|
|
15298
|
-
}) {
|
|
15299
|
-
if (typeof value === "boolean") return value;
|
|
15300
|
-
const normalized = String(value ?? "").trim().toLowerCase();
|
|
15301
|
-
if (!normalized) return fallback;
|
|
15302
|
-
if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "y") return true;
|
|
15303
|
-
if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "n") return false;
|
|
15304
|
-
throw new FohError({
|
|
15305
|
-
step,
|
|
15306
|
-
error: `Invalid boolean value for ${optionName}: ${String(value)}`,
|
|
15307
|
-
remediation: `Use ${optionName} true|false (also accepts 1|0, yes|no).`
|
|
15308
|
-
});
|
|
15309
|
-
}
|
|
15310
|
-
async function runWhatsAppReadinessChecks({
|
|
15311
|
-
orgId,
|
|
15312
|
-
apiUrlOverride,
|
|
15313
|
-
verifyToken
|
|
15314
|
-
}) {
|
|
15315
|
-
const status = await apiFetch("/v1/console/channels/whatsapp/status", {
|
|
15316
|
-
orgId,
|
|
15317
|
-
apiUrlOverride
|
|
15318
|
-
});
|
|
15319
|
-
const verifyPayload = verifyToken ? { verifyToken: String(verifyToken) } : {};
|
|
15320
|
-
const verify = await apiFetch("/v1/console/channels/whatsapp/verify", {
|
|
15321
|
-
method: "POST",
|
|
15322
|
-
body: JSON.stringify(verifyPayload),
|
|
15323
|
-
orgId,
|
|
15324
|
-
apiUrlOverride
|
|
15325
|
-
});
|
|
15326
|
-
const test = await apiFetch("/v1/console/channels/whatsapp/test", {
|
|
15327
|
-
method: "POST",
|
|
15328
|
-
body: JSON.stringify({ dryRun: true }),
|
|
15329
|
-
orgId,
|
|
15330
|
-
apiUrlOverride
|
|
15331
|
-
});
|
|
15332
|
-
return { status, verify, test };
|
|
15333
|
-
}
|
|
15284
|
+
// src/commands/channel-whatsapp-live-proof.ts
|
|
15334
15285
|
function resolveLiveProof({
|
|
15335
15286
|
enabled,
|
|
15336
15287
|
artifactPathRaw,
|
|
@@ -15386,6 +15337,57 @@ function resolveLiveProof({
|
|
|
15386
15337
|
freshness: evaluated.freshness
|
|
15387
15338
|
};
|
|
15388
15339
|
}
|
|
15340
|
+
|
|
15341
|
+
// src/commands/channel-whatsapp-setup.ts
|
|
15342
|
+
var import_node_crypto = require("node:crypto");
|
|
15343
|
+
var WHATSAPP_WEBHOOK_CHALLENGE_TIMEOUT_MS = 1e4;
|
|
15344
|
+
var WHATSAPP_VERIFY_TOKEN_PREFIX = "foh-wa";
|
|
15345
|
+
var WHATSAPP_SENDER_MODEL = {
|
|
15346
|
+
test_number: "Meta test numbers are sandbox senders for free temporary API tests only; do not store them as production sender config.",
|
|
15347
|
+
production_sender: "Production traffic requires a verified WhatsApp Business phone number; store that sender phone_number_id and WABA-owned credentials in FOH channel config.",
|
|
15348
|
+
runtime_source_of_truth: "FOH runtime resolves WhatsApp credentials from org/channel config, not from personal mobile numbers or hidden environment fallbacks."
|
|
15349
|
+
};
|
|
15350
|
+
function parseBooleanOption({
|
|
15351
|
+
value,
|
|
15352
|
+
fallback,
|
|
15353
|
+
optionName,
|
|
15354
|
+
step
|
|
15355
|
+
}) {
|
|
15356
|
+
if (typeof value === "boolean") return value;
|
|
15357
|
+
const normalized = String(value ?? "").trim().toLowerCase();
|
|
15358
|
+
if (!normalized) return fallback;
|
|
15359
|
+
if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "y") return true;
|
|
15360
|
+
if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "n") return false;
|
|
15361
|
+
throw new FohError({
|
|
15362
|
+
step,
|
|
15363
|
+
error: `Invalid boolean value for ${optionName}: ${String(value)}`,
|
|
15364
|
+
remediation: `Use ${optionName} true|false (also accepts 1|0, yes|no).`
|
|
15365
|
+
});
|
|
15366
|
+
}
|
|
15367
|
+
async function runWhatsAppReadinessChecks({
|
|
15368
|
+
orgId,
|
|
15369
|
+
apiUrlOverride,
|
|
15370
|
+
verifyToken
|
|
15371
|
+
}) {
|
|
15372
|
+
const status = await apiFetch("/v1/console/channels/whatsapp/status", {
|
|
15373
|
+
orgId,
|
|
15374
|
+
apiUrlOverride
|
|
15375
|
+
});
|
|
15376
|
+
const verifyPayload = verifyToken ? { verifyToken: String(verifyToken) } : {};
|
|
15377
|
+
const verify = await apiFetch("/v1/console/channels/whatsapp/verify", {
|
|
15378
|
+
method: "POST",
|
|
15379
|
+
body: JSON.stringify(verifyPayload),
|
|
15380
|
+
orgId,
|
|
15381
|
+
apiUrlOverride
|
|
15382
|
+
});
|
|
15383
|
+
const test = await apiFetch("/v1/console/channels/whatsapp/test", {
|
|
15384
|
+
method: "POST",
|
|
15385
|
+
body: JSON.stringify({ dryRun: true }),
|
|
15386
|
+
orgId,
|
|
15387
|
+
apiUrlOverride
|
|
15388
|
+
});
|
|
15389
|
+
return { status, verify, test };
|
|
15390
|
+
}
|
|
15389
15391
|
function buildWebhookUrl(apiBaseUrl) {
|
|
15390
15392
|
return `${apiBaseUrl.replace(/\/$/, "")}/v1/whatsapp/webhook`;
|
|
15391
15393
|
}
|
|
@@ -15522,6 +15524,8 @@ function assertProofPass(strict, reasons) {
|
|
|
15522
15524
|
markCommandFailed(1);
|
|
15523
15525
|
}
|
|
15524
15526
|
}
|
|
15527
|
+
|
|
15528
|
+
// src/commands/channel-whatsapp.ts
|
|
15525
15529
|
function registerWhatsAppChannelCommands(whatsapp, addCommonOptions) {
|
|
15526
15530
|
addCommonOptions(
|
|
15527
15531
|
whatsapp.command("start").description("Assess WhatsApp onboarding readiness and print fastest setup path")
|
|
@@ -16169,11 +16173,11 @@ function registerVoice(program3) {
|
|
|
16169
16173
|
}
|
|
16170
16174
|
const outputPath = String(opts.out || `foh-voice-preview-${provider}-${voiceId}.mp3`).trim();
|
|
16171
16175
|
const audio = Buffer.from(await res.arrayBuffer());
|
|
16172
|
-
const { mkdirSync: mkdirSync8, writeFileSync:
|
|
16173
|
-
const { dirname:
|
|
16176
|
+
const { mkdirSync: mkdirSync8, writeFileSync: writeFileSync13 } = await import("fs");
|
|
16177
|
+
const { dirname: dirname11, resolve: resolve13 } = await import("path");
|
|
16174
16178
|
const absolutePath = resolve13(outputPath);
|
|
16175
|
-
mkdirSync8(
|
|
16176
|
-
|
|
16179
|
+
mkdirSync8(dirname11(absolutePath), { recursive: true });
|
|
16180
|
+
writeFileSync13(absolutePath, audio);
|
|
16177
16181
|
format({
|
|
16178
16182
|
status: "ok",
|
|
16179
16183
|
provider,
|
|
@@ -32786,7 +32790,7 @@ var StdioServerTransport = class {
|
|
|
32786
32790
|
};
|
|
32787
32791
|
|
|
32788
32792
|
// src/lib/cli-version.ts
|
|
32789
|
-
var CLI_VERSION = "0.1.
|
|
32793
|
+
var CLI_VERSION = "0.1.70";
|
|
32790
32794
|
|
|
32791
32795
|
// src/commands/mcp-serve.ts
|
|
32792
32796
|
var DEFAULT_TIMEOUT_MS = 12e4;
|
|
@@ -33806,6 +33810,35 @@ function readDraftKnowledgeText(draft) {
|
|
|
33806
33810
|
const fromLegacy = typeof draft.knowledge_base === "string" ? draft.knowledge_base : "";
|
|
33807
33811
|
return fromLegacy;
|
|
33808
33812
|
}
|
|
33813
|
+
function normalizeKnowledgeText(value) {
|
|
33814
|
+
return value.replace(/^\uFEFF/, "").replace(/\r\n?/g, "\n").trim();
|
|
33815
|
+
}
|
|
33816
|
+
function splitDraftKnowledgeSegments(value) {
|
|
33817
|
+
return value.replace(/\r\n?/g, "\n").split(/\n\s*---+\s*\n/g).map((segment) => normalizeKnowledgeText(segment)).filter(Boolean);
|
|
33818
|
+
}
|
|
33819
|
+
function buildDraftKnowledgeUpdate(existing, fileContent) {
|
|
33820
|
+
const normalizedContent = normalizeKnowledgeText(fileContent);
|
|
33821
|
+
if (normalizedContent.length === 0) {
|
|
33822
|
+
throw new FohError({
|
|
33823
|
+
step: "knowledge.ingest-file",
|
|
33824
|
+
error: "Knowledge file is empty after normalization",
|
|
33825
|
+
remediation: "Pass a file with non-empty text content.",
|
|
33826
|
+
statusCode: 400
|
|
33827
|
+
});
|
|
33828
|
+
}
|
|
33829
|
+
const existingSegments = splitDraftKnowledgeSegments(existing);
|
|
33830
|
+
const duplicate = existingSegments.includes(normalizedContent);
|
|
33831
|
+
const nextSegments = duplicate ? existingSegments : [...existingSegments, normalizedContent];
|
|
33832
|
+
const nextKnowledge = nextSegments.join("\n\n---\n\n");
|
|
33833
|
+
const normalizedExisting = existingSegments.join("\n\n---\n\n");
|
|
33834
|
+
return {
|
|
33835
|
+
nextKnowledge,
|
|
33836
|
+
normalizedContent,
|
|
33837
|
+
duplicate,
|
|
33838
|
+
shouldPatch: nextKnowledge !== normalizedExisting || normalizeKnowledgeText(existing) !== normalizedExisting,
|
|
33839
|
+
segmentCount: nextSegments.length
|
|
33840
|
+
};
|
|
33841
|
+
}
|
|
33809
33842
|
function tokenize(value) {
|
|
33810
33843
|
return value.toLowerCase().split(/[^a-z0-9]+/g).map((token) => token.trim()).filter((token) => token.length >= 3);
|
|
33811
33844
|
}
|
|
@@ -33929,23 +33962,25 @@ function registerKnowledge(program3) {
|
|
|
33929
33962
|
apiUrlOverride: opts.apiUrl
|
|
33930
33963
|
});
|
|
33931
33964
|
const existing = readDraftKnowledgeText(draft);
|
|
33932
|
-
const
|
|
33933
|
-
|
|
33934
|
-
|
|
33935
|
-
|
|
33936
|
-
|
|
33937
|
-
|
|
33938
|
-
|
|
33939
|
-
|
|
33940
|
-
|
|
33941
|
-
|
|
33942
|
-
|
|
33943
|
-
|
|
33944
|
-
});
|
|
33965
|
+
const update = buildDraftKnowledgeUpdate(existing, content);
|
|
33966
|
+
if (update.shouldPatch) {
|
|
33967
|
+
await apiFetch(`/v1/console/agents/${opts.agent}/draft`, {
|
|
33968
|
+
method: "PATCH",
|
|
33969
|
+
body: JSON.stringify({
|
|
33970
|
+
knowledge_base_raw: update.nextKnowledge,
|
|
33971
|
+
knowledge_base: update.nextKnowledge
|
|
33972
|
+
}),
|
|
33973
|
+
orgId: opts.org,
|
|
33974
|
+
apiUrlOverride: opts.apiUrl
|
|
33975
|
+
});
|
|
33976
|
+
}
|
|
33945
33977
|
data = {
|
|
33946
33978
|
ok: true,
|
|
33947
33979
|
source: "agent_draft_direct",
|
|
33948
|
-
length: nextKnowledge.length
|
|
33980
|
+
length: update.nextKnowledge.length,
|
|
33981
|
+
draft_knowledge_updated: update.shouldPatch,
|
|
33982
|
+
draft_knowledge_deduped: update.duplicate,
|
|
33983
|
+
segment_count: update.segmentCount
|
|
33949
33984
|
};
|
|
33950
33985
|
} else {
|
|
33951
33986
|
data = await apiFetch("/v1/knowledge/documents", {
|
|
@@ -33953,7 +33988,7 @@ ${content}` : content;
|
|
|
33953
33988
|
body: JSON.stringify({
|
|
33954
33989
|
name: (0, import_path2.basename)(opts.file),
|
|
33955
33990
|
source_type: "text",
|
|
33956
|
-
source_value: content,
|
|
33991
|
+
source_value: normalizeKnowledgeText(content),
|
|
33957
33992
|
agent_id: opts.agent
|
|
33958
33993
|
}),
|
|
33959
33994
|
orgId: opts.org,
|
|
@@ -34078,47 +34113,8 @@ function registerLeads(program3) {
|
|
|
34078
34113
|
// src/commands/setup.ts
|
|
34079
34114
|
var import_crypto3 = require("crypto");
|
|
34080
34115
|
|
|
34081
|
-
// src/lib/signed-report.ts
|
|
34082
|
-
var import_crypto2 = require("crypto");
|
|
34083
|
-
var import_fs4 = require("fs");
|
|
34084
|
-
var import_path3 = require("path");
|
|
34085
|
-
function canonicalize(value) {
|
|
34086
|
-
if (value === null || value === void 0) return null;
|
|
34087
|
-
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
|
|
34088
|
-
if (Array.isArray(value)) return value.map((entry) => canonicalize(entry));
|
|
34089
|
-
if (typeof value === "object") {
|
|
34090
|
-
const sortedEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, entryValue]) => [key, canonicalize(entryValue)]);
|
|
34091
|
-
return Object.fromEntries(sortedEntries);
|
|
34092
|
-
}
|
|
34093
|
-
return String(value);
|
|
34094
|
-
}
|
|
34095
|
-
function stableStringify(value) {
|
|
34096
|
-
return JSON.stringify(canonicalize(value), null, 2) + "\n";
|
|
34097
|
-
}
|
|
34098
|
-
function sha256Hex(input) {
|
|
34099
|
-
return (0, import_crypto2.createHash)("sha256").update(input).digest("hex");
|
|
34100
|
-
}
|
|
34101
|
-
function signReport(reportPayload) {
|
|
34102
|
-
const canonical = stableStringify(reportPayload);
|
|
34103
|
-
return {
|
|
34104
|
-
...reportPayload,
|
|
34105
|
-
report_hash: {
|
|
34106
|
-
algorithm: "sha256",
|
|
34107
|
-
digest_hex: sha256Hex(canonical),
|
|
34108
|
-
canonicalization: "sorted-json-v1",
|
|
34109
|
-
verification_note: "Recompute SHA-256 over canonical payload JSON before report_hash and compare digest_hex."
|
|
34110
|
-
}
|
|
34111
|
-
};
|
|
34112
|
-
}
|
|
34113
|
-
function writeSignedJsonArtifact(path2, value) {
|
|
34114
|
-
const absolutePath = (0, import_path3.resolve)(path2);
|
|
34115
|
-
(0, import_fs4.mkdirSync)((0, import_path3.dirname)(absolutePath), { recursive: true });
|
|
34116
|
-
(0, import_fs4.writeFileSync)(absolutePath, stableStringify(value), "utf-8");
|
|
34117
|
-
return absolutePath;
|
|
34118
|
-
}
|
|
34119
|
-
|
|
34120
34116
|
// src/commands/manifest.ts
|
|
34121
|
-
var
|
|
34117
|
+
var import_fs4 = require("fs");
|
|
34122
34118
|
var import_picocolors3 = __toESM(require_picocolors());
|
|
34123
34119
|
function formatDiff(diffs) {
|
|
34124
34120
|
if (diffs.length === 0) return "No changes";
|
|
@@ -34154,7 +34150,7 @@ function formatDiff(diffs) {
|
|
|
34154
34150
|
function loadManifestFile(filePath) {
|
|
34155
34151
|
let raw;
|
|
34156
34152
|
try {
|
|
34157
|
-
raw = (0,
|
|
34153
|
+
raw = (0, import_fs4.readFileSync)(filePath, "utf-8");
|
|
34158
34154
|
} catch {
|
|
34159
34155
|
throw new FohError({
|
|
34160
34156
|
step: "manifest.load",
|
|
@@ -34379,80 +34375,13 @@ function normalizeAgentCertMode(value) {
|
|
|
34379
34375
|
return agentCertModeValues.includes(value) ? value : "quick";
|
|
34380
34376
|
}
|
|
34381
34377
|
|
|
34382
|
-
// src/commands/setup.ts
|
|
34383
|
-
var SETUP_STEP_ORDER = [
|
|
34384
|
-
"check_credentials",
|
|
34385
|
-
"check_org_access",
|
|
34386
|
-
"submit_compliance",
|
|
34387
|
-
"wait_compliance",
|
|
34388
|
-
"provision_phone",
|
|
34389
|
-
"create_agent",
|
|
34390
|
-
"validate_agent",
|
|
34391
|
-
"seed_guardrails",
|
|
34392
|
-
"ensure_widget",
|
|
34393
|
-
"set_widget_domains",
|
|
34394
|
-
"configure_voice",
|
|
34395
|
-
"run_smoke_test",
|
|
34396
|
-
"sim_certify_loop",
|
|
34397
|
-
"widget_smoke",
|
|
34398
|
-
"publish_agent",
|
|
34399
|
-
"emit_summary"
|
|
34400
|
-
];
|
|
34378
|
+
// src/commands/setup-apply.ts
|
|
34401
34379
|
function extractGuardrailsList(response) {
|
|
34402
34380
|
if (Array.isArray(response)) return response;
|
|
34403
34381
|
if (Array.isArray(response.guardrails)) return response.guardrails;
|
|
34404
34382
|
if (Array.isArray(response.rules)) return response.rules;
|
|
34405
34383
|
return [];
|
|
34406
34384
|
}
|
|
34407
|
-
function resolveResumeIndex(resumeFromRaw) {
|
|
34408
|
-
if (!resumeFromRaw) {
|
|
34409
|
-
return { resumeFrom: null, resumeIndex: 0 };
|
|
34410
|
-
}
|
|
34411
|
-
const resumeFrom = String(resumeFromRaw).trim();
|
|
34412
|
-
const resumeIndex = SETUP_STEP_ORDER.indexOf(resumeFrom);
|
|
34413
|
-
if (resumeIndex < 0) {
|
|
34414
|
-
throw new FohError({
|
|
34415
|
-
step: "setup.resume",
|
|
34416
|
-
error: `Invalid --resume-from step: ${String(resumeFromRaw)}`,
|
|
34417
|
-
remediation: `Use one of: ${SETUP_STEP_ORDER.join(", ")}`
|
|
34418
|
-
});
|
|
34419
|
-
}
|
|
34420
|
-
return { resumeFrom, resumeIndex };
|
|
34421
|
-
}
|
|
34422
|
-
function nowIso() {
|
|
34423
|
-
return (/* @__PURE__ */ new Date()).toISOString();
|
|
34424
|
-
}
|
|
34425
|
-
function timedStepResult(result, startedAtIso, startedAtMs) {
|
|
34426
|
-
return {
|
|
34427
|
-
...result,
|
|
34428
|
-
started_at: startedAtIso,
|
|
34429
|
-
completed_at: nowIso(),
|
|
34430
|
-
duration_ms: Math.max(0, Date.now() - startedAtMs)
|
|
34431
|
-
};
|
|
34432
|
-
}
|
|
34433
|
-
function optionNameToFlag(key) {
|
|
34434
|
-
return "--" + key.replace(/([A-Z])/g, "-$1").toLowerCase();
|
|
34435
|
-
}
|
|
34436
|
-
function normalizeSetupPhoneMode(raw) {
|
|
34437
|
-
const value = String(raw || "purchase").trim().toLowerCase();
|
|
34438
|
-
if (value === "observe" || value === "skip" || value === "purchase") return value;
|
|
34439
|
-
throw new FohError({
|
|
34440
|
-
step: "setup.phone_mode",
|
|
34441
|
-
error: `Invalid --phone-mode "${String(raw)}"`,
|
|
34442
|
-
remediation: "Use one of: observe, skip, purchase.",
|
|
34443
|
-
reasonCode: "setup_invalid_phone_mode"
|
|
34444
|
-
});
|
|
34445
|
-
}
|
|
34446
|
-
function complianceSkipDetail(phoneMode) {
|
|
34447
|
-
return {
|
|
34448
|
-
reason_code: `compliance_skipped_phone_mode_${phoneMode}`,
|
|
34449
|
-
phone_mode: phoneMode,
|
|
34450
|
-
spend_policy: resolveCliSpendPolicy(),
|
|
34451
|
-
spend_class: "free",
|
|
34452
|
-
safe_to_retry: true,
|
|
34453
|
-
operator_note: "Compliance is only required before paid FOH-owned phone purchase."
|
|
34454
|
-
};
|
|
34455
|
-
}
|
|
34456
34385
|
function isMissingAgentTestsError(error2) {
|
|
34457
34386
|
if (!(error2 instanceof FohError)) return false;
|
|
34458
34387
|
if (error2.statusCode !== 404) return false;
|
|
@@ -34510,6 +34439,11 @@ async function rebaseEvalAgentDraftFromTemplate(params) {
|
|
|
34510
34439
|
draft_keys: Object.keys(draft).sort()
|
|
34511
34440
|
};
|
|
34512
34441
|
}
|
|
34442
|
+
|
|
34443
|
+
// src/commands/setup-missing-options.ts
|
|
34444
|
+
function optionNameToFlag(key) {
|
|
34445
|
+
return "--" + key.replace(/([A-Z])/g, "-$1").toLowerCase();
|
|
34446
|
+
}
|
|
34513
34447
|
function buildMissingOptionsPlan(missing, opts) {
|
|
34514
34448
|
const missingFlags = missing.map(optionNameToFlag);
|
|
34515
34449
|
const signInUrl = buildConsoleSignInUrl(resolveConsoleBaseUrl(opts.consoleUrl));
|
|
@@ -34559,20 +34493,137 @@ function emitMissingOptionsPlan(missing, opts) {
|
|
|
34559
34493
|
`);
|
|
34560
34494
|
}
|
|
34561
34495
|
}
|
|
34562
|
-
|
|
34563
|
-
|
|
34564
|
-
|
|
34565
|
-
|
|
34566
|
-
|
|
34567
|
-
|
|
34568
|
-
|
|
34569
|
-
|
|
34570
|
-
|
|
34571
|
-
|
|
34572
|
-
|
|
34573
|
-
|
|
34574
|
-
|
|
34575
|
-
|
|
34496
|
+
|
|
34497
|
+
// src/commands/setup-plan.ts
|
|
34498
|
+
var SETUP_STEP_ORDER = [
|
|
34499
|
+
"check_credentials",
|
|
34500
|
+
"check_org_access",
|
|
34501
|
+
"submit_compliance",
|
|
34502
|
+
"wait_compliance",
|
|
34503
|
+
"provision_phone",
|
|
34504
|
+
"create_agent",
|
|
34505
|
+
"validate_agent",
|
|
34506
|
+
"seed_guardrails",
|
|
34507
|
+
"ensure_widget",
|
|
34508
|
+
"set_widget_domains",
|
|
34509
|
+
"configure_voice",
|
|
34510
|
+
"run_smoke_test",
|
|
34511
|
+
"sim_certify_loop",
|
|
34512
|
+
"widget_smoke",
|
|
34513
|
+
"publish_agent",
|
|
34514
|
+
"emit_summary"
|
|
34515
|
+
];
|
|
34516
|
+
function resolveResumeIndex(resumeFromRaw) {
|
|
34517
|
+
if (!resumeFromRaw) {
|
|
34518
|
+
return { resumeFrom: null, resumeIndex: 0 };
|
|
34519
|
+
}
|
|
34520
|
+
const resumeFrom = String(resumeFromRaw).trim();
|
|
34521
|
+
const resumeIndex = SETUP_STEP_ORDER.indexOf(resumeFrom);
|
|
34522
|
+
if (resumeIndex < 0) {
|
|
34523
|
+
throw new FohError({
|
|
34524
|
+
step: "setup.resume",
|
|
34525
|
+
error: `Invalid --resume-from step: ${String(resumeFromRaw)}`,
|
|
34526
|
+
remediation: `Use one of: ${SETUP_STEP_ORDER.join(", ")}`
|
|
34527
|
+
});
|
|
34528
|
+
}
|
|
34529
|
+
return { resumeFrom, resumeIndex };
|
|
34530
|
+
}
|
|
34531
|
+
function nowIso() {
|
|
34532
|
+
return (/* @__PURE__ */ new Date()).toISOString();
|
|
34533
|
+
}
|
|
34534
|
+
function timedStepResult(result, startedAtIso, startedAtMs) {
|
|
34535
|
+
return {
|
|
34536
|
+
...result,
|
|
34537
|
+
started_at: startedAtIso,
|
|
34538
|
+
completed_at: nowIso(),
|
|
34539
|
+
duration_ms: Math.max(0, Date.now() - startedAtMs)
|
|
34540
|
+
};
|
|
34541
|
+
}
|
|
34542
|
+
function normalizeSetupPhoneMode(raw) {
|
|
34543
|
+
const value = String(raw || "purchase").trim().toLowerCase();
|
|
34544
|
+
if (value === "observe" || value === "skip" || value === "purchase") return value;
|
|
34545
|
+
throw new FohError({
|
|
34546
|
+
step: "setup.phone_mode",
|
|
34547
|
+
error: `Invalid --phone-mode "${String(raw)}"`,
|
|
34548
|
+
remediation: "Use one of: observe, skip, purchase.",
|
|
34549
|
+
reasonCode: "setup_invalid_phone_mode"
|
|
34550
|
+
});
|
|
34551
|
+
}
|
|
34552
|
+
function complianceSkipDetail(phoneMode) {
|
|
34553
|
+
return {
|
|
34554
|
+
reason_code: `compliance_skipped_phone_mode_${phoneMode}`,
|
|
34555
|
+
phone_mode: phoneMode,
|
|
34556
|
+
spend_policy: resolveCliSpendPolicy(),
|
|
34557
|
+
spend_class: "free",
|
|
34558
|
+
safe_to_retry: true,
|
|
34559
|
+
operator_note: "Compliance is only required before paid FOH-owned phone purchase."
|
|
34560
|
+
};
|
|
34561
|
+
}
|
|
34562
|
+
|
|
34563
|
+
// src/lib/signed-report.ts
|
|
34564
|
+
var import_crypto2 = require("crypto");
|
|
34565
|
+
var import_fs5 = require("fs");
|
|
34566
|
+
var import_path3 = require("path");
|
|
34567
|
+
function canonicalize(value) {
|
|
34568
|
+
if (value === null || value === void 0) return null;
|
|
34569
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
|
|
34570
|
+
if (Array.isArray(value)) return value.map((entry) => canonicalize(entry));
|
|
34571
|
+
if (typeof value === "object") {
|
|
34572
|
+
const sortedEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, entryValue]) => [key, canonicalize(entryValue)]);
|
|
34573
|
+
return Object.fromEntries(sortedEntries);
|
|
34574
|
+
}
|
|
34575
|
+
return String(value);
|
|
34576
|
+
}
|
|
34577
|
+
function stableStringify(value) {
|
|
34578
|
+
return JSON.stringify(canonicalize(value), null, 2) + "\n";
|
|
34579
|
+
}
|
|
34580
|
+
function sha256Hex(input) {
|
|
34581
|
+
return (0, import_crypto2.createHash)("sha256").update(input).digest("hex");
|
|
34582
|
+
}
|
|
34583
|
+
function signReport(reportPayload) {
|
|
34584
|
+
const canonical = stableStringify(reportPayload);
|
|
34585
|
+
return {
|
|
34586
|
+
...reportPayload,
|
|
34587
|
+
report_hash: {
|
|
34588
|
+
algorithm: "sha256",
|
|
34589
|
+
digest_hex: sha256Hex(canonical),
|
|
34590
|
+
canonicalization: "sorted-json-v1",
|
|
34591
|
+
verification_note: "Recompute SHA-256 over canonical payload JSON before report_hash and compare digest_hex."
|
|
34592
|
+
}
|
|
34593
|
+
};
|
|
34594
|
+
}
|
|
34595
|
+
function writeSignedJsonArtifact(path2, value) {
|
|
34596
|
+
const absolutePath = (0, import_path3.resolve)(path2);
|
|
34597
|
+
(0, import_fs5.mkdirSync)((0, import_path3.dirname)(absolutePath), { recursive: true });
|
|
34598
|
+
(0, import_fs5.writeFileSync)(absolutePath, stableStringify(value), "utf-8");
|
|
34599
|
+
return absolutePath;
|
|
34600
|
+
}
|
|
34601
|
+
|
|
34602
|
+
// src/commands/setup-report.ts
|
|
34603
|
+
function writeSetupRunReport(reportPayload, reportOut) {
|
|
34604
|
+
const signed = signReport(reportPayload);
|
|
34605
|
+
const reportPath = reportOut ? writeSignedJsonArtifact(String(reportOut), signed) : null;
|
|
34606
|
+
return {
|
|
34607
|
+
reportHash: signed.report_hash.digest_hex,
|
|
34608
|
+
reportPath
|
|
34609
|
+
};
|
|
34610
|
+
}
|
|
34611
|
+
|
|
34612
|
+
// src/commands/setup.ts
|
|
34613
|
+
function registerSetup(program3) {
|
|
34614
|
+
program3.command("setup").description("Fully provision a new agency customer in one command").option("--org <id>", "Org ID (default: stored org from foh org use)").option("--agent-template <id>", "Agent template ID (e.g. viewing-request)").option("--agent-name <name>", "Name for the new agent").option("--phone-country <cc>", "Phone number country code", "GB").option("--phone-area-code <code>", "Phone area code preference").option("--phone-mode <mode>", "Phone path: observe, skip, or purchase", "purchase").option("--widget-domains <domains>", "Comma-separated widget domain allowlist").option("--voice-provider <p>", "TTS provider: openai, azure, twilio").option("--voice-id <id>", "Voice ID").option("--skip-compliance", "Skip compliance submission and wait").option("--skip-voice", "Skip voice configuration").option("--skip-tests", "Skip smoke tests").option("--cert-mode <m>", "Simulation cert mode: quick, full, stress", "quick").option("--cert-adaptive-runs <n>", "Adaptive run count for certification loop", "30").option("--cert-max-improvement-rounds <n>", "Max instruction improvement rounds in cert loop (0-5)", "1").option("--resume-from <step>", `Resume from a setup step (${SETUP_STEP_ORDER.join(", ")})`).option("--report-out <path>", "Optional path to write signed setup run report JSON").option("--dry-run", "Print all steps that would run without making any API calls").option("--api-url <url>", "API base URL override").option("--console-url <url>", "Console sign-in URL override").option("--json", "Output as JSON").action(async (opts) => {
|
|
34615
|
+
if (!opts.org) {
|
|
34616
|
+
try {
|
|
34617
|
+
opts.org = loadCredentials(opts.apiUrl).orgId;
|
|
34618
|
+
} catch {
|
|
34619
|
+
}
|
|
34620
|
+
}
|
|
34621
|
+
const missing = ["org", "agentTemplate", "agentName"].filter((key) => !opts[key]);
|
|
34622
|
+
if (missing.length) {
|
|
34623
|
+
emitMissingOptionsPlan(missing, { json: opts.json, consoleUrl: opts.consoleUrl });
|
|
34624
|
+
markCommandFailed(1);
|
|
34625
|
+
return;
|
|
34626
|
+
}
|
|
34576
34627
|
let resumeState;
|
|
34577
34628
|
try {
|
|
34578
34629
|
resumeState = resolveResumeIndex(opts.resumeFrom);
|
|
@@ -34644,12 +34695,7 @@ function registerSetup(program3) {
|
|
|
34644
34695
|
steps: completed,
|
|
34645
34696
|
failure: failure ?? null
|
|
34646
34697
|
};
|
|
34647
|
-
|
|
34648
|
-
const reportPath = opts.reportOut ? writeSignedJsonArtifact(String(opts.reportOut), signed) : null;
|
|
34649
|
-
return {
|
|
34650
|
-
reportHash: signed.report_hash.digest_hex,
|
|
34651
|
-
reportPath
|
|
34652
|
-
};
|
|
34698
|
+
return writeSetupRunReport(reportPayload, opts.reportOut);
|
|
34653
34699
|
};
|
|
34654
34700
|
const shouldResumeSkip = (stepName) => {
|
|
34655
34701
|
if (!resumeState.resumeFrom) return false;
|
|
@@ -35140,8 +35186,8 @@ function registerSetup(program3) {
|
|
|
35140
35186
|
}
|
|
35141
35187
|
try {
|
|
35142
35188
|
const manifest = await agentExport(resolvedAgentId, { apiUrlOverride: opts.apiUrl });
|
|
35143
|
-
const { writeFileSync:
|
|
35144
|
-
|
|
35189
|
+
const { writeFileSync: writeFileSync13 } = await import("fs");
|
|
35190
|
+
writeFileSync13(
|
|
35145
35191
|
"tenant.yaml",
|
|
35146
35192
|
`# tenant.yaml - Front Of House agent manifest
|
|
35147
35193
|
# Edit this file and run: foh plan tenant.yaml
|
|
@@ -35311,8 +35357,8 @@ function registerSim(program3) {
|
|
|
35311
35357
|
}
|
|
35312
35358
|
const cert = response.certificate;
|
|
35313
35359
|
if (opts.out) {
|
|
35314
|
-
const { writeFileSync:
|
|
35315
|
-
|
|
35360
|
+
const { writeFileSync: writeFileSync13 } = await import("fs");
|
|
35361
|
+
writeFileSync13(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
|
|
35316
35362
|
process.stderr.write(` Certificate written to ${opts.out}
|
|
35317
35363
|
`);
|
|
35318
35364
|
}
|
|
@@ -35362,8 +35408,8 @@ function registerSim(program3) {
|
|
|
35362
35408
|
});
|
|
35363
35409
|
}
|
|
35364
35410
|
if (opts.out) {
|
|
35365
|
-
const { writeFileSync:
|
|
35366
|
-
|
|
35411
|
+
const { writeFileSync: writeFileSync13 } = await import("fs");
|
|
35412
|
+
writeFileSync13(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
|
|
35367
35413
|
process.stderr.write(` Final certificate written to ${opts.out}
|
|
35368
35414
|
`);
|
|
35369
35415
|
}
|
|
@@ -38967,9 +39013,9 @@ function registerUpdate(program3) {
|
|
|
38967
39013
|
}
|
|
38968
39014
|
|
|
38969
39015
|
// src/commands/eval.ts
|
|
38970
|
-
var
|
|
38971
|
-
var
|
|
38972
|
-
var
|
|
39016
|
+
var import_fs19 = require("fs");
|
|
39017
|
+
var import_path18 = require("path");
|
|
39018
|
+
var import_child_process6 = require("child_process");
|
|
38973
39019
|
|
|
38974
39020
|
// src/lib/external-agent-artifact-safety.ts
|
|
38975
39021
|
var import_fs12 = require("fs");
|
|
@@ -39303,63 +39349,13 @@ function readCommandRecords(runDir) {
|
|
|
39303
39349
|
}
|
|
39304
39350
|
|
|
39305
39351
|
// src/lib/external-agent-executor.ts
|
|
39306
|
-
var
|
|
39352
|
+
var import_fs18 = require("fs");
|
|
39307
39353
|
var import_os2 = require("os");
|
|
39308
|
-
var
|
|
39309
|
-
var
|
|
39354
|
+
var import_path17 = require("path");
|
|
39355
|
+
var import_child_process5 = require("child_process");
|
|
39310
39356
|
|
|
39311
|
-
// src/lib/external-agent-
|
|
39312
|
-
var import_fs14 = require("fs");
|
|
39357
|
+
// src/lib/external-agent-executor-env.ts
|
|
39313
39358
|
var import_path12 = require("path");
|
|
39314
|
-
var EXTERNAL_AGENT_METADATA_FILENAMES = [
|
|
39315
|
-
"external-agent-metadata.json",
|
|
39316
|
-
"agent-metadata.json"
|
|
39317
|
-
];
|
|
39318
|
-
var PUBLIC_DOC_URL_RE = /^https:\/\/frontofhouse\.okii\.uk(?:\/[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]*)?$/;
|
|
39319
|
-
function normalizeDocUrl(value) {
|
|
39320
|
-
const raw = typeof value === "string" ? value : value && typeof value === "object" && typeof value.url === "string" ? String(value.url) : "";
|
|
39321
|
-
const url2 = raw.trim().replace(/[.?!:]+$/g, "");
|
|
39322
|
-
if (!PUBLIC_DOC_URL_RE.test(url2)) return null;
|
|
39323
|
-
return url2;
|
|
39324
|
-
}
|
|
39325
|
-
function collectDocsFrom(value, docs) {
|
|
39326
|
-
if (Array.isArray(value)) {
|
|
39327
|
-
for (const entry of value) {
|
|
39328
|
-
const url2 = normalizeDocUrl(entry);
|
|
39329
|
-
if (url2) docs.add(url2);
|
|
39330
|
-
}
|
|
39331
|
-
}
|
|
39332
|
-
}
|
|
39333
|
-
function readExternalAgentMetadata(runDir) {
|
|
39334
|
-
for (const filename of EXTERNAL_AGENT_METADATA_FILENAMES) {
|
|
39335
|
-
const path2 = (0, import_path12.join)(runDir, filename);
|
|
39336
|
-
if (!(0, import_fs14.existsSync)(path2)) continue;
|
|
39337
|
-
try {
|
|
39338
|
-
const parsed = JSON.parse((0, import_fs14.readFileSync)(path2, "utf8"));
|
|
39339
|
-
const docs = /* @__PURE__ */ new Set();
|
|
39340
|
-
collectDocsFrom(parsed.docs_pages_used, docs);
|
|
39341
|
-
collectDocsFrom(parsed.docs_pages_observed, docs);
|
|
39342
|
-
collectDocsFrom(parsed.docs_used, docs);
|
|
39343
|
-
collectDocsFrom(parsed.public_docs_used, docs);
|
|
39344
|
-
return {
|
|
39345
|
-
path: filename,
|
|
39346
|
-
docs_pages_used: Array.from(docs).sort()
|
|
39347
|
-
};
|
|
39348
|
-
} catch {
|
|
39349
|
-
return {
|
|
39350
|
-
path: filename,
|
|
39351
|
-
docs_pages_used: []
|
|
39352
|
-
};
|
|
39353
|
-
}
|
|
39354
|
-
}
|
|
39355
|
-
return {
|
|
39356
|
-
path: null,
|
|
39357
|
-
docs_pages_used: []
|
|
39358
|
-
};
|
|
39359
|
-
}
|
|
39360
|
-
|
|
39361
|
-
// src/lib/external-agent-executor.ts
|
|
39362
|
-
var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
|
|
39363
39359
|
var CODEX_EXECUTOR_DENIED_ENV_PREFIXES = [
|
|
39364
39360
|
"SUPABASE_",
|
|
39365
39361
|
"DATABASE_",
|
|
@@ -39403,15 +39399,6 @@ var EXTERNAL_AGENT_EVAL_AUTH_ENV_MAP = {
|
|
|
39403
39399
|
FOH_EXTERNAL_AGENT_EVAL_API_URL: "FOH_API_URL",
|
|
39404
39400
|
FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT: "FOH_TOKEN_EXPIRES_AT"
|
|
39405
39401
|
};
|
|
39406
|
-
var DEFAULT_FOH_API_URL2 = "https://api.frontofhouse.okii.uk";
|
|
39407
|
-
var ExternalAgentExecutorError = class extends Error {
|
|
39408
|
-
reasonCode;
|
|
39409
|
-
constructor(reasonCode, message) {
|
|
39410
|
-
super(message);
|
|
39411
|
-
this.name = "ExternalAgentExecutorError";
|
|
39412
|
-
this.reasonCode = reasonCode;
|
|
39413
|
-
}
|
|
39414
|
-
};
|
|
39415
39402
|
function isDeniedEnvKey(key) {
|
|
39416
39403
|
const upper = key.toUpperCase();
|
|
39417
39404
|
if (CODEX_EXECUTOR_DENIED_ENV_NAMES.some((name) => upper === name)) return true;
|
|
@@ -39432,7 +39419,7 @@ function buildCodexExecutorEnv(input) {
|
|
|
39432
39419
|
env[childKey] = value;
|
|
39433
39420
|
}
|
|
39434
39421
|
}
|
|
39435
|
-
env.npm_config_cache = (0,
|
|
39422
|
+
env.npm_config_cache = (0, import_path12.join)((0, import_path12.dirname)(input.runDir), ".npm-cache");
|
|
39436
39423
|
env.npm_config_prefer_online = "true";
|
|
39437
39424
|
env.npm_config_update_notifier = "false";
|
|
39438
39425
|
env.npm_config_yes = "true";
|
|
@@ -39442,21 +39429,384 @@ function buildCodexExecutorEnv(input) {
|
|
|
39442
39429
|
env.FOH_CLI_SUPPRESS_BANNER = "1";
|
|
39443
39430
|
return env;
|
|
39444
39431
|
}
|
|
39445
|
-
|
|
39432
|
+
|
|
39433
|
+
// src/lib/external-agent-executor-artifacts.ts
|
|
39434
|
+
var import_fs15 = require("fs");
|
|
39435
|
+
var import_path14 = require("path");
|
|
39436
|
+
|
|
39437
|
+
// src/lib/external-agent-metadata.ts
|
|
39438
|
+
var import_fs14 = require("fs");
|
|
39439
|
+
var import_path13 = require("path");
|
|
39440
|
+
var EXTERNAL_AGENT_METADATA_FILENAMES = [
|
|
39441
|
+
"external-agent-metadata.json",
|
|
39442
|
+
"agent-metadata.json"
|
|
39443
|
+
];
|
|
39444
|
+
var PUBLIC_DOC_URL_RE = /^https:\/\/frontofhouse\.okii\.uk(?:\/[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]*)?$/;
|
|
39445
|
+
function normalizeDocUrl(value) {
|
|
39446
|
+
const raw = typeof value === "string" ? value : value && typeof value === "object" && typeof value.url === "string" ? String(value.url) : "";
|
|
39447
|
+
const url2 = raw.trim().replace(/[.?!:]+$/g, "");
|
|
39448
|
+
if (!PUBLIC_DOC_URL_RE.test(url2)) return null;
|
|
39449
|
+
return url2;
|
|
39450
|
+
}
|
|
39451
|
+
function collectDocsFrom(value, docs) {
|
|
39452
|
+
if (Array.isArray(value)) {
|
|
39453
|
+
for (const entry of value) {
|
|
39454
|
+
const url2 = normalizeDocUrl(entry);
|
|
39455
|
+
if (url2) docs.add(url2);
|
|
39456
|
+
}
|
|
39457
|
+
}
|
|
39458
|
+
}
|
|
39459
|
+
function readExternalAgentMetadata(runDir) {
|
|
39460
|
+
for (const filename of EXTERNAL_AGENT_METADATA_FILENAMES) {
|
|
39461
|
+
const path2 = (0, import_path13.join)(runDir, filename);
|
|
39462
|
+
if (!(0, import_fs14.existsSync)(path2)) continue;
|
|
39463
|
+
try {
|
|
39464
|
+
const parsed = JSON.parse((0, import_fs14.readFileSync)(path2, "utf8"));
|
|
39465
|
+
const docs = /* @__PURE__ */ new Set();
|
|
39466
|
+
collectDocsFrom(parsed.docs_pages_used, docs);
|
|
39467
|
+
collectDocsFrom(parsed.docs_pages_observed, docs);
|
|
39468
|
+
collectDocsFrom(parsed.docs_used, docs);
|
|
39469
|
+
collectDocsFrom(parsed.public_docs_used, docs);
|
|
39470
|
+
return {
|
|
39471
|
+
path: filename,
|
|
39472
|
+
docs_pages_used: Array.from(docs).sort()
|
|
39473
|
+
};
|
|
39474
|
+
} catch {
|
|
39475
|
+
return {
|
|
39476
|
+
path: filename,
|
|
39477
|
+
docs_pages_used: []
|
|
39478
|
+
};
|
|
39479
|
+
}
|
|
39480
|
+
}
|
|
39446
39481
|
return {
|
|
39447
|
-
|
|
39448
|
-
|
|
39449
|
-
apiUrl: String(env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_API_URL || DEFAULT_FOH_API_URL2).trim() || DEFAULT_FOH_API_URL2,
|
|
39450
|
-
expiresAt: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || "").trim()
|
|
39482
|
+
path: null,
|
|
39483
|
+
docs_pages_used: []
|
|
39451
39484
|
};
|
|
39452
39485
|
}
|
|
39453
|
-
|
|
39454
|
-
|
|
39455
|
-
|
|
39456
|
-
);
|
|
39486
|
+
|
|
39487
|
+
// src/lib/external-agent-executor-artifacts.ts
|
|
39488
|
+
function redactArtifactFile(path2, input = {}) {
|
|
39489
|
+
if (!(0, import_fs15.existsSync)(path2)) return;
|
|
39490
|
+
const original = (0, import_fs15.readFileSync)(path2, "utf8");
|
|
39491
|
+
const redacted = redactExternalAgentArtifactText(original, input);
|
|
39492
|
+
if (redacted !== original) (0, import_fs15.writeFileSync)(path2, redacted, "utf8");
|
|
39457
39493
|
}
|
|
39458
|
-
|
|
39459
|
-
|
|
39494
|
+
function redactExternalAgentOutputArtifacts(run, input = {}) {
|
|
39495
|
+
redactArtifactFile(run.outputs.jsonl, input);
|
|
39496
|
+
redactArtifactFile(run.outputs.last_message, input);
|
|
39497
|
+
redactArtifactFile(run.outputs.stderr, input);
|
|
39498
|
+
redactArtifactFile((0, import_path14.join)(run.run_dir, "commands.ndjson"), input);
|
|
39499
|
+
if (!(0, import_fs15.existsSync)(run.run_dir)) return;
|
|
39500
|
+
for (const name of (0, import_fs15.readdirSync)(run.run_dir)) {
|
|
39501
|
+
if (name.startsWith("command-output-cmd_") && !name.endsWith(".redacted")) {
|
|
39502
|
+
redactArtifactFile((0, import_path14.join)(run.run_dir, name), input);
|
|
39503
|
+
}
|
|
39504
|
+
}
|
|
39505
|
+
}
|
|
39506
|
+
function copyExternalAgentCommandCaptureArtifacts(input) {
|
|
39507
|
+
const commandLog = (0, import_path14.join)(input.captureDir, "commands.ndjson");
|
|
39508
|
+
if ((0, import_fs15.existsSync)(commandLog)) {
|
|
39509
|
+
(0, import_fs15.writeFileSync)((0, import_path14.join)(input.runDir, "commands.ndjson"), (0, import_fs15.readFileSync)(commandLog, "utf8"), "utf8");
|
|
39510
|
+
}
|
|
39511
|
+
for (const name of (0, import_fs15.readdirSync)(input.captureDir)) {
|
|
39512
|
+
if (name.startsWith("command-output-cmd_")) {
|
|
39513
|
+
(0, import_fs15.copyFileSync)((0, import_path14.join)(input.captureDir, name), (0, import_path14.join)(input.runDir, name));
|
|
39514
|
+
} else if (EXTERNAL_AGENT_METADATA_FILENAMES.includes(name)) {
|
|
39515
|
+
(0, import_fs15.copyFileSync)((0, import_path14.join)(input.captureDir, name), (0, import_path14.join)(input.runDir, name));
|
|
39516
|
+
}
|
|
39517
|
+
}
|
|
39518
|
+
}
|
|
39519
|
+
|
|
39520
|
+
// src/lib/external-agent-executor-classification.ts
|
|
39521
|
+
var import_fs16 = require("fs");
|
|
39522
|
+
var import_path15 = require("path");
|
|
39523
|
+
function proofArtifactPasses(runDir) {
|
|
39524
|
+
const proofPath = (0, import_path15.join)(runDir, "proof.json");
|
|
39525
|
+
if (!(0, import_fs16.existsSync)(proofPath)) return false;
|
|
39526
|
+
try {
|
|
39527
|
+
const parsed = JSON.parse((0, import_fs16.readFileSync)(proofPath, "utf8"));
|
|
39528
|
+
return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
|
|
39529
|
+
} catch {
|
|
39530
|
+
return false;
|
|
39531
|
+
}
|
|
39532
|
+
}
|
|
39533
|
+
function readIfExists(path2) {
|
|
39534
|
+
return (0, import_fs16.existsSync)(path2) ? (0, import_fs16.readFileSync)(path2, "utf8") : "";
|
|
39535
|
+
}
|
|
39536
|
+
function relativeArtifactName(path2) {
|
|
39537
|
+
return (0, import_path15.basename)(path2);
|
|
39538
|
+
}
|
|
39539
|
+
function externalAgentSummaryCommand(root) {
|
|
39540
|
+
return [
|
|
39541
|
+
"node",
|
|
39542
|
+
"scripts/summarize-external-agent-runs.mjs",
|
|
39543
|
+
"--root",
|
|
39544
|
+
quoteShellArg(root),
|
|
39545
|
+
"--out",
|
|
39546
|
+
quoteShellArg((0, import_path15.join)(root, "latest-summary.json")),
|
|
39547
|
+
"--report",
|
|
39548
|
+
quoteShellArg((0, import_path15.join)(root, "summary.report.json"))
|
|
39549
|
+
].join(" ");
|
|
39550
|
+
}
|
|
39551
|
+
function quoteShellArg(value) {
|
|
39552
|
+
const text = String(value);
|
|
39553
|
+
if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
|
|
39554
|
+
return `"${text.replace(/(["$`])/g, "\\$1")}"`;
|
|
39555
|
+
}
|
|
39556
|
+
function classifyExternalAgentRun(input) {
|
|
39557
|
+
if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
|
|
39558
|
+
if (!input.artifactSafetyOk) return { status: "fail", reasonCode: "external_agent_artifact_safety_blocked" };
|
|
39559
|
+
const completedCommands = readCommandRecords(input.run.run_dir).filter((record2) => record2.phase === "completed");
|
|
39560
|
+
const observedVersions = completedCommands.map((record2) => String(record2.cli_version || "").trim()).filter((version2) => /^\d+\.\d+\.\d+$/.test(version2));
|
|
39561
|
+
if (observedVersions.some((version2) => version2 !== CLI_VERSION)) {
|
|
39562
|
+
return { status: "hold", reasonCode: "external_agent_cli_version_drift" };
|
|
39563
|
+
}
|
|
39564
|
+
const commandReasonCodes = completedCommands.flatMap((record2) => [
|
|
39565
|
+
String(record2.reason_code || ""),
|
|
39566
|
+
...Array.isArray(record2.check_reason_codes) ? record2.check_reason_codes.map((code) => String(code || "")) : []
|
|
39567
|
+
]).filter(Boolean);
|
|
39568
|
+
const hasCommandReason = (pattern) => commandReasonCodes.some((reason) => pattern.test(reason));
|
|
39569
|
+
if (hasCommandReason(new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i"))) {
|
|
39570
|
+
return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
|
|
39571
|
+
}
|
|
39572
|
+
if (hasCommandReason(/provider_capacity_blocked/i)) {
|
|
39573
|
+
return { status: "hold", reasonCode: "provider_capacity_blocked" };
|
|
39574
|
+
}
|
|
39575
|
+
if (hasCommandReason(/byon_voice_number_not_configured/i)) {
|
|
39576
|
+
return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
|
|
39577
|
+
}
|
|
39578
|
+
if (hasCommandReason(/contact_phone_provisioning_failed/i)) {
|
|
39579
|
+
return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
|
|
39580
|
+
}
|
|
39581
|
+
if (hasCommandReason(/voice_contact_expected_no_spend_hold/i)) {
|
|
39582
|
+
return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
|
|
39583
|
+
}
|
|
39584
|
+
if (hasCommandReason(/contact_phone_missing/i)) {
|
|
39585
|
+
return { status: "hold", reasonCode: "voice_contact_phone_missing" };
|
|
39586
|
+
}
|
|
39587
|
+
if (hasCommandReason(/sim(?:ulation)?[_-]?cert(?:ify|ification)?.*failed|simulation_certification_failed/i)) {
|
|
39588
|
+
return { status: "hold", reasonCode: "simulation_certification_failed" };
|
|
39589
|
+
}
|
|
39590
|
+
if (hasCommandReason(/proof_held/i)) {
|
|
39591
|
+
return { status: "hold", reasonCode: "external_agent_proof_held" };
|
|
39592
|
+
}
|
|
39593
|
+
if (hasCommandReason(/agent_limit_reached/i)) {
|
|
39594
|
+
return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
|
|
39595
|
+
}
|
|
39596
|
+
const lastMessage = readIfExists(input.run.outputs.last_message);
|
|
39597
|
+
const stderr = readIfExists(input.run.outputs.stderr);
|
|
39598
|
+
const combined = `${lastMessage}
|
|
39599
|
+
${stderr}`;
|
|
39600
|
+
if (/need[^.\n]*(?:private|source)[^.\n]*repo|cannot[^.\n]*without[^.\n]*(?:private|source)[^.\n]*repo|clone[^.\n]*(?:private|source)[^.\n]*repo/i.test(combined)) {
|
|
39601
|
+
return { status: "fail", reasonCode: "private_repo_assumption_detected" };
|
|
39602
|
+
}
|
|
39603
|
+
if (/(?:blocked|rejected|declined) by policy|EXEC_POLICY_BLOCKED|command execution was rejected|shell commands were rejected/i.test(combined)) {
|
|
39604
|
+
return { status: "hold", reasonCode: "codex_exec_policy_blocked" };
|
|
39605
|
+
}
|
|
39606
|
+
if (/bwrap:.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|bubblewrap.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|Failed RTM_NEWADDR|ENV_SANDBOX_EXEC_BLOCKED|permission profiles requiring direct runtime enforcement are incompatible with --use-legacy-landlock|legacy[_ -]?landlock.*incompatible/i.test(combined)) {
|
|
39607
|
+
return { status: "hold", reasonCode: "codex_sandbox_exec_blocked" };
|
|
39608
|
+
}
|
|
39609
|
+
if (/ENV_NETWORK_DNS_BLOCK|Could not resolve host|npm ping.*timeout|NO_EXECUTABLE_INSTALL/i.test(combined)) {
|
|
39610
|
+
return { status: "hold", reasonCode: "codex_network_dns_blocked" };
|
|
39611
|
+
}
|
|
39612
|
+
if (new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i").test(combined)) {
|
|
39613
|
+
return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
|
|
39614
|
+
}
|
|
39615
|
+
if (/provider_capacity_blocked/i.test(combined)) {
|
|
39616
|
+
return { status: "hold", reasonCode: "provider_capacity_blocked" };
|
|
39617
|
+
}
|
|
39618
|
+
if (/byon_voice_number_not_configured/i.test(combined)) {
|
|
39619
|
+
return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
|
|
39620
|
+
}
|
|
39621
|
+
if (/contact_phone_provisioning_failed/i.test(combined)) {
|
|
39622
|
+
return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
|
|
39623
|
+
}
|
|
39624
|
+
if (/voice_contact_expected_no_spend_hold/i.test(combined)) {
|
|
39625
|
+
return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
|
|
39626
|
+
}
|
|
39627
|
+
if (/contact_phone_missing/i.test(combined)) {
|
|
39628
|
+
return { status: "hold", reasonCode: "voice_contact_phone_missing" };
|
|
39629
|
+
}
|
|
39630
|
+
if (/simulation_certification_failed/i.test(combined)) {
|
|
39631
|
+
return { status: "hold", reasonCode: "simulation_certification_failed" };
|
|
39632
|
+
}
|
|
39633
|
+
if (/proof_held/i.test(combined)) {
|
|
39634
|
+
return { status: "hold", reasonCode: "external_agent_proof_held" };
|
|
39635
|
+
}
|
|
39636
|
+
if (/agent_limit_reached/i.test(combined)) {
|
|
39637
|
+
return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
|
|
39638
|
+
}
|
|
39639
|
+
if (/browser|approve|approval|login|auth|sign in/i.test(combined) && !proofArtifactPasses(input.run.run_dir)) {
|
|
39640
|
+
return { status: "hold", reasonCode: "auth_browser_approval_required" };
|
|
39641
|
+
}
|
|
39642
|
+
if (input.exitCode !== 0) return { status: "hold", reasonCode: `${input.run.command}_runner_nonzero_exit` };
|
|
39643
|
+
if (proofArtifactPasses(input.run.run_dir)) return { status: "pass", reasonCode: null };
|
|
39644
|
+
return { status: "hold", reasonCode: "external_agent_proof_artifact_missing" };
|
|
39645
|
+
}
|
|
39646
|
+
function buildExecutedExternalAgentRunArtifact(input) {
|
|
39647
|
+
const commands = readCommandRecords(input.run.run_dir);
|
|
39648
|
+
const agentMetadata = readExternalAgentMetadata(input.run.run_dir);
|
|
39649
|
+
return {
|
|
39650
|
+
schema_version: "external_agent_run.v1",
|
|
39651
|
+
run_id: input.run.run_id,
|
|
39652
|
+
status: input.status,
|
|
39653
|
+
failure_reason_code: input.reasonCode,
|
|
39654
|
+
model_provider: input.run.model_provider,
|
|
39655
|
+
model_name: input.run.model_name,
|
|
39656
|
+
runner_model: input.run.runner_model,
|
|
39657
|
+
agent_shell: `${input.run.command}-exec`,
|
|
39658
|
+
workspace_type: "clean-no-repo-programmatic",
|
|
39659
|
+
prompt_version: input.run.prompt_version,
|
|
39660
|
+
prompt_path: "prompt.txt",
|
|
39661
|
+
started_at: input.startedAt,
|
|
39662
|
+
ended_at: input.endedAt,
|
|
39663
|
+
manual_intervention_count: 0,
|
|
39664
|
+
manual_interventions: [],
|
|
39665
|
+
environment: {
|
|
39666
|
+
os: process.platform,
|
|
39667
|
+
node_version: process.version,
|
|
39668
|
+
npm_version: null,
|
|
39669
|
+
foh_cli_version: CLI_VERSION,
|
|
39670
|
+
runner_exit_code: input.exitCode,
|
|
39671
|
+
runner_timed_out: input.timedOut,
|
|
39672
|
+
duration_ms: input.durationMs
|
|
39673
|
+
},
|
|
39674
|
+
public_entrypoints: [
|
|
39675
|
+
"https://frontofhouse.okii.uk",
|
|
39676
|
+
"https://frontofhouse.okii.uk/llms.txt",
|
|
39677
|
+
"https://frontofhouse.okii.uk/openapi.yaml",
|
|
39678
|
+
"npx --yes @f-o-h/cli@latest"
|
|
39679
|
+
],
|
|
39680
|
+
commands_run: commands.map((command) => command.command),
|
|
39681
|
+
docs_pages_used: agentMetadata.docs_pages_used,
|
|
39682
|
+
eval_state: {
|
|
39683
|
+
lifecycle_strategy: "reuse_existing_eval_state",
|
|
39684
|
+
org_reuse_expected: true,
|
|
39685
|
+
agent_reuse_expected: true,
|
|
39686
|
+
widget_reuse_expected: true,
|
|
39687
|
+
fresh_org_expected: false,
|
|
39688
|
+
ephemeral_org_expected: false,
|
|
39689
|
+
fresh_agent_expected: false,
|
|
39690
|
+
phone_purchase_expected: false,
|
|
39691
|
+
paid_resource_creation_expected: false,
|
|
39692
|
+
spend_policy_expected: NO_SPEND_POLICY,
|
|
39693
|
+
cleanup_expected: false,
|
|
39694
|
+
cleanup_strategy: "no_cleanup_for_reused_eval_state",
|
|
39695
|
+
paid_resource_strategy: "blocked_unless_explicit_byon_or_operator_approved",
|
|
39696
|
+
rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
|
|
39697
|
+
},
|
|
39698
|
+
artifacts: {
|
|
39699
|
+
terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
|
|
39700
|
+
command_log: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
|
|
39701
|
+
proof_bundle: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
|
|
39702
|
+
replay_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
|
|
39703
|
+
knowledge_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
|
|
39704
|
+
improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
|
|
39705
|
+
agent_metadata: agentMetadata.path,
|
|
39706
|
+
notes: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
|
|
39707
|
+
runner_last_message: relativeArtifactName(input.run.outputs.last_message),
|
|
39708
|
+
runner_stderr: relativeArtifactName(input.run.outputs.stderr),
|
|
39709
|
+
codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
|
|
39710
|
+
codex_stderr: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.stderr) : null,
|
|
39711
|
+
artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
|
|
39712
|
+
},
|
|
39713
|
+
summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
|
|
39714
|
+
next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))] : [
|
|
39715
|
+
"foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
|
|
39716
|
+
"foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
|
|
39717
|
+
externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))
|
|
39718
|
+
]
|
|
39719
|
+
};
|
|
39720
|
+
}
|
|
39721
|
+
|
|
39722
|
+
// src/lib/external-agent-runner-execution.ts
|
|
39723
|
+
var import_child_process4 = require("child_process");
|
|
39724
|
+
var import_fs17 = require("fs");
|
|
39725
|
+
var import_path16 = require("path");
|
|
39726
|
+
function buildCommandInvocation(command, args) {
|
|
39727
|
+
if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
|
|
39728
|
+
const binDir = (0, import_path16.dirname)(command);
|
|
39729
|
+
const codexEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
|
|
39730
|
+
if ((0, import_fs17.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
|
|
39731
|
+
const geminiEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
|
|
39732
|
+
if ((0, import_fs17.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
|
|
39733
|
+
}
|
|
39734
|
+
return { command, args };
|
|
39735
|
+
}
|
|
39736
|
+
function spawnExternalAgentRunner(input) {
|
|
39737
|
+
return new Promise((resolveRun) => {
|
|
39738
|
+
const started = Date.now();
|
|
39739
|
+
const commandInvocation = buildCommandInvocation(input.command, input.args);
|
|
39740
|
+
const child = (0, import_child_process4.spawn)(commandInvocation.command, commandInvocation.args, {
|
|
39741
|
+
cwd: input.cwd,
|
|
39742
|
+
env: input.env,
|
|
39743
|
+
shell: false,
|
|
39744
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
39745
|
+
windowsHide: true
|
|
39746
|
+
});
|
|
39747
|
+
const stdout = (0, import_fs17.createWriteStream)(input.stdoutPath, { flags: "w" });
|
|
39748
|
+
const stderr = (0, import_fs17.createWriteStream)(input.stderrPath, { flags: "w" });
|
|
39749
|
+
child.stdout.pipe(stdout);
|
|
39750
|
+
child.stderr.pipe(stderr);
|
|
39751
|
+
child.stdin.end(input.prompt);
|
|
39752
|
+
let timedOut = false;
|
|
39753
|
+
const timer = setTimeout(() => {
|
|
39754
|
+
timedOut = true;
|
|
39755
|
+
if (child.pid && process.platform === "win32") {
|
|
39756
|
+
(0, import_child_process4.spawnSync)("taskkill.exe", ["/pid", String(child.pid), "/t", "/f"], { stdio: "ignore" });
|
|
39757
|
+
} else {
|
|
39758
|
+
child.kill("SIGKILL");
|
|
39759
|
+
}
|
|
39760
|
+
}, input.timeoutMs);
|
|
39761
|
+
child.on("close", (exitCode) => {
|
|
39762
|
+
clearTimeout(timer);
|
|
39763
|
+
stdout.end();
|
|
39764
|
+
stderr.end();
|
|
39765
|
+
resolveRun({
|
|
39766
|
+
exitCode,
|
|
39767
|
+
timedOut,
|
|
39768
|
+
durationMs: Date.now() - started
|
|
39769
|
+
});
|
|
39770
|
+
});
|
|
39771
|
+
child.on("error", () => {
|
|
39772
|
+
clearTimeout(timer);
|
|
39773
|
+
stdout.end();
|
|
39774
|
+
stderr.end();
|
|
39775
|
+
resolveRun({
|
|
39776
|
+
exitCode: null,
|
|
39777
|
+
timedOut,
|
|
39778
|
+
durationMs: Date.now() - started
|
|
39779
|
+
});
|
|
39780
|
+
});
|
|
39781
|
+
});
|
|
39782
|
+
}
|
|
39783
|
+
|
|
39784
|
+
// src/lib/external-agent-executor.ts
|
|
39785
|
+
var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
|
|
39786
|
+
var DEFAULT_FOH_API_URL2 = "https://api.frontofhouse.okii.uk";
|
|
39787
|
+
var ExternalAgentExecutorError = class extends Error {
|
|
39788
|
+
reasonCode;
|
|
39789
|
+
constructor(reasonCode, message) {
|
|
39790
|
+
super(message);
|
|
39791
|
+
this.name = "ExternalAgentExecutorError";
|
|
39792
|
+
this.reasonCode = reasonCode;
|
|
39793
|
+
}
|
|
39794
|
+
};
|
|
39795
|
+
function readExternalAgentEvalAuthEnv(env = process.env) {
|
|
39796
|
+
return {
|
|
39797
|
+
token: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || "").trim(),
|
|
39798
|
+
orgId: String(env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || "").trim(),
|
|
39799
|
+
apiUrl: String(env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_API_URL || DEFAULT_FOH_API_URL2).trim() || DEFAULT_FOH_API_URL2,
|
|
39800
|
+
expiresAt: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || "").trim()
|
|
39801
|
+
};
|
|
39802
|
+
}
|
|
39803
|
+
function shouldRunExternalAgentEvalAuthPreflight(env = process.env) {
|
|
39804
|
+
return Boolean(
|
|
39805
|
+
env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || env.FOH_SERVICE_TOKEN || env.FOH_ORG_ID || env.FOH_API_URL || env.FOH_TOKEN_EXPIRES_AT
|
|
39806
|
+
);
|
|
39807
|
+
}
|
|
39808
|
+
async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}) {
|
|
39809
|
+
const hasExplicitEvalAuth = Boolean(
|
|
39460
39810
|
env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT
|
|
39461
39811
|
);
|
|
39462
39812
|
if (!shouldRunExternalAgentEvalAuthPreflight(env) && !options.requireExplicitEvalAuth) return null;
|
|
@@ -39507,14 +39857,14 @@ async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}
|
|
|
39507
39857
|
};
|
|
39508
39858
|
}
|
|
39509
39859
|
function normalizeForCompare(path2) {
|
|
39510
|
-
const resolved = (0,
|
|
39860
|
+
const resolved = (0, import_path17.resolve)(path2);
|
|
39511
39861
|
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
|
|
39512
39862
|
}
|
|
39513
39863
|
function isPathInside(childPath, parentPath) {
|
|
39514
39864
|
const child = normalizeForCompare(childPath);
|
|
39515
39865
|
const parent = normalizeForCompare(parentPath);
|
|
39516
|
-
const rel = (0,
|
|
39517
|
-
return rel === "" || !!rel && !rel.startsWith("..") && !(0,
|
|
39866
|
+
const rel = (0, import_path17.relative)(parent, child);
|
|
39867
|
+
return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path17.isAbsolute)(rel);
|
|
39518
39868
|
}
|
|
39519
39869
|
function requireString(value, field) {
|
|
39520
39870
|
if (typeof value !== "string" || value.trim() === "") {
|
|
@@ -39523,10 +39873,10 @@ function requireString(value, field) {
|
|
|
39523
39873
|
return value;
|
|
39524
39874
|
}
|
|
39525
39875
|
function readBatch(batchPath) {
|
|
39526
|
-
if (!(0,
|
|
39876
|
+
if (!(0, import_fs18.existsSync)(batchPath)) {
|
|
39527
39877
|
throw new ExternalAgentExecutorError("external_agent_batch_not_found", `Batch file not found: ${batchPath}`);
|
|
39528
39878
|
}
|
|
39529
|
-
const parsed = JSON.parse((0,
|
|
39879
|
+
const parsed = JSON.parse((0, import_fs18.readFileSync)(batchPath, "utf8"));
|
|
39530
39880
|
if (parsed.schema_version !== "external_agent_batch_plan.v1") {
|
|
39531
39881
|
throw new ExternalAgentExecutorError("invalid_external_agent_batch", "Batch schema_version must be external_agent_batch_plan.v1.");
|
|
39532
39882
|
}
|
|
@@ -39541,47 +39891,30 @@ function defaultRunnerProbe(command, args) {
|
|
|
39541
39891
|
encoding: "utf8",
|
|
39542
39892
|
timeout: isGeminiHeadlessSmoke ? GEMINI_HEADLESS_PROBE_TIMEOUT_MS : void 0
|
|
39543
39893
|
};
|
|
39544
|
-
const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0,
|
|
39894
|
+
const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process5.spawnSync)(
|
|
39545
39895
|
"powershell.exe",
|
|
39546
39896
|
["-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", `& ${[command, ...args].map(quotePowerShellArg).join(" ")}`],
|
|
39547
39897
|
spawnOptions
|
|
39548
|
-
) : (0,
|
|
39898
|
+
) : (0, import_child_process5.spawnSync)(command, args, spawnOptions);
|
|
39549
39899
|
return {
|
|
39550
39900
|
status: typeof result.status === "number" ? result.status : null,
|
|
39551
39901
|
stdout: String(result.stdout || ""),
|
|
39552
39902
|
stderr: String(result.stderr || ""),
|
|
39553
|
-
error: result.error
|
|
39554
|
-
};
|
|
39555
|
-
}
|
|
39556
|
-
function geminiCapacityUnavailable(text) {
|
|
39557
|
-
return /MODEL_CAPACITY_EXHAUSTED|RESOURCE_EXHAUSTED|No capacity available|rateLimitExceeded|exhausted your capacity|status 429/i.test(text);
|
|
39558
|
-
}
|
|
39559
|
-
function quotePowerShellArg(value) {
|
|
39560
|
-
return `'${value.replace(/'/g, "''")}'`;
|
|
39561
|
-
}
|
|
39562
|
-
function quoteShellArg(value) {
|
|
39563
|
-
const text = String(value);
|
|
39564
|
-
if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
|
|
39565
|
-
return `"${text.replace(/(["$`])/g, "\\$1")}"`;
|
|
39566
|
-
}
|
|
39567
|
-
function externalAgentSummaryCommand(root) {
|
|
39568
|
-
return [
|
|
39569
|
-
"node",
|
|
39570
|
-
"scripts/summarize-external-agent-runs.mjs",
|
|
39571
|
-
"--root",
|
|
39572
|
-
quoteShellArg(root),
|
|
39573
|
-
"--out",
|
|
39574
|
-
quoteShellArg((0, import_path13.join)(root, "latest-summary.json")),
|
|
39575
|
-
"--report",
|
|
39576
|
-
quoteShellArg((0, import_path13.join)(root, "summary.report.json"))
|
|
39577
|
-
].join(" ");
|
|
39903
|
+
error: result.error
|
|
39904
|
+
};
|
|
39905
|
+
}
|
|
39906
|
+
function geminiCapacityUnavailable(text) {
|
|
39907
|
+
return /MODEL_CAPACITY_EXHAUSTED|RESOURCE_EXHAUSTED|No capacity available|rateLimitExceeded|exhausted your capacity|status 429/i.test(text);
|
|
39908
|
+
}
|
|
39909
|
+
function quotePowerShellArg(value) {
|
|
39910
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
39578
39911
|
}
|
|
39579
39912
|
function resolveCodexProbeCommand() {
|
|
39580
39913
|
if (process.platform !== "win32") return "codex";
|
|
39581
39914
|
const appData = process.env.APPDATA;
|
|
39582
39915
|
if (appData) {
|
|
39583
|
-
const appDataShim = (0,
|
|
39584
|
-
if ((0,
|
|
39916
|
+
const appDataShim = (0, import_path17.join)(appData, "npm", "codex.cmd");
|
|
39917
|
+
if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
|
|
39585
39918
|
}
|
|
39586
39919
|
return "codex.cmd";
|
|
39587
39920
|
}
|
|
@@ -39592,8 +39925,8 @@ function resolveGeminiProbeCommand() {
|
|
|
39592
39925
|
if (process.platform !== "win32") return "gemini";
|
|
39593
39926
|
const appData = process.env.APPDATA;
|
|
39594
39927
|
if (appData) {
|
|
39595
|
-
const appDataShim = (0,
|
|
39596
|
-
if ((0,
|
|
39928
|
+
const appDataShim = (0, import_path17.join)(appData, "npm", "gemini.cmd");
|
|
39929
|
+
if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
|
|
39597
39930
|
}
|
|
39598
39931
|
return "gemini.cmd";
|
|
39599
39932
|
}
|
|
@@ -39864,13 +40197,34 @@ function safeRunId(value) {
|
|
|
39864
40197
|
return value.toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "run";
|
|
39865
40198
|
}
|
|
39866
40199
|
function resolveWorkspaceRoot(input) {
|
|
39867
|
-
if (input.workspaceRoot) return (0,
|
|
39868
|
-
const batchStem = (0,
|
|
39869
|
-
const repoStem = (0,
|
|
39870
|
-
return (0,
|
|
40200
|
+
if (input.workspaceRoot) return (0, import_path17.resolve)(input.workspaceRoot);
|
|
40201
|
+
const batchStem = (0, import_path17.basename)((0, import_path17.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
|
|
40202
|
+
const repoStem = (0, import_path17.basename)((0, import_path17.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
|
|
40203
|
+
return (0, import_path17.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
|
|
40204
|
+
}
|
|
40205
|
+
function findNearestGitRoot(startPath) {
|
|
40206
|
+
let current = (0, import_path17.resolve)(startPath);
|
|
40207
|
+
while (true) {
|
|
40208
|
+
if ((0, import_fs18.existsSync)((0, import_path17.join)(current, ".git"))) return current;
|
|
40209
|
+
const parent = (0, import_path17.dirname)(current);
|
|
40210
|
+
if (parent === current) return null;
|
|
40211
|
+
current = parent;
|
|
40212
|
+
}
|
|
40213
|
+
}
|
|
40214
|
+
function resolvePrivateRepoRoot(input) {
|
|
40215
|
+
if (input.explicitPrivateRepoRoot) {
|
|
40216
|
+
return { root: (0, import_path17.resolve)(input.explicitPrivateRepoRoot), explicit: true };
|
|
40217
|
+
}
|
|
40218
|
+
const cwd = (0, import_path17.resolve)(input.cwd || process.cwd());
|
|
40219
|
+
const gitRoot = findNearestGitRoot(cwd);
|
|
40220
|
+
if (gitRoot) return { root: gitRoot, explicit: false };
|
|
40221
|
+
return {
|
|
40222
|
+
root: (0, import_path17.join)(cwd, ".foh-no-private-repo-root-sentinel"),
|
|
40223
|
+
explicit: false
|
|
40224
|
+
};
|
|
39871
40225
|
}
|
|
39872
40226
|
function promptVersionFromPath(promptPath) {
|
|
39873
|
-
const raw = (0,
|
|
40227
|
+
const raw = (0, import_fs18.readFileSync)(promptPath, "utf8");
|
|
39874
40228
|
if (raw.includes("Do not assume access to the private source repository")) return "blank-setup.v1";
|
|
39875
40229
|
return "unknown";
|
|
39876
40230
|
}
|
|
@@ -39879,14 +40233,18 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39879
40233
|
if (runner !== "codex" && runner !== "gemini") {
|
|
39880
40234
|
throw new ExternalAgentExecutorError("unsupported_external_agent_runner", `Unsupported runner: ${runner}`);
|
|
39881
40235
|
}
|
|
39882
|
-
const batchPath = (0,
|
|
40236
|
+
const batchPath = (0, import_path17.resolve)(options.batchPath);
|
|
39883
40237
|
const batch = readBatch(batchPath);
|
|
39884
40238
|
const runnerProbe = validateRunner(options, runner);
|
|
39885
40239
|
const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
|
|
39886
40240
|
const codexSandboxMode = normalizeCodexSandboxMode(options.codexSandboxMode);
|
|
39887
40241
|
const codexModel = runner === "codex" ? normalizeCodexModel(options.codexModel) : null;
|
|
39888
40242
|
const codexNetworkAccess = options.codexNetworkAccess === true;
|
|
39889
|
-
const
|
|
40243
|
+
const privateRepo = resolvePrivateRepoRoot({
|
|
40244
|
+
explicitPrivateRepoRoot: options.privateRepoRoot,
|
|
40245
|
+
cwd: options.cwd
|
|
40246
|
+
});
|
|
40247
|
+
const privateRepoRoot = privateRepo.root;
|
|
39890
40248
|
const workspaceRoot = resolveWorkspaceRoot({ batchPath, workspaceRoot: options.workspaceRoot, privateRepoRoot });
|
|
39891
40249
|
if (isPathInside(workspaceRoot, privateRepoRoot)) {
|
|
39892
40250
|
throw new ExternalAgentExecutorError(
|
|
@@ -39894,17 +40252,17 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39894
40252
|
`Workspace root must be outside the private repository. workspace=${workspaceRoot} repo=${privateRepoRoot}`
|
|
39895
40253
|
);
|
|
39896
40254
|
}
|
|
39897
|
-
(0,
|
|
39898
|
-
const batchDir = (0,
|
|
40255
|
+
(0, import_fs18.mkdirSync)(workspaceRoot, { recursive: true });
|
|
40256
|
+
const batchDir = (0, import_path17.resolve)(String(batch.batch_dir || (0, import_path17.resolve)(batchPath, "..")));
|
|
39899
40257
|
const timeoutMinutes = Number.isFinite(options.timeoutMinutes) && Number(options.timeoutMinutes) > 0 ? Number(options.timeoutMinutes) : 30;
|
|
39900
40258
|
const runs = batch.runs.map((run) => {
|
|
39901
40259
|
const runId = safeRunId(requireString(run.run_id, "runs[].run_id"));
|
|
39902
|
-
const runDir = (0,
|
|
39903
|
-
const promptPath = (0,
|
|
39904
|
-
const workspaceDir = (0,
|
|
39905
|
-
(0,
|
|
39906
|
-
(0,
|
|
39907
|
-
(0,
|
|
40260
|
+
const runDir = (0, import_path17.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
|
|
40261
|
+
const promptPath = (0, import_path17.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
|
|
40262
|
+
const workspaceDir = (0, import_path17.join)(workspaceRoot, runId);
|
|
40263
|
+
(0, import_fs18.mkdirSync)(workspaceDir, { recursive: true });
|
|
40264
|
+
(0, import_fs18.writeFileSync)(
|
|
40265
|
+
(0, import_path17.join)(workspaceDir, "README.md"),
|
|
39908
40266
|
[
|
|
39909
40267
|
"# FOH External-Agent Workspace",
|
|
39910
40268
|
"",
|
|
@@ -39922,11 +40280,11 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39922
40280
|
});
|
|
39923
40281
|
const promptVersion = String(env[EXTERNAL_AGENT_PROMPT_VERSION_ENV] || "unknown");
|
|
39924
40282
|
const outputStem = runner === "gemini" ? "gemini" : "codex";
|
|
39925
|
-
const jsonlPath = (0,
|
|
39926
|
-
const lastMessagePath = (0,
|
|
39927
|
-
const stderrPath = (0,
|
|
39928
|
-
const runPath = (0,
|
|
39929
|
-
const artifactSafetyPath = (0,
|
|
40283
|
+
const jsonlPath = (0, import_path17.join)(runDir, `${outputStem}-exec.jsonl`);
|
|
40284
|
+
const lastMessagePath = (0, import_path17.join)(runDir, `${outputStem}-last-message.md`);
|
|
40285
|
+
const stderrPath = (0, import_path17.join)(runDir, `${outputStem}-stderr.txt`);
|
|
40286
|
+
const runPath = (0, import_path17.join)(runDir, "run.json");
|
|
40287
|
+
const artifactSafetyPath = (0, import_path17.join)(runDir, "artifact-safety.json");
|
|
39930
40288
|
const args = runner === "gemini" ? [
|
|
39931
40289
|
...runnerProbe.globalArgs,
|
|
39932
40290
|
...runnerProbe.execArgs
|
|
@@ -39977,7 +40335,7 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39977
40335
|
batch_path: batchPath,
|
|
39978
40336
|
batch_dir: batchDir,
|
|
39979
40337
|
private_repo_root: privateRepoRoot,
|
|
39980
|
-
private_repo_root_explicit:
|
|
40338
|
+
private_repo_root_explicit: privateRepo.explicit,
|
|
39981
40339
|
workspace_root: workspaceRoot,
|
|
39982
40340
|
timeout_minutes: timeoutMinutes,
|
|
39983
40341
|
safety: {
|
|
@@ -40017,281 +40375,12 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
40017
40375
|
};
|
|
40018
40376
|
}
|
|
40019
40377
|
function writeExternalAgentExecutorPlan(plan) {
|
|
40020
|
-
const path2 = (0,
|
|
40021
|
-
(0,
|
|
40022
|
-
(0,
|
|
40378
|
+
const path2 = (0, import_path17.join)(plan.batch_dir, "executor-plan.json");
|
|
40379
|
+
(0, import_fs18.mkdirSync)(plan.batch_dir, { recursive: true });
|
|
40380
|
+
(0, import_fs18.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
|
|
40023
40381
|
`, "utf8");
|
|
40024
40382
|
return path2;
|
|
40025
40383
|
}
|
|
40026
|
-
function proofArtifactPasses(runDir) {
|
|
40027
|
-
const proofPath = (0, import_path13.join)(runDir, "proof.json");
|
|
40028
|
-
if (!(0, import_fs15.existsSync)(proofPath)) return false;
|
|
40029
|
-
try {
|
|
40030
|
-
const parsed = JSON.parse((0, import_fs15.readFileSync)(proofPath, "utf8"));
|
|
40031
|
-
return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
|
|
40032
|
-
} catch {
|
|
40033
|
-
return false;
|
|
40034
|
-
}
|
|
40035
|
-
}
|
|
40036
|
-
function readIfExists(path2) {
|
|
40037
|
-
return (0, import_fs15.existsSync)(path2) ? (0, import_fs15.readFileSync)(path2, "utf8") : "";
|
|
40038
|
-
}
|
|
40039
|
-
function redactArtifactFile(path2, input = {}) {
|
|
40040
|
-
if (!(0, import_fs15.existsSync)(path2)) return;
|
|
40041
|
-
const original = (0, import_fs15.readFileSync)(path2, "utf8");
|
|
40042
|
-
const redacted = redactExternalAgentArtifactText(original, input);
|
|
40043
|
-
if (redacted !== original) (0, import_fs15.writeFileSync)(path2, redacted, "utf8");
|
|
40044
|
-
}
|
|
40045
|
-
function redactOutputArtifacts(run, input = {}) {
|
|
40046
|
-
redactArtifactFile(run.outputs.jsonl, input);
|
|
40047
|
-
redactArtifactFile(run.outputs.last_message, input);
|
|
40048
|
-
redactArtifactFile(run.outputs.stderr, input);
|
|
40049
|
-
redactArtifactFile((0, import_path13.join)(run.run_dir, "commands.ndjson"), input);
|
|
40050
|
-
if (!(0, import_fs15.existsSync)(run.run_dir)) return;
|
|
40051
|
-
for (const name of (0, import_fs15.readdirSync)(run.run_dir)) {
|
|
40052
|
-
if (name.startsWith("command-output-cmd_") && !name.endsWith(".redacted")) {
|
|
40053
|
-
redactArtifactFile((0, import_path13.join)(run.run_dir, name), input);
|
|
40054
|
-
}
|
|
40055
|
-
}
|
|
40056
|
-
}
|
|
40057
|
-
function copyCommandCaptureArtifacts(input) {
|
|
40058
|
-
const commandLog = (0, import_path13.join)(input.captureDir, "commands.ndjson");
|
|
40059
|
-
if ((0, import_fs15.existsSync)(commandLog)) {
|
|
40060
|
-
(0, import_fs15.writeFileSync)((0, import_path13.join)(input.runDir, "commands.ndjson"), (0, import_fs15.readFileSync)(commandLog, "utf8"), "utf8");
|
|
40061
|
-
}
|
|
40062
|
-
for (const name of (0, import_fs15.readdirSync)(input.captureDir)) {
|
|
40063
|
-
if (name.startsWith("command-output-cmd_")) {
|
|
40064
|
-
(0, import_fs15.copyFileSync)((0, import_path13.join)(input.captureDir, name), (0, import_path13.join)(input.runDir, name));
|
|
40065
|
-
} else if (EXTERNAL_AGENT_METADATA_FILENAMES.includes(name)) {
|
|
40066
|
-
(0, import_fs15.copyFileSync)((0, import_path13.join)(input.captureDir, name), (0, import_path13.join)(input.runDir, name));
|
|
40067
|
-
}
|
|
40068
|
-
}
|
|
40069
|
-
}
|
|
40070
|
-
function relativeArtifactName(path2) {
|
|
40071
|
-
return (0, import_path13.basename)(path2);
|
|
40072
|
-
}
|
|
40073
|
-
function classifyRun(input) {
|
|
40074
|
-
if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
|
|
40075
|
-
if (!input.artifactSafetyOk) return { status: "fail", reasonCode: "external_agent_artifact_safety_blocked" };
|
|
40076
|
-
const completedCommands = readCommandRecords(input.run.run_dir).filter((record2) => record2.phase === "completed");
|
|
40077
|
-
const observedVersions = completedCommands.map((record2) => String(record2.cli_version || "").trim()).filter((version2) => /^\d+\.\d+\.\d+$/.test(version2));
|
|
40078
|
-
if (observedVersions.some((version2) => version2 !== CLI_VERSION)) {
|
|
40079
|
-
return { status: "hold", reasonCode: "external_agent_cli_version_drift" };
|
|
40080
|
-
}
|
|
40081
|
-
const commandReasonCodes = completedCommands.flatMap((record2) => [
|
|
40082
|
-
String(record2.reason_code || ""),
|
|
40083
|
-
...Array.isArray(record2.check_reason_codes) ? record2.check_reason_codes.map((code) => String(code || "")) : []
|
|
40084
|
-
]).filter(Boolean);
|
|
40085
|
-
const hasCommandReason = (pattern) => commandReasonCodes.some((reason) => pattern.test(reason));
|
|
40086
|
-
if (hasCommandReason(new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i"))) {
|
|
40087
|
-
return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
|
|
40088
|
-
}
|
|
40089
|
-
if (hasCommandReason(/provider_capacity_blocked/i)) {
|
|
40090
|
-
return { status: "hold", reasonCode: "provider_capacity_blocked" };
|
|
40091
|
-
}
|
|
40092
|
-
if (hasCommandReason(/byon_voice_number_not_configured/i)) {
|
|
40093
|
-
return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
|
|
40094
|
-
}
|
|
40095
|
-
if (hasCommandReason(/contact_phone_provisioning_failed/i)) {
|
|
40096
|
-
return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
|
|
40097
|
-
}
|
|
40098
|
-
if (hasCommandReason(/voice_contact_expected_no_spend_hold/i)) {
|
|
40099
|
-
return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
|
|
40100
|
-
}
|
|
40101
|
-
if (hasCommandReason(/contact_phone_missing/i)) {
|
|
40102
|
-
return { status: "hold", reasonCode: "voice_contact_phone_missing" };
|
|
40103
|
-
}
|
|
40104
|
-
if (hasCommandReason(/sim(?:ulation)?[_-]?cert(?:ify|ification)?.*failed|simulation_certification_failed/i)) {
|
|
40105
|
-
return { status: "hold", reasonCode: "simulation_certification_failed" };
|
|
40106
|
-
}
|
|
40107
|
-
if (hasCommandReason(/proof_held/i)) {
|
|
40108
|
-
return { status: "hold", reasonCode: "external_agent_proof_held" };
|
|
40109
|
-
}
|
|
40110
|
-
if (hasCommandReason(/agent_limit_reached/i)) {
|
|
40111
|
-
return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
|
|
40112
|
-
}
|
|
40113
|
-
const lastMessage = readIfExists(input.run.outputs.last_message);
|
|
40114
|
-
const stderr = readIfExists(input.run.outputs.stderr);
|
|
40115
|
-
const combined = `${lastMessage}
|
|
40116
|
-
${stderr}`;
|
|
40117
|
-
if (/need[^.\n]*(?:private|source)[^.\n]*repo|cannot[^.\n]*without[^.\n]*(?:private|source)[^.\n]*repo|clone[^.\n]*(?:private|source)[^.\n]*repo/i.test(combined)) {
|
|
40118
|
-
return { status: "fail", reasonCode: "private_repo_assumption_detected" };
|
|
40119
|
-
}
|
|
40120
|
-
if (/(?:blocked|rejected|declined) by policy|EXEC_POLICY_BLOCKED|command execution was rejected|shell commands were rejected/i.test(combined)) {
|
|
40121
|
-
return { status: "hold", reasonCode: "codex_exec_policy_blocked" };
|
|
40122
|
-
}
|
|
40123
|
-
if (/bwrap:.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|bubblewrap.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|Failed RTM_NEWADDR|ENV_SANDBOX_EXEC_BLOCKED|permission profiles requiring direct runtime enforcement are incompatible with --use-legacy-landlock|legacy[_ -]?landlock.*incompatible/i.test(combined)) {
|
|
40124
|
-
return { status: "hold", reasonCode: "codex_sandbox_exec_blocked" };
|
|
40125
|
-
}
|
|
40126
|
-
if (/ENV_NETWORK_DNS_BLOCK|Could not resolve host|npm ping.*timeout|NO_EXECUTABLE_INSTALL/i.test(combined)) {
|
|
40127
|
-
return { status: "hold", reasonCode: "codex_network_dns_blocked" };
|
|
40128
|
-
}
|
|
40129
|
-
if (new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i").test(combined)) {
|
|
40130
|
-
return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
|
|
40131
|
-
}
|
|
40132
|
-
if (/provider_capacity_blocked/i.test(combined)) {
|
|
40133
|
-
return { status: "hold", reasonCode: "provider_capacity_blocked" };
|
|
40134
|
-
}
|
|
40135
|
-
if (/byon_voice_number_not_configured/i.test(combined)) {
|
|
40136
|
-
return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
|
|
40137
|
-
}
|
|
40138
|
-
if (/contact_phone_provisioning_failed/i.test(combined)) {
|
|
40139
|
-
return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
|
|
40140
|
-
}
|
|
40141
|
-
if (/voice_contact_expected_no_spend_hold/i.test(combined)) {
|
|
40142
|
-
return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
|
|
40143
|
-
}
|
|
40144
|
-
if (/contact_phone_missing/i.test(combined)) {
|
|
40145
|
-
return { status: "hold", reasonCode: "voice_contact_phone_missing" };
|
|
40146
|
-
}
|
|
40147
|
-
if (/simulation_certification_failed/i.test(combined)) {
|
|
40148
|
-
return { status: "hold", reasonCode: "simulation_certification_failed" };
|
|
40149
|
-
}
|
|
40150
|
-
if (/proof_held/i.test(combined)) {
|
|
40151
|
-
return { status: "hold", reasonCode: "external_agent_proof_held" };
|
|
40152
|
-
}
|
|
40153
|
-
if (/agent_limit_reached/i.test(combined)) {
|
|
40154
|
-
return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
|
|
40155
|
-
}
|
|
40156
|
-
if (/browser|approve|approval|login|auth|sign in/i.test(combined) && !proofArtifactPasses(input.run.run_dir)) {
|
|
40157
|
-
return { status: "hold", reasonCode: "auth_browser_approval_required" };
|
|
40158
|
-
}
|
|
40159
|
-
if (input.exitCode !== 0) return { status: "hold", reasonCode: `${input.run.command}_runner_nonzero_exit` };
|
|
40160
|
-
if (proofArtifactPasses(input.run.run_dir)) return { status: "pass", reasonCode: null };
|
|
40161
|
-
return { status: "hold", reasonCode: "external_agent_proof_artifact_missing" };
|
|
40162
|
-
}
|
|
40163
|
-
function buildExecutedRunArtifact(input) {
|
|
40164
|
-
const commands = readCommandRecords(input.run.run_dir);
|
|
40165
|
-
const agentMetadata = readExternalAgentMetadata(input.run.run_dir);
|
|
40166
|
-
return {
|
|
40167
|
-
schema_version: "external_agent_run.v1",
|
|
40168
|
-
run_id: input.run.run_id,
|
|
40169
|
-
status: input.status,
|
|
40170
|
-
failure_reason_code: input.reasonCode,
|
|
40171
|
-
model_provider: input.run.model_provider,
|
|
40172
|
-
model_name: input.run.model_name,
|
|
40173
|
-
runner_model: input.run.runner_model,
|
|
40174
|
-
agent_shell: `${input.run.command}-exec`,
|
|
40175
|
-
workspace_type: "clean-no-repo-programmatic",
|
|
40176
|
-
prompt_version: input.run.prompt_version,
|
|
40177
|
-
prompt_path: "prompt.txt",
|
|
40178
|
-
started_at: input.startedAt,
|
|
40179
|
-
ended_at: input.endedAt,
|
|
40180
|
-
manual_intervention_count: 0,
|
|
40181
|
-
manual_interventions: [],
|
|
40182
|
-
environment: {
|
|
40183
|
-
os: process.platform,
|
|
40184
|
-
node_version: process.version,
|
|
40185
|
-
npm_version: null,
|
|
40186
|
-
foh_cli_version: CLI_VERSION,
|
|
40187
|
-
runner_exit_code: input.exitCode,
|
|
40188
|
-
runner_timed_out: input.timedOut,
|
|
40189
|
-
duration_ms: input.durationMs
|
|
40190
|
-
},
|
|
40191
|
-
public_entrypoints: [
|
|
40192
|
-
"https://frontofhouse.okii.uk",
|
|
40193
|
-
"https://frontofhouse.okii.uk/llms.txt",
|
|
40194
|
-
"https://frontofhouse.okii.uk/openapi.yaml",
|
|
40195
|
-
"npx --yes @f-o-h/cli@latest"
|
|
40196
|
-
],
|
|
40197
|
-
commands_run: commands.map((command) => command.command),
|
|
40198
|
-
docs_pages_used: agentMetadata.docs_pages_used,
|
|
40199
|
-
eval_state: {
|
|
40200
|
-
lifecycle_strategy: "reuse_existing_eval_state",
|
|
40201
|
-
org_reuse_expected: true,
|
|
40202
|
-
agent_reuse_expected: true,
|
|
40203
|
-
widget_reuse_expected: true,
|
|
40204
|
-
fresh_org_expected: false,
|
|
40205
|
-
ephemeral_org_expected: false,
|
|
40206
|
-
fresh_agent_expected: false,
|
|
40207
|
-
phone_purchase_expected: false,
|
|
40208
|
-
paid_resource_creation_expected: false,
|
|
40209
|
-
spend_policy_expected: NO_SPEND_POLICY,
|
|
40210
|
-
cleanup_expected: false,
|
|
40211
|
-
cleanup_strategy: "no_cleanup_for_reused_eval_state",
|
|
40212
|
-
paid_resource_strategy: "blocked_unless_explicit_byon_or_operator_approved",
|
|
40213
|
-
rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
|
|
40214
|
-
},
|
|
40215
|
-
artifacts: {
|
|
40216
|
-
terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
|
|
40217
|
-
command_log: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
|
|
40218
|
-
proof_bundle: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
|
|
40219
|
-
replay_packet: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
|
|
40220
|
-
knowledge_packet: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
|
|
40221
|
-
improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
|
|
40222
|
-
agent_metadata: agentMetadata.path,
|
|
40223
|
-
notes: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
|
|
40224
|
-
runner_last_message: relativeArtifactName(input.run.outputs.last_message),
|
|
40225
|
-
runner_stderr: relativeArtifactName(input.run.outputs.stderr),
|
|
40226
|
-
codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
|
|
40227
|
-
codex_stderr: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.stderr) : null,
|
|
40228
|
-
artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
|
|
40229
|
-
},
|
|
40230
|
-
summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
|
|
40231
|
-
next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path13.dirname)(input.run.run_dir))] : [
|
|
40232
|
-
"foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
|
|
40233
|
-
"foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
|
|
40234
|
-
externalAgentSummaryCommand((0, import_path13.dirname)(input.run.run_dir))
|
|
40235
|
-
]
|
|
40236
|
-
};
|
|
40237
|
-
}
|
|
40238
|
-
function spawnRunner(input) {
|
|
40239
|
-
return new Promise((resolveRun) => {
|
|
40240
|
-
const started = Date.now();
|
|
40241
|
-
const commandInvocation = buildCommandInvocation(input.command, input.args);
|
|
40242
|
-
const child = (0, import_child_process4.spawn)(commandInvocation.command, commandInvocation.args, {
|
|
40243
|
-
cwd: input.cwd,
|
|
40244
|
-
env: input.env,
|
|
40245
|
-
shell: false,
|
|
40246
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
40247
|
-
windowsHide: true
|
|
40248
|
-
});
|
|
40249
|
-
const stdout = (0, import_fs15.createWriteStream)(input.stdoutPath, { flags: "w" });
|
|
40250
|
-
const stderr = (0, import_fs15.createWriteStream)(input.stderrPath, { flags: "w" });
|
|
40251
|
-
child.stdout.pipe(stdout);
|
|
40252
|
-
child.stderr.pipe(stderr);
|
|
40253
|
-
child.stdin.end(input.prompt);
|
|
40254
|
-
let timedOut = false;
|
|
40255
|
-
const timer = setTimeout(() => {
|
|
40256
|
-
timedOut = true;
|
|
40257
|
-
if (child.pid && process.platform === "win32") {
|
|
40258
|
-
(0, import_child_process4.spawnSync)("taskkill.exe", ["/pid", String(child.pid), "/t", "/f"], { stdio: "ignore" });
|
|
40259
|
-
} else {
|
|
40260
|
-
child.kill("SIGKILL");
|
|
40261
|
-
}
|
|
40262
|
-
}, input.timeoutMs);
|
|
40263
|
-
child.on("close", (exitCode) => {
|
|
40264
|
-
clearTimeout(timer);
|
|
40265
|
-
stdout.end();
|
|
40266
|
-
stderr.end();
|
|
40267
|
-
resolveRun({
|
|
40268
|
-
exitCode,
|
|
40269
|
-
timedOut,
|
|
40270
|
-
durationMs: Date.now() - started
|
|
40271
|
-
});
|
|
40272
|
-
});
|
|
40273
|
-
child.on("error", () => {
|
|
40274
|
-
clearTimeout(timer);
|
|
40275
|
-
stdout.end();
|
|
40276
|
-
stderr.end();
|
|
40277
|
-
resolveRun({
|
|
40278
|
-
exitCode: null,
|
|
40279
|
-
timedOut,
|
|
40280
|
-
durationMs: Date.now() - started
|
|
40281
|
-
});
|
|
40282
|
-
});
|
|
40283
|
-
});
|
|
40284
|
-
}
|
|
40285
|
-
function buildCommandInvocation(command, args) {
|
|
40286
|
-
if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
|
|
40287
|
-
const binDir = (0, import_path13.dirname)(command);
|
|
40288
|
-
const codexEntrypoint = (0, import_path13.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
|
|
40289
|
-
if ((0, import_fs15.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
|
|
40290
|
-
const geminiEntrypoint = (0, import_path13.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
|
|
40291
|
-
if ((0, import_fs15.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
|
|
40292
|
-
}
|
|
40293
|
-
return { command, args };
|
|
40294
|
-
}
|
|
40295
40384
|
async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
40296
40385
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
40297
40386
|
const results = [];
|
|
@@ -40303,8 +40392,8 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40303
40392
|
if (authPreflight && !authPreflight.ok) {
|
|
40304
40393
|
const endedAt2 = (/* @__PURE__ */ new Date()).toISOString();
|
|
40305
40394
|
const blockedResults = plan.runs.map((run) => {
|
|
40306
|
-
(0,
|
|
40307
|
-
const runArtifact =
|
|
40395
|
+
(0, import_fs18.mkdirSync)(run.run_dir, { recursive: true });
|
|
40396
|
+
const runArtifact = buildExecutedExternalAgentRunArtifact({
|
|
40308
40397
|
run,
|
|
40309
40398
|
startedAt,
|
|
40310
40399
|
endedAt: endedAt2,
|
|
@@ -40314,7 +40403,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40314
40403
|
timedOut: false,
|
|
40315
40404
|
durationMs: 0
|
|
40316
40405
|
});
|
|
40317
|
-
(0,
|
|
40406
|
+
(0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
|
|
40318
40407
|
`, "utf8");
|
|
40319
40408
|
return {
|
|
40320
40409
|
run_id: run.run_id,
|
|
@@ -40341,41 +40430,41 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40341
40430
|
}
|
|
40342
40431
|
for (const run of plan.runs) {
|
|
40343
40432
|
const runStartedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
40344
|
-
const commandCaptureDir = (0,
|
|
40345
|
-
(0,
|
|
40433
|
+
const commandCaptureDir = (0, import_path17.join)(run.workspace_dir, ".foh-capture");
|
|
40434
|
+
(0, import_fs18.mkdirSync)(commandCaptureDir, { recursive: true });
|
|
40346
40435
|
const env = buildCodexExecutorEnv({
|
|
40347
40436
|
sourceEnv: options.env,
|
|
40348
40437
|
runDir: commandCaptureDir,
|
|
40349
40438
|
promptVersion: run.prompt_version
|
|
40350
40439
|
});
|
|
40351
|
-
const spawned = await
|
|
40440
|
+
const spawned = await spawnExternalAgentRunner({
|
|
40352
40441
|
command: runnerCommand,
|
|
40353
40442
|
args: run.args,
|
|
40354
40443
|
cwd: run.workspace_dir,
|
|
40355
40444
|
env,
|
|
40356
|
-
prompt: (0,
|
|
40445
|
+
prompt: (0, import_fs18.readFileSync)(run.prompt_path, "utf8"),
|
|
40357
40446
|
stdoutPath: run.outputs.jsonl,
|
|
40358
40447
|
stderrPath: run.outputs.stderr,
|
|
40359
40448
|
timeoutMs: plan.timeout_minutes * 60 * 1e3
|
|
40360
40449
|
});
|
|
40361
|
-
|
|
40450
|
+
copyExternalAgentCommandCaptureArtifacts({ captureDir: commandCaptureDir, runDir: run.run_dir });
|
|
40362
40451
|
const privateRepoRoot = options.privateRepoRoot || plan.private_repo_root;
|
|
40363
|
-
|
|
40452
|
+
redactExternalAgentOutputArtifacts(run, { privateRepoRoot });
|
|
40364
40453
|
const artifactSafety = scanExternalAgentArtifacts({
|
|
40365
40454
|
runDir: run.run_dir,
|
|
40366
40455
|
privateRepoRoot,
|
|
40367
40456
|
writeRedacted: true
|
|
40368
40457
|
});
|
|
40369
|
-
(0,
|
|
40458
|
+
(0, import_fs18.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
|
|
40370
40459
|
`, "utf8");
|
|
40371
40460
|
const runEndedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
40372
|
-
const classification =
|
|
40461
|
+
const classification = classifyExternalAgentRun({
|
|
40373
40462
|
run,
|
|
40374
40463
|
exitCode: spawned.exitCode,
|
|
40375
40464
|
timedOut: spawned.timedOut,
|
|
40376
40465
|
artifactSafetyOk: artifactSafety.ok
|
|
40377
40466
|
});
|
|
40378
|
-
const runArtifact =
|
|
40467
|
+
const runArtifact = buildExecutedExternalAgentRunArtifact({
|
|
40379
40468
|
run,
|
|
40380
40469
|
startedAt: runStartedAt,
|
|
40381
40470
|
endedAt: runEndedAt,
|
|
@@ -40385,7 +40474,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40385
40474
|
timedOut: spawned.timedOut,
|
|
40386
40475
|
durationMs: spawned.durationMs
|
|
40387
40476
|
});
|
|
40388
|
-
(0,
|
|
40477
|
+
(0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
|
|
40389
40478
|
`, "utf8");
|
|
40390
40479
|
results.push({
|
|
40391
40480
|
run_id: run.run_id,
|
|
@@ -40434,13 +40523,13 @@ function defaultRunDir(modelName, promptVersion) {
|
|
|
40434
40523
|
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
|
|
40435
40524
|
const safeModel = String(modelName || "unknown-model").toLowerCase().replace(/[^a-z0-9_-]+/g, "-");
|
|
40436
40525
|
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
40437
|
-
return (0,
|
|
40526
|
+
return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
|
|
40438
40527
|
}
|
|
40439
40528
|
function defaultBatchDir(promptVersion) {
|
|
40440
40529
|
const date4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
40441
40530
|
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
|
|
40442
40531
|
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
40443
|
-
return (0,
|
|
40532
|
+
return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
|
|
40444
40533
|
}
|
|
40445
40534
|
function safeSlug(value) {
|
|
40446
40535
|
return String(value || "unknown").toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "unknown";
|
|
@@ -40455,8 +40544,8 @@ function scanArtifactsCommand(runDir, privateRepoRoot) {
|
|
|
40455
40544
|
return `foh eval external-agent scan-artifacts --run-dir ${quoteArg(runDir)}${privateRootArg} --write-redacted --json`;
|
|
40456
40545
|
}
|
|
40457
40546
|
function externalAgentSummaryCommand2(root) {
|
|
40458
|
-
const summaryPath = (0,
|
|
40459
|
-
const reportPath = (0,
|
|
40547
|
+
const summaryPath = (0, import_path18.join)(root, "latest-summary.json");
|
|
40548
|
+
const reportPath = (0, import_path18.join)(root, "summary.report.json");
|
|
40460
40549
|
return [
|
|
40461
40550
|
"node",
|
|
40462
40551
|
"scripts/summarize-external-agent-runs.mjs",
|
|
@@ -40570,14 +40659,14 @@ function writePrompt(runDir, promptVersion, context = {}) {
|
|
|
40570
40659
|
replayPromptContext(context.replayFile),
|
|
40571
40660
|
knowledgeMissPromptContext(context.knowledgeQuestion, context.expectedAnswer)
|
|
40572
40661
|
].join("");
|
|
40573
|
-
const path2 = (0,
|
|
40574
|
-
(0,
|
|
40662
|
+
const path2 = (0, import_path18.join)(runDir, "prompt.txt");
|
|
40663
|
+
(0, import_fs19.writeFileSync)(path2, `${prompt}
|
|
40575
40664
|
`, "utf8");
|
|
40576
40665
|
return path2;
|
|
40577
40666
|
}
|
|
40578
40667
|
function writeSession(runDir, session) {
|
|
40579
|
-
const path2 = (0,
|
|
40580
|
-
(0,
|
|
40668
|
+
const path2 = (0, import_path18.join)(runDir, "session.json");
|
|
40669
|
+
(0, import_fs19.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
|
|
40581
40670
|
`, "utf8");
|
|
40582
40671
|
return path2;
|
|
40583
40672
|
}
|
|
@@ -40653,9 +40742,9 @@ function buildRunArtifact(input) {
|
|
|
40653
40742
|
notes: "notes.md"
|
|
40654
40743
|
},
|
|
40655
40744
|
summary: status === "pass" ? "External-agent capture session completed and was marked pass." : `External-agent capture session completed with ${commands.length} captured FOH command(s); classify and improve reason ${reasonCode}.`,
|
|
40656
|
-
next_commands: status === "pass" ? [externalAgentSummaryCommand2((0,
|
|
40657
|
-
`foh bug improve --from external-agent-run --file ${(0,
|
|
40658
|
-
externalAgentSummaryCommand2((0,
|
|
40745
|
+
next_commands: status === "pass" ? [externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))] : [
|
|
40746
|
+
`foh bug improve --from external-agent-run --file ${(0, import_path18.join)(input.runDir, "run.json")} --out ${(0, import_path18.join)(input.runDir, "improvement-packet.json")} --json`,
|
|
40747
|
+
externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))
|
|
40659
40748
|
]
|
|
40660
40749
|
};
|
|
40661
40750
|
}
|
|
@@ -40664,16 +40753,16 @@ function registerEval(program3) {
|
|
|
40664
40753
|
const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
|
|
40665
40754
|
external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
|
|
40666
40755
|
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|
|
40667
|
-
const batchDir = (0,
|
|
40668
|
-
const replayFile = opts.replayFile ? (0,
|
|
40756
|
+
const batchDir = (0, import_path18.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
|
|
40757
|
+
const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
|
|
40669
40758
|
const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
|
|
40670
40759
|
const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
|
|
40671
40760
|
const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
|
|
40672
|
-
(0,
|
|
40761
|
+
(0, import_fs19.mkdirSync)(batchDir, { recursive: true });
|
|
40673
40762
|
const runs = models.map((model, index) => {
|
|
40674
40763
|
const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
|
|
40675
|
-
const runDir = (0,
|
|
40676
|
-
(0,
|
|
40764
|
+
const runDir = (0, import_path18.join)(batchDir, runId);
|
|
40765
|
+
(0, import_fs19.mkdirSync)(runDir, { recursive: true });
|
|
40677
40766
|
const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
|
|
40678
40767
|
const commandArgs = [
|
|
40679
40768
|
"eval",
|
|
@@ -40720,8 +40809,8 @@ function registerEval(program3) {
|
|
|
40720
40809
|
runs,
|
|
40721
40810
|
summary_command: externalAgentSummaryCommand2(batchDir)
|
|
40722
40811
|
};
|
|
40723
|
-
const batchPath = (0,
|
|
40724
|
-
(0,
|
|
40812
|
+
const batchPath = (0, import_path18.join)(batchDir, "batch.json");
|
|
40813
|
+
(0, import_fs19.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
|
|
40725
40814
|
`, "utf8");
|
|
40726
40815
|
format(cliEnvelope({
|
|
40727
40816
|
schemaVersion: "external_agent_batch_plan_result.v1",
|
|
@@ -40741,11 +40830,11 @@ function registerEval(program3) {
|
|
|
40741
40830
|
external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
|
|
40742
40831
|
const status = normalizeStatus(opts.status);
|
|
40743
40832
|
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|
|
40744
|
-
const runDir = (0,
|
|
40745
|
-
const replayFile = opts.replayFile ? (0,
|
|
40833
|
+
const runDir = (0, import_path18.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
|
|
40834
|
+
const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
|
|
40746
40835
|
const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
|
|
40747
40836
|
const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
|
|
40748
|
-
(0,
|
|
40837
|
+
(0, import_fs19.mkdirSync)(runDir, { recursive: true });
|
|
40749
40838
|
const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
|
|
40750
40839
|
const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
|
|
40751
40840
|
const shell = inferShell(opts.shell);
|
|
@@ -40771,7 +40860,7 @@ function registerEval(program3) {
|
|
|
40771
40860
|
}
|
|
40772
40861
|
};
|
|
40773
40862
|
writeSession(runDir, session);
|
|
40774
|
-
(0,
|
|
40863
|
+
(0, import_fs19.writeFileSync)((0, import_path18.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
|
|
40775
40864
|
let shellExitCode = null;
|
|
40776
40865
|
if (opts.shell !== false) {
|
|
40777
40866
|
process.stdout.write(`
|
|
@@ -40781,7 +40870,7 @@ Prompt: ${promptPath}
|
|
|
40781
40870
|
Exit the shell to finalize run.json.
|
|
40782
40871
|
|
|
40783
40872
|
`);
|
|
40784
|
-
const result = (0,
|
|
40873
|
+
const result = (0, import_child_process6.spawnSync)(shell.command, shell.args, {
|
|
40785
40874
|
stdio: "inherit",
|
|
40786
40875
|
env: {
|
|
40787
40876
|
...process.env,
|
|
@@ -40793,8 +40882,8 @@ Exit the shell to finalize run.json.
|
|
|
40793
40882
|
shellExitCode = typeof result.status === "number" ? result.status : null;
|
|
40794
40883
|
}
|
|
40795
40884
|
const artifact = buildRunArtifact({ runDir, session, status, reasonCode: opts.reasonCode, shellExitCode });
|
|
40796
|
-
const runPath = (0,
|
|
40797
|
-
(0,
|
|
40885
|
+
const runPath = (0, import_path18.join)(runDir, "run.json");
|
|
40886
|
+
(0, import_fs19.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
|
|
40798
40887
|
`, "utf8");
|
|
40799
40888
|
format(cliEnvelope({
|
|
40800
40889
|
schemaVersion: "external_agent_capture_result.v1",
|
|
@@ -40804,7 +40893,7 @@ Exit the shell to finalize run.json.
|
|
|
40804
40893
|
artifacts: {
|
|
40805
40894
|
run: runPath,
|
|
40806
40895
|
prompt: promptPath,
|
|
40807
|
-
commands: (0,
|
|
40896
|
+
commands: (0, import_path18.join)(runDir, "commands.ndjson")
|
|
40808
40897
|
},
|
|
40809
40898
|
nextCommands: artifact.next_commands,
|
|
40810
40899
|
extra: { run: artifact }
|
|
@@ -40867,8 +40956,8 @@ Exit the shell to finalize run.json.
|
|
|
40867
40956
|
requireExplicitEvalAuth: true,
|
|
40868
40957
|
minimumEvalAuthTtlMs: (plan.timeout_minutes + 5) * 60 * 1e3
|
|
40869
40958
|
});
|
|
40870
|
-
const resultPath = (0,
|
|
40871
|
-
(0,
|
|
40959
|
+
const resultPath = (0, import_path18.join)(plan.batch_dir, "execution-result.json");
|
|
40960
|
+
(0, import_fs19.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
|
|
40872
40961
|
`, "utf8");
|
|
40873
40962
|
format(cliEnvelope({
|
|
40874
40963
|
schemaVersion: "external_agent_execution_result.v1",
|