@f-o-h/cli 0.1.69 → 0.1.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/foh.js +725 -661
- package/package.json +1 -1
package/dist/foh.js
CHANGED
|
@@ -14172,8 +14172,8 @@ function registerAgentGuardrailCommands(agent) {
|
|
|
14172
14172
|
try {
|
|
14173
14173
|
rule = JSON.parse(opts.rule);
|
|
14174
14174
|
} catch {
|
|
14175
|
-
const { readFileSync:
|
|
14176
|
-
rule = JSON.parse(
|
|
14175
|
+
const { readFileSync: readFileSync16 } = await import("fs");
|
|
14176
|
+
rule = JSON.parse(readFileSync16(opts.rule, "utf-8"));
|
|
14177
14177
|
}
|
|
14178
14178
|
const data = await apiFetch(`/v1/console/agents/${opts.agent}/guardrails`, {
|
|
14179
14179
|
method: "POST",
|
|
@@ -14323,7 +14323,7 @@ async function publishAgentFromCurrentDraft(agentId, options) {
|
|
|
14323
14323
|
orgId
|
|
14324
14324
|
});
|
|
14325
14325
|
try {
|
|
14326
|
-
await apiFetch(`/v1/console/agents/${agentId}/publish`, {
|
|
14326
|
+
return await apiFetch(`/v1/console/agents/${agentId}/publish`, {
|
|
14327
14327
|
method: "POST",
|
|
14328
14328
|
body: JSON.stringify({ flowDraft }),
|
|
14329
14329
|
apiUrlOverride,
|
|
@@ -14399,7 +14399,7 @@ async function validateCertifyAndPublishAgent(opts) {
|
|
|
14399
14399
|
remediation: `Run: foh agent validate --agent ${opts.agentId} to see details.`
|
|
14400
14400
|
});
|
|
14401
14401
|
}
|
|
14402
|
-
await publishAgentFromCurrentDraft(opts.agentId, {
|
|
14402
|
+
const publish = await publishAgentFromCurrentDraft(opts.agentId, {
|
|
14403
14403
|
apiUrlOverride: opts.apiUrlOverride,
|
|
14404
14404
|
orgId: opts.orgId
|
|
14405
14405
|
});
|
|
@@ -14409,7 +14409,7 @@ async function validateCertifyAndPublishAgent(opts) {
|
|
|
14409
14409
|
status: "not_run",
|
|
14410
14410
|
reason_code: "publish_consumes_existing_certification_evidence"
|
|
14411
14411
|
},
|
|
14412
|
-
publish
|
|
14412
|
+
publish
|
|
14413
14413
|
};
|
|
14414
14414
|
}
|
|
14415
14415
|
|
|
@@ -14719,9 +14719,9 @@ function registerAgent(program3) {
|
|
|
14719
14719
|
process.stdout.write(yaml);
|
|
14720
14720
|
return;
|
|
14721
14721
|
}
|
|
14722
|
-
const { writeFileSync:
|
|
14722
|
+
const { writeFileSync: writeFileSync13 } = await import("fs");
|
|
14723
14723
|
const outputPath = opts.output ?? "tenant.yaml";
|
|
14724
|
-
|
|
14724
|
+
writeFileSync13(
|
|
14725
14725
|
outputPath,
|
|
14726
14726
|
`# tenant.yaml - Front Of House agent manifest
|
|
14727
14727
|
# Edit this file and run: foh plan tenant.yaml
|
|
@@ -15084,11 +15084,6 @@ function registerInstagramChannelCommands(instagram, addCommonOptions) {
|
|
|
15084
15084
|
}));
|
|
15085
15085
|
}
|
|
15086
15086
|
|
|
15087
|
-
// src/commands/channel-whatsapp.ts
|
|
15088
|
-
var import_node_crypto = require("node:crypto");
|
|
15089
|
-
var import_node_fs = require("node:fs");
|
|
15090
|
-
var path = __toESM(require("node:path"));
|
|
15091
|
-
|
|
15092
15087
|
// src/commands/channel-whatsapp-helpers.ts
|
|
15093
15088
|
function parsePositiveNumber(value, fallback) {
|
|
15094
15089
|
if (value === void 0 || value === null || String(value).trim() === "") return fallback;
|
|
@@ -15215,7 +15210,11 @@ function buildReasonedNextSteps({
|
|
|
15215
15210
|
return dedupeSteps(steps);
|
|
15216
15211
|
}
|
|
15217
15212
|
|
|
15218
|
-
//
|
|
15213
|
+
// src/commands/channel-whatsapp-live-proof.ts
|
|
15214
|
+
var import_node_fs = require("node:fs");
|
|
15215
|
+
var path = __toESM(require("node:path"));
|
|
15216
|
+
|
|
15217
|
+
// src/lib/channel-live-proof-evaluator.mjs
|
|
15219
15218
|
function normalizeStatusValue(value) {
|
|
15220
15219
|
return String(value || "").trim().toLowerCase();
|
|
15221
15220
|
}
|
|
@@ -15282,55 +15281,7 @@ function evaluateChannelLiveProofArtifact({
|
|
|
15282
15281
|
};
|
|
15283
15282
|
}
|
|
15284
15283
|
|
|
15285
|
-
// src/commands/channel-whatsapp.ts
|
|
15286
|
-
var WHATSAPP_WEBHOOK_CHALLENGE_TIMEOUT_MS = 1e4;
|
|
15287
|
-
var WHATSAPP_VERIFY_TOKEN_PREFIX = "foh-wa";
|
|
15288
|
-
var WHATSAPP_SENDER_MODEL = {
|
|
15289
|
-
test_number: "Meta test numbers are sandbox senders for free temporary API tests only; do not store them as production sender config.",
|
|
15290
|
-
production_sender: "Production traffic requires a verified WhatsApp Business phone number; store that sender phone_number_id and WABA-owned credentials in FOH channel config.",
|
|
15291
|
-
runtime_source_of_truth: "FOH runtime resolves WhatsApp credentials from org/channel config, not from personal mobile numbers or hidden environment fallbacks."
|
|
15292
|
-
};
|
|
15293
|
-
function parseBooleanOption({
|
|
15294
|
-
value,
|
|
15295
|
-
fallback,
|
|
15296
|
-
optionName,
|
|
15297
|
-
step
|
|
15298
|
-
}) {
|
|
15299
|
-
if (typeof value === "boolean") return value;
|
|
15300
|
-
const normalized = String(value ?? "").trim().toLowerCase();
|
|
15301
|
-
if (!normalized) return fallback;
|
|
15302
|
-
if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "y") return true;
|
|
15303
|
-
if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "n") return false;
|
|
15304
|
-
throw new FohError({
|
|
15305
|
-
step,
|
|
15306
|
-
error: `Invalid boolean value for ${optionName}: ${String(value)}`,
|
|
15307
|
-
remediation: `Use ${optionName} true|false (also accepts 1|0, yes|no).`
|
|
15308
|
-
});
|
|
15309
|
-
}
|
|
15310
|
-
async function runWhatsAppReadinessChecks({
|
|
15311
|
-
orgId,
|
|
15312
|
-
apiUrlOverride,
|
|
15313
|
-
verifyToken
|
|
15314
|
-
}) {
|
|
15315
|
-
const status = await apiFetch("/v1/console/channels/whatsapp/status", {
|
|
15316
|
-
orgId,
|
|
15317
|
-
apiUrlOverride
|
|
15318
|
-
});
|
|
15319
|
-
const verifyPayload = verifyToken ? { verifyToken: String(verifyToken) } : {};
|
|
15320
|
-
const verify = await apiFetch("/v1/console/channels/whatsapp/verify", {
|
|
15321
|
-
method: "POST",
|
|
15322
|
-
body: JSON.stringify(verifyPayload),
|
|
15323
|
-
orgId,
|
|
15324
|
-
apiUrlOverride
|
|
15325
|
-
});
|
|
15326
|
-
const test = await apiFetch("/v1/console/channels/whatsapp/test", {
|
|
15327
|
-
method: "POST",
|
|
15328
|
-
body: JSON.stringify({ dryRun: true }),
|
|
15329
|
-
orgId,
|
|
15330
|
-
apiUrlOverride
|
|
15331
|
-
});
|
|
15332
|
-
return { status, verify, test };
|
|
15333
|
-
}
|
|
15284
|
+
// src/commands/channel-whatsapp-live-proof.ts
|
|
15334
15285
|
function resolveLiveProof({
|
|
15335
15286
|
enabled,
|
|
15336
15287
|
artifactPathRaw,
|
|
@@ -15386,6 +15337,57 @@ function resolveLiveProof({
|
|
|
15386
15337
|
freshness: evaluated.freshness
|
|
15387
15338
|
};
|
|
15388
15339
|
}
|
|
15340
|
+
|
|
15341
|
+
// src/commands/channel-whatsapp-setup.ts
|
|
15342
|
+
var import_node_crypto = require("node:crypto");
|
|
15343
|
+
var WHATSAPP_WEBHOOK_CHALLENGE_TIMEOUT_MS = 1e4;
|
|
15344
|
+
var WHATSAPP_VERIFY_TOKEN_PREFIX = "foh-wa";
|
|
15345
|
+
var WHATSAPP_SENDER_MODEL = {
|
|
15346
|
+
test_number: "Meta test numbers are sandbox senders for free temporary API tests only; do not store them as production sender config.",
|
|
15347
|
+
production_sender: "Production traffic requires a verified WhatsApp Business phone number; store that sender phone_number_id and WABA-owned credentials in FOH channel config.",
|
|
15348
|
+
runtime_source_of_truth: "FOH runtime resolves WhatsApp credentials from org/channel config, not from personal mobile numbers or hidden environment fallbacks."
|
|
15349
|
+
};
|
|
15350
|
+
function parseBooleanOption({
|
|
15351
|
+
value,
|
|
15352
|
+
fallback,
|
|
15353
|
+
optionName,
|
|
15354
|
+
step
|
|
15355
|
+
}) {
|
|
15356
|
+
if (typeof value === "boolean") return value;
|
|
15357
|
+
const normalized = String(value ?? "").trim().toLowerCase();
|
|
15358
|
+
if (!normalized) return fallback;
|
|
15359
|
+
if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "y") return true;
|
|
15360
|
+
if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "n") return false;
|
|
15361
|
+
throw new FohError({
|
|
15362
|
+
step,
|
|
15363
|
+
error: `Invalid boolean value for ${optionName}: ${String(value)}`,
|
|
15364
|
+
remediation: `Use ${optionName} true|false (also accepts 1|0, yes|no).`
|
|
15365
|
+
});
|
|
15366
|
+
}
|
|
15367
|
+
async function runWhatsAppReadinessChecks({
|
|
15368
|
+
orgId,
|
|
15369
|
+
apiUrlOverride,
|
|
15370
|
+
verifyToken
|
|
15371
|
+
}) {
|
|
15372
|
+
const status = await apiFetch("/v1/console/channels/whatsapp/status", {
|
|
15373
|
+
orgId,
|
|
15374
|
+
apiUrlOverride
|
|
15375
|
+
});
|
|
15376
|
+
const verifyPayload = verifyToken ? { verifyToken: String(verifyToken) } : {};
|
|
15377
|
+
const verify = await apiFetch("/v1/console/channels/whatsapp/verify", {
|
|
15378
|
+
method: "POST",
|
|
15379
|
+
body: JSON.stringify(verifyPayload),
|
|
15380
|
+
orgId,
|
|
15381
|
+
apiUrlOverride
|
|
15382
|
+
});
|
|
15383
|
+
const test = await apiFetch("/v1/console/channels/whatsapp/test", {
|
|
15384
|
+
method: "POST",
|
|
15385
|
+
body: JSON.stringify({ dryRun: true }),
|
|
15386
|
+
orgId,
|
|
15387
|
+
apiUrlOverride
|
|
15388
|
+
});
|
|
15389
|
+
return { status, verify, test };
|
|
15390
|
+
}
|
|
15389
15391
|
function buildWebhookUrl(apiBaseUrl) {
|
|
15390
15392
|
return `${apiBaseUrl.replace(/\/$/, "")}/v1/whatsapp/webhook`;
|
|
15391
15393
|
}
|
|
@@ -15522,6 +15524,8 @@ function assertProofPass(strict, reasons) {
|
|
|
15522
15524
|
markCommandFailed(1);
|
|
15523
15525
|
}
|
|
15524
15526
|
}
|
|
15527
|
+
|
|
15528
|
+
// src/commands/channel-whatsapp.ts
|
|
15525
15529
|
function registerWhatsAppChannelCommands(whatsapp, addCommonOptions) {
|
|
15526
15530
|
addCommonOptions(
|
|
15527
15531
|
whatsapp.command("start").description("Assess WhatsApp onboarding readiness and print fastest setup path")
|
|
@@ -16169,11 +16173,11 @@ function registerVoice(program3) {
|
|
|
16169
16173
|
}
|
|
16170
16174
|
const outputPath = String(opts.out || `foh-voice-preview-${provider}-${voiceId}.mp3`).trim();
|
|
16171
16175
|
const audio = Buffer.from(await res.arrayBuffer());
|
|
16172
|
-
const { mkdirSync: mkdirSync8, writeFileSync:
|
|
16173
|
-
const { dirname:
|
|
16176
|
+
const { mkdirSync: mkdirSync8, writeFileSync: writeFileSync13 } = await import("fs");
|
|
16177
|
+
const { dirname: dirname11, resolve: resolve13 } = await import("path");
|
|
16174
16178
|
const absolutePath = resolve13(outputPath);
|
|
16175
|
-
mkdirSync8(
|
|
16176
|
-
|
|
16179
|
+
mkdirSync8(dirname11(absolutePath), { recursive: true });
|
|
16180
|
+
writeFileSync13(absolutePath, audio);
|
|
16177
16181
|
format({
|
|
16178
16182
|
status: "ok",
|
|
16179
16183
|
provider,
|
|
@@ -32786,7 +32790,7 @@ var StdioServerTransport = class {
|
|
|
32786
32790
|
};
|
|
32787
32791
|
|
|
32788
32792
|
// src/lib/cli-version.ts
|
|
32789
|
-
var CLI_VERSION = "0.1.
|
|
32793
|
+
var CLI_VERSION = "0.1.70";
|
|
32790
32794
|
|
|
32791
32795
|
// src/commands/mcp-serve.ts
|
|
32792
32796
|
var DEFAULT_TIMEOUT_MS = 12e4;
|
|
@@ -33806,6 +33810,35 @@ function readDraftKnowledgeText(draft) {
|
|
|
33806
33810
|
const fromLegacy = typeof draft.knowledge_base === "string" ? draft.knowledge_base : "";
|
|
33807
33811
|
return fromLegacy;
|
|
33808
33812
|
}
|
|
33813
|
+
function normalizeKnowledgeText(value) {
|
|
33814
|
+
return value.replace(/^\uFEFF/, "").replace(/\r\n?/g, "\n").trim();
|
|
33815
|
+
}
|
|
33816
|
+
function splitDraftKnowledgeSegments(value) {
|
|
33817
|
+
return value.replace(/\r\n?/g, "\n").split(/\n\s*---+\s*\n/g).map((segment) => normalizeKnowledgeText(segment)).filter(Boolean);
|
|
33818
|
+
}
|
|
33819
|
+
function buildDraftKnowledgeUpdate(existing, fileContent) {
|
|
33820
|
+
const normalizedContent = normalizeKnowledgeText(fileContent);
|
|
33821
|
+
if (normalizedContent.length === 0) {
|
|
33822
|
+
throw new FohError({
|
|
33823
|
+
step: "knowledge.ingest-file",
|
|
33824
|
+
error: "Knowledge file is empty after normalization",
|
|
33825
|
+
remediation: "Pass a file with non-empty text content.",
|
|
33826
|
+
statusCode: 400
|
|
33827
|
+
});
|
|
33828
|
+
}
|
|
33829
|
+
const existingSegments = splitDraftKnowledgeSegments(existing);
|
|
33830
|
+
const duplicate = existingSegments.includes(normalizedContent);
|
|
33831
|
+
const nextSegments = duplicate ? existingSegments : [...existingSegments, normalizedContent];
|
|
33832
|
+
const nextKnowledge = nextSegments.join("\n\n---\n\n");
|
|
33833
|
+
const normalizedExisting = existingSegments.join("\n\n---\n\n");
|
|
33834
|
+
return {
|
|
33835
|
+
nextKnowledge,
|
|
33836
|
+
normalizedContent,
|
|
33837
|
+
duplicate,
|
|
33838
|
+
shouldPatch: nextKnowledge !== normalizedExisting || normalizeKnowledgeText(existing) !== normalizedExisting,
|
|
33839
|
+
segmentCount: nextSegments.length
|
|
33840
|
+
};
|
|
33841
|
+
}
|
|
33809
33842
|
function tokenize(value) {
|
|
33810
33843
|
return value.toLowerCase().split(/[^a-z0-9]+/g).map((token) => token.trim()).filter((token) => token.length >= 3);
|
|
33811
33844
|
}
|
|
@@ -33929,23 +33962,25 @@ function registerKnowledge(program3) {
|
|
|
33929
33962
|
apiUrlOverride: opts.apiUrl
|
|
33930
33963
|
});
|
|
33931
33964
|
const existing = readDraftKnowledgeText(draft);
|
|
33932
|
-
const
|
|
33933
|
-
|
|
33934
|
-
|
|
33935
|
-
|
|
33936
|
-
|
|
33937
|
-
|
|
33938
|
-
|
|
33939
|
-
|
|
33940
|
-
|
|
33941
|
-
|
|
33942
|
-
|
|
33943
|
-
|
|
33944
|
-
});
|
|
33965
|
+
const update = buildDraftKnowledgeUpdate(existing, content);
|
|
33966
|
+
if (update.shouldPatch) {
|
|
33967
|
+
await apiFetch(`/v1/console/agents/${opts.agent}/draft`, {
|
|
33968
|
+
method: "PATCH",
|
|
33969
|
+
body: JSON.stringify({
|
|
33970
|
+
knowledge_base_raw: update.nextKnowledge,
|
|
33971
|
+
knowledge_base: update.nextKnowledge
|
|
33972
|
+
}),
|
|
33973
|
+
orgId: opts.org,
|
|
33974
|
+
apiUrlOverride: opts.apiUrl
|
|
33975
|
+
});
|
|
33976
|
+
}
|
|
33945
33977
|
data = {
|
|
33946
33978
|
ok: true,
|
|
33947
33979
|
source: "agent_draft_direct",
|
|
33948
|
-
length: nextKnowledge.length
|
|
33980
|
+
length: update.nextKnowledge.length,
|
|
33981
|
+
draft_knowledge_updated: update.shouldPatch,
|
|
33982
|
+
draft_knowledge_deduped: update.duplicate,
|
|
33983
|
+
segment_count: update.segmentCount
|
|
33949
33984
|
};
|
|
33950
33985
|
} else {
|
|
33951
33986
|
data = await apiFetch("/v1/knowledge/documents", {
|
|
@@ -33953,7 +33988,7 @@ ${content}` : content;
|
|
|
33953
33988
|
body: JSON.stringify({
|
|
33954
33989
|
name: (0, import_path2.basename)(opts.file),
|
|
33955
33990
|
source_type: "text",
|
|
33956
|
-
source_value: content,
|
|
33991
|
+
source_value: normalizeKnowledgeText(content),
|
|
33957
33992
|
agent_id: opts.agent
|
|
33958
33993
|
}),
|
|
33959
33994
|
orgId: opts.org,
|
|
@@ -34078,47 +34113,8 @@ function registerLeads(program3) {
|
|
|
34078
34113
|
// src/commands/setup.ts
|
|
34079
34114
|
var import_crypto3 = require("crypto");
|
|
34080
34115
|
|
|
34081
|
-
// src/lib/signed-report.ts
|
|
34082
|
-
var import_crypto2 = require("crypto");
|
|
34083
|
-
var import_fs4 = require("fs");
|
|
34084
|
-
var import_path3 = require("path");
|
|
34085
|
-
function canonicalize(value) {
|
|
34086
|
-
if (value === null || value === void 0) return null;
|
|
34087
|
-
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
|
|
34088
|
-
if (Array.isArray(value)) return value.map((entry) => canonicalize(entry));
|
|
34089
|
-
if (typeof value === "object") {
|
|
34090
|
-
const sortedEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, entryValue]) => [key, canonicalize(entryValue)]);
|
|
34091
|
-
return Object.fromEntries(sortedEntries);
|
|
34092
|
-
}
|
|
34093
|
-
return String(value);
|
|
34094
|
-
}
|
|
34095
|
-
function stableStringify(value) {
|
|
34096
|
-
return JSON.stringify(canonicalize(value), null, 2) + "\n";
|
|
34097
|
-
}
|
|
34098
|
-
function sha256Hex(input) {
|
|
34099
|
-
return (0, import_crypto2.createHash)("sha256").update(input).digest("hex");
|
|
34100
|
-
}
|
|
34101
|
-
function signReport(reportPayload) {
|
|
34102
|
-
const canonical = stableStringify(reportPayload);
|
|
34103
|
-
return {
|
|
34104
|
-
...reportPayload,
|
|
34105
|
-
report_hash: {
|
|
34106
|
-
algorithm: "sha256",
|
|
34107
|
-
digest_hex: sha256Hex(canonical),
|
|
34108
|
-
canonicalization: "sorted-json-v1",
|
|
34109
|
-
verification_note: "Recompute SHA-256 over canonical payload JSON before report_hash and compare digest_hex."
|
|
34110
|
-
}
|
|
34111
|
-
};
|
|
34112
|
-
}
|
|
34113
|
-
function writeSignedJsonArtifact(path2, value) {
|
|
34114
|
-
const absolutePath = (0, import_path3.resolve)(path2);
|
|
34115
|
-
(0, import_fs4.mkdirSync)((0, import_path3.dirname)(absolutePath), { recursive: true });
|
|
34116
|
-
(0, import_fs4.writeFileSync)(absolutePath, stableStringify(value), "utf-8");
|
|
34117
|
-
return absolutePath;
|
|
34118
|
-
}
|
|
34119
|
-
|
|
34120
34116
|
// src/commands/manifest.ts
|
|
34121
|
-
var
|
|
34117
|
+
var import_fs4 = require("fs");
|
|
34122
34118
|
var import_picocolors3 = __toESM(require_picocolors());
|
|
34123
34119
|
function formatDiff(diffs) {
|
|
34124
34120
|
if (diffs.length === 0) return "No changes";
|
|
@@ -34154,7 +34150,7 @@ function formatDiff(diffs) {
|
|
|
34154
34150
|
function loadManifestFile(filePath) {
|
|
34155
34151
|
let raw;
|
|
34156
34152
|
try {
|
|
34157
|
-
raw = (0,
|
|
34153
|
+
raw = (0, import_fs4.readFileSync)(filePath, "utf-8");
|
|
34158
34154
|
} catch {
|
|
34159
34155
|
throw new FohError({
|
|
34160
34156
|
step: "manifest.load",
|
|
@@ -34379,80 +34375,13 @@ function normalizeAgentCertMode(value) {
|
|
|
34379
34375
|
return agentCertModeValues.includes(value) ? value : "quick";
|
|
34380
34376
|
}
|
|
34381
34377
|
|
|
34382
|
-
// src/commands/setup.ts
|
|
34383
|
-
var SETUP_STEP_ORDER = [
|
|
34384
|
-
"check_credentials",
|
|
34385
|
-
"check_org_access",
|
|
34386
|
-
"submit_compliance",
|
|
34387
|
-
"wait_compliance",
|
|
34388
|
-
"provision_phone",
|
|
34389
|
-
"create_agent",
|
|
34390
|
-
"validate_agent",
|
|
34391
|
-
"seed_guardrails",
|
|
34392
|
-
"ensure_widget",
|
|
34393
|
-
"set_widget_domains",
|
|
34394
|
-
"configure_voice",
|
|
34395
|
-
"run_smoke_test",
|
|
34396
|
-
"sim_certify_loop",
|
|
34397
|
-
"widget_smoke",
|
|
34398
|
-
"publish_agent",
|
|
34399
|
-
"emit_summary"
|
|
34400
|
-
];
|
|
34378
|
+
// src/commands/setup-apply.ts
|
|
34401
34379
|
function extractGuardrailsList(response) {
|
|
34402
34380
|
if (Array.isArray(response)) return response;
|
|
34403
34381
|
if (Array.isArray(response.guardrails)) return response.guardrails;
|
|
34404
34382
|
if (Array.isArray(response.rules)) return response.rules;
|
|
34405
34383
|
return [];
|
|
34406
34384
|
}
|
|
34407
|
-
function resolveResumeIndex(resumeFromRaw) {
|
|
34408
|
-
if (!resumeFromRaw) {
|
|
34409
|
-
return { resumeFrom: null, resumeIndex: 0 };
|
|
34410
|
-
}
|
|
34411
|
-
const resumeFrom = String(resumeFromRaw).trim();
|
|
34412
|
-
const resumeIndex = SETUP_STEP_ORDER.indexOf(resumeFrom);
|
|
34413
|
-
if (resumeIndex < 0) {
|
|
34414
|
-
throw new FohError({
|
|
34415
|
-
step: "setup.resume",
|
|
34416
|
-
error: `Invalid --resume-from step: ${String(resumeFromRaw)}`,
|
|
34417
|
-
remediation: `Use one of: ${SETUP_STEP_ORDER.join(", ")}`
|
|
34418
|
-
});
|
|
34419
|
-
}
|
|
34420
|
-
return { resumeFrom, resumeIndex };
|
|
34421
|
-
}
|
|
34422
|
-
function nowIso() {
|
|
34423
|
-
return (/* @__PURE__ */ new Date()).toISOString();
|
|
34424
|
-
}
|
|
34425
|
-
function timedStepResult(result, startedAtIso, startedAtMs) {
|
|
34426
|
-
return {
|
|
34427
|
-
...result,
|
|
34428
|
-
started_at: startedAtIso,
|
|
34429
|
-
completed_at: nowIso(),
|
|
34430
|
-
duration_ms: Math.max(0, Date.now() - startedAtMs)
|
|
34431
|
-
};
|
|
34432
|
-
}
|
|
34433
|
-
function optionNameToFlag(key) {
|
|
34434
|
-
return "--" + key.replace(/([A-Z])/g, "-$1").toLowerCase();
|
|
34435
|
-
}
|
|
34436
|
-
function normalizeSetupPhoneMode(raw) {
|
|
34437
|
-
const value = String(raw || "purchase").trim().toLowerCase();
|
|
34438
|
-
if (value === "observe" || value === "skip" || value === "purchase") return value;
|
|
34439
|
-
throw new FohError({
|
|
34440
|
-
step: "setup.phone_mode",
|
|
34441
|
-
error: `Invalid --phone-mode "${String(raw)}"`,
|
|
34442
|
-
remediation: "Use one of: observe, skip, purchase.",
|
|
34443
|
-
reasonCode: "setup_invalid_phone_mode"
|
|
34444
|
-
});
|
|
34445
|
-
}
|
|
34446
|
-
function complianceSkipDetail(phoneMode) {
|
|
34447
|
-
return {
|
|
34448
|
-
reason_code: `compliance_skipped_phone_mode_${phoneMode}`,
|
|
34449
|
-
phone_mode: phoneMode,
|
|
34450
|
-
spend_policy: resolveCliSpendPolicy(),
|
|
34451
|
-
spend_class: "free",
|
|
34452
|
-
safe_to_retry: true,
|
|
34453
|
-
operator_note: "Compliance is only required before paid FOH-owned phone purchase."
|
|
34454
|
-
};
|
|
34455
|
-
}
|
|
34456
34385
|
function isMissingAgentTestsError(error2) {
|
|
34457
34386
|
if (!(error2 instanceof FohError)) return false;
|
|
34458
34387
|
if (error2.statusCode !== 404) return false;
|
|
@@ -34510,6 +34439,11 @@ async function rebaseEvalAgentDraftFromTemplate(params) {
|
|
|
34510
34439
|
draft_keys: Object.keys(draft).sort()
|
|
34511
34440
|
};
|
|
34512
34441
|
}
|
|
34442
|
+
|
|
34443
|
+
// src/commands/setup-missing-options.ts
|
|
34444
|
+
function optionNameToFlag(key) {
|
|
34445
|
+
return "--" + key.replace(/([A-Z])/g, "-$1").toLowerCase();
|
|
34446
|
+
}
|
|
34513
34447
|
function buildMissingOptionsPlan(missing, opts) {
|
|
34514
34448
|
const missingFlags = missing.map(optionNameToFlag);
|
|
34515
34449
|
const signInUrl = buildConsoleSignInUrl(resolveConsoleBaseUrl(opts.consoleUrl));
|
|
@@ -34559,20 +34493,137 @@ function emitMissingOptionsPlan(missing, opts) {
|
|
|
34559
34493
|
`);
|
|
34560
34494
|
}
|
|
34561
34495
|
}
|
|
34562
|
-
|
|
34563
|
-
|
|
34564
|
-
|
|
34565
|
-
|
|
34566
|
-
|
|
34567
|
-
|
|
34568
|
-
|
|
34569
|
-
|
|
34570
|
-
|
|
34571
|
-
|
|
34572
|
-
|
|
34573
|
-
|
|
34574
|
-
|
|
34575
|
-
|
|
34496
|
+
|
|
34497
|
+
// src/commands/setup-plan.ts
|
|
34498
|
+
var SETUP_STEP_ORDER = [
|
|
34499
|
+
"check_credentials",
|
|
34500
|
+
"check_org_access",
|
|
34501
|
+
"submit_compliance",
|
|
34502
|
+
"wait_compliance",
|
|
34503
|
+
"provision_phone",
|
|
34504
|
+
"create_agent",
|
|
34505
|
+
"validate_agent",
|
|
34506
|
+
"seed_guardrails",
|
|
34507
|
+
"ensure_widget",
|
|
34508
|
+
"set_widget_domains",
|
|
34509
|
+
"configure_voice",
|
|
34510
|
+
"run_smoke_test",
|
|
34511
|
+
"sim_certify_loop",
|
|
34512
|
+
"widget_smoke",
|
|
34513
|
+
"publish_agent",
|
|
34514
|
+
"emit_summary"
|
|
34515
|
+
];
|
|
34516
|
+
function resolveResumeIndex(resumeFromRaw) {
|
|
34517
|
+
if (!resumeFromRaw) {
|
|
34518
|
+
return { resumeFrom: null, resumeIndex: 0 };
|
|
34519
|
+
}
|
|
34520
|
+
const resumeFrom = String(resumeFromRaw).trim();
|
|
34521
|
+
const resumeIndex = SETUP_STEP_ORDER.indexOf(resumeFrom);
|
|
34522
|
+
if (resumeIndex < 0) {
|
|
34523
|
+
throw new FohError({
|
|
34524
|
+
step: "setup.resume",
|
|
34525
|
+
error: `Invalid --resume-from step: ${String(resumeFromRaw)}`,
|
|
34526
|
+
remediation: `Use one of: ${SETUP_STEP_ORDER.join(", ")}`
|
|
34527
|
+
});
|
|
34528
|
+
}
|
|
34529
|
+
return { resumeFrom, resumeIndex };
|
|
34530
|
+
}
|
|
34531
|
+
function nowIso() {
|
|
34532
|
+
return (/* @__PURE__ */ new Date()).toISOString();
|
|
34533
|
+
}
|
|
34534
|
+
function timedStepResult(result, startedAtIso, startedAtMs) {
|
|
34535
|
+
return {
|
|
34536
|
+
...result,
|
|
34537
|
+
started_at: startedAtIso,
|
|
34538
|
+
completed_at: nowIso(),
|
|
34539
|
+
duration_ms: Math.max(0, Date.now() - startedAtMs)
|
|
34540
|
+
};
|
|
34541
|
+
}
|
|
34542
|
+
function normalizeSetupPhoneMode(raw) {
|
|
34543
|
+
const value = String(raw || "purchase").trim().toLowerCase();
|
|
34544
|
+
if (value === "observe" || value === "skip" || value === "purchase") return value;
|
|
34545
|
+
throw new FohError({
|
|
34546
|
+
step: "setup.phone_mode",
|
|
34547
|
+
error: `Invalid --phone-mode "${String(raw)}"`,
|
|
34548
|
+
remediation: "Use one of: observe, skip, purchase.",
|
|
34549
|
+
reasonCode: "setup_invalid_phone_mode"
|
|
34550
|
+
});
|
|
34551
|
+
}
|
|
34552
|
+
function complianceSkipDetail(phoneMode) {
|
|
34553
|
+
return {
|
|
34554
|
+
reason_code: `compliance_skipped_phone_mode_${phoneMode}`,
|
|
34555
|
+
phone_mode: phoneMode,
|
|
34556
|
+
spend_policy: resolveCliSpendPolicy(),
|
|
34557
|
+
spend_class: "free",
|
|
34558
|
+
safe_to_retry: true,
|
|
34559
|
+
operator_note: "Compliance is only required before paid FOH-owned phone purchase."
|
|
34560
|
+
};
|
|
34561
|
+
}
|
|
34562
|
+
|
|
34563
|
+
// src/lib/signed-report.ts
|
|
34564
|
+
var import_crypto2 = require("crypto");
|
|
34565
|
+
var import_fs5 = require("fs");
|
|
34566
|
+
var import_path3 = require("path");
|
|
34567
|
+
function canonicalize(value) {
|
|
34568
|
+
if (value === null || value === void 0) return null;
|
|
34569
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") return value;
|
|
34570
|
+
if (Array.isArray(value)) return value.map((entry) => canonicalize(entry));
|
|
34571
|
+
if (typeof value === "object") {
|
|
34572
|
+
const sortedEntries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, entryValue]) => [key, canonicalize(entryValue)]);
|
|
34573
|
+
return Object.fromEntries(sortedEntries);
|
|
34574
|
+
}
|
|
34575
|
+
return String(value);
|
|
34576
|
+
}
|
|
34577
|
+
function stableStringify(value) {
|
|
34578
|
+
return JSON.stringify(canonicalize(value), null, 2) + "\n";
|
|
34579
|
+
}
|
|
34580
|
+
function sha256Hex(input) {
|
|
34581
|
+
return (0, import_crypto2.createHash)("sha256").update(input).digest("hex");
|
|
34582
|
+
}
|
|
34583
|
+
function signReport(reportPayload) {
|
|
34584
|
+
const canonical = stableStringify(reportPayload);
|
|
34585
|
+
return {
|
|
34586
|
+
...reportPayload,
|
|
34587
|
+
report_hash: {
|
|
34588
|
+
algorithm: "sha256",
|
|
34589
|
+
digest_hex: sha256Hex(canonical),
|
|
34590
|
+
canonicalization: "sorted-json-v1",
|
|
34591
|
+
verification_note: "Recompute SHA-256 over canonical payload JSON before report_hash and compare digest_hex."
|
|
34592
|
+
}
|
|
34593
|
+
};
|
|
34594
|
+
}
|
|
34595
|
+
function writeSignedJsonArtifact(path2, value) {
|
|
34596
|
+
const absolutePath = (0, import_path3.resolve)(path2);
|
|
34597
|
+
(0, import_fs5.mkdirSync)((0, import_path3.dirname)(absolutePath), { recursive: true });
|
|
34598
|
+
(0, import_fs5.writeFileSync)(absolutePath, stableStringify(value), "utf-8");
|
|
34599
|
+
return absolutePath;
|
|
34600
|
+
}
|
|
34601
|
+
|
|
34602
|
+
// src/commands/setup-report.ts
|
|
34603
|
+
function writeSetupRunReport(reportPayload, reportOut) {
|
|
34604
|
+
const signed = signReport(reportPayload);
|
|
34605
|
+
const reportPath = reportOut ? writeSignedJsonArtifact(String(reportOut), signed) : null;
|
|
34606
|
+
return {
|
|
34607
|
+
reportHash: signed.report_hash.digest_hex,
|
|
34608
|
+
reportPath
|
|
34609
|
+
};
|
|
34610
|
+
}
|
|
34611
|
+
|
|
34612
|
+
// src/commands/setup.ts
|
|
34613
|
+
function registerSetup(program3) {
|
|
34614
|
+
program3.command("setup").description("Fully provision a new agency customer in one command").option("--org <id>", "Org ID (default: stored org from foh org use)").option("--agent-template <id>", "Agent template ID (e.g. viewing-request)").option("--agent-name <name>", "Name for the new agent").option("--phone-country <cc>", "Phone number country code", "GB").option("--phone-area-code <code>", "Phone area code preference").option("--phone-mode <mode>", "Phone path: observe, skip, or purchase", "purchase").option("--widget-domains <domains>", "Comma-separated widget domain allowlist").option("--voice-provider <p>", "TTS provider: openai, azure, twilio").option("--voice-id <id>", "Voice ID").option("--skip-compliance", "Skip compliance submission and wait").option("--skip-voice", "Skip voice configuration").option("--skip-tests", "Skip smoke tests").option("--cert-mode <m>", "Simulation cert mode: quick, full, stress", "quick").option("--cert-adaptive-runs <n>", "Adaptive run count for certification loop", "30").option("--cert-max-improvement-rounds <n>", "Max instruction improvement rounds in cert loop (0-5)", "1").option("--resume-from <step>", `Resume from a setup step (${SETUP_STEP_ORDER.join(", ")})`).option("--report-out <path>", "Optional path to write signed setup run report JSON").option("--dry-run", "Print all steps that would run without making any API calls").option("--api-url <url>", "API base URL override").option("--console-url <url>", "Console sign-in URL override").option("--json", "Output as JSON").action(async (opts) => {
|
|
34615
|
+
if (!opts.org) {
|
|
34616
|
+
try {
|
|
34617
|
+
opts.org = loadCredentials(opts.apiUrl).orgId;
|
|
34618
|
+
} catch {
|
|
34619
|
+
}
|
|
34620
|
+
}
|
|
34621
|
+
const missing = ["org", "agentTemplate", "agentName"].filter((key) => !opts[key]);
|
|
34622
|
+
if (missing.length) {
|
|
34623
|
+
emitMissingOptionsPlan(missing, { json: opts.json, consoleUrl: opts.consoleUrl });
|
|
34624
|
+
markCommandFailed(1);
|
|
34625
|
+
return;
|
|
34626
|
+
}
|
|
34576
34627
|
let resumeState;
|
|
34577
34628
|
try {
|
|
34578
34629
|
resumeState = resolveResumeIndex(opts.resumeFrom);
|
|
@@ -34644,12 +34695,7 @@ function registerSetup(program3) {
|
|
|
34644
34695
|
steps: completed,
|
|
34645
34696
|
failure: failure ?? null
|
|
34646
34697
|
};
|
|
34647
|
-
|
|
34648
|
-
const reportPath = opts.reportOut ? writeSignedJsonArtifact(String(opts.reportOut), signed) : null;
|
|
34649
|
-
return {
|
|
34650
|
-
reportHash: signed.report_hash.digest_hex,
|
|
34651
|
-
reportPath
|
|
34652
|
-
};
|
|
34698
|
+
return writeSetupRunReport(reportPayload, opts.reportOut);
|
|
34653
34699
|
};
|
|
34654
34700
|
const shouldResumeSkip = (stepName) => {
|
|
34655
34701
|
if (!resumeState.resumeFrom) return false;
|
|
@@ -35140,8 +35186,8 @@ function registerSetup(program3) {
|
|
|
35140
35186
|
}
|
|
35141
35187
|
try {
|
|
35142
35188
|
const manifest = await agentExport(resolvedAgentId, { apiUrlOverride: opts.apiUrl });
|
|
35143
|
-
const { writeFileSync:
|
|
35144
|
-
|
|
35189
|
+
const { writeFileSync: writeFileSync13 } = await import("fs");
|
|
35190
|
+
writeFileSync13(
|
|
35145
35191
|
"tenant.yaml",
|
|
35146
35192
|
`# tenant.yaml - Front Of House agent manifest
|
|
35147
35193
|
# Edit this file and run: foh plan tenant.yaml
|
|
@@ -35311,8 +35357,8 @@ function registerSim(program3) {
|
|
|
35311
35357
|
}
|
|
35312
35358
|
const cert = response.certificate;
|
|
35313
35359
|
if (opts.out) {
|
|
35314
|
-
const { writeFileSync:
|
|
35315
|
-
|
|
35360
|
+
const { writeFileSync: writeFileSync13 } = await import("fs");
|
|
35361
|
+
writeFileSync13(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
|
|
35316
35362
|
process.stderr.write(` Certificate written to ${opts.out}
|
|
35317
35363
|
`);
|
|
35318
35364
|
}
|
|
@@ -35362,8 +35408,8 @@ function registerSim(program3) {
|
|
|
35362
35408
|
});
|
|
35363
35409
|
}
|
|
35364
35410
|
if (opts.out) {
|
|
35365
|
-
const { writeFileSync:
|
|
35366
|
-
|
|
35411
|
+
const { writeFileSync: writeFileSync13 } = await import("fs");
|
|
35412
|
+
writeFileSync13(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
|
|
35367
35413
|
process.stderr.write(` Final certificate written to ${opts.out}
|
|
35368
35414
|
`);
|
|
35369
35415
|
}
|
|
@@ -38967,9 +39013,9 @@ function registerUpdate(program3) {
|
|
|
38967
39013
|
}
|
|
38968
39014
|
|
|
38969
39015
|
// src/commands/eval.ts
|
|
38970
|
-
var
|
|
38971
|
-
var
|
|
38972
|
-
var
|
|
39016
|
+
var import_fs19 = require("fs");
|
|
39017
|
+
var import_path18 = require("path");
|
|
39018
|
+
var import_child_process6 = require("child_process");
|
|
38973
39019
|
|
|
38974
39020
|
// src/lib/external-agent-artifact-safety.ts
|
|
38975
39021
|
var import_fs12 = require("fs");
|
|
@@ -39303,63 +39349,13 @@ function readCommandRecords(runDir) {
|
|
|
39303
39349
|
}
|
|
39304
39350
|
|
|
39305
39351
|
// src/lib/external-agent-executor.ts
|
|
39306
|
-
var
|
|
39352
|
+
var import_fs18 = require("fs");
|
|
39307
39353
|
var import_os2 = require("os");
|
|
39308
|
-
var
|
|
39309
|
-
var
|
|
39354
|
+
var import_path17 = require("path");
|
|
39355
|
+
var import_child_process5 = require("child_process");
|
|
39310
39356
|
|
|
39311
|
-
// src/lib/external-agent-
|
|
39312
|
-
var import_fs14 = require("fs");
|
|
39357
|
+
// src/lib/external-agent-executor-env.ts
|
|
39313
39358
|
var import_path12 = require("path");
|
|
39314
|
-
var EXTERNAL_AGENT_METADATA_FILENAMES = [
|
|
39315
|
-
"external-agent-metadata.json",
|
|
39316
|
-
"agent-metadata.json"
|
|
39317
|
-
];
|
|
39318
|
-
var PUBLIC_DOC_URL_RE = /^https:\/\/frontofhouse\.okii\.uk(?:\/[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]*)?$/;
|
|
39319
|
-
function normalizeDocUrl(value) {
|
|
39320
|
-
const raw = typeof value === "string" ? value : value && typeof value === "object" && typeof value.url === "string" ? String(value.url) : "";
|
|
39321
|
-
const url2 = raw.trim().replace(/[.?!:]+$/g, "");
|
|
39322
|
-
if (!PUBLIC_DOC_URL_RE.test(url2)) return null;
|
|
39323
|
-
return url2;
|
|
39324
|
-
}
|
|
39325
|
-
function collectDocsFrom(value, docs) {
|
|
39326
|
-
if (Array.isArray(value)) {
|
|
39327
|
-
for (const entry of value) {
|
|
39328
|
-
const url2 = normalizeDocUrl(entry);
|
|
39329
|
-
if (url2) docs.add(url2);
|
|
39330
|
-
}
|
|
39331
|
-
}
|
|
39332
|
-
}
|
|
39333
|
-
function readExternalAgentMetadata(runDir) {
|
|
39334
|
-
for (const filename of EXTERNAL_AGENT_METADATA_FILENAMES) {
|
|
39335
|
-
const path2 = (0, import_path12.join)(runDir, filename);
|
|
39336
|
-
if (!(0, import_fs14.existsSync)(path2)) continue;
|
|
39337
|
-
try {
|
|
39338
|
-
const parsed = JSON.parse((0, import_fs14.readFileSync)(path2, "utf8"));
|
|
39339
|
-
const docs = /* @__PURE__ */ new Set();
|
|
39340
|
-
collectDocsFrom(parsed.docs_pages_used, docs);
|
|
39341
|
-
collectDocsFrom(parsed.docs_pages_observed, docs);
|
|
39342
|
-
collectDocsFrom(parsed.docs_used, docs);
|
|
39343
|
-
collectDocsFrom(parsed.public_docs_used, docs);
|
|
39344
|
-
return {
|
|
39345
|
-
path: filename,
|
|
39346
|
-
docs_pages_used: Array.from(docs).sort()
|
|
39347
|
-
};
|
|
39348
|
-
} catch {
|
|
39349
|
-
return {
|
|
39350
|
-
path: filename,
|
|
39351
|
-
docs_pages_used: []
|
|
39352
|
-
};
|
|
39353
|
-
}
|
|
39354
|
-
}
|
|
39355
|
-
return {
|
|
39356
|
-
path: null,
|
|
39357
|
-
docs_pages_used: []
|
|
39358
|
-
};
|
|
39359
|
-
}
|
|
39360
|
-
|
|
39361
|
-
// src/lib/external-agent-executor.ts
|
|
39362
|
-
var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
|
|
39363
39359
|
var CODEX_EXECUTOR_DENIED_ENV_PREFIXES = [
|
|
39364
39360
|
"SUPABASE_",
|
|
39365
39361
|
"DATABASE_",
|
|
@@ -39403,15 +39399,6 @@ var EXTERNAL_AGENT_EVAL_AUTH_ENV_MAP = {
|
|
|
39403
39399
|
FOH_EXTERNAL_AGENT_EVAL_API_URL: "FOH_API_URL",
|
|
39404
39400
|
FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT: "FOH_TOKEN_EXPIRES_AT"
|
|
39405
39401
|
};
|
|
39406
|
-
var DEFAULT_FOH_API_URL2 = "https://api.frontofhouse.okii.uk";
|
|
39407
|
-
var ExternalAgentExecutorError = class extends Error {
|
|
39408
|
-
reasonCode;
|
|
39409
|
-
constructor(reasonCode, message) {
|
|
39410
|
-
super(message);
|
|
39411
|
-
this.name = "ExternalAgentExecutorError";
|
|
39412
|
-
this.reasonCode = reasonCode;
|
|
39413
|
-
}
|
|
39414
|
-
};
|
|
39415
39402
|
function isDeniedEnvKey(key) {
|
|
39416
39403
|
const upper = key.toUpperCase();
|
|
39417
39404
|
if (CODEX_EXECUTOR_DENIED_ENV_NAMES.some((name) => upper === name)) return true;
|
|
@@ -39432,7 +39419,7 @@ function buildCodexExecutorEnv(input) {
|
|
|
39432
39419
|
env[childKey] = value;
|
|
39433
39420
|
}
|
|
39434
39421
|
}
|
|
39435
|
-
env.npm_config_cache = (0,
|
|
39422
|
+
env.npm_config_cache = (0, import_path12.join)((0, import_path12.dirname)(input.runDir), ".npm-cache");
|
|
39436
39423
|
env.npm_config_prefer_online = "true";
|
|
39437
39424
|
env.npm_config_update_notifier = "false";
|
|
39438
39425
|
env.npm_config_yes = "true";
|
|
@@ -39442,21 +39429,384 @@ function buildCodexExecutorEnv(input) {
|
|
|
39442
39429
|
env.FOH_CLI_SUPPRESS_BANNER = "1";
|
|
39443
39430
|
return env;
|
|
39444
39431
|
}
|
|
39445
|
-
|
|
39432
|
+
|
|
39433
|
+
// src/lib/external-agent-executor-artifacts.ts
|
|
39434
|
+
var import_fs15 = require("fs");
|
|
39435
|
+
var import_path14 = require("path");
|
|
39436
|
+
|
|
39437
|
+
// src/lib/external-agent-metadata.ts
|
|
39438
|
+
var import_fs14 = require("fs");
|
|
39439
|
+
var import_path13 = require("path");
|
|
39440
|
+
var EXTERNAL_AGENT_METADATA_FILENAMES = [
|
|
39441
|
+
"external-agent-metadata.json",
|
|
39442
|
+
"agent-metadata.json"
|
|
39443
|
+
];
|
|
39444
|
+
var PUBLIC_DOC_URL_RE = /^https:\/\/frontofhouse\.okii\.uk(?:\/[A-Za-z0-9._~:/?#[\]@!$&'()*+,;=%-]*)?$/;
|
|
39445
|
+
function normalizeDocUrl(value) {
|
|
39446
|
+
const raw = typeof value === "string" ? value : value && typeof value === "object" && typeof value.url === "string" ? String(value.url) : "";
|
|
39447
|
+
const url2 = raw.trim().replace(/[.?!:]+$/g, "");
|
|
39448
|
+
if (!PUBLIC_DOC_URL_RE.test(url2)) return null;
|
|
39449
|
+
return url2;
|
|
39450
|
+
}
|
|
39451
|
+
function collectDocsFrom(value, docs) {
|
|
39452
|
+
if (Array.isArray(value)) {
|
|
39453
|
+
for (const entry of value) {
|
|
39454
|
+
const url2 = normalizeDocUrl(entry);
|
|
39455
|
+
if (url2) docs.add(url2);
|
|
39456
|
+
}
|
|
39457
|
+
}
|
|
39458
|
+
}
|
|
39459
|
+
function readExternalAgentMetadata(runDir) {
|
|
39460
|
+
for (const filename of EXTERNAL_AGENT_METADATA_FILENAMES) {
|
|
39461
|
+
const path2 = (0, import_path13.join)(runDir, filename);
|
|
39462
|
+
if (!(0, import_fs14.existsSync)(path2)) continue;
|
|
39463
|
+
try {
|
|
39464
|
+
const parsed = JSON.parse((0, import_fs14.readFileSync)(path2, "utf8"));
|
|
39465
|
+
const docs = /* @__PURE__ */ new Set();
|
|
39466
|
+
collectDocsFrom(parsed.docs_pages_used, docs);
|
|
39467
|
+
collectDocsFrom(parsed.docs_pages_observed, docs);
|
|
39468
|
+
collectDocsFrom(parsed.docs_used, docs);
|
|
39469
|
+
collectDocsFrom(parsed.public_docs_used, docs);
|
|
39470
|
+
return {
|
|
39471
|
+
path: filename,
|
|
39472
|
+
docs_pages_used: Array.from(docs).sort()
|
|
39473
|
+
};
|
|
39474
|
+
} catch {
|
|
39475
|
+
return {
|
|
39476
|
+
path: filename,
|
|
39477
|
+
docs_pages_used: []
|
|
39478
|
+
};
|
|
39479
|
+
}
|
|
39480
|
+
}
|
|
39446
39481
|
return {
|
|
39447
|
-
|
|
39448
|
-
|
|
39449
|
-
apiUrl: String(env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_API_URL || DEFAULT_FOH_API_URL2).trim() || DEFAULT_FOH_API_URL2,
|
|
39450
|
-
expiresAt: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || "").trim()
|
|
39482
|
+
path: null,
|
|
39483
|
+
docs_pages_used: []
|
|
39451
39484
|
};
|
|
39452
39485
|
}
|
|
39453
|
-
|
|
39454
|
-
|
|
39455
|
-
|
|
39456
|
-
);
|
|
39486
|
+
|
|
39487
|
+
// src/lib/external-agent-executor-artifacts.ts
|
|
39488
|
+
function redactArtifactFile(path2, input = {}) {
|
|
39489
|
+
if (!(0, import_fs15.existsSync)(path2)) return;
|
|
39490
|
+
const original = (0, import_fs15.readFileSync)(path2, "utf8");
|
|
39491
|
+
const redacted = redactExternalAgentArtifactText(original, input);
|
|
39492
|
+
if (redacted !== original) (0, import_fs15.writeFileSync)(path2, redacted, "utf8");
|
|
39457
39493
|
}
|
|
39458
|
-
|
|
39459
|
-
|
|
39494
|
+
function redactExternalAgentOutputArtifacts(run, input = {}) {
|
|
39495
|
+
redactArtifactFile(run.outputs.jsonl, input);
|
|
39496
|
+
redactArtifactFile(run.outputs.last_message, input);
|
|
39497
|
+
redactArtifactFile(run.outputs.stderr, input);
|
|
39498
|
+
redactArtifactFile((0, import_path14.join)(run.run_dir, "commands.ndjson"), input);
|
|
39499
|
+
if (!(0, import_fs15.existsSync)(run.run_dir)) return;
|
|
39500
|
+
for (const name of (0, import_fs15.readdirSync)(run.run_dir)) {
|
|
39501
|
+
if (name.startsWith("command-output-cmd_") && !name.endsWith(".redacted")) {
|
|
39502
|
+
redactArtifactFile((0, import_path14.join)(run.run_dir, name), input);
|
|
39503
|
+
}
|
|
39504
|
+
}
|
|
39505
|
+
}
|
|
39506
|
+
function copyExternalAgentCommandCaptureArtifacts(input) {
|
|
39507
|
+
const commandLog = (0, import_path14.join)(input.captureDir, "commands.ndjson");
|
|
39508
|
+
if ((0, import_fs15.existsSync)(commandLog)) {
|
|
39509
|
+
(0, import_fs15.writeFileSync)((0, import_path14.join)(input.runDir, "commands.ndjson"), (0, import_fs15.readFileSync)(commandLog, "utf8"), "utf8");
|
|
39510
|
+
}
|
|
39511
|
+
for (const name of (0, import_fs15.readdirSync)(input.captureDir)) {
|
|
39512
|
+
if (name.startsWith("command-output-cmd_")) {
|
|
39513
|
+
(0, import_fs15.copyFileSync)((0, import_path14.join)(input.captureDir, name), (0, import_path14.join)(input.runDir, name));
|
|
39514
|
+
} else if (EXTERNAL_AGENT_METADATA_FILENAMES.includes(name)) {
|
|
39515
|
+
(0, import_fs15.copyFileSync)((0, import_path14.join)(input.captureDir, name), (0, import_path14.join)(input.runDir, name));
|
|
39516
|
+
}
|
|
39517
|
+
}
|
|
39518
|
+
}
|
|
39519
|
+
|
|
39520
|
+
// src/lib/external-agent-executor-classification.ts
|
|
39521
|
+
var import_fs16 = require("fs");
|
|
39522
|
+
var import_path15 = require("path");
|
|
39523
|
+
function proofArtifactPasses(runDir) {
|
|
39524
|
+
const proofPath = (0, import_path15.join)(runDir, "proof.json");
|
|
39525
|
+
if (!(0, import_fs16.existsSync)(proofPath)) return false;
|
|
39526
|
+
try {
|
|
39527
|
+
const parsed = JSON.parse((0, import_fs16.readFileSync)(proofPath, "utf8"));
|
|
39528
|
+
return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
|
|
39529
|
+
} catch {
|
|
39530
|
+
return false;
|
|
39531
|
+
}
|
|
39532
|
+
}
|
|
39533
|
+
function readIfExists(path2) {
|
|
39534
|
+
return (0, import_fs16.existsSync)(path2) ? (0, import_fs16.readFileSync)(path2, "utf8") : "";
|
|
39535
|
+
}
|
|
39536
|
+
function relativeArtifactName(path2) {
|
|
39537
|
+
return (0, import_path15.basename)(path2);
|
|
39538
|
+
}
|
|
39539
|
+
function externalAgentSummaryCommand(root) {
|
|
39540
|
+
return [
|
|
39541
|
+
"node",
|
|
39542
|
+
"scripts/summarize-external-agent-runs.mjs",
|
|
39543
|
+
"--root",
|
|
39544
|
+
quoteShellArg(root),
|
|
39545
|
+
"--out",
|
|
39546
|
+
quoteShellArg((0, import_path15.join)(root, "latest-summary.json")),
|
|
39547
|
+
"--report",
|
|
39548
|
+
quoteShellArg((0, import_path15.join)(root, "summary.report.json"))
|
|
39549
|
+
].join(" ");
|
|
39550
|
+
}
|
|
39551
|
+
function quoteShellArg(value) {
|
|
39552
|
+
const text = String(value);
|
|
39553
|
+
if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
|
|
39554
|
+
return `"${text.replace(/(["$`])/g, "\\$1")}"`;
|
|
39555
|
+
}
|
|
39556
|
+
function classifyExternalAgentRun(input) {
|
|
39557
|
+
if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
|
|
39558
|
+
if (!input.artifactSafetyOk) return { status: "fail", reasonCode: "external_agent_artifact_safety_blocked" };
|
|
39559
|
+
const completedCommands = readCommandRecords(input.run.run_dir).filter((record2) => record2.phase === "completed");
|
|
39560
|
+
const observedVersions = completedCommands.map((record2) => String(record2.cli_version || "").trim()).filter((version2) => /^\d+\.\d+\.\d+$/.test(version2));
|
|
39561
|
+
if (observedVersions.some((version2) => version2 !== CLI_VERSION)) {
|
|
39562
|
+
return { status: "hold", reasonCode: "external_agent_cli_version_drift" };
|
|
39563
|
+
}
|
|
39564
|
+
const commandReasonCodes = completedCommands.flatMap((record2) => [
|
|
39565
|
+
String(record2.reason_code || ""),
|
|
39566
|
+
...Array.isArray(record2.check_reason_codes) ? record2.check_reason_codes.map((code) => String(code || "")) : []
|
|
39567
|
+
]).filter(Boolean);
|
|
39568
|
+
const hasCommandReason = (pattern) => commandReasonCodes.some((reason) => pattern.test(reason));
|
|
39569
|
+
if (hasCommandReason(new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i"))) {
|
|
39570
|
+
return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
|
|
39571
|
+
}
|
|
39572
|
+
if (hasCommandReason(/provider_capacity_blocked/i)) {
|
|
39573
|
+
return { status: "hold", reasonCode: "provider_capacity_blocked" };
|
|
39574
|
+
}
|
|
39575
|
+
if (hasCommandReason(/byon_voice_number_not_configured/i)) {
|
|
39576
|
+
return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
|
|
39577
|
+
}
|
|
39578
|
+
if (hasCommandReason(/contact_phone_provisioning_failed/i)) {
|
|
39579
|
+
return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
|
|
39580
|
+
}
|
|
39581
|
+
if (hasCommandReason(/voice_contact_expected_no_spend_hold/i)) {
|
|
39582
|
+
return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
|
|
39583
|
+
}
|
|
39584
|
+
if (hasCommandReason(/contact_phone_missing/i)) {
|
|
39585
|
+
return { status: "hold", reasonCode: "voice_contact_phone_missing" };
|
|
39586
|
+
}
|
|
39587
|
+
if (hasCommandReason(/sim(?:ulation)?[_-]?cert(?:ify|ification)?.*failed|simulation_certification_failed/i)) {
|
|
39588
|
+
return { status: "hold", reasonCode: "simulation_certification_failed" };
|
|
39589
|
+
}
|
|
39590
|
+
if (hasCommandReason(/proof_held/i)) {
|
|
39591
|
+
return { status: "hold", reasonCode: "external_agent_proof_held" };
|
|
39592
|
+
}
|
|
39593
|
+
if (hasCommandReason(/agent_limit_reached/i)) {
|
|
39594
|
+
return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
|
|
39595
|
+
}
|
|
39596
|
+
const lastMessage = readIfExists(input.run.outputs.last_message);
|
|
39597
|
+
const stderr = readIfExists(input.run.outputs.stderr);
|
|
39598
|
+
const combined = `${lastMessage}
|
|
39599
|
+
${stderr}`;
|
|
39600
|
+
if (/need[^.\n]*(?:private|source)[^.\n]*repo|cannot[^.\n]*without[^.\n]*(?:private|source)[^.\n]*repo|clone[^.\n]*(?:private|source)[^.\n]*repo/i.test(combined)) {
|
|
39601
|
+
return { status: "fail", reasonCode: "private_repo_assumption_detected" };
|
|
39602
|
+
}
|
|
39603
|
+
if (/(?:blocked|rejected|declined) by policy|EXEC_POLICY_BLOCKED|command execution was rejected|shell commands were rejected/i.test(combined)) {
|
|
39604
|
+
return { status: "hold", reasonCode: "codex_exec_policy_blocked" };
|
|
39605
|
+
}
|
|
39606
|
+
if (/bwrap:.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|bubblewrap.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|Failed RTM_NEWADDR|ENV_SANDBOX_EXEC_BLOCKED|permission profiles requiring direct runtime enforcement are incompatible with --use-legacy-landlock|legacy[_ -]?landlock.*incompatible/i.test(combined)) {
|
|
39607
|
+
return { status: "hold", reasonCode: "codex_sandbox_exec_blocked" };
|
|
39608
|
+
}
|
|
39609
|
+
if (/ENV_NETWORK_DNS_BLOCK|Could not resolve host|npm ping.*timeout|NO_EXECUTABLE_INSTALL/i.test(combined)) {
|
|
39610
|
+
return { status: "hold", reasonCode: "codex_network_dns_blocked" };
|
|
39611
|
+
}
|
|
39612
|
+
if (new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i").test(combined)) {
|
|
39613
|
+
return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
|
|
39614
|
+
}
|
|
39615
|
+
if (/provider_capacity_blocked/i.test(combined)) {
|
|
39616
|
+
return { status: "hold", reasonCode: "provider_capacity_blocked" };
|
|
39617
|
+
}
|
|
39618
|
+
if (/byon_voice_number_not_configured/i.test(combined)) {
|
|
39619
|
+
return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
|
|
39620
|
+
}
|
|
39621
|
+
if (/contact_phone_provisioning_failed/i.test(combined)) {
|
|
39622
|
+
return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
|
|
39623
|
+
}
|
|
39624
|
+
if (/voice_contact_expected_no_spend_hold/i.test(combined)) {
|
|
39625
|
+
return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
|
|
39626
|
+
}
|
|
39627
|
+
if (/contact_phone_missing/i.test(combined)) {
|
|
39628
|
+
return { status: "hold", reasonCode: "voice_contact_phone_missing" };
|
|
39629
|
+
}
|
|
39630
|
+
if (/simulation_certification_failed/i.test(combined)) {
|
|
39631
|
+
return { status: "hold", reasonCode: "simulation_certification_failed" };
|
|
39632
|
+
}
|
|
39633
|
+
if (/proof_held/i.test(combined)) {
|
|
39634
|
+
return { status: "hold", reasonCode: "external_agent_proof_held" };
|
|
39635
|
+
}
|
|
39636
|
+
if (/agent_limit_reached/i.test(combined)) {
|
|
39637
|
+
return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
|
|
39638
|
+
}
|
|
39639
|
+
if (/browser|approve|approval|login|auth|sign in/i.test(combined) && !proofArtifactPasses(input.run.run_dir)) {
|
|
39640
|
+
return { status: "hold", reasonCode: "auth_browser_approval_required" };
|
|
39641
|
+
}
|
|
39642
|
+
if (input.exitCode !== 0) return { status: "hold", reasonCode: `${input.run.command}_runner_nonzero_exit` };
|
|
39643
|
+
if (proofArtifactPasses(input.run.run_dir)) return { status: "pass", reasonCode: null };
|
|
39644
|
+
return { status: "hold", reasonCode: "external_agent_proof_artifact_missing" };
|
|
39645
|
+
}
|
|
39646
|
+
function buildExecutedExternalAgentRunArtifact(input) {
|
|
39647
|
+
const commands = readCommandRecords(input.run.run_dir);
|
|
39648
|
+
const agentMetadata = readExternalAgentMetadata(input.run.run_dir);
|
|
39649
|
+
return {
|
|
39650
|
+
schema_version: "external_agent_run.v1",
|
|
39651
|
+
run_id: input.run.run_id,
|
|
39652
|
+
status: input.status,
|
|
39653
|
+
failure_reason_code: input.reasonCode,
|
|
39654
|
+
model_provider: input.run.model_provider,
|
|
39655
|
+
model_name: input.run.model_name,
|
|
39656
|
+
runner_model: input.run.runner_model,
|
|
39657
|
+
agent_shell: `${input.run.command}-exec`,
|
|
39658
|
+
workspace_type: "clean-no-repo-programmatic",
|
|
39659
|
+
prompt_version: input.run.prompt_version,
|
|
39660
|
+
prompt_path: "prompt.txt",
|
|
39661
|
+
started_at: input.startedAt,
|
|
39662
|
+
ended_at: input.endedAt,
|
|
39663
|
+
manual_intervention_count: 0,
|
|
39664
|
+
manual_interventions: [],
|
|
39665
|
+
environment: {
|
|
39666
|
+
os: process.platform,
|
|
39667
|
+
node_version: process.version,
|
|
39668
|
+
npm_version: null,
|
|
39669
|
+
foh_cli_version: CLI_VERSION,
|
|
39670
|
+
runner_exit_code: input.exitCode,
|
|
39671
|
+
runner_timed_out: input.timedOut,
|
|
39672
|
+
duration_ms: input.durationMs
|
|
39673
|
+
},
|
|
39674
|
+
public_entrypoints: [
|
|
39675
|
+
"https://frontofhouse.okii.uk",
|
|
39676
|
+
"https://frontofhouse.okii.uk/llms.txt",
|
|
39677
|
+
"https://frontofhouse.okii.uk/openapi.yaml",
|
|
39678
|
+
"npx --yes @f-o-h/cli@latest"
|
|
39679
|
+
],
|
|
39680
|
+
commands_run: commands.map((command) => command.command),
|
|
39681
|
+
docs_pages_used: agentMetadata.docs_pages_used,
|
|
39682
|
+
eval_state: {
|
|
39683
|
+
lifecycle_strategy: "reuse_existing_eval_state",
|
|
39684
|
+
org_reuse_expected: true,
|
|
39685
|
+
agent_reuse_expected: true,
|
|
39686
|
+
widget_reuse_expected: true,
|
|
39687
|
+
fresh_org_expected: false,
|
|
39688
|
+
ephemeral_org_expected: false,
|
|
39689
|
+
fresh_agent_expected: false,
|
|
39690
|
+
phone_purchase_expected: false,
|
|
39691
|
+
paid_resource_creation_expected: false,
|
|
39692
|
+
spend_policy_expected: NO_SPEND_POLICY,
|
|
39693
|
+
cleanup_expected: false,
|
|
39694
|
+
cleanup_strategy: "no_cleanup_for_reused_eval_state",
|
|
39695
|
+
paid_resource_strategy: "blocked_unless_explicit_byon_or_operator_approved",
|
|
39696
|
+
rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
|
|
39697
|
+
},
|
|
39698
|
+
artifacts: {
|
|
39699
|
+
terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
|
|
39700
|
+
command_log: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
|
|
39701
|
+
proof_bundle: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
|
|
39702
|
+
replay_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
|
|
39703
|
+
knowledge_packet: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
|
|
39704
|
+
improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
|
|
39705
|
+
agent_metadata: agentMetadata.path,
|
|
39706
|
+
notes: (0, import_fs16.existsSync)((0, import_path15.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
|
|
39707
|
+
runner_last_message: relativeArtifactName(input.run.outputs.last_message),
|
|
39708
|
+
runner_stderr: relativeArtifactName(input.run.outputs.stderr),
|
|
39709
|
+
codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
|
|
39710
|
+
codex_stderr: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.stderr) : null,
|
|
39711
|
+
artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
|
|
39712
|
+
},
|
|
39713
|
+
summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
|
|
39714
|
+
next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))] : [
|
|
39715
|
+
"foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
|
|
39716
|
+
"foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
|
|
39717
|
+
externalAgentSummaryCommand((0, import_path15.dirname)(input.run.run_dir))
|
|
39718
|
+
]
|
|
39719
|
+
};
|
|
39720
|
+
}
|
|
39721
|
+
|
|
39722
|
+
// src/lib/external-agent-runner-execution.ts
|
|
39723
|
+
var import_child_process4 = require("child_process");
|
|
39724
|
+
var import_fs17 = require("fs");
|
|
39725
|
+
var import_path16 = require("path");
|
|
39726
|
+
function buildCommandInvocation(command, args) {
|
|
39727
|
+
if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
|
|
39728
|
+
const binDir = (0, import_path16.dirname)(command);
|
|
39729
|
+
const codexEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
|
|
39730
|
+
if ((0, import_fs17.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
|
|
39731
|
+
const geminiEntrypoint = (0, import_path16.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
|
|
39732
|
+
if ((0, import_fs17.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
|
|
39733
|
+
}
|
|
39734
|
+
return { command, args };
|
|
39735
|
+
}
|
|
39736
|
+
function spawnExternalAgentRunner(input) {
|
|
39737
|
+
return new Promise((resolveRun) => {
|
|
39738
|
+
const started = Date.now();
|
|
39739
|
+
const commandInvocation = buildCommandInvocation(input.command, input.args);
|
|
39740
|
+
const child = (0, import_child_process4.spawn)(commandInvocation.command, commandInvocation.args, {
|
|
39741
|
+
cwd: input.cwd,
|
|
39742
|
+
env: input.env,
|
|
39743
|
+
shell: false,
|
|
39744
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
39745
|
+
windowsHide: true
|
|
39746
|
+
});
|
|
39747
|
+
const stdout = (0, import_fs17.createWriteStream)(input.stdoutPath, { flags: "w" });
|
|
39748
|
+
const stderr = (0, import_fs17.createWriteStream)(input.stderrPath, { flags: "w" });
|
|
39749
|
+
child.stdout.pipe(stdout);
|
|
39750
|
+
child.stderr.pipe(stderr);
|
|
39751
|
+
child.stdin.end(input.prompt);
|
|
39752
|
+
let timedOut = false;
|
|
39753
|
+
const timer = setTimeout(() => {
|
|
39754
|
+
timedOut = true;
|
|
39755
|
+
if (child.pid && process.platform === "win32") {
|
|
39756
|
+
(0, import_child_process4.spawnSync)("taskkill.exe", ["/pid", String(child.pid), "/t", "/f"], { stdio: "ignore" });
|
|
39757
|
+
} else {
|
|
39758
|
+
child.kill("SIGKILL");
|
|
39759
|
+
}
|
|
39760
|
+
}, input.timeoutMs);
|
|
39761
|
+
child.on("close", (exitCode) => {
|
|
39762
|
+
clearTimeout(timer);
|
|
39763
|
+
stdout.end();
|
|
39764
|
+
stderr.end();
|
|
39765
|
+
resolveRun({
|
|
39766
|
+
exitCode,
|
|
39767
|
+
timedOut,
|
|
39768
|
+
durationMs: Date.now() - started
|
|
39769
|
+
});
|
|
39770
|
+
});
|
|
39771
|
+
child.on("error", () => {
|
|
39772
|
+
clearTimeout(timer);
|
|
39773
|
+
stdout.end();
|
|
39774
|
+
stderr.end();
|
|
39775
|
+
resolveRun({
|
|
39776
|
+
exitCode: null,
|
|
39777
|
+
timedOut,
|
|
39778
|
+
durationMs: Date.now() - started
|
|
39779
|
+
});
|
|
39780
|
+
});
|
|
39781
|
+
});
|
|
39782
|
+
}
|
|
39783
|
+
|
|
39784
|
+
// src/lib/external-agent-executor.ts
|
|
39785
|
+
var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
|
|
39786
|
+
var DEFAULT_FOH_API_URL2 = "https://api.frontofhouse.okii.uk";
|
|
39787
|
+
var ExternalAgentExecutorError = class extends Error {
|
|
39788
|
+
reasonCode;
|
|
39789
|
+
constructor(reasonCode, message) {
|
|
39790
|
+
super(message);
|
|
39791
|
+
this.name = "ExternalAgentExecutorError";
|
|
39792
|
+
this.reasonCode = reasonCode;
|
|
39793
|
+
}
|
|
39794
|
+
};
|
|
39795
|
+
function readExternalAgentEvalAuthEnv(env = process.env) {
|
|
39796
|
+
return {
|
|
39797
|
+
token: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || "").trim(),
|
|
39798
|
+
orgId: String(env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || "").trim(),
|
|
39799
|
+
apiUrl: String(env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_API_URL || DEFAULT_FOH_API_URL2).trim() || DEFAULT_FOH_API_URL2,
|
|
39800
|
+
expiresAt: String(env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || "").trim()
|
|
39801
|
+
};
|
|
39802
|
+
}
|
|
39803
|
+
function shouldRunExternalAgentEvalAuthPreflight(env = process.env) {
|
|
39804
|
+
return Boolean(
|
|
39805
|
+
env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT || env.FOH_SERVICE_TOKEN || env.FOH_ORG_ID || env.FOH_API_URL || env.FOH_TOKEN_EXPIRES_AT
|
|
39806
|
+
);
|
|
39807
|
+
}
|
|
39808
|
+
async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}) {
|
|
39809
|
+
const hasExplicitEvalAuth = Boolean(
|
|
39460
39810
|
env.FOH_EXTERNAL_AGENT_EVAL_TOKEN || env.FOH_EXTERNAL_AGENT_EVAL_ORG_ID || env.FOH_EXTERNAL_AGENT_EVAL_API_URL || env.FOH_EXTERNAL_AGENT_EVAL_TOKEN_EXPIRES_AT
|
|
39461
39811
|
);
|
|
39462
39812
|
if (!shouldRunExternalAgentEvalAuthPreflight(env) && !options.requireExplicitEvalAuth) return null;
|
|
@@ -39507,14 +39857,14 @@ async function runExternalAgentEvalAuthPreflight(env = process.env, options = {}
|
|
|
39507
39857
|
};
|
|
39508
39858
|
}
|
|
39509
39859
|
function normalizeForCompare(path2) {
|
|
39510
|
-
const resolved = (0,
|
|
39860
|
+
const resolved = (0, import_path17.resolve)(path2);
|
|
39511
39861
|
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
|
|
39512
39862
|
}
|
|
39513
39863
|
function isPathInside(childPath, parentPath) {
|
|
39514
39864
|
const child = normalizeForCompare(childPath);
|
|
39515
39865
|
const parent = normalizeForCompare(parentPath);
|
|
39516
|
-
const rel = (0,
|
|
39517
|
-
return rel === "" || !!rel && !rel.startsWith("..") && !(0,
|
|
39866
|
+
const rel = (0, import_path17.relative)(parent, child);
|
|
39867
|
+
return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path17.isAbsolute)(rel);
|
|
39518
39868
|
}
|
|
39519
39869
|
function requireString(value, field) {
|
|
39520
39870
|
if (typeof value !== "string" || value.trim() === "") {
|
|
@@ -39523,10 +39873,10 @@ function requireString(value, field) {
|
|
|
39523
39873
|
return value;
|
|
39524
39874
|
}
|
|
39525
39875
|
function readBatch(batchPath) {
|
|
39526
|
-
if (!(0,
|
|
39876
|
+
if (!(0, import_fs18.existsSync)(batchPath)) {
|
|
39527
39877
|
throw new ExternalAgentExecutorError("external_agent_batch_not_found", `Batch file not found: ${batchPath}`);
|
|
39528
39878
|
}
|
|
39529
|
-
const parsed = JSON.parse((0,
|
|
39879
|
+
const parsed = JSON.parse((0, import_fs18.readFileSync)(batchPath, "utf8"));
|
|
39530
39880
|
if (parsed.schema_version !== "external_agent_batch_plan.v1") {
|
|
39531
39881
|
throw new ExternalAgentExecutorError("invalid_external_agent_batch", "Batch schema_version must be external_agent_batch_plan.v1.");
|
|
39532
39882
|
}
|
|
@@ -39541,11 +39891,11 @@ function defaultRunnerProbe(command, args) {
|
|
|
39541
39891
|
encoding: "utf8",
|
|
39542
39892
|
timeout: isGeminiHeadlessSmoke ? GEMINI_HEADLESS_PROBE_TIMEOUT_MS : void 0
|
|
39543
39893
|
};
|
|
39544
|
-
const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0,
|
|
39894
|
+
const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process5.spawnSync)(
|
|
39545
39895
|
"powershell.exe",
|
|
39546
39896
|
["-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", `& ${[command, ...args].map(quotePowerShellArg).join(" ")}`],
|
|
39547
39897
|
spawnOptions
|
|
39548
|
-
) : (0,
|
|
39898
|
+
) : (0, import_child_process5.spawnSync)(command, args, spawnOptions);
|
|
39549
39899
|
return {
|
|
39550
39900
|
status: typeof result.status === "number" ? result.status : null,
|
|
39551
39901
|
stdout: String(result.stdout || ""),
|
|
@@ -39559,29 +39909,12 @@ function geminiCapacityUnavailable(text) {
|
|
|
39559
39909
|
function quotePowerShellArg(value) {
|
|
39560
39910
|
return `'${value.replace(/'/g, "''")}'`;
|
|
39561
39911
|
}
|
|
39562
|
-
function quoteShellArg(value) {
|
|
39563
|
-
const text = String(value);
|
|
39564
|
-
if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
|
|
39565
|
-
return `"${text.replace(/(["$`])/g, "\\$1")}"`;
|
|
39566
|
-
}
|
|
39567
|
-
function externalAgentSummaryCommand(root) {
|
|
39568
|
-
return [
|
|
39569
|
-
"node",
|
|
39570
|
-
"scripts/summarize-external-agent-runs.mjs",
|
|
39571
|
-
"--root",
|
|
39572
|
-
quoteShellArg(root),
|
|
39573
|
-
"--out",
|
|
39574
|
-
quoteShellArg((0, import_path13.join)(root, "latest-summary.json")),
|
|
39575
|
-
"--report",
|
|
39576
|
-
quoteShellArg((0, import_path13.join)(root, "summary.report.json"))
|
|
39577
|
-
].join(" ");
|
|
39578
|
-
}
|
|
39579
39912
|
function resolveCodexProbeCommand() {
|
|
39580
39913
|
if (process.platform !== "win32") return "codex";
|
|
39581
39914
|
const appData = process.env.APPDATA;
|
|
39582
39915
|
if (appData) {
|
|
39583
|
-
const appDataShim = (0,
|
|
39584
|
-
if ((0,
|
|
39916
|
+
const appDataShim = (0, import_path17.join)(appData, "npm", "codex.cmd");
|
|
39917
|
+
if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
|
|
39585
39918
|
}
|
|
39586
39919
|
return "codex.cmd";
|
|
39587
39920
|
}
|
|
@@ -39592,8 +39925,8 @@ function resolveGeminiProbeCommand() {
|
|
|
39592
39925
|
if (process.platform !== "win32") return "gemini";
|
|
39593
39926
|
const appData = process.env.APPDATA;
|
|
39594
39927
|
if (appData) {
|
|
39595
|
-
const appDataShim = (0,
|
|
39596
|
-
if ((0,
|
|
39928
|
+
const appDataShim = (0, import_path17.join)(appData, "npm", "gemini.cmd");
|
|
39929
|
+
if ((0, import_fs18.existsSync)(appDataShim)) return appDataShim;
|
|
39597
39930
|
}
|
|
39598
39931
|
return "gemini.cmd";
|
|
39599
39932
|
}
|
|
@@ -39864,34 +40197,34 @@ function safeRunId(value) {
|
|
|
39864
40197
|
return value.toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "run";
|
|
39865
40198
|
}
|
|
39866
40199
|
function resolveWorkspaceRoot(input) {
|
|
39867
|
-
if (input.workspaceRoot) return (0,
|
|
39868
|
-
const batchStem = (0,
|
|
39869
|
-
const repoStem = (0,
|
|
39870
|
-
return (0,
|
|
40200
|
+
if (input.workspaceRoot) return (0, import_path17.resolve)(input.workspaceRoot);
|
|
40201
|
+
const batchStem = (0, import_path17.basename)((0, import_path17.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
|
|
40202
|
+
const repoStem = (0, import_path17.basename)((0, import_path17.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
|
|
40203
|
+
return (0, import_path17.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
|
|
39871
40204
|
}
|
|
39872
40205
|
function findNearestGitRoot(startPath) {
|
|
39873
|
-
let current = (0,
|
|
40206
|
+
let current = (0, import_path17.resolve)(startPath);
|
|
39874
40207
|
while (true) {
|
|
39875
|
-
if ((0,
|
|
39876
|
-
const parent = (0,
|
|
40208
|
+
if ((0, import_fs18.existsSync)((0, import_path17.join)(current, ".git"))) return current;
|
|
40209
|
+
const parent = (0, import_path17.dirname)(current);
|
|
39877
40210
|
if (parent === current) return null;
|
|
39878
40211
|
current = parent;
|
|
39879
40212
|
}
|
|
39880
40213
|
}
|
|
39881
40214
|
function resolvePrivateRepoRoot(input) {
|
|
39882
40215
|
if (input.explicitPrivateRepoRoot) {
|
|
39883
|
-
return { root: (0,
|
|
40216
|
+
return { root: (0, import_path17.resolve)(input.explicitPrivateRepoRoot), explicit: true };
|
|
39884
40217
|
}
|
|
39885
|
-
const cwd = (0,
|
|
40218
|
+
const cwd = (0, import_path17.resolve)(input.cwd || process.cwd());
|
|
39886
40219
|
const gitRoot = findNearestGitRoot(cwd);
|
|
39887
40220
|
if (gitRoot) return { root: gitRoot, explicit: false };
|
|
39888
40221
|
return {
|
|
39889
|
-
root: (0,
|
|
40222
|
+
root: (0, import_path17.join)(cwd, ".foh-no-private-repo-root-sentinel"),
|
|
39890
40223
|
explicit: false
|
|
39891
40224
|
};
|
|
39892
40225
|
}
|
|
39893
40226
|
function promptVersionFromPath(promptPath) {
|
|
39894
|
-
const raw = (0,
|
|
40227
|
+
const raw = (0, import_fs18.readFileSync)(promptPath, "utf8");
|
|
39895
40228
|
if (raw.includes("Do not assume access to the private source repository")) return "blank-setup.v1";
|
|
39896
40229
|
return "unknown";
|
|
39897
40230
|
}
|
|
@@ -39900,7 +40233,7 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39900
40233
|
if (runner !== "codex" && runner !== "gemini") {
|
|
39901
40234
|
throw new ExternalAgentExecutorError("unsupported_external_agent_runner", `Unsupported runner: ${runner}`);
|
|
39902
40235
|
}
|
|
39903
|
-
const batchPath = (0,
|
|
40236
|
+
const batchPath = (0, import_path17.resolve)(options.batchPath);
|
|
39904
40237
|
const batch = readBatch(batchPath);
|
|
39905
40238
|
const runnerProbe = validateRunner(options, runner);
|
|
39906
40239
|
const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
|
|
@@ -39919,17 +40252,17 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39919
40252
|
`Workspace root must be outside the private repository. workspace=${workspaceRoot} repo=${privateRepoRoot}`
|
|
39920
40253
|
);
|
|
39921
40254
|
}
|
|
39922
|
-
(0,
|
|
39923
|
-
const batchDir = (0,
|
|
40255
|
+
(0, import_fs18.mkdirSync)(workspaceRoot, { recursive: true });
|
|
40256
|
+
const batchDir = (0, import_path17.resolve)(String(batch.batch_dir || (0, import_path17.resolve)(batchPath, "..")));
|
|
39924
40257
|
const timeoutMinutes = Number.isFinite(options.timeoutMinutes) && Number(options.timeoutMinutes) > 0 ? Number(options.timeoutMinutes) : 30;
|
|
39925
40258
|
const runs = batch.runs.map((run) => {
|
|
39926
40259
|
const runId = safeRunId(requireString(run.run_id, "runs[].run_id"));
|
|
39927
|
-
const runDir = (0,
|
|
39928
|
-
const promptPath = (0,
|
|
39929
|
-
const workspaceDir = (0,
|
|
39930
|
-
(0,
|
|
39931
|
-
(0,
|
|
39932
|
-
(0,
|
|
40260
|
+
const runDir = (0, import_path17.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
|
|
40261
|
+
const promptPath = (0, import_path17.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
|
|
40262
|
+
const workspaceDir = (0, import_path17.join)(workspaceRoot, runId);
|
|
40263
|
+
(0, import_fs18.mkdirSync)(workspaceDir, { recursive: true });
|
|
40264
|
+
(0, import_fs18.writeFileSync)(
|
|
40265
|
+
(0, import_path17.join)(workspaceDir, "README.md"),
|
|
39933
40266
|
[
|
|
39934
40267
|
"# FOH External-Agent Workspace",
|
|
39935
40268
|
"",
|
|
@@ -39947,11 +40280,11 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39947
40280
|
});
|
|
39948
40281
|
const promptVersion = String(env[EXTERNAL_AGENT_PROMPT_VERSION_ENV] || "unknown");
|
|
39949
40282
|
const outputStem = runner === "gemini" ? "gemini" : "codex";
|
|
39950
|
-
const jsonlPath = (0,
|
|
39951
|
-
const lastMessagePath = (0,
|
|
39952
|
-
const stderrPath = (0,
|
|
39953
|
-
const runPath = (0,
|
|
39954
|
-
const artifactSafetyPath = (0,
|
|
40283
|
+
const jsonlPath = (0, import_path17.join)(runDir, `${outputStem}-exec.jsonl`);
|
|
40284
|
+
const lastMessagePath = (0, import_path17.join)(runDir, `${outputStem}-last-message.md`);
|
|
40285
|
+
const stderrPath = (0, import_path17.join)(runDir, `${outputStem}-stderr.txt`);
|
|
40286
|
+
const runPath = (0, import_path17.join)(runDir, "run.json");
|
|
40287
|
+
const artifactSafetyPath = (0, import_path17.join)(runDir, "artifact-safety.json");
|
|
39955
40288
|
const args = runner === "gemini" ? [
|
|
39956
40289
|
...runnerProbe.globalArgs,
|
|
39957
40290
|
...runnerProbe.execArgs
|
|
@@ -40042,281 +40375,12 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
40042
40375
|
};
|
|
40043
40376
|
}
|
|
40044
40377
|
function writeExternalAgentExecutorPlan(plan) {
|
|
40045
|
-
const path2 = (0,
|
|
40046
|
-
(0,
|
|
40047
|
-
(0,
|
|
40378
|
+
const path2 = (0, import_path17.join)(plan.batch_dir, "executor-plan.json");
|
|
40379
|
+
(0, import_fs18.mkdirSync)(plan.batch_dir, { recursive: true });
|
|
40380
|
+
(0, import_fs18.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
|
|
40048
40381
|
`, "utf8");
|
|
40049
40382
|
return path2;
|
|
40050
40383
|
}
|
|
40051
|
-
function proofArtifactPasses(runDir) {
|
|
40052
|
-
const proofPath = (0, import_path13.join)(runDir, "proof.json");
|
|
40053
|
-
if (!(0, import_fs15.existsSync)(proofPath)) return false;
|
|
40054
|
-
try {
|
|
40055
|
-
const parsed = JSON.parse((0, import_fs15.readFileSync)(proofPath, "utf8"));
|
|
40056
|
-
return parsed.ok === true || parsed.status === "pass" || parsed.status === "passed";
|
|
40057
|
-
} catch {
|
|
40058
|
-
return false;
|
|
40059
|
-
}
|
|
40060
|
-
}
|
|
40061
|
-
function readIfExists(path2) {
|
|
40062
|
-
return (0, import_fs15.existsSync)(path2) ? (0, import_fs15.readFileSync)(path2, "utf8") : "";
|
|
40063
|
-
}
|
|
40064
|
-
function redactArtifactFile(path2, input = {}) {
|
|
40065
|
-
if (!(0, import_fs15.existsSync)(path2)) return;
|
|
40066
|
-
const original = (0, import_fs15.readFileSync)(path2, "utf8");
|
|
40067
|
-
const redacted = redactExternalAgentArtifactText(original, input);
|
|
40068
|
-
if (redacted !== original) (0, import_fs15.writeFileSync)(path2, redacted, "utf8");
|
|
40069
|
-
}
|
|
40070
|
-
function redactOutputArtifacts(run, input = {}) {
|
|
40071
|
-
redactArtifactFile(run.outputs.jsonl, input);
|
|
40072
|
-
redactArtifactFile(run.outputs.last_message, input);
|
|
40073
|
-
redactArtifactFile(run.outputs.stderr, input);
|
|
40074
|
-
redactArtifactFile((0, import_path13.join)(run.run_dir, "commands.ndjson"), input);
|
|
40075
|
-
if (!(0, import_fs15.existsSync)(run.run_dir)) return;
|
|
40076
|
-
for (const name of (0, import_fs15.readdirSync)(run.run_dir)) {
|
|
40077
|
-
if (name.startsWith("command-output-cmd_") && !name.endsWith(".redacted")) {
|
|
40078
|
-
redactArtifactFile((0, import_path13.join)(run.run_dir, name), input);
|
|
40079
|
-
}
|
|
40080
|
-
}
|
|
40081
|
-
}
|
|
40082
|
-
function copyCommandCaptureArtifacts(input) {
|
|
40083
|
-
const commandLog = (0, import_path13.join)(input.captureDir, "commands.ndjson");
|
|
40084
|
-
if ((0, import_fs15.existsSync)(commandLog)) {
|
|
40085
|
-
(0, import_fs15.writeFileSync)((0, import_path13.join)(input.runDir, "commands.ndjson"), (0, import_fs15.readFileSync)(commandLog, "utf8"), "utf8");
|
|
40086
|
-
}
|
|
40087
|
-
for (const name of (0, import_fs15.readdirSync)(input.captureDir)) {
|
|
40088
|
-
if (name.startsWith("command-output-cmd_")) {
|
|
40089
|
-
(0, import_fs15.copyFileSync)((0, import_path13.join)(input.captureDir, name), (0, import_path13.join)(input.runDir, name));
|
|
40090
|
-
} else if (EXTERNAL_AGENT_METADATA_FILENAMES.includes(name)) {
|
|
40091
|
-
(0, import_fs15.copyFileSync)((0, import_path13.join)(input.captureDir, name), (0, import_path13.join)(input.runDir, name));
|
|
40092
|
-
}
|
|
40093
|
-
}
|
|
40094
|
-
}
|
|
40095
|
-
function relativeArtifactName(path2) {
|
|
40096
|
-
return (0, import_path13.basename)(path2);
|
|
40097
|
-
}
|
|
40098
|
-
function classifyRun(input) {
|
|
40099
|
-
if (input.timedOut) return { status: "hold", reasonCode: `${input.run.command}_runner_timeout` };
|
|
40100
|
-
if (!input.artifactSafetyOk) return { status: "fail", reasonCode: "external_agent_artifact_safety_blocked" };
|
|
40101
|
-
const completedCommands = readCommandRecords(input.run.run_dir).filter((record2) => record2.phase === "completed");
|
|
40102
|
-
const observedVersions = completedCommands.map((record2) => String(record2.cli_version || "").trim()).filter((version2) => /^\d+\.\d+\.\d+$/.test(version2));
|
|
40103
|
-
if (observedVersions.some((version2) => version2 !== CLI_VERSION)) {
|
|
40104
|
-
return { status: "hold", reasonCode: "external_agent_cli_version_drift" };
|
|
40105
|
-
}
|
|
40106
|
-
const commandReasonCodes = completedCommands.flatMap((record2) => [
|
|
40107
|
-
String(record2.reason_code || ""),
|
|
40108
|
-
...Array.isArray(record2.check_reason_codes) ? record2.check_reason_codes.map((code) => String(code || "")) : []
|
|
40109
|
-
]).filter(Boolean);
|
|
40110
|
-
const hasCommandReason = (pattern) => commandReasonCodes.some((reason) => pattern.test(reason));
|
|
40111
|
-
if (hasCommandReason(new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i"))) {
|
|
40112
|
-
return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
|
|
40113
|
-
}
|
|
40114
|
-
if (hasCommandReason(/provider_capacity_blocked/i)) {
|
|
40115
|
-
return { status: "hold", reasonCode: "provider_capacity_blocked" };
|
|
40116
|
-
}
|
|
40117
|
-
if (hasCommandReason(/byon_voice_number_not_configured/i)) {
|
|
40118
|
-
return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
|
|
40119
|
-
}
|
|
40120
|
-
if (hasCommandReason(/contact_phone_provisioning_failed/i)) {
|
|
40121
|
-
return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
|
|
40122
|
-
}
|
|
40123
|
-
if (hasCommandReason(/voice_contact_expected_no_spend_hold/i)) {
|
|
40124
|
-
return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
|
|
40125
|
-
}
|
|
40126
|
-
if (hasCommandReason(/contact_phone_missing/i)) {
|
|
40127
|
-
return { status: "hold", reasonCode: "voice_contact_phone_missing" };
|
|
40128
|
-
}
|
|
40129
|
-
if (hasCommandReason(/sim(?:ulation)?[_-]?cert(?:ify|ification)?.*failed|simulation_certification_failed/i)) {
|
|
40130
|
-
return { status: "hold", reasonCode: "simulation_certification_failed" };
|
|
40131
|
-
}
|
|
40132
|
-
if (hasCommandReason(/proof_held/i)) {
|
|
40133
|
-
return { status: "hold", reasonCode: "external_agent_proof_held" };
|
|
40134
|
-
}
|
|
40135
|
-
if (hasCommandReason(/agent_limit_reached/i)) {
|
|
40136
|
-
return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
|
|
40137
|
-
}
|
|
40138
|
-
const lastMessage = readIfExists(input.run.outputs.last_message);
|
|
40139
|
-
const stderr = readIfExists(input.run.outputs.stderr);
|
|
40140
|
-
const combined = `${lastMessage}
|
|
40141
|
-
${stderr}`;
|
|
40142
|
-
if (/need[^.\n]*(?:private|source)[^.\n]*repo|cannot[^.\n]*without[^.\n]*(?:private|source)[^.\n]*repo|clone[^.\n]*(?:private|source)[^.\n]*repo/i.test(combined)) {
|
|
40143
|
-
return { status: "fail", reasonCode: "private_repo_assumption_detected" };
|
|
40144
|
-
}
|
|
40145
|
-
if (/(?:blocked|rejected|declined) by policy|EXEC_POLICY_BLOCKED|command execution was rejected|shell commands were rejected/i.test(combined)) {
|
|
40146
|
-
return { status: "hold", reasonCode: "codex_exec_policy_blocked" };
|
|
40147
|
-
}
|
|
40148
|
-
if (/bwrap:.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|bubblewrap.*(?:RTM_NEWADDR|Operation not permitted|setting up uid map: Permission denied)|Failed RTM_NEWADDR|ENV_SANDBOX_EXEC_BLOCKED|permission profiles requiring direct runtime enforcement are incompatible with --use-legacy-landlock|legacy[_ -]?landlock.*incompatible/i.test(combined)) {
|
|
40149
|
-
return { status: "hold", reasonCode: "codex_sandbox_exec_blocked" };
|
|
40150
|
-
}
|
|
40151
|
-
if (/ENV_NETWORK_DNS_BLOCK|Could not resolve host|npm ping.*timeout|NO_EXECUTABLE_INSTALL/i.test(combined)) {
|
|
40152
|
-
return { status: "hold", reasonCode: "codex_network_dns_blocked" };
|
|
40153
|
-
}
|
|
40154
|
-
if (new RegExp(PAID_RESOURCE_BLOCKED_REASON_CODE, "i").test(combined)) {
|
|
40155
|
-
return { status: "hold", reasonCode: PAID_RESOURCE_BLOCKED_REASON_CODE };
|
|
40156
|
-
}
|
|
40157
|
-
if (/provider_capacity_blocked/i.test(combined)) {
|
|
40158
|
-
return { status: "hold", reasonCode: "provider_capacity_blocked" };
|
|
40159
|
-
}
|
|
40160
|
-
if (/byon_voice_number_not_configured/i.test(combined)) {
|
|
40161
|
-
return { status: "hold", reasonCode: "byon_voice_number_not_configured" };
|
|
40162
|
-
}
|
|
40163
|
-
if (/contact_phone_provisioning_failed/i.test(combined)) {
|
|
40164
|
-
return { status: "hold", reasonCode: "voice_contact_phone_provisioning_failed" };
|
|
40165
|
-
}
|
|
40166
|
-
if (/voice_contact_expected_no_spend_hold/i.test(combined)) {
|
|
40167
|
-
return { status: "hold", reasonCode: "voice_contact_expected_no_spend_hold" };
|
|
40168
|
-
}
|
|
40169
|
-
if (/contact_phone_missing/i.test(combined)) {
|
|
40170
|
-
return { status: "hold", reasonCode: "voice_contact_phone_missing" };
|
|
40171
|
-
}
|
|
40172
|
-
if (/simulation_certification_failed/i.test(combined)) {
|
|
40173
|
-
return { status: "hold", reasonCode: "simulation_certification_failed" };
|
|
40174
|
-
}
|
|
40175
|
-
if (/proof_held/i.test(combined)) {
|
|
40176
|
-
return { status: "hold", reasonCode: "external_agent_proof_held" };
|
|
40177
|
-
}
|
|
40178
|
-
if (/agent_limit_reached/i.test(combined)) {
|
|
40179
|
-
return { status: "hold", reasonCode: "eval_org_agent_limit_reached" };
|
|
40180
|
-
}
|
|
40181
|
-
if (/browser|approve|approval|login|auth|sign in/i.test(combined) && !proofArtifactPasses(input.run.run_dir)) {
|
|
40182
|
-
return { status: "hold", reasonCode: "auth_browser_approval_required" };
|
|
40183
|
-
}
|
|
40184
|
-
if (input.exitCode !== 0) return { status: "hold", reasonCode: `${input.run.command}_runner_nonzero_exit` };
|
|
40185
|
-
if (proofArtifactPasses(input.run.run_dir)) return { status: "pass", reasonCode: null };
|
|
40186
|
-
return { status: "hold", reasonCode: "external_agent_proof_artifact_missing" };
|
|
40187
|
-
}
|
|
40188
|
-
function buildExecutedRunArtifact(input) {
|
|
40189
|
-
const commands = readCommandRecords(input.run.run_dir);
|
|
40190
|
-
const agentMetadata = readExternalAgentMetadata(input.run.run_dir);
|
|
40191
|
-
return {
|
|
40192
|
-
schema_version: "external_agent_run.v1",
|
|
40193
|
-
run_id: input.run.run_id,
|
|
40194
|
-
status: input.status,
|
|
40195
|
-
failure_reason_code: input.reasonCode,
|
|
40196
|
-
model_provider: input.run.model_provider,
|
|
40197
|
-
model_name: input.run.model_name,
|
|
40198
|
-
runner_model: input.run.runner_model,
|
|
40199
|
-
agent_shell: `${input.run.command}-exec`,
|
|
40200
|
-
workspace_type: "clean-no-repo-programmatic",
|
|
40201
|
-
prompt_version: input.run.prompt_version,
|
|
40202
|
-
prompt_path: "prompt.txt",
|
|
40203
|
-
started_at: input.startedAt,
|
|
40204
|
-
ended_at: input.endedAt,
|
|
40205
|
-
manual_intervention_count: 0,
|
|
40206
|
-
manual_interventions: [],
|
|
40207
|
-
environment: {
|
|
40208
|
-
os: process.platform,
|
|
40209
|
-
node_version: process.version,
|
|
40210
|
-
npm_version: null,
|
|
40211
|
-
foh_cli_version: CLI_VERSION,
|
|
40212
|
-
runner_exit_code: input.exitCode,
|
|
40213
|
-
runner_timed_out: input.timedOut,
|
|
40214
|
-
duration_ms: input.durationMs
|
|
40215
|
-
},
|
|
40216
|
-
public_entrypoints: [
|
|
40217
|
-
"https://frontofhouse.okii.uk",
|
|
40218
|
-
"https://frontofhouse.okii.uk/llms.txt",
|
|
40219
|
-
"https://frontofhouse.okii.uk/openapi.yaml",
|
|
40220
|
-
"npx --yes @f-o-h/cli@latest"
|
|
40221
|
-
],
|
|
40222
|
-
commands_run: commands.map((command) => command.command),
|
|
40223
|
-
docs_pages_used: agentMetadata.docs_pages_used,
|
|
40224
|
-
eval_state: {
|
|
40225
|
-
lifecycle_strategy: "reuse_existing_eval_state",
|
|
40226
|
-
org_reuse_expected: true,
|
|
40227
|
-
agent_reuse_expected: true,
|
|
40228
|
-
widget_reuse_expected: true,
|
|
40229
|
-
fresh_org_expected: false,
|
|
40230
|
-
ephemeral_org_expected: false,
|
|
40231
|
-
fresh_agent_expected: false,
|
|
40232
|
-
phone_purchase_expected: false,
|
|
40233
|
-
paid_resource_creation_expected: false,
|
|
40234
|
-
spend_policy_expected: NO_SPEND_POLICY,
|
|
40235
|
-
cleanup_expected: false,
|
|
40236
|
-
cleanup_strategy: "no_cleanup_for_reused_eval_state",
|
|
40237
|
-
paid_resource_strategy: "blocked_unless_explicit_byon_or_operator_approved",
|
|
40238
|
-
rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
|
|
40239
|
-
},
|
|
40240
|
-
artifacts: {
|
|
40241
|
-
terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
|
|
40242
|
-
command_log: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
|
|
40243
|
-
proof_bundle: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
|
|
40244
|
-
replay_packet: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
|
|
40245
|
-
knowledge_packet: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
|
|
40246
|
-
improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
|
|
40247
|
-
agent_metadata: agentMetadata.path,
|
|
40248
|
-
notes: (0, import_fs15.existsSync)((0, import_path13.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
|
|
40249
|
-
runner_last_message: relativeArtifactName(input.run.outputs.last_message),
|
|
40250
|
-
runner_stderr: relativeArtifactName(input.run.outputs.stderr),
|
|
40251
|
-
codex_last_message: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.last_message) : null,
|
|
40252
|
-
codex_stderr: input.run.command === "codex" ? relativeArtifactName(input.run.outputs.stderr) : null,
|
|
40253
|
-
artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
|
|
40254
|
-
},
|
|
40255
|
-
summary: input.status === "pass" ? `Controlled ${input.run.command} external-agent run produced passing proof evidence.` : `Controlled ${input.run.command} external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
|
|
40256
|
-
next_commands: input.status === "pass" ? [externalAgentSummaryCommand((0, import_path13.dirname)(input.run.run_dir))] : [
|
|
40257
|
-
"foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
|
|
40258
|
-
"foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
|
|
40259
|
-
externalAgentSummaryCommand((0, import_path13.dirname)(input.run.run_dir))
|
|
40260
|
-
]
|
|
40261
|
-
};
|
|
40262
|
-
}
|
|
40263
|
-
function spawnRunner(input) {
|
|
40264
|
-
return new Promise((resolveRun) => {
|
|
40265
|
-
const started = Date.now();
|
|
40266
|
-
const commandInvocation = buildCommandInvocation(input.command, input.args);
|
|
40267
|
-
const child = (0, import_child_process4.spawn)(commandInvocation.command, commandInvocation.args, {
|
|
40268
|
-
cwd: input.cwd,
|
|
40269
|
-
env: input.env,
|
|
40270
|
-
shell: false,
|
|
40271
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
40272
|
-
windowsHide: true
|
|
40273
|
-
});
|
|
40274
|
-
const stdout = (0, import_fs15.createWriteStream)(input.stdoutPath, { flags: "w" });
|
|
40275
|
-
const stderr = (0, import_fs15.createWriteStream)(input.stderrPath, { flags: "w" });
|
|
40276
|
-
child.stdout.pipe(stdout);
|
|
40277
|
-
child.stderr.pipe(stderr);
|
|
40278
|
-
child.stdin.end(input.prompt);
|
|
40279
|
-
let timedOut = false;
|
|
40280
|
-
const timer = setTimeout(() => {
|
|
40281
|
-
timedOut = true;
|
|
40282
|
-
if (child.pid && process.platform === "win32") {
|
|
40283
|
-
(0, import_child_process4.spawnSync)("taskkill.exe", ["/pid", String(child.pid), "/t", "/f"], { stdio: "ignore" });
|
|
40284
|
-
} else {
|
|
40285
|
-
child.kill("SIGKILL");
|
|
40286
|
-
}
|
|
40287
|
-
}, input.timeoutMs);
|
|
40288
|
-
child.on("close", (exitCode) => {
|
|
40289
|
-
clearTimeout(timer);
|
|
40290
|
-
stdout.end();
|
|
40291
|
-
stderr.end();
|
|
40292
|
-
resolveRun({
|
|
40293
|
-
exitCode,
|
|
40294
|
-
timedOut,
|
|
40295
|
-
durationMs: Date.now() - started
|
|
40296
|
-
});
|
|
40297
|
-
});
|
|
40298
|
-
child.on("error", () => {
|
|
40299
|
-
clearTimeout(timer);
|
|
40300
|
-
stdout.end();
|
|
40301
|
-
stderr.end();
|
|
40302
|
-
resolveRun({
|
|
40303
|
-
exitCode: null,
|
|
40304
|
-
timedOut,
|
|
40305
|
-
durationMs: Date.now() - started
|
|
40306
|
-
});
|
|
40307
|
-
});
|
|
40308
|
-
});
|
|
40309
|
-
}
|
|
40310
|
-
function buildCommandInvocation(command, args) {
|
|
40311
|
-
if (process.platform === "win32" && command.toLowerCase().endsWith(".cmd")) {
|
|
40312
|
-
const binDir = (0, import_path13.dirname)(command);
|
|
40313
|
-
const codexEntrypoint = (0, import_path13.join)(binDir, "node_modules", "@openai", "codex", "bin", "codex.js");
|
|
40314
|
-
if ((0, import_fs15.existsSync)(codexEntrypoint)) return { command: process.execPath, args: [codexEntrypoint, ...args] };
|
|
40315
|
-
const geminiEntrypoint = (0, import_path13.join)(binDir, "node_modules", "@google", "gemini-cli", "bundle", "gemini.js");
|
|
40316
|
-
if ((0, import_fs15.existsSync)(geminiEntrypoint)) return { command: process.execPath, args: ["--no-warnings=DEP0040", geminiEntrypoint, ...args] };
|
|
40317
|
-
}
|
|
40318
|
-
return { command, args };
|
|
40319
|
-
}
|
|
40320
40384
|
async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
40321
40385
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
40322
40386
|
const results = [];
|
|
@@ -40328,8 +40392,8 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40328
40392
|
if (authPreflight && !authPreflight.ok) {
|
|
40329
40393
|
const endedAt2 = (/* @__PURE__ */ new Date()).toISOString();
|
|
40330
40394
|
const blockedResults = plan.runs.map((run) => {
|
|
40331
|
-
(0,
|
|
40332
|
-
const runArtifact =
|
|
40395
|
+
(0, import_fs18.mkdirSync)(run.run_dir, { recursive: true });
|
|
40396
|
+
const runArtifact = buildExecutedExternalAgentRunArtifact({
|
|
40333
40397
|
run,
|
|
40334
40398
|
startedAt,
|
|
40335
40399
|
endedAt: endedAt2,
|
|
@@ -40339,7 +40403,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40339
40403
|
timedOut: false,
|
|
40340
40404
|
durationMs: 0
|
|
40341
40405
|
});
|
|
40342
|
-
(0,
|
|
40406
|
+
(0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
|
|
40343
40407
|
`, "utf8");
|
|
40344
40408
|
return {
|
|
40345
40409
|
run_id: run.run_id,
|
|
@@ -40366,41 +40430,41 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40366
40430
|
}
|
|
40367
40431
|
for (const run of plan.runs) {
|
|
40368
40432
|
const runStartedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
40369
|
-
const commandCaptureDir = (0,
|
|
40370
|
-
(0,
|
|
40433
|
+
const commandCaptureDir = (0, import_path17.join)(run.workspace_dir, ".foh-capture");
|
|
40434
|
+
(0, import_fs18.mkdirSync)(commandCaptureDir, { recursive: true });
|
|
40371
40435
|
const env = buildCodexExecutorEnv({
|
|
40372
40436
|
sourceEnv: options.env,
|
|
40373
40437
|
runDir: commandCaptureDir,
|
|
40374
40438
|
promptVersion: run.prompt_version
|
|
40375
40439
|
});
|
|
40376
|
-
const spawned = await
|
|
40440
|
+
const spawned = await spawnExternalAgentRunner({
|
|
40377
40441
|
command: runnerCommand,
|
|
40378
40442
|
args: run.args,
|
|
40379
40443
|
cwd: run.workspace_dir,
|
|
40380
40444
|
env,
|
|
40381
|
-
prompt: (0,
|
|
40445
|
+
prompt: (0, import_fs18.readFileSync)(run.prompt_path, "utf8"),
|
|
40382
40446
|
stdoutPath: run.outputs.jsonl,
|
|
40383
40447
|
stderrPath: run.outputs.stderr,
|
|
40384
40448
|
timeoutMs: plan.timeout_minutes * 60 * 1e3
|
|
40385
40449
|
});
|
|
40386
|
-
|
|
40450
|
+
copyExternalAgentCommandCaptureArtifacts({ captureDir: commandCaptureDir, runDir: run.run_dir });
|
|
40387
40451
|
const privateRepoRoot = options.privateRepoRoot || plan.private_repo_root;
|
|
40388
|
-
|
|
40452
|
+
redactExternalAgentOutputArtifacts(run, { privateRepoRoot });
|
|
40389
40453
|
const artifactSafety = scanExternalAgentArtifacts({
|
|
40390
40454
|
runDir: run.run_dir,
|
|
40391
40455
|
privateRepoRoot,
|
|
40392
40456
|
writeRedacted: true
|
|
40393
40457
|
});
|
|
40394
|
-
(0,
|
|
40458
|
+
(0, import_fs18.writeFileSync)(run.outputs.artifact_safety, `${JSON.stringify(artifactSafety, null, 2)}
|
|
40395
40459
|
`, "utf8");
|
|
40396
40460
|
const runEndedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
40397
|
-
const classification =
|
|
40461
|
+
const classification = classifyExternalAgentRun({
|
|
40398
40462
|
run,
|
|
40399
40463
|
exitCode: spawned.exitCode,
|
|
40400
40464
|
timedOut: spawned.timedOut,
|
|
40401
40465
|
artifactSafetyOk: artifactSafety.ok
|
|
40402
40466
|
});
|
|
40403
|
-
const runArtifact =
|
|
40467
|
+
const runArtifact = buildExecutedExternalAgentRunArtifact({
|
|
40404
40468
|
run,
|
|
40405
40469
|
startedAt: runStartedAt,
|
|
40406
40470
|
endedAt: runEndedAt,
|
|
@@ -40410,7 +40474,7 @@ async function executeExternalAgentExecutorPlan(plan, options = {}) {
|
|
|
40410
40474
|
timedOut: spawned.timedOut,
|
|
40411
40475
|
durationMs: spawned.durationMs
|
|
40412
40476
|
});
|
|
40413
|
-
(0,
|
|
40477
|
+
(0, import_fs18.writeFileSync)(run.outputs.run, `${JSON.stringify(runArtifact, null, 2)}
|
|
40414
40478
|
`, "utf8");
|
|
40415
40479
|
results.push({
|
|
40416
40480
|
run_id: run.run_id,
|
|
@@ -40459,13 +40523,13 @@ function defaultRunDir(modelName, promptVersion) {
|
|
|
40459
40523
|
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
|
|
40460
40524
|
const safeModel = String(modelName || "unknown-model").toLowerCase().replace(/[^a-z0-9_-]+/g, "-");
|
|
40461
40525
|
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
40462
|
-
return (0,
|
|
40526
|
+
return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
|
|
40463
40527
|
}
|
|
40464
40528
|
function defaultBatchDir(promptVersion) {
|
|
40465
40529
|
const date4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
40466
40530
|
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
|
|
40467
40531
|
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
40468
|
-
return (0,
|
|
40532
|
+
return (0, import_path18.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
|
|
40469
40533
|
}
|
|
40470
40534
|
function safeSlug(value) {
|
|
40471
40535
|
return String(value || "unknown").toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "unknown";
|
|
@@ -40480,8 +40544,8 @@ function scanArtifactsCommand(runDir, privateRepoRoot) {
|
|
|
40480
40544
|
return `foh eval external-agent scan-artifacts --run-dir ${quoteArg(runDir)}${privateRootArg} --write-redacted --json`;
|
|
40481
40545
|
}
|
|
40482
40546
|
function externalAgentSummaryCommand2(root) {
|
|
40483
|
-
const summaryPath = (0,
|
|
40484
|
-
const reportPath = (0,
|
|
40547
|
+
const summaryPath = (0, import_path18.join)(root, "latest-summary.json");
|
|
40548
|
+
const reportPath = (0, import_path18.join)(root, "summary.report.json");
|
|
40485
40549
|
return [
|
|
40486
40550
|
"node",
|
|
40487
40551
|
"scripts/summarize-external-agent-runs.mjs",
|
|
@@ -40595,14 +40659,14 @@ function writePrompt(runDir, promptVersion, context = {}) {
|
|
|
40595
40659
|
replayPromptContext(context.replayFile),
|
|
40596
40660
|
knowledgeMissPromptContext(context.knowledgeQuestion, context.expectedAnswer)
|
|
40597
40661
|
].join("");
|
|
40598
|
-
const path2 = (0,
|
|
40599
|
-
(0,
|
|
40662
|
+
const path2 = (0, import_path18.join)(runDir, "prompt.txt");
|
|
40663
|
+
(0, import_fs19.writeFileSync)(path2, `${prompt}
|
|
40600
40664
|
`, "utf8");
|
|
40601
40665
|
return path2;
|
|
40602
40666
|
}
|
|
40603
40667
|
function writeSession(runDir, session) {
|
|
40604
|
-
const path2 = (0,
|
|
40605
|
-
(0,
|
|
40668
|
+
const path2 = (0, import_path18.join)(runDir, "session.json");
|
|
40669
|
+
(0, import_fs19.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
|
|
40606
40670
|
`, "utf8");
|
|
40607
40671
|
return path2;
|
|
40608
40672
|
}
|
|
@@ -40678,9 +40742,9 @@ function buildRunArtifact(input) {
|
|
|
40678
40742
|
notes: "notes.md"
|
|
40679
40743
|
},
|
|
40680
40744
|
summary: status === "pass" ? "External-agent capture session completed and was marked pass." : `External-agent capture session completed with ${commands.length} captured FOH command(s); classify and improve reason ${reasonCode}.`,
|
|
40681
|
-
next_commands: status === "pass" ? [externalAgentSummaryCommand2((0,
|
|
40682
|
-
`foh bug improve --from external-agent-run --file ${(0,
|
|
40683
|
-
externalAgentSummaryCommand2((0,
|
|
40745
|
+
next_commands: status === "pass" ? [externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))] : [
|
|
40746
|
+
`foh bug improve --from external-agent-run --file ${(0, import_path18.join)(input.runDir, "run.json")} --out ${(0, import_path18.join)(input.runDir, "improvement-packet.json")} --json`,
|
|
40747
|
+
externalAgentSummaryCommand2((0, import_path18.dirname)(input.runDir))
|
|
40684
40748
|
]
|
|
40685
40749
|
};
|
|
40686
40750
|
}
|
|
@@ -40689,16 +40753,16 @@ function registerEval(program3) {
|
|
|
40689
40753
|
const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
|
|
40690
40754
|
external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
|
|
40691
40755
|
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|
|
40692
|
-
const batchDir = (0,
|
|
40693
|
-
const replayFile = opts.replayFile ? (0,
|
|
40756
|
+
const batchDir = (0, import_path18.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
|
|
40757
|
+
const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
|
|
40694
40758
|
const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
|
|
40695
40759
|
const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
|
|
40696
40760
|
const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
|
|
40697
|
-
(0,
|
|
40761
|
+
(0, import_fs19.mkdirSync)(batchDir, { recursive: true });
|
|
40698
40762
|
const runs = models.map((model, index) => {
|
|
40699
40763
|
const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
|
|
40700
|
-
const runDir = (0,
|
|
40701
|
-
(0,
|
|
40764
|
+
const runDir = (0, import_path18.join)(batchDir, runId);
|
|
40765
|
+
(0, import_fs19.mkdirSync)(runDir, { recursive: true });
|
|
40702
40766
|
const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
|
|
40703
40767
|
const commandArgs = [
|
|
40704
40768
|
"eval",
|
|
@@ -40745,8 +40809,8 @@ function registerEval(program3) {
|
|
|
40745
40809
|
runs,
|
|
40746
40810
|
summary_command: externalAgentSummaryCommand2(batchDir)
|
|
40747
40811
|
};
|
|
40748
|
-
const batchPath = (0,
|
|
40749
|
-
(0,
|
|
40812
|
+
const batchPath = (0, import_path18.join)(batchDir, "batch.json");
|
|
40813
|
+
(0, import_fs19.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
|
|
40750
40814
|
`, "utf8");
|
|
40751
40815
|
format(cliEnvelope({
|
|
40752
40816
|
schemaVersion: "external_agent_batch_plan_result.v1",
|
|
@@ -40766,11 +40830,11 @@ function registerEval(program3) {
|
|
|
40766
40830
|
external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
|
|
40767
40831
|
const status = normalizeStatus(opts.status);
|
|
40768
40832
|
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|
|
40769
|
-
const runDir = (0,
|
|
40770
|
-
const replayFile = opts.replayFile ? (0,
|
|
40833
|
+
const runDir = (0, import_path18.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
|
|
40834
|
+
const replayFile = opts.replayFile ? (0, import_path18.resolve)(String(opts.replayFile)) : void 0;
|
|
40771
40835
|
const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
|
|
40772
40836
|
const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
|
|
40773
|
-
(0,
|
|
40837
|
+
(0, import_fs19.mkdirSync)(runDir, { recursive: true });
|
|
40774
40838
|
const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
|
|
40775
40839
|
const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
|
|
40776
40840
|
const shell = inferShell(opts.shell);
|
|
@@ -40796,7 +40860,7 @@ function registerEval(program3) {
|
|
|
40796
40860
|
}
|
|
40797
40861
|
};
|
|
40798
40862
|
writeSession(runDir, session);
|
|
40799
|
-
(0,
|
|
40863
|
+
(0, import_fs19.writeFileSync)((0, import_path18.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
|
|
40800
40864
|
let shellExitCode = null;
|
|
40801
40865
|
if (opts.shell !== false) {
|
|
40802
40866
|
process.stdout.write(`
|
|
@@ -40806,7 +40870,7 @@ Prompt: ${promptPath}
|
|
|
40806
40870
|
Exit the shell to finalize run.json.
|
|
40807
40871
|
|
|
40808
40872
|
`);
|
|
40809
|
-
const result = (0,
|
|
40873
|
+
const result = (0, import_child_process6.spawnSync)(shell.command, shell.args, {
|
|
40810
40874
|
stdio: "inherit",
|
|
40811
40875
|
env: {
|
|
40812
40876
|
...process.env,
|
|
@@ -40818,8 +40882,8 @@ Exit the shell to finalize run.json.
|
|
|
40818
40882
|
shellExitCode = typeof result.status === "number" ? result.status : null;
|
|
40819
40883
|
}
|
|
40820
40884
|
const artifact = buildRunArtifact({ runDir, session, status, reasonCode: opts.reasonCode, shellExitCode });
|
|
40821
|
-
const runPath = (0,
|
|
40822
|
-
(0,
|
|
40885
|
+
const runPath = (0, import_path18.join)(runDir, "run.json");
|
|
40886
|
+
(0, import_fs19.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
|
|
40823
40887
|
`, "utf8");
|
|
40824
40888
|
format(cliEnvelope({
|
|
40825
40889
|
schemaVersion: "external_agent_capture_result.v1",
|
|
@@ -40829,7 +40893,7 @@ Exit the shell to finalize run.json.
|
|
|
40829
40893
|
artifacts: {
|
|
40830
40894
|
run: runPath,
|
|
40831
40895
|
prompt: promptPath,
|
|
40832
|
-
commands: (0,
|
|
40896
|
+
commands: (0, import_path18.join)(runDir, "commands.ndjson")
|
|
40833
40897
|
},
|
|
40834
40898
|
nextCommands: artifact.next_commands,
|
|
40835
40899
|
extra: { run: artifact }
|
|
@@ -40892,8 +40956,8 @@ Exit the shell to finalize run.json.
|
|
|
40892
40956
|
requireExplicitEvalAuth: true,
|
|
40893
40957
|
minimumEvalAuthTtlMs: (plan.timeout_minutes + 5) * 60 * 1e3
|
|
40894
40958
|
});
|
|
40895
|
-
const resultPath = (0,
|
|
40896
|
-
(0,
|
|
40959
|
+
const resultPath = (0, import_path18.join)(plan.batch_dir, "execution-result.json");
|
|
40960
|
+
(0, import_fs19.writeFileSync)(resultPath, `${JSON.stringify(result, null, 2)}
|
|
40897
40961
|
`, "utf8");
|
|
40898
40962
|
format(cliEnvelope({
|
|
40899
40963
|
schemaVersion: "external_agent_execution_result.v1",
|