@f-o-h/cli 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -11
- package/dist/foh.js +515 -88
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@ AI-operator provisioning CLI for Front Of House.
|
|
|
4
4
|
|
|
5
5
|
Public mirror: https://github.com/iiko38/front-of-house-cli
|
|
6
6
|
|
|
7
|
-
Current published baseline: `@f-o-h/cli@0.1.
|
|
7
|
+
Current published baseline: `@f-o-h/cli@0.1.11`
|
|
8
8
|
|
|
9
9
|
This mirror is a generated release artifact. The private product monorepo is not
|
|
10
10
|
published here, and no open-source license is granted unless stated separately.
|
|
@@ -85,21 +85,44 @@ The CLI defaults to the production API at `https://api.frontofhouse.okii.uk`.
|
|
|
85
85
|
|
|
86
86
|
## External-Agent Eval Capture
|
|
87
87
|
|
|
88
|
-
Use this when testing whether a clean coding agent can start from public docs
|
|
89
|
-
and the public npm package without private repo context:
|
|
90
|
-
|
|
91
|
-
```bash
|
|
92
|
-
foh eval external-agent
|
|
88
|
+
Use this when testing whether a clean coding agent can start from public docs
|
|
89
|
+
and the public npm package without private repo context:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
foh eval external-agent batch \
|
|
93
|
+
--models openai/codex,anthropic/claude,cursor/agent \
|
|
94
|
+
--prompt-version blank-setup.v1 \
|
|
95
|
+
--json
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Run each returned launch command in a clean agent terminal:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
foh eval external-agent run \
|
|
93
102
|
--model-provider openai \
|
|
94
103
|
--model-name codex \
|
|
95
104
|
--prompt-version blank-setup.v1
|
|
96
105
|
```
|
|
97
106
|
|
|
98
|
-
The command writes a versioned prompt, launches an instrumented shell, captures
|
|
99
|
-
FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
|
|
100
|
-
`external_agent_run.v1` artifact when the shell exits.
|
|
101
|
-
|
|
102
|
-
|
|
107
|
+
The command writes a versioned prompt, launches an instrumented shell, captures
|
|
108
|
+
FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
|
|
109
|
+
`external_agent_run.v1` artifact when the shell exits.
|
|
110
|
+
|
|
111
|
+
For guarded programmable-runner planning:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
foh eval external-agent execute \
|
|
115
|
+
--runner codex \
|
|
116
|
+
--batch test-results/external-agent-runs/<batch>/batch.json \
|
|
117
|
+
--dry-run \
|
|
118
|
+
--json
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
This writes `executor-plan.json`, creates intentionally empty clean workspaces
|
|
122
|
+
outside the private repo, validates the local Codex binary/help flags, and
|
|
123
|
+
prints exact `codex exec` commands without executing them.
|
|
124
|
+
|
|
125
|
+
## Local Scenario Suites
|
|
103
126
|
|
|
104
127
|
`foh test run --suite <file>` runs deterministic widget-runtime checks for a
|
|
105
128
|
specific agent. The suite format supports reply text checks plus structured
|
package/dist/foh.js
CHANGED
|
@@ -6046,7 +6046,7 @@ var require_compile = __commonJS({
|
|
|
6046
6046
|
const schOrFunc = root.refs[ref];
|
|
6047
6047
|
if (schOrFunc)
|
|
6048
6048
|
return schOrFunc;
|
|
6049
|
-
let _sch =
|
|
6049
|
+
let _sch = resolve11.call(this, root, ref);
|
|
6050
6050
|
if (_sch === void 0) {
|
|
6051
6051
|
const schema2 = (_a2 = root.localRefs) === null || _a2 === void 0 ? void 0 : _a2[ref];
|
|
6052
6052
|
const { schemaId } = this.opts;
|
|
@@ -6073,7 +6073,7 @@ var require_compile = __commonJS({
|
|
|
6073
6073
|
function sameSchemaEnv(s1, s2) {
|
|
6074
6074
|
return s1.schema === s2.schema && s1.root === s2.root && s1.baseId === s2.baseId;
|
|
6075
6075
|
}
|
|
6076
|
-
function
|
|
6076
|
+
function resolve11(root, ref) {
|
|
6077
6077
|
let sch;
|
|
6078
6078
|
while (typeof (sch = this.refs[ref]) == "string")
|
|
6079
6079
|
ref = sch;
|
|
@@ -6648,55 +6648,55 @@ var require_fast_uri = __commonJS({
|
|
|
6648
6648
|
}
|
|
6649
6649
|
return uri;
|
|
6650
6650
|
}
|
|
6651
|
-
function
|
|
6651
|
+
function resolve11(baseURI, relativeURI, options) {
|
|
6652
6652
|
const schemelessOptions = options ? Object.assign({ scheme: "null" }, options) : { scheme: "null" };
|
|
6653
6653
|
const resolved = resolveComponent(parse3(baseURI, schemelessOptions), parse3(relativeURI, schemelessOptions), schemelessOptions, true);
|
|
6654
6654
|
schemelessOptions.skipEscape = true;
|
|
6655
6655
|
return serialize(resolved, schemelessOptions);
|
|
6656
6656
|
}
|
|
6657
|
-
function resolveComponent(base,
|
|
6657
|
+
function resolveComponent(base, relative2, options, skipNormalization) {
|
|
6658
6658
|
const target = {};
|
|
6659
6659
|
if (!skipNormalization) {
|
|
6660
6660
|
base = parse3(serialize(base, options), options);
|
|
6661
|
-
|
|
6661
|
+
relative2 = parse3(serialize(relative2, options), options);
|
|
6662
6662
|
}
|
|
6663
6663
|
options = options || {};
|
|
6664
|
-
if (!options.tolerant &&
|
|
6665
|
-
target.scheme =
|
|
6666
|
-
target.userinfo =
|
|
6667
|
-
target.host =
|
|
6668
|
-
target.port =
|
|
6669
|
-
target.path = removeDotSegments(
|
|
6670
|
-
target.query =
|
|
6664
|
+
if (!options.tolerant && relative2.scheme) {
|
|
6665
|
+
target.scheme = relative2.scheme;
|
|
6666
|
+
target.userinfo = relative2.userinfo;
|
|
6667
|
+
target.host = relative2.host;
|
|
6668
|
+
target.port = relative2.port;
|
|
6669
|
+
target.path = removeDotSegments(relative2.path || "");
|
|
6670
|
+
target.query = relative2.query;
|
|
6671
6671
|
} else {
|
|
6672
|
-
if (
|
|
6673
|
-
target.userinfo =
|
|
6674
|
-
target.host =
|
|
6675
|
-
target.port =
|
|
6676
|
-
target.path = removeDotSegments(
|
|
6677
|
-
target.query =
|
|
6672
|
+
if (relative2.userinfo !== void 0 || relative2.host !== void 0 || relative2.port !== void 0) {
|
|
6673
|
+
target.userinfo = relative2.userinfo;
|
|
6674
|
+
target.host = relative2.host;
|
|
6675
|
+
target.port = relative2.port;
|
|
6676
|
+
target.path = removeDotSegments(relative2.path || "");
|
|
6677
|
+
target.query = relative2.query;
|
|
6678
6678
|
} else {
|
|
6679
|
-
if (!
|
|
6679
|
+
if (!relative2.path) {
|
|
6680
6680
|
target.path = base.path;
|
|
6681
|
-
if (
|
|
6682
|
-
target.query =
|
|
6681
|
+
if (relative2.query !== void 0) {
|
|
6682
|
+
target.query = relative2.query;
|
|
6683
6683
|
} else {
|
|
6684
6684
|
target.query = base.query;
|
|
6685
6685
|
}
|
|
6686
6686
|
} else {
|
|
6687
|
-
if (
|
|
6688
|
-
target.path = removeDotSegments(
|
|
6687
|
+
if (relative2.path[0] === "/") {
|
|
6688
|
+
target.path = removeDotSegments(relative2.path);
|
|
6689
6689
|
} else {
|
|
6690
6690
|
if ((base.userinfo !== void 0 || base.host !== void 0 || base.port !== void 0) && !base.path) {
|
|
6691
|
-
target.path = "/" +
|
|
6691
|
+
target.path = "/" + relative2.path;
|
|
6692
6692
|
} else if (!base.path) {
|
|
6693
|
-
target.path =
|
|
6693
|
+
target.path = relative2.path;
|
|
6694
6694
|
} else {
|
|
6695
|
-
target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) +
|
|
6695
|
+
target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative2.path;
|
|
6696
6696
|
}
|
|
6697
6697
|
target.path = removeDotSegments(target.path);
|
|
6698
6698
|
}
|
|
6699
|
-
target.query =
|
|
6699
|
+
target.query = relative2.query;
|
|
6700
6700
|
}
|
|
6701
6701
|
target.userinfo = base.userinfo;
|
|
6702
6702
|
target.host = base.host;
|
|
@@ -6704,7 +6704,7 @@ var require_fast_uri = __commonJS({
|
|
|
6704
6704
|
}
|
|
6705
6705
|
target.scheme = base.scheme;
|
|
6706
6706
|
}
|
|
6707
|
-
target.fragment =
|
|
6707
|
+
target.fragment = relative2.fragment;
|
|
6708
6708
|
return target;
|
|
6709
6709
|
}
|
|
6710
6710
|
function equal(uriA, uriB, options) {
|
|
@@ -6875,7 +6875,7 @@ var require_fast_uri = __commonJS({
|
|
|
6875
6875
|
var fastUri = {
|
|
6876
6876
|
SCHEMES,
|
|
6877
6877
|
normalize,
|
|
6878
|
-
resolve:
|
|
6878
|
+
resolve: resolve11,
|
|
6879
6879
|
resolveComponent,
|
|
6880
6880
|
equal,
|
|
6881
6881
|
serialize,
|
|
@@ -10105,21 +10105,21 @@ async function promptLine(label, {
|
|
|
10105
10105
|
allowEmpty = false,
|
|
10106
10106
|
defaultValue
|
|
10107
10107
|
} = {}) {
|
|
10108
|
-
return await new Promise((
|
|
10108
|
+
return await new Promise((resolve11) => {
|
|
10109
10109
|
const suffix = defaultValue ? ` [${defaultValue}]` : "";
|
|
10110
10110
|
const rl = (0, import_readline.createInterface)({ input: process.stdin, output: process.stdout, terminal: true });
|
|
10111
10111
|
rl.question(`${label}${suffix}: `, (answer) => {
|
|
10112
10112
|
rl.close();
|
|
10113
10113
|
const value = String(answer ?? "").trim();
|
|
10114
10114
|
if (!value && typeof defaultValue === "string") {
|
|
10115
|
-
|
|
10115
|
+
resolve11(defaultValue);
|
|
10116
10116
|
return;
|
|
10117
10117
|
}
|
|
10118
10118
|
if (!value && !allowEmpty) {
|
|
10119
|
-
|
|
10119
|
+
resolve11("");
|
|
10120
10120
|
return;
|
|
10121
10121
|
}
|
|
10122
|
-
|
|
10122
|
+
resolve11(value);
|
|
10123
10123
|
});
|
|
10124
10124
|
});
|
|
10125
10125
|
}
|
|
@@ -10127,7 +10127,7 @@ async function promptSecret(label) {
|
|
|
10127
10127
|
if (!process.stdin.isTTY || !process.stdout.isTTY || typeof process.stdin.setRawMode !== "function") {
|
|
10128
10128
|
return await promptLine(label);
|
|
10129
10129
|
}
|
|
10130
|
-
return await new Promise((
|
|
10130
|
+
return await new Promise((resolve11) => {
|
|
10131
10131
|
const stdin = process.stdin;
|
|
10132
10132
|
const stdout = process.stdout;
|
|
10133
10133
|
const wasRaw = Boolean(stdin.isRaw);
|
|
@@ -10141,7 +10141,7 @@ async function promptSecret(label) {
|
|
|
10141
10141
|
const finish = () => {
|
|
10142
10142
|
cleanup();
|
|
10143
10143
|
stdout.write("\n");
|
|
10144
|
-
|
|
10144
|
+
resolve11(value);
|
|
10145
10145
|
};
|
|
10146
10146
|
const onData = (chunk) => {
|
|
10147
10147
|
const text = typeof chunk === "string" ? chunk : chunk.toString("utf8");
|
|
@@ -10150,7 +10150,7 @@ async function promptSecret(label) {
|
|
|
10150
10150
|
cleanup();
|
|
10151
10151
|
process.exitCode = 130;
|
|
10152
10152
|
stdout.write("\n");
|
|
10153
|
-
return
|
|
10153
|
+
return resolve11("");
|
|
10154
10154
|
}
|
|
10155
10155
|
if (char === "\r" || char === "\n") {
|
|
10156
10156
|
finish();
|
|
@@ -10419,7 +10419,7 @@ async function storeAuthenticatedSession(params) {
|
|
|
10419
10419
|
return output;
|
|
10420
10420
|
}
|
|
10421
10421
|
function sleep(ms) {
|
|
10422
|
-
return new Promise((
|
|
10422
|
+
return new Promise((resolve11) => setTimeout(resolve11, ms));
|
|
10423
10423
|
}
|
|
10424
10424
|
async function runDeviceLogin(opts) {
|
|
10425
10425
|
const jsonMode = Boolean(opts.json);
|
|
@@ -10957,7 +10957,7 @@ async function pollUntil(check2, opts) {
|
|
|
10957
10957
|
}
|
|
10958
10958
|
}
|
|
10959
10959
|
function sleep2(ms) {
|
|
10960
|
-
return new Promise((
|
|
10960
|
+
return new Promise((resolve11) => setTimeout(resolve11, ms));
|
|
10961
10961
|
}
|
|
10962
10962
|
|
|
10963
10963
|
// src/commands/compliance.ts
|
|
@@ -13995,8 +13995,8 @@ function registerAgentGuardrailCommands(agent) {
|
|
|
13995
13995
|
try {
|
|
13996
13996
|
rule = JSON.parse(opts.rule);
|
|
13997
13997
|
} catch {
|
|
13998
|
-
const { readFileSync:
|
|
13999
|
-
rule = JSON.parse(
|
|
13998
|
+
const { readFileSync: readFileSync11 } = await import("fs");
|
|
13999
|
+
rule = JSON.parse(readFileSync11(opts.rule, "utf-8"));
|
|
14000
14000
|
}
|
|
14001
14001
|
const data = await apiFetch(`/v1/console/agents/${opts.agent}/guardrails`, {
|
|
14002
14002
|
method: "POST",
|
|
@@ -14596,9 +14596,9 @@ function registerAgent(program3) {
|
|
|
14596
14596
|
process.stdout.write(yaml);
|
|
14597
14597
|
return;
|
|
14598
14598
|
}
|
|
14599
|
-
const { writeFileSync:
|
|
14599
|
+
const { writeFileSync: writeFileSync8 } = await import("fs");
|
|
14600
14600
|
const outputPath = opts.output ?? "tenant.yaml";
|
|
14601
|
-
|
|
14601
|
+
writeFileSync8(
|
|
14602
14602
|
outputPath,
|
|
14603
14603
|
`# tenant.yaml - Front Of House agent manifest
|
|
14604
14604
|
# Edit this file and run: foh plan tenant.yaml
|
|
@@ -16033,11 +16033,11 @@ function registerVoice(program3) {
|
|
|
16033
16033
|
}
|
|
16034
16034
|
const outputPath = String(opts.out || `foh-voice-preview-${provider}-${voiceId}.mp3`).trim();
|
|
16035
16035
|
const audio = Buffer.from(await res.arrayBuffer());
|
|
16036
|
-
const { mkdirSync:
|
|
16037
|
-
const { dirname: dirname5, resolve:
|
|
16038
|
-
const absolutePath =
|
|
16039
|
-
|
|
16040
|
-
|
|
16036
|
+
const { mkdirSync: mkdirSync7, writeFileSync: writeFileSync8 } = await import("fs");
|
|
16037
|
+
const { dirname: dirname5, resolve: resolve11 } = await import("path");
|
|
16038
|
+
const absolutePath = resolve11(outputPath);
|
|
16039
|
+
mkdirSync7(dirname5(absolutePath), { recursive: true });
|
|
16040
|
+
writeFileSync8(absolutePath, audio);
|
|
16041
16041
|
format({
|
|
16042
16042
|
status: "ok",
|
|
16043
16043
|
provider,
|
|
@@ -30518,7 +30518,7 @@ var Protocol = class {
|
|
|
30518
30518
|
return;
|
|
30519
30519
|
}
|
|
30520
30520
|
const pollInterval = task2.pollInterval ?? this._options?.defaultTaskPollInterval ?? 1e3;
|
|
30521
|
-
await new Promise((
|
|
30521
|
+
await new Promise((resolve11) => setTimeout(resolve11, pollInterval));
|
|
30522
30522
|
options?.signal?.throwIfAborted();
|
|
30523
30523
|
}
|
|
30524
30524
|
} catch (error2) {
|
|
@@ -30535,7 +30535,7 @@ var Protocol = class {
|
|
|
30535
30535
|
*/
|
|
30536
30536
|
request(request, resultSchema, options) {
|
|
30537
30537
|
const { relatedRequestId, resumptionToken, onresumptiontoken, task, relatedTask } = options ?? {};
|
|
30538
|
-
return new Promise((
|
|
30538
|
+
return new Promise((resolve11, reject) => {
|
|
30539
30539
|
const earlyReject = (error2) => {
|
|
30540
30540
|
reject(error2);
|
|
30541
30541
|
};
|
|
@@ -30613,7 +30613,7 @@ var Protocol = class {
|
|
|
30613
30613
|
if (!parseResult.success) {
|
|
30614
30614
|
reject(parseResult.error);
|
|
30615
30615
|
} else {
|
|
30616
|
-
|
|
30616
|
+
resolve11(parseResult.data);
|
|
30617
30617
|
}
|
|
30618
30618
|
} catch (error2) {
|
|
30619
30619
|
reject(error2);
|
|
@@ -30874,12 +30874,12 @@ var Protocol = class {
|
|
|
30874
30874
|
}
|
|
30875
30875
|
} catch {
|
|
30876
30876
|
}
|
|
30877
|
-
return new Promise((
|
|
30877
|
+
return new Promise((resolve11, reject) => {
|
|
30878
30878
|
if (signal.aborted) {
|
|
30879
30879
|
reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
|
|
30880
30880
|
return;
|
|
30881
30881
|
}
|
|
30882
|
-
const timeoutId = setTimeout(
|
|
30882
|
+
const timeoutId = setTimeout(resolve11, interval);
|
|
30883
30883
|
signal.addEventListener("abort", () => {
|
|
30884
30884
|
clearTimeout(timeoutId);
|
|
30885
30885
|
reject(new McpError(ErrorCode.InvalidRequest, "Request cancelled"));
|
|
@@ -31979,7 +31979,7 @@ var McpServer = class {
|
|
|
31979
31979
|
let task = createTaskResult.task;
|
|
31980
31980
|
const pollInterval = task.pollInterval ?? 5e3;
|
|
31981
31981
|
while (task.status !== "completed" && task.status !== "failed" && task.status !== "cancelled") {
|
|
31982
|
-
await new Promise((
|
|
31982
|
+
await new Promise((resolve11) => setTimeout(resolve11, pollInterval));
|
|
31983
31983
|
const updatedTask = await extra.taskStore.getTask(taskId);
|
|
31984
31984
|
if (!updatedTask) {
|
|
31985
31985
|
throw new McpError(ErrorCode.InternalError, `Task ${taskId} not found during polling`);
|
|
@@ -32628,19 +32628,19 @@ var StdioServerTransport = class {
|
|
|
32628
32628
|
this.onclose?.();
|
|
32629
32629
|
}
|
|
32630
32630
|
send(message) {
|
|
32631
|
-
return new Promise((
|
|
32631
|
+
return new Promise((resolve11) => {
|
|
32632
32632
|
const json3 = serializeMessage(message);
|
|
32633
32633
|
if (this._stdout.write(json3)) {
|
|
32634
|
-
|
|
32634
|
+
resolve11();
|
|
32635
32635
|
} else {
|
|
32636
|
-
this._stdout.once("drain",
|
|
32636
|
+
this._stdout.once("drain", resolve11);
|
|
32637
32637
|
}
|
|
32638
32638
|
});
|
|
32639
32639
|
}
|
|
32640
32640
|
};
|
|
32641
32641
|
|
|
32642
32642
|
// src/lib/cli-version.ts
|
|
32643
|
-
var CLI_VERSION = "0.1.
|
|
32643
|
+
var CLI_VERSION = "0.1.11";
|
|
32644
32644
|
|
|
32645
32645
|
// src/commands/mcp-serve.ts
|
|
32646
32646
|
var DEFAULT_TIMEOUT_MS = 12e4;
|
|
@@ -32825,7 +32825,7 @@ async function runFohCli(params) {
|
|
|
32825
32825
|
effectiveArgv.push("--json");
|
|
32826
32826
|
}
|
|
32827
32827
|
const command = `foh ${effectiveArgv.join(" ")}`;
|
|
32828
|
-
return await new Promise((
|
|
32828
|
+
return await new Promise((resolve11) => {
|
|
32829
32829
|
const child = (0, import_node_child_process.spawn)(process.execPath, [cliEntry, ...effectiveArgv], {
|
|
32830
32830
|
stdio: ["ignore", "pipe", "pipe"],
|
|
32831
32831
|
env: {
|
|
@@ -32850,7 +32850,7 @@ async function runFohCli(params) {
|
|
|
32850
32850
|
});
|
|
32851
32851
|
child.once("error", (error2) => {
|
|
32852
32852
|
clearTimeout(timeoutHandle);
|
|
32853
|
-
|
|
32853
|
+
resolve11({
|
|
32854
32854
|
ok: false,
|
|
32855
32855
|
command,
|
|
32856
32856
|
argv: effectiveArgv,
|
|
@@ -32866,7 +32866,7 @@ async function runFohCli(params) {
|
|
|
32866
32866
|
const stderrText = finalizeBoundedText(stderrBuffer);
|
|
32867
32867
|
const exitCode = Number.isFinite(code ?? NaN) ? Number(code) : 1;
|
|
32868
32868
|
const stdoutJson = tryParseJson(stdoutText);
|
|
32869
|
-
|
|
32869
|
+
resolve11({
|
|
32870
32870
|
ok: !timedOut && exitCode === 0,
|
|
32871
32871
|
command,
|
|
32872
32872
|
argv: effectiveArgv,
|
|
@@ -34775,8 +34775,8 @@ function registerSetup(program3) {
|
|
|
34775
34775
|
}
|
|
34776
34776
|
try {
|
|
34777
34777
|
const manifest = await agentExport(resolvedAgentId, { apiUrlOverride: opts.apiUrl });
|
|
34778
|
-
const { writeFileSync:
|
|
34779
|
-
|
|
34778
|
+
const { writeFileSync: writeFileSync8 } = await import("fs");
|
|
34779
|
+
writeFileSync8(
|
|
34780
34780
|
"tenant.yaml",
|
|
34781
34781
|
`# tenant.yaml - Front Of House agent manifest
|
|
34782
34782
|
# Edit this file and run: foh plan tenant.yaml
|
|
@@ -34944,8 +34944,8 @@ function registerSim(program3) {
|
|
|
34944
34944
|
}
|
|
34945
34945
|
const cert = response.certificate;
|
|
34946
34946
|
if (opts.out) {
|
|
34947
|
-
const { writeFileSync:
|
|
34948
|
-
|
|
34947
|
+
const { writeFileSync: writeFileSync8 } = await import("fs");
|
|
34948
|
+
writeFileSync8(opts.out, JSON.stringify(cert, null, 2) + "\n", "utf-8");
|
|
34949
34949
|
process.stderr.write(` Certificate written to ${opts.out}
|
|
34950
34950
|
`);
|
|
34951
34951
|
}
|
|
@@ -34995,8 +34995,8 @@ function registerSim(program3) {
|
|
|
34995
34995
|
});
|
|
34996
34996
|
}
|
|
34997
34997
|
if (opts.out) {
|
|
34998
|
-
const { writeFileSync:
|
|
34999
|
-
|
|
34998
|
+
const { writeFileSync: writeFileSync8 } = await import("fs");
|
|
34999
|
+
writeFileSync8(opts.out, JSON.stringify(response.certificate, null, 2) + "\n", "utf-8");
|
|
35000
35000
|
process.stderr.write(` Final certificate written to ${opts.out}
|
|
35001
35001
|
`);
|
|
35002
35002
|
}
|
|
@@ -37710,7 +37710,7 @@ async function runSelf(args, apiUrlOverride) {
|
|
|
37710
37710
|
if (apiUrlOverride && !spawnArgs.includes("--api-url")) {
|
|
37711
37711
|
spawnArgs.push("--api-url", apiUrlOverride);
|
|
37712
37712
|
}
|
|
37713
|
-
return await new Promise((
|
|
37713
|
+
return await new Promise((resolve11, reject) => {
|
|
37714
37714
|
const child = (0, import_child_process2.spawn)(process.execPath, [process.argv[1], ...spawnArgs], {
|
|
37715
37715
|
stdio: "inherit",
|
|
37716
37716
|
env: {
|
|
@@ -37720,7 +37720,7 @@ async function runSelf(args, apiUrlOverride) {
|
|
|
37720
37720
|
}
|
|
37721
37721
|
});
|
|
37722
37722
|
child.once("error", reject);
|
|
37723
|
-
child.once("close", (code) =>
|
|
37723
|
+
child.once("close", (code) => resolve11(typeof code === "number" ? code : 1));
|
|
37724
37724
|
});
|
|
37725
37725
|
}
|
|
37726
37726
|
function shouldUseInteractiveHome(argv) {
|
|
@@ -38098,17 +38098,17 @@ function detectUpdateAvailability(currentVersion, cwd = process.cwd()) {
|
|
|
38098
38098
|
async function applyRepoUpdate(repoRoot) {
|
|
38099
38099
|
const scriptPath = (0, import_path9.join)(repoRoot, "scripts", "Install-FohCli.ps1");
|
|
38100
38100
|
if (process.platform === "win32") {
|
|
38101
|
-
return await new Promise((
|
|
38101
|
+
return await new Promise((resolve11, reject) => {
|
|
38102
38102
|
const child = (0, import_child_process3.spawn)(
|
|
38103
38103
|
"powershell",
|
|
38104
38104
|
["-ExecutionPolicy", "Bypass", "-File", scriptPath],
|
|
38105
38105
|
{ stdio: "inherit" }
|
|
38106
38106
|
);
|
|
38107
38107
|
child.once("error", reject);
|
|
38108
|
-
child.once("close", (code) =>
|
|
38108
|
+
child.once("close", (code) => resolve11(typeof code === "number" ? code : 1));
|
|
38109
38109
|
});
|
|
38110
38110
|
}
|
|
38111
|
-
return await new Promise((
|
|
38111
|
+
return await new Promise((resolve11, reject) => {
|
|
38112
38112
|
const child = (0, import_child_process3.spawn)(
|
|
38113
38113
|
"corepack",
|
|
38114
38114
|
["pnpm", "cli:install:global"],
|
|
@@ -38118,7 +38118,7 @@ async function applyRepoUpdate(repoRoot) {
|
|
|
38118
38118
|
}
|
|
38119
38119
|
);
|
|
38120
38120
|
child.once("error", reject);
|
|
38121
|
-
child.once("close", (code) =>
|
|
38121
|
+
child.once("close", (code) => resolve11(typeof code === "number" ? code : 1));
|
|
38122
38122
|
});
|
|
38123
38123
|
}
|
|
38124
38124
|
function shouldShowUpdateNotice(argv = process.argv) {
|
|
@@ -38254,9 +38254,9 @@ function registerUpdate(program3) {
|
|
|
38254
38254
|
}
|
|
38255
38255
|
|
|
38256
38256
|
// src/commands/eval.ts
|
|
38257
|
-
var
|
|
38258
|
-
var
|
|
38259
|
-
var
|
|
38257
|
+
var import_fs14 = require("fs");
|
|
38258
|
+
var import_path12 = require("path");
|
|
38259
|
+
var import_child_process5 = require("child_process");
|
|
38260
38260
|
|
|
38261
38261
|
// src/lib/external-agent-capture.ts
|
|
38262
38262
|
var import_fs12 = require("fs");
|
|
@@ -38307,8 +38307,287 @@ function readCommandRecords(runDir) {
|
|
|
38307
38307
|
return (0, import_fs12.readFileSync)(commandLogPath, "utf8").split(/\r?\n/).map((line) => line.trim()).filter(Boolean).map((line) => JSON.parse(line));
|
|
38308
38308
|
}
|
|
38309
38309
|
|
|
38310
|
+
// src/lib/external-agent-executor.ts
|
|
38311
|
+
var import_fs13 = require("fs");
|
|
38312
|
+
var import_os2 = require("os");
|
|
38313
|
+
var import_path11 = require("path");
|
|
38314
|
+
var import_child_process4 = require("child_process");
|
|
38315
|
+
var CODEX_EXECUTOR_DENIED_ENV_PREFIXES = [
|
|
38316
|
+
"SUPABASE_",
|
|
38317
|
+
"DATABASE_",
|
|
38318
|
+
"OPENAI_",
|
|
38319
|
+
"XAI_",
|
|
38320
|
+
"ANTHROPIC_",
|
|
38321
|
+
"WHATSAPP_",
|
|
38322
|
+
"TWILIO_",
|
|
38323
|
+
"STRIPE_"
|
|
38324
|
+
];
|
|
38325
|
+
var CODEX_EXECUTOR_DENIED_ENV_NAMES = [
|
|
38326
|
+
"DATABASE_URL",
|
|
38327
|
+
"NPM_TOKEN",
|
|
38328
|
+
"GITHUB_TOKEN",
|
|
38329
|
+
"GH_TOKEN",
|
|
38330
|
+
"META_APP_SECRET",
|
|
38331
|
+
"FACEBOOK_APP_SECRET",
|
|
38332
|
+
"GOOGLE_CLIENT_SECRET",
|
|
38333
|
+
"JWT_SECRET",
|
|
38334
|
+
"SESSION_SECRET"
|
|
38335
|
+
];
|
|
38336
|
+
var CHILD_ENV_ALLOWLIST = [
|
|
38337
|
+
"APPDATA",
|
|
38338
|
+
"CODEX_HOME",
|
|
38339
|
+
"ComSpec",
|
|
38340
|
+
"HOME",
|
|
38341
|
+
"LOCALAPPDATA",
|
|
38342
|
+
"PATH",
|
|
38343
|
+
"Path",
|
|
38344
|
+
"SystemRoot",
|
|
38345
|
+
"TEMP",
|
|
38346
|
+
"TMP",
|
|
38347
|
+
"USERPROFILE",
|
|
38348
|
+
"WINDIR"
|
|
38349
|
+
];
|
|
38350
|
+
var ExternalAgentExecutorError = class extends Error {
|
|
38351
|
+
reasonCode;
|
|
38352
|
+
constructor(reasonCode, message) {
|
|
38353
|
+
super(message);
|
|
38354
|
+
this.name = "ExternalAgentExecutorError";
|
|
38355
|
+
this.reasonCode = reasonCode;
|
|
38356
|
+
}
|
|
38357
|
+
};
|
|
38358
|
+
function isDeniedEnvKey(key) {
|
|
38359
|
+
const upper = key.toUpperCase();
|
|
38360
|
+
if (CODEX_EXECUTOR_DENIED_ENV_NAMES.some((name) => upper === name)) return true;
|
|
38361
|
+
return CODEX_EXECUTOR_DENIED_ENV_PREFIXES.some((prefix) => upper.startsWith(prefix));
|
|
38362
|
+
}
|
|
38363
|
+
function buildCodexExecutorEnv(input) {
|
|
38364
|
+
const source = input.sourceEnv ?? process.env;
|
|
38365
|
+
const env = {};
|
|
38366
|
+
for (const key of CHILD_ENV_ALLOWLIST) {
|
|
38367
|
+
const value = source[key];
|
|
38368
|
+
if (typeof value === "string" && value.length > 0 && !isDeniedEnvKey(key)) {
|
|
38369
|
+
env[key] = value;
|
|
38370
|
+
}
|
|
38371
|
+
}
|
|
38372
|
+
env[EXTERNAL_AGENT_RUN_DIR_ENV] = input.runDir;
|
|
38373
|
+
env[EXTERNAL_AGENT_PROMPT_VERSION_ENV] = input.promptVersion;
|
|
38374
|
+
env.FOH_CLI_SUPPRESS_BANNER = "1";
|
|
38375
|
+
return env;
|
|
38376
|
+
}
|
|
38377
|
+
function normalizeForCompare(path2) {
|
|
38378
|
+
const resolved = (0, import_path11.resolve)(path2);
|
|
38379
|
+
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
|
|
38380
|
+
}
|
|
38381
|
+
function isPathInside(childPath, parentPath) {
|
|
38382
|
+
const child = normalizeForCompare(childPath);
|
|
38383
|
+
const parent = normalizeForCompare(parentPath);
|
|
38384
|
+
const rel = (0, import_path11.relative)(parent, child);
|
|
38385
|
+
return rel === "" || !!rel && !rel.startsWith("..") && !(0, import_path11.isAbsolute)(rel);
|
|
38386
|
+
}
|
|
38387
|
+
function requireString(value, field) {
|
|
38388
|
+
if (typeof value !== "string" || value.trim() === "") {
|
|
38389
|
+
throw new ExternalAgentExecutorError("invalid_external_agent_batch", `Batch field ${field} must be a non-empty string.`);
|
|
38390
|
+
}
|
|
38391
|
+
return value;
|
|
38392
|
+
}
|
|
38393
|
+
function readBatch(batchPath) {
|
|
38394
|
+
if (!(0, import_fs13.existsSync)(batchPath)) {
|
|
38395
|
+
throw new ExternalAgentExecutorError("external_agent_batch_not_found", `Batch file not found: ${batchPath}`);
|
|
38396
|
+
}
|
|
38397
|
+
const parsed = JSON.parse((0, import_fs13.readFileSync)(batchPath, "utf8"));
|
|
38398
|
+
if (parsed.schema_version !== "external_agent_batch_plan.v1") {
|
|
38399
|
+
throw new ExternalAgentExecutorError("invalid_external_agent_batch", "Batch schema_version must be external_agent_batch_plan.v1.");
|
|
38400
|
+
}
|
|
38401
|
+
if (!Array.isArray(parsed.runs) || parsed.runs.length === 0) {
|
|
38402
|
+
throw new ExternalAgentExecutorError("invalid_external_agent_batch", "Batch runs must be a non-empty array.");
|
|
38403
|
+
}
|
|
38404
|
+
return parsed;
|
|
38405
|
+
}
|
|
38406
|
+
function defaultRunnerProbe(command, args) {
|
|
38407
|
+
const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process4.spawnSync)(
|
|
38408
|
+
"powershell.exe",
|
|
38409
|
+
["-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", `& ${[command, ...args].map(quotePowerShellArg).join(" ")}`],
|
|
38410
|
+
{ encoding: "utf8" }
|
|
38411
|
+
) : (0, import_child_process4.spawnSync)(command, args, { encoding: "utf8" });
|
|
38412
|
+
return {
|
|
38413
|
+
status: typeof result.status === "number" ? result.status : null,
|
|
38414
|
+
stdout: String(result.stdout || ""),
|
|
38415
|
+
stderr: String(result.stderr || ""),
|
|
38416
|
+
error: result.error
|
|
38417
|
+
};
|
|
38418
|
+
}
|
|
38419
|
+
function quotePowerShellArg(value) {
|
|
38420
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
38421
|
+
}
|
|
38422
|
+
function resolveCodexProbeCommand() {
|
|
38423
|
+
if (process.platform !== "win32") return "codex";
|
|
38424
|
+
const appData = process.env.APPDATA;
|
|
38425
|
+
if (appData) {
|
|
38426
|
+
const appDataShim = (0, import_path11.join)(appData, "npm", "codex.cmd");
|
|
38427
|
+
if ((0, import_fs13.existsSync)(appDataShim)) return appDataShim;
|
|
38428
|
+
}
|
|
38429
|
+
return "codex.cmd";
|
|
38430
|
+
}
|
|
38431
|
+
function validateCodexRunner(options) {
|
|
38432
|
+
if (options.skipRunnerProbe) {
|
|
38433
|
+
return { binaryChecked: false, requiredFlagsChecked: false };
|
|
38434
|
+
}
|
|
38435
|
+
const probe = options.runnerProbe ?? defaultRunnerProbe;
|
|
38436
|
+
const probeCommand = resolveCodexProbeCommand();
|
|
38437
|
+
const version2 = probe(probeCommand, ["--version"]);
|
|
38438
|
+
if (version2.error || version2.status !== 0) {
|
|
38439
|
+
throw new ExternalAgentExecutorError("external_agent_runner_binary_missing", "Codex runner probe failed: `codex --version` did not exit 0.");
|
|
38440
|
+
}
|
|
38441
|
+
const help = probe(probeCommand, ["exec", "--help"]);
|
|
38442
|
+
if (help.error || help.status !== 0) {
|
|
38443
|
+
throw new ExternalAgentExecutorError("external_agent_runner_help_unavailable", "Codex runner probe failed: `codex exec --help` did not exit 0.");
|
|
38444
|
+
}
|
|
38445
|
+
const helpText = `${help.stdout}
|
|
38446
|
+
${help.stderr}`;
|
|
38447
|
+
const requiredFlags = [
|
|
38448
|
+
"--cd",
|
|
38449
|
+
"--skip-git-repo-check",
|
|
38450
|
+
"--ephemeral",
|
|
38451
|
+
"--ignore-rules",
|
|
38452
|
+
"--sandbox",
|
|
38453
|
+
"--full-auto",
|
|
38454
|
+
"--json",
|
|
38455
|
+
"--output-last-message"
|
|
38456
|
+
];
|
|
38457
|
+
const missing = requiredFlags.filter((flag) => !helpText.includes(flag));
|
|
38458
|
+
if (missing.length > 0) {
|
|
38459
|
+
throw new ExternalAgentExecutorError(
|
|
38460
|
+
"external_agent_runner_required_flags_missing",
|
|
38461
|
+
`Codex runner is missing required exec flag(s): ${missing.join(", ")}`
|
|
38462
|
+
);
|
|
38463
|
+
}
|
|
38464
|
+
return { binaryChecked: true, requiredFlagsChecked: true };
|
|
38465
|
+
}
|
|
38466
|
+
function safeRunId(value) {
|
|
38467
|
+
return value.toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "run";
|
|
38468
|
+
}
|
|
38469
|
+
function resolveWorkspaceRoot(input) {
|
|
38470
|
+
if (input.workspaceRoot) return (0, import_path11.resolve)(input.workspaceRoot);
|
|
38471
|
+
const batchStem = (0, import_path11.basename)((0, import_path11.resolve)(input.batchPath)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
|
|
38472
|
+
const repoStem = (0, import_path11.basename)((0, import_path11.resolve)(input.privateRepoRoot)).replace(/[^a-zA-Z0-9_.-]+/g, "-");
|
|
38473
|
+
return (0, import_path11.resolve)((0, import_os2.tmpdir)(), "foh-external-agent-workspaces", repoStem, batchStem);
|
|
38474
|
+
}
|
|
38475
|
+
function promptVersionFromPath(promptPath) {
|
|
38476
|
+
const raw = (0, import_fs13.readFileSync)(promptPath, "utf8");
|
|
38477
|
+
if (raw.includes("Do not assume access to the private source repository")) return "blank-setup.v1";
|
|
38478
|
+
return "unknown";
|
|
38479
|
+
}
|
|
38480
|
+
function createExternalAgentExecutorPlan(options) {
|
|
38481
|
+
const runner = String(options.runner || "codex");
|
|
38482
|
+
if (runner !== "codex") {
|
|
38483
|
+
throw new ExternalAgentExecutorError("unsupported_external_agent_runner", `Unsupported runner: ${runner}`);
|
|
38484
|
+
}
|
|
38485
|
+
const batchPath = (0, import_path11.resolve)(options.batchPath);
|
|
38486
|
+
const batch = readBatch(batchPath);
|
|
38487
|
+
const runnerProbe = validateCodexRunner(options);
|
|
38488
|
+
const privateRepoRoot = (0, import_path11.resolve)(options.privateRepoRoot || options.cwd || process.cwd());
|
|
38489
|
+
const workspaceRoot = resolveWorkspaceRoot({ batchPath, workspaceRoot: options.workspaceRoot, privateRepoRoot });
|
|
38490
|
+
if (isPathInside(workspaceRoot, privateRepoRoot)) {
|
|
38491
|
+
throw new ExternalAgentExecutorError(
|
|
38492
|
+
"external_agent_workspace_inside_private_repo",
|
|
38493
|
+
`Workspace root must be outside the private repository. workspace=${workspaceRoot} repo=${privateRepoRoot}`
|
|
38494
|
+
);
|
|
38495
|
+
}
|
|
38496
|
+
(0, import_fs13.mkdirSync)(workspaceRoot, { recursive: true });
|
|
38497
|
+
const batchDir = (0, import_path11.resolve)(String(batch.batch_dir || (0, import_path11.resolve)(batchPath, "..")));
|
|
38498
|
+
const timeoutMinutes = Number.isFinite(options.timeoutMinutes) && Number(options.timeoutMinutes) > 0 ? Number(options.timeoutMinutes) : 30;
|
|
38499
|
+
const runs = batch.runs.map((run) => {
|
|
38500
|
+
const runId = safeRunId(requireString(run.run_id, "runs[].run_id"));
|
|
38501
|
+
const runDir = (0, import_path11.resolve)(requireString(run.run_dir, `runs[${runId}].run_dir`));
|
|
38502
|
+
const promptPath = (0, import_path11.resolve)(requireString(run.prompt_path, `runs[${runId}].prompt_path`));
|
|
38503
|
+
const workspaceDir = (0, import_path11.join)(workspaceRoot, runId);
|
|
38504
|
+
(0, import_fs13.mkdirSync)(workspaceDir, { recursive: true });
|
|
38505
|
+
(0, import_fs13.writeFileSync)(
|
|
38506
|
+
(0, import_path11.join)(workspaceDir, "README.md"),
|
|
38507
|
+
[
|
|
38508
|
+
"# FOH External-Agent Workspace",
|
|
38509
|
+
"",
|
|
38510
|
+
"This directory is intentionally empty.",
|
|
38511
|
+
"Use only public FOH docs, public API docs, and the public npm CLI package.",
|
|
38512
|
+
"Do not assume access to the private source repository.",
|
|
38513
|
+
""
|
|
38514
|
+
].join("\n"),
|
|
38515
|
+
"utf8"
|
|
38516
|
+
);
|
|
38517
|
+
const env = buildCodexExecutorEnv({
|
|
38518
|
+
sourceEnv: options.env,
|
|
38519
|
+
runDir,
|
|
38520
|
+
promptVersion: promptVersionFromPath(promptPath)
|
|
38521
|
+
});
|
|
38522
|
+
const jsonlPath = (0, import_path11.join)(runDir, "codex-exec.jsonl");
|
|
38523
|
+
const lastMessagePath = (0, import_path11.join)(runDir, "codex-last-message.md");
|
|
38524
|
+
const args = [
|
|
38525
|
+
"exec",
|
|
38526
|
+
"--cd",
|
|
38527
|
+
workspaceDir,
|
|
38528
|
+
"--skip-git-repo-check",
|
|
38529
|
+
"--ephemeral",
|
|
38530
|
+
"--ignore-rules",
|
|
38531
|
+
"--ignore-user-config",
|
|
38532
|
+
"--sandbox",
|
|
38533
|
+
"workspace-write",
|
|
38534
|
+
"--full-auto",
|
|
38535
|
+
"--json",
|
|
38536
|
+
"--output-last-message",
|
|
38537
|
+
lastMessagePath,
|
|
38538
|
+
"-"
|
|
38539
|
+
];
|
|
38540
|
+
return {
|
|
38541
|
+
run_id: runId,
|
|
38542
|
+
model_provider: String(run.model_provider || "unknown"),
|
|
38543
|
+
model_name: String(run.model_name || "unknown-model"),
|
|
38544
|
+
run_dir: runDir,
|
|
38545
|
+
prompt_path: promptPath,
|
|
38546
|
+
workspace_dir: workspaceDir,
|
|
38547
|
+
command: "codex",
|
|
38548
|
+
args,
|
|
38549
|
+
env_keys: Object.keys(env).sort(),
|
|
38550
|
+
outputs: {
|
|
38551
|
+
jsonl: jsonlPath,
|
|
38552
|
+
last_message: lastMessagePath
|
|
38553
|
+
}
|
|
38554
|
+
};
|
|
38555
|
+
});
|
|
38556
|
+
return {
|
|
38557
|
+
schema_version: "external_agent_executor_plan.v1",
|
|
38558
|
+
created_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
38559
|
+
runner,
|
|
38560
|
+
mode: "dry_run",
|
|
38561
|
+
batch_path: batchPath,
|
|
38562
|
+
batch_dir: batchDir,
|
|
38563
|
+
private_repo_root: privateRepoRoot,
|
|
38564
|
+
workspace_root: workspaceRoot,
|
|
38565
|
+
timeout_minutes: timeoutMinutes,
|
|
38566
|
+
safety: {
|
|
38567
|
+
workspace_outside_private_repo: true,
|
|
38568
|
+
repo_files_copied: false,
|
|
38569
|
+
child_env_mode: "allowlist",
|
|
38570
|
+
denied_env_prefixes: [...CODEX_EXECUTOR_DENIED_ENV_PREFIXES],
|
|
38571
|
+
denied_env_names: [...CODEX_EXECUTOR_DENIED_ENV_NAMES],
|
|
38572
|
+
runner_probe: {
|
|
38573
|
+
binary_checked: runnerProbe.binaryChecked,
|
|
38574
|
+
required_flags_checked: runnerProbe.requiredFlagsChecked
|
|
38575
|
+
}
|
|
38576
|
+
},
|
|
38577
|
+
runs
|
|
38578
|
+
};
|
|
38579
|
+
}
|
|
38580
|
+
function writeExternalAgentExecutorPlan(plan) {
|
|
38581
|
+
const path2 = (0, import_path11.join)(plan.batch_dir, "executor-plan.json");
|
|
38582
|
+
(0, import_fs13.mkdirSync)(plan.batch_dir, { recursive: true });
|
|
38583
|
+
(0, import_fs13.writeFileSync)(path2, `${JSON.stringify(plan, null, 2)}
|
|
38584
|
+
`, "utf8");
|
|
38585
|
+
return path2;
|
|
38586
|
+
}
|
|
38587
|
+
|
|
38310
38588
|
// src/commands/eval.ts
|
|
38311
38589
|
var DEFAULT_PROMPT_VERSION = "blank-setup.v1";
|
|
38590
|
+
var DEFAULT_BATCH_MODELS = "openai/codex,anthropic/claude,cursor/agent";
|
|
38312
38591
|
var PROMPTS = {
|
|
38313
38592
|
"blank-setup.v1": "Go to https://frontofhouse.okii.uk. Use only public docs, public API docs, and the public npm CLI package. Install the FOH CLI, authenticate or reach a deterministic auth blocker, create or configure a Front Of House voice agent and website widget, run proof/smoke/certification where available, and produce a final evidence summary with commands run, docs used, artifacts created, and any blocker reason codes. Do not assume access to the private source repository.",
|
|
38314
38593
|
"debug-proof-failure.v1": "You are given a FOH proof or debug artifact. Use public docs and FOH CLI/API behavior to classify whether the blocker is docs, auth, org setup, agent config, widget, channel, runtime, or product bug. Produce a redacted improvement packet or the exact command needed to produce one. Do not ask the human to interpret logs manually unless no machine-readable artifact exists.",
|
|
@@ -38325,7 +38604,32 @@ function defaultRunDir(modelName, promptVersion) {
|
|
|
38325
38604
|
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
|
|
38326
38605
|
const safeModel = String(modelName || "unknown-model").toLowerCase().replace(/[^a-z0-9_-]+/g, "-");
|
|
38327
38606
|
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
38328
|
-
return (0,
|
|
38607
|
+
return (0, import_path12.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
|
|
38608
|
+
}
|
|
38609
|
+
function defaultBatchDir(promptVersion) {
|
|
38610
|
+
const date4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
38611
|
+
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
|
|
38612
|
+
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
38613
|
+
return (0, import_path12.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
|
|
38614
|
+
}
|
|
38615
|
+
function safeSlug(value) {
|
|
38616
|
+
return String(value || "unknown").toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "unknown";
|
|
38617
|
+
}
|
|
38618
|
+
function quoteArg(value) {
|
|
38619
|
+
const text = String(value);
|
|
38620
|
+
if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
|
|
38621
|
+
return `"${text.replace(/(["$`])/g, "\\$1")}"`;
|
|
38622
|
+
}
|
|
38623
|
+
function parseModelSpec(raw) {
|
|
38624
|
+
const [provider, ...nameParts] = String(raw || "").split("/");
|
|
38625
|
+
const name = nameParts.join("/");
|
|
38626
|
+
return {
|
|
38627
|
+
provider: provider?.trim() || "unknown",
|
|
38628
|
+
name: name.trim() || "unknown-model"
|
|
38629
|
+
};
|
|
38630
|
+
}
|
|
38631
|
+
function parseModelList(raw) {
|
|
38632
|
+
return String(raw || DEFAULT_BATCH_MODELS).split(",").map((entry) => entry.trim()).filter(Boolean).map(parseModelSpec);
|
|
38329
38633
|
}
|
|
38330
38634
|
function inferShell(raw) {
|
|
38331
38635
|
if (raw && raw.trim()) return { command: raw, args: [], label: raw };
|
|
@@ -38334,14 +38638,14 @@ function inferShell(raw) {
|
|
|
38334
38638
|
}
|
|
38335
38639
|
function writePrompt(runDir, promptVersion) {
|
|
38336
38640
|
const prompt = PROMPTS[promptVersion] ?? PROMPTS[DEFAULT_PROMPT_VERSION];
|
|
38337
|
-
const path2 = (0,
|
|
38338
|
-
(0,
|
|
38641
|
+
const path2 = (0, import_path12.join)(runDir, "prompt.txt");
|
|
38642
|
+
(0, import_fs14.writeFileSync)(path2, `${prompt}
|
|
38339
38643
|
`, "utf8");
|
|
38340
38644
|
return path2;
|
|
38341
38645
|
}
|
|
38342
38646
|
function writeSession(runDir, session) {
|
|
38343
|
-
const path2 = (0,
|
|
38344
|
-
(0,
|
|
38647
|
+
const path2 = (0, import_path12.join)(runDir, "session.json");
|
|
38648
|
+
(0, import_fs14.writeFileSync)(path2, `${JSON.stringify(session, null, 2)}
|
|
38345
38649
|
`, "utf8");
|
|
38346
38650
|
return path2;
|
|
38347
38651
|
}
|
|
@@ -38392,7 +38696,7 @@ function buildRunArtifact(input) {
|
|
|
38392
38696
|
},
|
|
38393
38697
|
summary: status === "pass" ? "External-agent capture session completed and was marked pass." : `External-agent capture session completed with ${commands.length} captured FOH command(s); classify and improve reason ${reasonCode}.`,
|
|
38394
38698
|
next_commands: status === "pass" ? ["corepack pnpm eval:external-agent:runs:summary"] : [
|
|
38395
|
-
`foh bug improve --from external-agent-run --file ${(0,
|
|
38699
|
+
`foh bug improve --from external-agent-run --file ${(0, import_path12.join)(input.runDir, "run.json")} --out ${(0, import_path12.join)(input.runDir, "improvement-packet.json")} --json`,
|
|
38396
38700
|
"corepack pnpm eval:external-agent:runs:summary"
|
|
38397
38701
|
]
|
|
38398
38702
|
};
|
|
@@ -38400,11 +38704,78 @@ function buildRunArtifact(input) {
|
|
|
38400
38704
|
function registerEval(program3) {
|
|
38401
38705
|
const evalCommand = program3.command("eval").description("Run or summarize external-agent evaluation workflows");
|
|
38402
38706
|
const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
|
|
38707
|
+
external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
|
|
38708
|
+
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|
|
38709
|
+
const batchDir = (0, import_path12.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
|
|
38710
|
+
const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
|
|
38711
|
+
(0, import_fs14.mkdirSync)(batchDir, { recursive: true });
|
|
38712
|
+
const runs = models.map((model, index) => {
|
|
38713
|
+
const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
|
|
38714
|
+
const runDir = (0, import_path12.join)(batchDir, runId);
|
|
38715
|
+
(0, import_fs14.mkdirSync)(runDir, { recursive: true });
|
|
38716
|
+
const promptPath = writePrompt(runDir, promptVersion);
|
|
38717
|
+
const commandArgs = [
|
|
38718
|
+
"eval",
|
|
38719
|
+
"external-agent",
|
|
38720
|
+
"run",
|
|
38721
|
+
"--model-provider",
|
|
38722
|
+
model.provider,
|
|
38723
|
+
"--model-name",
|
|
38724
|
+
model.name,
|
|
38725
|
+
"--prompt-version",
|
|
38726
|
+
promptVersion,
|
|
38727
|
+
"--workspace-type",
|
|
38728
|
+
String(opts.workspaceType || "clean-no-repo"),
|
|
38729
|
+
"--agent-shell",
|
|
38730
|
+
String(opts.agentShell || "vscode-terminal"),
|
|
38731
|
+
"--out-dir",
|
|
38732
|
+
runDir
|
|
38733
|
+
];
|
|
38734
|
+
return {
|
|
38735
|
+
run_id: runId,
|
|
38736
|
+
model_provider: model.provider,
|
|
38737
|
+
model_name: model.name,
|
|
38738
|
+
prompt_version: promptVersion,
|
|
38739
|
+
run_dir: runDir,
|
|
38740
|
+
prompt_path: promptPath,
|
|
38741
|
+
launch_args: commandArgs,
|
|
38742
|
+
launch_command: `npx --yes @f-o-h/cli@latest ${commandArgs.map(quoteArg).join(" ")}`
|
|
38743
|
+
};
|
|
38744
|
+
});
|
|
38745
|
+
const batch = {
|
|
38746
|
+
schema_version: "external_agent_batch_plan.v1",
|
|
38747
|
+
created_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
38748
|
+
batch_dir: batchDir,
|
|
38749
|
+
prompt_version: promptVersion,
|
|
38750
|
+
workspace_type: String(opts.workspaceType || "clean-no-repo"),
|
|
38751
|
+
agent_shell: String(opts.agentShell || "vscode-terminal"),
|
|
38752
|
+
run_count: runs.length,
|
|
38753
|
+
runs,
|
|
38754
|
+
summary_command: `corepack pnpm eval:external-agent:runs:summary -- --root ${batchDir}`
|
|
38755
|
+
};
|
|
38756
|
+
const batchPath = (0, import_path12.join)(batchDir, "batch.json");
|
|
38757
|
+
(0, import_fs14.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
|
|
38758
|
+
`, "utf8");
|
|
38759
|
+
format(cliEnvelope({
|
|
38760
|
+
schemaVersion: "external_agent_batch_plan_result.v1",
|
|
38761
|
+
status: "exported",
|
|
38762
|
+
reasonCode: "external_agent_batch_plan_created",
|
|
38763
|
+
summary: `External-agent batch plan created for ${runs.length} model(s).`,
|
|
38764
|
+
artifacts: {
|
|
38765
|
+
batch: batchPath
|
|
38766
|
+
},
|
|
38767
|
+
nextCommands: [
|
|
38768
|
+
...runs.map((run) => run.launch_command),
|
|
38769
|
+
batch.summary_command
|
|
38770
|
+
],
|
|
38771
|
+
extra: { batch }
|
|
38772
|
+
}), { json: Boolean(opts.json) });
|
|
38773
|
+
});
|
|
38403
38774
|
external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
|
|
38404
38775
|
const status = normalizeStatus(opts.status);
|
|
38405
38776
|
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|
|
38406
|
-
const runDir = (0,
|
|
38407
|
-
(0,
|
|
38777
|
+
const runDir = (0, import_path12.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
|
|
38778
|
+
(0, import_fs14.mkdirSync)(runDir, { recursive: true });
|
|
38408
38779
|
const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
|
|
38409
38780
|
const promptPath = writePrompt(runDir, promptVersion);
|
|
38410
38781
|
const shell = inferShell(opts.shell);
|
|
@@ -38426,7 +38797,7 @@ function registerEval(program3) {
|
|
|
38426
38797
|
}
|
|
38427
38798
|
};
|
|
38428
38799
|
writeSession(runDir, session);
|
|
38429
|
-
(0,
|
|
38800
|
+
(0, import_fs14.writeFileSync)((0, import_path12.join)(runDir, "notes.md"), "# External Agent Run Notes\n\n", "utf8");
|
|
38430
38801
|
let shellExitCode = null;
|
|
38431
38802
|
if (opts.shell !== false) {
|
|
38432
38803
|
process.stdout.write(`
|
|
@@ -38436,7 +38807,7 @@ Prompt: ${promptPath}
|
|
|
38436
38807
|
Exit the shell to finalize run.json.
|
|
38437
38808
|
|
|
38438
38809
|
`);
|
|
38439
|
-
const result = (0,
|
|
38810
|
+
const result = (0, import_child_process5.spawnSync)(shell.command, shell.args, {
|
|
38440
38811
|
stdio: "inherit",
|
|
38441
38812
|
env: {
|
|
38442
38813
|
...process.env,
|
|
@@ -38448,8 +38819,8 @@ Exit the shell to finalize run.json.
|
|
|
38448
38819
|
shellExitCode = typeof result.status === "number" ? result.status : null;
|
|
38449
38820
|
}
|
|
38450
38821
|
const artifact = buildRunArtifact({ runDir, session, status, reasonCode: opts.reasonCode, shellExitCode });
|
|
38451
|
-
const runPath = (0,
|
|
38452
|
-
(0,
|
|
38822
|
+
const runPath = (0, import_path12.join)(runDir, "run.json");
|
|
38823
|
+
(0, import_fs14.writeFileSync)(runPath, `${JSON.stringify(artifact, null, 2)}
|
|
38453
38824
|
`, "utf8");
|
|
38454
38825
|
format(cliEnvelope({
|
|
38455
38826
|
schemaVersion: "external_agent_capture_result.v1",
|
|
@@ -38459,12 +38830,68 @@ Exit the shell to finalize run.json.
|
|
|
38459
38830
|
artifacts: {
|
|
38460
38831
|
run: runPath,
|
|
38461
38832
|
prompt: promptPath,
|
|
38462
|
-
commands: (0,
|
|
38833
|
+
commands: (0, import_path12.join)(runDir, "commands.ndjson")
|
|
38463
38834
|
},
|
|
38464
38835
|
nextCommands: artifact.next_commands,
|
|
38465
38836
|
extra: { run: artifact }
|
|
38466
38837
|
}), { json: Boolean(opts.json) });
|
|
38467
38838
|
});
|
|
38839
|
+
external.command("execute").description("Create a guarded dry-run executor plan for programmable external-agent runners").requiredOption("--batch <path>", "Path to external_agent_batch_plan.v1 batch.json").option("--runner <runner>", "Runner implementation", "codex").option("--workspace-root <path>", "Clean executor workspace root; must be outside the private repo").option("--private-repo-root <path>", "Private repository root to guard against").option("--timeout-minutes <minutes>", "Per-run timeout planned for future execution", "30").option("--skip-runner-probe", "Skip local runner binary/help probing").option("--dry-run", "Write the executor plan without launching the runner", true).option("--json", "Output as JSON").action(async (opts) => {
|
|
38840
|
+
if (opts.dryRun === false) {
|
|
38841
|
+
format(cliEnvelope({
|
|
38842
|
+
schemaVersion: "external_agent_executor_plan_result.v1",
|
|
38843
|
+
status: "blocked",
|
|
38844
|
+
reasonCode: "external_agent_executor_live_run_not_enabled",
|
|
38845
|
+
summary: "Live external-agent execution is intentionally blocked until dry-run safety gates have passed.",
|
|
38846
|
+
nextCommands: [
|
|
38847
|
+
`foh eval external-agent execute --runner ${opts.runner || "codex"} --batch ${opts.batch} --dry-run --json`
|
|
38848
|
+
]
|
|
38849
|
+
}), { json: Boolean(opts.json) });
|
|
38850
|
+
process.exitCode = 1;
|
|
38851
|
+
return;
|
|
38852
|
+
}
|
|
38853
|
+
try {
|
|
38854
|
+
const plan = createExternalAgentExecutorPlan({
|
|
38855
|
+
batchPath: String(opts.batch),
|
|
38856
|
+
runner: String(opts.runner || "codex"),
|
|
38857
|
+
workspaceRoot: opts.workspaceRoot ? String(opts.workspaceRoot) : void 0,
|
|
38858
|
+
privateRepoRoot: opts.privateRepoRoot ? String(opts.privateRepoRoot) : void 0,
|
|
38859
|
+
timeoutMinutes: Number(opts.timeoutMinutes || 30),
|
|
38860
|
+
skipRunnerProbe: Boolean(opts.skipRunnerProbe),
|
|
38861
|
+
cwd: process.cwd()
|
|
38862
|
+
});
|
|
38863
|
+
const planPath = writeExternalAgentExecutorPlan(plan);
|
|
38864
|
+
format(cliEnvelope({
|
|
38865
|
+
schemaVersion: "external_agent_executor_plan_result.v1",
|
|
38866
|
+
status: "exported",
|
|
38867
|
+
reasonCode: "external_agent_executor_plan_created",
|
|
38868
|
+
summary: `External-agent ${plan.runner} dry-run executor plan created for ${plan.runs.length} run(s).`,
|
|
38869
|
+
artifacts: {
|
|
38870
|
+
plan: planPath
|
|
38871
|
+
},
|
|
38872
|
+
nextCommands: [
|
|
38873
|
+
"Review executor-plan.json for workspace/env isolation before enabling any live runner.",
|
|
38874
|
+
...plan.runs.map((run) => `${run.command} ${run.args.map(quoteArg).join(" ")} < ${quoteArg(run.prompt_path)} > ${quoteArg(run.outputs.jsonl)}`)
|
|
38875
|
+
],
|
|
38876
|
+
extra: { plan }
|
|
38877
|
+
}), { json: Boolean(opts.json) });
|
|
38878
|
+
} catch (error2) {
|
|
38879
|
+
if (error2 instanceof ExternalAgentExecutorError) {
|
|
38880
|
+
format(cliEnvelope({
|
|
38881
|
+
schemaVersion: "external_agent_executor_plan_result.v1",
|
|
38882
|
+
status: "blocked",
|
|
38883
|
+
reasonCode: error2.reasonCode,
|
|
38884
|
+
summary: error2.message,
|
|
38885
|
+
nextCommands: [
|
|
38886
|
+
"Fix the executor plan input or workspace path and rerun with --dry-run."
|
|
38887
|
+
]
|
|
38888
|
+
}), { json: Boolean(opts.json) });
|
|
38889
|
+
process.exitCode = 1;
|
|
38890
|
+
return;
|
|
38891
|
+
}
|
|
38892
|
+
throw error2;
|
|
38893
|
+
}
|
|
38894
|
+
});
|
|
38468
38895
|
}
|
|
38469
38896
|
|
|
38470
38897
|
// src/lib/banner.ts
|