@f-o-h/cli 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +174 -165
- package/dist/foh.js +101 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,175 +1,184 @@
|
|
|
1
|
-
# Front Of House CLI
|
|
2
|
-
|
|
3
|
-
AI-operator provisioning CLI for Front Of House.
|
|
4
|
-
|
|
5
|
-
Public mirror: https://github.com/iiko38/front-of-house-cli
|
|
6
|
-
|
|
7
|
-
Current published baseline: `@f-o-h/cli@0.1.
|
|
8
|
-
|
|
9
|
-
This mirror is a generated release artifact. The private product monorepo is not
|
|
10
|
-
published here, and no open-source license is granted unless stated separately.
|
|
11
|
-
|
|
12
|
-
Package-local examples and schemas ship with the npm artifact:
|
|
13
|
-
|
|
14
|
-
- `examples/scenario-suite.viewing.yml`
|
|
15
|
-
- `examples/proof-report.example.json`
|
|
16
|
-
- `examples/transcript-export.example.json`
|
|
17
|
-
- `examples/improvement-packet.example.json`
|
|
18
|
-
- `examples/external-agent-run.example.json`
|
|
19
|
-
- `schemas/cli-envelope.schema.json`
|
|
20
|
-
- `schemas/scenario-suite.schema.json`
|
|
21
|
-
- `schemas/transcript-export.schema.json`
|
|
22
|
-
- `schemas/improvement-packet.schema.json`
|
|
23
|
-
- `schemas/external-agent-run.schema.json`
|
|
24
|
-
|
|
25
|
-
## Install
|
|
26
|
-
|
|
27
|
-
```bash
|
|
28
|
-
npx @f-o-h/cli setup
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
Or install globally:
|
|
32
|
-
|
|
33
|
-
```bash
|
|
34
|
-
npm install -g @f-o-h/cli
|
|
35
|
-
foh --help
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
Verify the package version:
|
|
39
|
-
|
|
40
|
-
```bash
|
|
41
|
-
npx @f-o-h/cli --version
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
## First Run
|
|
45
|
-
|
|
46
|
-
```bash
|
|
47
|
-
foh auth signup --web
|
|
48
|
-
foh auth login --web
|
|
49
|
-
foh auth login
|
|
50
|
-
foh org list
|
|
51
|
-
foh org use --org <org-id>
|
|
52
|
-
foh setup
|
|
53
|
-
foh prove --agent <agent-id> --json
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
For AI agents and text-only terminals:
|
|
57
|
-
|
|
58
|
-
```bash
|
|
59
|
-
foh auth signup --web --json
|
|
60
|
-
foh auth login --web --json
|
|
61
|
-
foh auth login --email "$FOH_EMAIL" --password "$FOH_PASSWORD" --json
|
|
62
|
-
foh org list --json
|
|
63
|
-
foh org use --org <org-id> --json
|
|
64
|
-
foh setup --org <org-id> --agent-template <template-id> --agent-name "Demo Agent" --json
|
|
65
|
-
foh prove --agent <agent-id> --json --out foh-proof.json
|
|
66
|
-
foh test run --suite ./suite.yml --agent <agent-id> --json --out foh-test-report.json
|
|
67
|
-
foh agent replay --file ./transcript-export.json --json
|
|
68
|
-
foh bug improve --from-file foh-proof.json --out foh-improvement.json --json
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
`auth signup --web` opens the console signup page when possible and always
|
|
72
|
-
prints the fallback URL. `auth login --web` starts browser device
|
|
73
|
-
authorization, opens `/cli-auth`, waits for console approval, and stores the
|
|
74
|
-
returned short-lived token. Credential auth remains available as fallback.
|
|
75
|
-
|
|
76
|
-
`foh prove` produces a compact signed proof report across auth, org context,
|
|
77
|
-
agent validation, contact phone readiness, voice provider health, widget
|
|
78
|
-
channel/embed readiness, widget smoke, and simulation certification. It is
|
|
79
|
-
read-only by default; pass `--mutation-mode ensure` or `--repair` only when you
|
|
80
|
-
explicitly want proof to ensure missing widget state. Use `--strict` in
|
|
81
|
-
automation when holds should fail the command, and `--mission voice` or
|
|
82
|
-
`--require-phone` when a voice/contact number is mandatory for the demo.
|
|
83
|
-
|
|
84
|
-
The CLI defaults to the production API at `https://api.frontofhouse.okii.uk`.
|
|
85
|
-
|
|
86
|
-
## External-Agent Eval Capture
|
|
87
|
-
|
|
1
|
+
# Front Of House CLI
|
|
2
|
+
|
|
3
|
+
AI-operator provisioning CLI for Front Of House.
|
|
4
|
+
|
|
5
|
+
Public mirror: https://github.com/iiko38/front-of-house-cli
|
|
6
|
+
|
|
7
|
+
Current published baseline: `@f-o-h/cli@0.1.10`
|
|
8
|
+
|
|
9
|
+
This mirror is a generated release artifact. The private product monorepo is not
|
|
10
|
+
published here, and no open-source license is granted unless stated separately.
|
|
11
|
+
|
|
12
|
+
Package-local examples and schemas ship with the npm artifact:
|
|
13
|
+
|
|
14
|
+
- `examples/scenario-suite.viewing.yml`
|
|
15
|
+
- `examples/proof-report.example.json`
|
|
16
|
+
- `examples/transcript-export.example.json`
|
|
17
|
+
- `examples/improvement-packet.example.json`
|
|
18
|
+
- `examples/external-agent-run.example.json`
|
|
19
|
+
- `schemas/cli-envelope.schema.json`
|
|
20
|
+
- `schemas/scenario-suite.schema.json`
|
|
21
|
+
- `schemas/transcript-export.schema.json`
|
|
22
|
+
- `schemas/improvement-packet.schema.json`
|
|
23
|
+
- `schemas/external-agent-run.schema.json`
|
|
24
|
+
|
|
25
|
+
## Install
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
npx @f-o-h/cli setup
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Or install globally:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
npm install -g @f-o-h/cli
|
|
35
|
+
foh --help
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Verify the package version:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
npx @f-o-h/cli --version
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## First Run
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
foh auth signup --web
|
|
48
|
+
foh auth login --web
|
|
49
|
+
foh auth login
|
|
50
|
+
foh org list
|
|
51
|
+
foh org use --org <org-id>
|
|
52
|
+
foh setup
|
|
53
|
+
foh prove --agent <agent-id> --json
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
For AI agents and text-only terminals:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
foh auth signup --web --json
|
|
60
|
+
foh auth login --web --json
|
|
61
|
+
foh auth login --email "$FOH_EMAIL" --password "$FOH_PASSWORD" --json
|
|
62
|
+
foh org list --json
|
|
63
|
+
foh org use --org <org-id> --json
|
|
64
|
+
foh setup --org <org-id> --agent-template <template-id> --agent-name "Demo Agent" --json
|
|
65
|
+
foh prove --agent <agent-id> --json --out foh-proof.json
|
|
66
|
+
foh test run --suite ./suite.yml --agent <agent-id> --json --out foh-test-report.json
|
|
67
|
+
foh agent replay --file ./transcript-export.json --json
|
|
68
|
+
foh bug improve --from-file foh-proof.json --out foh-improvement.json --json
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
`auth signup --web` opens the console signup page when possible and always
|
|
72
|
+
prints the fallback URL. `auth login --web` starts browser device
|
|
73
|
+
authorization, opens `/cli-auth`, waits for console approval, and stores the
|
|
74
|
+
returned short-lived token. Credential auth remains available as fallback.
|
|
75
|
+
|
|
76
|
+
`foh prove` produces a compact signed proof report across auth, org context,
|
|
77
|
+
agent validation, contact phone readiness, voice provider health, widget
|
|
78
|
+
channel/embed readiness, widget smoke, and simulation certification. It is
|
|
79
|
+
read-only by default; pass `--mutation-mode ensure` or `--repair` only when you
|
|
80
|
+
explicitly want proof to ensure missing widget state. Use `--strict` in
|
|
81
|
+
automation when holds should fail the command, and `--mission voice` or
|
|
82
|
+
`--require-phone` when a voice/contact number is mandatory for the demo.
|
|
83
|
+
|
|
84
|
+
The CLI defaults to the production API at `https://api.frontofhouse.okii.uk`.
|
|
85
|
+
|
|
86
|
+
## External-Agent Eval Capture
|
|
87
|
+
|
|
88
88
|
Use this when testing whether a clean coding agent can start from public docs
|
|
89
89
|
and the public npm package without private repo context:
|
|
90
90
|
|
|
91
91
|
```bash
|
|
92
|
-
foh eval external-agent
|
|
93
|
-
--
|
|
94
|
-
--
|
|
95
|
-
--prompt-version blank-setup.v1
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
The command writes a versioned prompt, launches an instrumented shell, captures
|
|
99
|
-
FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
|
|
100
|
-
`external_agent_run.v1` artifact when the shell exits.
|
|
101
|
-
|
|
102
|
-
## Local Scenario Suites
|
|
103
|
-
|
|
104
|
-
`foh test run --suite <file>` runs deterministic widget-runtime checks for a
|
|
105
|
-
specific agent. The suite format supports reply text checks plus structured
|
|
106
|
-
runtime assertions for trace/correlation IDs, action or terminal state, latency,
|
|
107
|
-
variables, tool calls, escalation/handoff, lead capture, and exact response
|
|
108
|
-
field paths.
|
|
109
|
-
|
|
110
|
-
```yaml
|
|
111
|
-
agent: agent_123
|
|
112
|
-
scenarios:
|
|
113
|
-
- id: viewing
|
|
114
|
-
turns:
|
|
115
|
-
- user: Can I book a viewing this week?
|
|
116
|
-
expect:
|
|
117
|
-
contains: viewing
|
|
118
|
-
trace_present: true
|
|
119
|
-
correlation_present: true
|
|
120
|
-
action: text
|
|
121
|
-
latency_ms:
|
|
122
|
-
max: 3000
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
Use transcript fixtures when turning real user conversations into regression
|
|
126
|
-
tests:
|
|
127
|
-
|
|
128
|
-
```yaml
|
|
129
|
-
agent: agent_123
|
|
130
|
-
scenarios:
|
|
131
|
-
- id: replay-viewing
|
|
132
|
-
fixture_transcript: ./fixtures/viewing-transcript.json
|
|
133
|
-
```
|
|
134
|
-
|
|
135
|
-
## Transcript Export
|
|
136
|
-
|
|
137
|
-
Use hydrated transcript export to turn real behavior into replay/debug artifacts:
|
|
138
|
-
|
|
139
|
-
```bash
|
|
140
|
-
foh transcripts export \
|
|
141
|
-
--agent <agent-id> \
|
|
142
|
-
--hydrate \
|
|
143
|
-
--include-traces \
|
|
144
|
-
--format json \
|
|
145
|
-
--out foh-transcripts.json \
|
|
92
|
+
foh eval external-agent batch \
|
|
93
|
+
--models openai/codex,anthropic/claude,cursor/agent \
|
|
94
|
+
--prompt-version blank-setup.v1 \
|
|
146
95
|
--json
|
|
147
96
|
```
|
|
148
97
|
|
|
149
|
-
|
|
150
|
-
Each exported conversation includes a `replay_command` and `test_fixture` seed
|
|
151
|
-
so operators or AI agents can move from observed failure to replay or scenario
|
|
152
|
-
regression without opening the console.
|
|
153
|
-
|
|
154
|
-
Replay a local export without API access:
|
|
155
|
-
|
|
156
|
-
```bash
|
|
157
|
-
foh agent replay --file foh-transcripts.json --json
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
## Improvement Packets
|
|
161
|
-
|
|
162
|
-
Use `foh bug improve` when a setup, proof, replay, knowledge, runtime, or
|
|
163
|
-
live-proof failure should become actionable backlog/test/config/docs work:
|
|
98
|
+
Run each returned launch command in a clean agent terminal:
|
|
164
99
|
|
|
165
100
|
```bash
|
|
166
|
-
foh
|
|
167
|
-
--
|
|
168
|
-
--
|
|
169
|
-
--
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
The command
|
|
173
|
-
|
|
174
|
-
|
|
101
|
+
foh eval external-agent run \
|
|
102
|
+
--model-provider openai \
|
|
103
|
+
--model-name codex \
|
|
104
|
+
--prompt-version blank-setup.v1
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
The command writes a versioned prompt, launches an instrumented shell, captures
|
|
108
|
+
FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
|
|
109
|
+
`external_agent_run.v1` artifact when the shell exits.
|
|
110
|
+
|
|
111
|
+
## Local Scenario Suites
|
|
112
|
+
|
|
113
|
+
`foh test run --suite <file>` runs deterministic widget-runtime checks for a
|
|
114
|
+
specific agent. The suite format supports reply text checks plus structured
|
|
115
|
+
runtime assertions for trace/correlation IDs, action or terminal state, latency,
|
|
116
|
+
variables, tool calls, escalation/handoff, lead capture, and exact response
|
|
117
|
+
field paths.
|
|
118
|
+
|
|
119
|
+
```yaml
|
|
120
|
+
agent: agent_123
|
|
121
|
+
scenarios:
|
|
122
|
+
- id: viewing
|
|
123
|
+
turns:
|
|
124
|
+
- user: Can I book a viewing this week?
|
|
125
|
+
expect:
|
|
126
|
+
contains: viewing
|
|
127
|
+
trace_present: true
|
|
128
|
+
correlation_present: true
|
|
129
|
+
action: text
|
|
130
|
+
latency_ms:
|
|
131
|
+
max: 3000
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Use transcript fixtures when turning real user conversations into regression
|
|
135
|
+
tests:
|
|
136
|
+
|
|
137
|
+
```yaml
|
|
138
|
+
agent: agent_123
|
|
139
|
+
scenarios:
|
|
140
|
+
- id: replay-viewing
|
|
141
|
+
fixture_transcript: ./fixtures/viewing-transcript.json
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Transcript Export
|
|
145
|
+
|
|
146
|
+
Use hydrated transcript export to turn real behavior into replay/debug artifacts:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
foh transcripts export \
|
|
150
|
+
--agent <agent-id> \
|
|
151
|
+
--hydrate \
|
|
152
|
+
--include-traces \
|
|
153
|
+
--format json \
|
|
154
|
+
--out foh-transcripts.json \
|
|
155
|
+
--json
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Exports redact obvious emails, phone numbers, and secret-like tokens by default.
|
|
159
|
+
Each exported conversation includes a `replay_command` and `test_fixture` seed
|
|
160
|
+
so operators or AI agents can move from observed failure to replay or scenario
|
|
161
|
+
regression without opening the console.
|
|
162
|
+
|
|
163
|
+
Replay a local export without API access:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
foh agent replay --file foh-transcripts.json --json
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Improvement Packets
|
|
170
|
+
|
|
171
|
+
Use `foh bug improve` when a setup, proof, replay, knowledge, runtime, or
|
|
172
|
+
live-proof failure should become actionable backlog/test/config/docs work:
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
foh bug improve \
|
|
176
|
+
--from-file test-results/proof-or-replay-failure.json \
|
|
177
|
+
--out test-results/improvement-packet.json \
|
|
178
|
+
--json
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
The command emits a redacted `foh_improvement_packet.v1` with stable IDs,
|
|
182
|
+
reason code, promotion decision, evidence summary, and deterministic next
|
|
183
|
+
commands.
|
|
175
184
|
|
package/dist/foh.js
CHANGED
|
@@ -32640,7 +32640,7 @@ var StdioServerTransport = class {
|
|
|
32640
32640
|
};
|
|
32641
32641
|
|
|
32642
32642
|
// src/lib/cli-version.ts
|
|
32643
|
-
var CLI_VERSION = "0.1.
|
|
32643
|
+
var CLI_VERSION = "0.1.10";
|
|
32644
32644
|
|
|
32645
32645
|
// src/commands/mcp-serve.ts
|
|
32646
32646
|
var DEFAULT_TIMEOUT_MS = 12e4;
|
|
@@ -38267,6 +38267,12 @@ var SECRET_RE2 = /\b(?:Bearer\s+)?(?:sk|pk|xai|whsec|EAAN|ghp|gho|github_pat|npm
|
|
|
38267
38267
|
function redactText(value) {
|
|
38268
38268
|
return value.replace(SECRET_RE2, "[redacted_secret]");
|
|
38269
38269
|
}
|
|
38270
|
+
function redactPath(value) {
|
|
38271
|
+
let redacted = redactText(value);
|
|
38272
|
+
const home = process.env.USERPROFILE || process.env.HOME;
|
|
38273
|
+
if (home) redacted = redacted.replace(home, "~");
|
|
38274
|
+
return redacted;
|
|
38275
|
+
}
|
|
38270
38276
|
function safeJsonLine(value) {
|
|
38271
38277
|
return JSON.stringify(value).replace(/\r?\n/g, " ") + "\n";
|
|
38272
38278
|
}
|
|
@@ -38285,7 +38291,7 @@ function recordExternalAgentCliInvocation(input) {
|
|
|
38285
38291
|
schema_version: "external_agent_cli_command.v1",
|
|
38286
38292
|
recorded_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
38287
38293
|
cli_version: input.cliVersion,
|
|
38288
|
-
cwd:
|
|
38294
|
+
cwd: redactPath(process.cwd()),
|
|
38289
38295
|
argv: args,
|
|
38290
38296
|
command: args.join(" "),
|
|
38291
38297
|
json_requested: args.includes("--json"),
|
|
@@ -38303,6 +38309,7 @@ function readCommandRecords(runDir) {
|
|
|
38303
38309
|
|
|
38304
38310
|
// src/commands/eval.ts
|
|
38305
38311
|
var DEFAULT_PROMPT_VERSION = "blank-setup.v1";
|
|
38312
|
+
var DEFAULT_BATCH_MODELS = "openai/codex,anthropic/claude,cursor/agent";
|
|
38306
38313
|
var PROMPTS = {
|
|
38307
38314
|
"blank-setup.v1": "Go to https://frontofhouse.okii.uk. Use only public docs, public API docs, and the public npm CLI package. Install the FOH CLI, authenticate or reach a deterministic auth blocker, create or configure a Front Of House voice agent and website widget, run proof/smoke/certification where available, and produce a final evidence summary with commands run, docs used, artifacts created, and any blocker reason codes. Do not assume access to the private source repository.",
|
|
38308
38315
|
"debug-proof-failure.v1": "You are given a FOH proof or debug artifact. Use public docs and FOH CLI/API behavior to classify whether the blocker is docs, auth, org setup, agent config, widget, channel, runtime, or product bug. Produce a redacted improvement packet or the exact command needed to produce one. Do not ask the human to interpret logs manually unless no machine-readable artifact exists.",
|
|
@@ -38321,6 +38328,31 @@ function defaultRunDir(modelName, promptVersion) {
|
|
|
38321
38328
|
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
38322
38329
|
return (0, import_path11.resolve)("test-results", "external-agent-runs", date4, `${safeModel}-${safePrompt}-${stamp}`);
|
|
38323
38330
|
}
|
|
38331
|
+
function defaultBatchDir(promptVersion) {
|
|
38332
|
+
const date4 = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
38333
|
+
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").replace("T", "-").slice(0, 23);
|
|
38334
|
+
const safePrompt = String(promptVersion || DEFAULT_PROMPT_VERSION).toLowerCase().replace(/[^a-z0-9_.-]+/g, "-");
|
|
38335
|
+
return (0, import_path11.resolve)("test-results", "external-agent-runs", date4, `batch-${safePrompt}-${stamp}`);
|
|
38336
|
+
}
|
|
38337
|
+
function safeSlug(value) {
|
|
38338
|
+
return String(value || "unknown").toLowerCase().replace(/[^a-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "unknown";
|
|
38339
|
+
}
|
|
38340
|
+
function quoteArg(value) {
|
|
38341
|
+
const text = String(value);
|
|
38342
|
+
if (/^[A-Za-z0-9_./:=@-]+$/.test(text)) return text;
|
|
38343
|
+
return `"${text.replace(/(["$`])/g, "\\$1")}"`;
|
|
38344
|
+
}
|
|
38345
|
+
function parseModelSpec(raw) {
|
|
38346
|
+
const [provider, ...nameParts] = String(raw || "").split("/");
|
|
38347
|
+
const name = nameParts.join("/");
|
|
38348
|
+
return {
|
|
38349
|
+
provider: provider?.trim() || "unknown",
|
|
38350
|
+
name: name.trim() || "unknown-model"
|
|
38351
|
+
};
|
|
38352
|
+
}
|
|
38353
|
+
function parseModelList(raw) {
|
|
38354
|
+
return String(raw || DEFAULT_BATCH_MODELS).split(",").map((entry) => entry.trim()).filter(Boolean).map(parseModelSpec);
|
|
38355
|
+
}
|
|
38324
38356
|
function inferShell(raw) {
|
|
38325
38357
|
if (raw && raw.trim()) return { command: raw, args: [], label: raw };
|
|
38326
38358
|
if (process.platform === "win32") return { command: "powershell.exe", args: ["-NoLogo", "-NoProfile"], label: "powershell" };
|
|
@@ -38394,6 +38426,73 @@ function buildRunArtifact(input) {
|
|
|
38394
38426
|
function registerEval(program3) {
|
|
38395
38427
|
const evalCommand = program3.command("eval").description("Run or summarize external-agent evaluation workflows");
|
|
38396
38428
|
const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
|
|
38429
|
+
external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
|
|
38430
|
+
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|
|
38431
|
+
const batchDir = (0, import_path11.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
|
|
38432
|
+
const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
|
|
38433
|
+
(0, import_fs13.mkdirSync)(batchDir, { recursive: true });
|
|
38434
|
+
const runs = models.map((model, index) => {
|
|
38435
|
+
const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
|
|
38436
|
+
const runDir = (0, import_path11.join)(batchDir, runId);
|
|
38437
|
+
(0, import_fs13.mkdirSync)(runDir, { recursive: true });
|
|
38438
|
+
const promptPath = writePrompt(runDir, promptVersion);
|
|
38439
|
+
const commandArgs = [
|
|
38440
|
+
"eval",
|
|
38441
|
+
"external-agent",
|
|
38442
|
+
"run",
|
|
38443
|
+
"--model-provider",
|
|
38444
|
+
model.provider,
|
|
38445
|
+
"--model-name",
|
|
38446
|
+
model.name,
|
|
38447
|
+
"--prompt-version",
|
|
38448
|
+
promptVersion,
|
|
38449
|
+
"--workspace-type",
|
|
38450
|
+
String(opts.workspaceType || "clean-no-repo"),
|
|
38451
|
+
"--agent-shell",
|
|
38452
|
+
String(opts.agentShell || "vscode-terminal"),
|
|
38453
|
+
"--out-dir",
|
|
38454
|
+
runDir
|
|
38455
|
+
];
|
|
38456
|
+
return {
|
|
38457
|
+
run_id: runId,
|
|
38458
|
+
model_provider: model.provider,
|
|
38459
|
+
model_name: model.name,
|
|
38460
|
+
prompt_version: promptVersion,
|
|
38461
|
+
run_dir: runDir,
|
|
38462
|
+
prompt_path: promptPath,
|
|
38463
|
+
launch_args: commandArgs,
|
|
38464
|
+
launch_command: `npx --yes @f-o-h/cli@latest ${commandArgs.map(quoteArg).join(" ")}`
|
|
38465
|
+
};
|
|
38466
|
+
});
|
|
38467
|
+
const batch = {
|
|
38468
|
+
schema_version: "external_agent_batch_plan.v1",
|
|
38469
|
+
created_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
38470
|
+
batch_dir: batchDir,
|
|
38471
|
+
prompt_version: promptVersion,
|
|
38472
|
+
workspace_type: String(opts.workspaceType || "clean-no-repo"),
|
|
38473
|
+
agent_shell: String(opts.agentShell || "vscode-terminal"),
|
|
38474
|
+
run_count: runs.length,
|
|
38475
|
+
runs,
|
|
38476
|
+
summary_command: `corepack pnpm eval:external-agent:runs:summary -- --root ${batchDir}`
|
|
38477
|
+
};
|
|
38478
|
+
const batchPath = (0, import_path11.join)(batchDir, "batch.json");
|
|
38479
|
+
(0, import_fs13.writeFileSync)(batchPath, `${JSON.stringify(batch, null, 2)}
|
|
38480
|
+
`, "utf8");
|
|
38481
|
+
format(cliEnvelope({
|
|
38482
|
+
schemaVersion: "external_agent_batch_plan_result.v1",
|
|
38483
|
+
status: "exported",
|
|
38484
|
+
reasonCode: "external_agent_batch_plan_created",
|
|
38485
|
+
summary: `External-agent batch plan created for ${runs.length} model(s).`,
|
|
38486
|
+
artifacts: {
|
|
38487
|
+
batch: batchPath
|
|
38488
|
+
},
|
|
38489
|
+
nextCommands: [
|
|
38490
|
+
...runs.map((run) => run.launch_command),
|
|
38491
|
+
batch.summary_command
|
|
38492
|
+
],
|
|
38493
|
+
extra: { batch }
|
|
38494
|
+
}), { json: Boolean(opts.json) });
|
|
38495
|
+
});
|
|
38397
38496
|
external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
|
|
38398
38497
|
const status = normalizeStatus(opts.status);
|
|
38399
38498
|
const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
|