@f-o-h/cli 0.1.58 → 0.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -181,16 +181,19 @@ failure packet, and convert it with `foh bug improve`.
181
181
  For guarded programmable-runner planning:
182
182
 
183
183
  ```bash
184
- foh eval external-agent execute \
185
- --runner codex \
186
- --batch test-results/external-agent-runs/<batch>/batch.json \
187
- --dry-run \
188
- --json
189
- ```
190
-
191
- This writes `executor-plan.json`, creates intentionally empty clean workspaces
192
- outside the private repo, validates the local Codex binary/help flags, and
193
- prints exact `codex exec` commands without executing them.
184
+ foh eval external-agent execute \
185
+ --runner codex \
186
+ --batch test-results/external-agent-runs/<batch>/batch.json \
187
+ --codex-model gpt-5.4-mini \
188
+ --dry-run \
189
+ --json
190
+ ```
191
+
192
+ This writes `executor-plan.json`, creates intentionally empty clean workspaces
193
+ outside the private repo, validates the local Codex binary/help flags, and
194
+ prints exact `codex exec` commands without executing them. Use `--codex-model`
195
+ to compare the same prompt across explicit Codex models; the selected runner
196
+ model is recorded in `executor-plan.json` and controlled-run `run.json`.
194
197
 
195
198
  Before promoting run artifacts, scan and redact them:
196
199
 
package/dist/foh.js CHANGED
@@ -32853,7 +32853,7 @@ var StdioServerTransport = class {
32853
32853
  };
32854
32854
 
32855
32855
  // src/lib/cli-version.ts
32856
- var CLI_VERSION = "0.1.58";
32856
+ var CLI_VERSION = "0.1.60";
32857
32857
 
32858
32858
  // src/commands/mcp-serve.ts
32859
32859
  var DEFAULT_TIMEOUT_MS = 12e4;
@@ -39244,6 +39244,7 @@ function readExternalAgentMetadata(runDir) {
39244
39244
  }
39245
39245
 
39246
39246
  // src/lib/external-agent-executor.ts
39247
+ var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
39247
39248
  var CODEX_EXECUTOR_DENIED_ENV_PREFIXES = [
39248
39249
  "SUPABASE_",
39249
39250
  "DATABASE_",
@@ -39411,11 +39412,16 @@ function readBatch(batchPath) {
39411
39412
  return parsed;
39412
39413
  }
39413
39414
  function defaultRunnerProbe(command, args) {
39415
+ const isGeminiHeadlessSmoke = args.includes("FOH_GEMINI_HEADLESS_PROBE");
39416
+ const spawnOptions = {
39417
+ encoding: "utf8",
39418
+ timeout: isGeminiHeadlessSmoke ? GEMINI_HEADLESS_PROBE_TIMEOUT_MS : void 0
39419
+ };
39414
39420
  const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process4.spawnSync)(
39415
39421
  "powershell.exe",
39416
39422
  ["-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", `& ${[command, ...args].map(quotePowerShellArg).join(" ")}`],
39417
- { encoding: "utf8" }
39418
- ) : (0, import_child_process4.spawnSync)(command, args, { encoding: "utf8" });
39423
+ spawnOptions
39424
+ ) : (0, import_child_process4.spawnSync)(command, args, spawnOptions);
39419
39425
  return {
39420
39426
  status: typeof result.status === "number" ? result.status : null,
39421
39427
  stdout: String(result.stdout || ""),
@@ -39423,6 +39429,9 @@ function defaultRunnerProbe(command, args) {
39423
39429
  error: result.error
39424
39430
  };
39425
39431
  }
39432
+ function geminiCapacityUnavailable(text) {
39433
+ return /MODEL_CAPACITY_EXHAUSTED|RESOURCE_EXHAUSTED|No capacity available|rateLimitExceeded|exhausted your capacity|status 429/i.test(text);
39434
+ }
39426
39435
  function quotePowerShellArg(value) {
39427
39436
  return `'${value.replace(/'/g, "''")}'`;
39428
39437
  }
@@ -39517,6 +39526,9 @@ ${yoloHelp.stderr}`;
39517
39526
  "--output-last-message"
39518
39527
  ];
39519
39528
  const missing = commonExecFlags.filter((flag) => !execHelpText.includes(flag));
39529
+ if (String(options.codexModel || "").trim() && !execHelpText.includes("--model")) {
39530
+ missing.push("--model");
39531
+ }
39520
39532
  const supportsLegacyFullAuto = execHelpText.includes("--full-auto");
39521
39533
  const supportsModernApprovalMode = rootHelp.status === 0 && rootHelpText.includes("--ask-for-approval");
39522
39534
  if (!supportsLegacyFullAuto && !supportsModernApprovalMode) {
@@ -39629,7 +39641,19 @@ ${smoke.stderr}`;
39629
39641
  "Gemini runner sandbox is unavailable on this host. Install/configure Docker/Podman or rerun only on an externally isolated host with --gemini-sandbox-mode disabled."
39630
39642
  );
39631
39643
  }
39644
+ if (geminiCapacityUnavailable(smokeText)) {
39645
+ throw new ExternalAgentExecutorError(
39646
+ "external_agent_runner_capacity_unavailable",
39647
+ "Gemini runner reached the provider but the selected model has no available capacity. Retry later or configure a supported lower-contention Gemini model before live eval."
39648
+ );
39649
+ }
39632
39650
  if (smoke.error || smoke.status !== 0 && !/Auth method|GEMINI_API_KEY|GOOGLE_GENAI_USE_VERTEXAI|GOOGLE_GENAI_USE_GCA/i.test(smokeText)) {
39651
+ if (smoke.error?.code === "ETIMEDOUT") {
39652
+ throw new ExternalAgentExecutorError(
39653
+ "external_agent_runner_headless_probe_timed_out",
39654
+ "Gemini runner headless invocation probe timed out before reaching an auth boundary or provider response."
39655
+ );
39656
+ }
39633
39657
  throw new ExternalAgentExecutorError(
39634
39658
  "external_agent_runner_headless_probe_failed",
39635
39659
  "Gemini runner headless invocation probe failed before reaching an auth boundary."
@@ -39675,6 +39699,17 @@ function normalizeCodexSandboxMode(value) {
39675
39699
  `Unsupported Codex sandbox mode: ${value}. Use workspace-write or danger-full-access.`
39676
39700
  );
39677
39701
  }
39702
+ function normalizeCodexModel(value) {
39703
+ const normalized = String(value || "").trim();
39704
+ if (!normalized) return null;
39705
+ if (!/^[A-Za-z0-9._:@/-]+$/.test(normalized)) {
39706
+ throw new ExternalAgentExecutorError(
39707
+ "invalid_codex_model",
39708
+ "Unsupported Codex model value. Use a plain model id such as gpt-5.4-mini."
39709
+ );
39710
+ }
39711
+ return normalized;
39712
+ }
39678
39713
  function normalizeGeminiSandboxMode(value) {
39679
39714
  const normalized = (value || "required").trim().toLowerCase();
39680
39715
  if (normalized === "required" || normalized === "disabled") return normalized;
@@ -39683,6 +39718,9 @@ function normalizeGeminiSandboxMode(value) {
39683
39718
  `Unsupported Gemini sandbox mode: ${value}. Use required or disabled.`
39684
39719
  );
39685
39720
  }
39721
+ function codexModelArgs(model) {
39722
+ return model ? ["--model", model] : [];
39723
+ }
39686
39724
  function codexConfigArgs(input) {
39687
39725
  const args = [];
39688
39726
  if (input.backend === "legacy-landlock") {
@@ -39722,6 +39760,7 @@ function createExternalAgentExecutorPlan(options) {
39722
39760
  const runnerProbe = validateRunner(options, runner);
39723
39761
  const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
39724
39762
  const codexSandboxMode = normalizeCodexSandboxMode(options.codexSandboxMode);
39763
+ const codexModel = runner === "codex" ? normalizeCodexModel(options.codexModel) : null;
39725
39764
  const codexNetworkAccess = options.codexNetworkAccess === true;
39726
39765
  const privateRepoRoot = (0, import_path13.resolve)(options.privateRepoRoot || options.cwd || process.cwd());
39727
39766
  const workspaceRoot = resolveWorkspaceRoot({ batchPath, workspaceRoot: options.workspaceRoot, privateRepoRoot });
@@ -39770,6 +39809,7 @@ function createExternalAgentExecutorPlan(options) {
39770
39809
  ] : [
39771
39810
  ...runnerProbe.globalArgs,
39772
39811
  "exec",
39812
+ ...codexModelArgs(codexModel),
39773
39813
  ...codexConfigArgs({ backend: codexSandboxBackend, networkAccess: codexNetworkAccess }),
39774
39814
  "--cd",
39775
39815
  workspaceDir,
@@ -39788,6 +39828,7 @@ function createExternalAgentExecutorPlan(options) {
39788
39828
  run_id: runId,
39789
39829
  model_provider: String(run.model_provider || "unknown"),
39790
39830
  model_name: String(run.model_name || "unknown-model"),
39831
+ runner_model: runner === "codex" ? codexModel : null,
39791
39832
  prompt_version: promptVersion,
39792
39833
  run_dir: runDir,
39793
39834
  prompt_path: promptPath,
@@ -39842,6 +39883,7 @@ function createExternalAgentExecutorPlan(options) {
39842
39883
  },
39843
39884
  runner_automation_mode: runnerProbe.automationMode,
39844
39885
  codex_automation_mode: runner === "codex" ? runnerProbe.automationMode : null,
39886
+ codex_model: runner === "codex" ? codexModel : null,
39845
39887
  codex_sandbox_mode: codexSandboxMode,
39846
39888
  codex_sandbox_backend: codexSandboxBackend,
39847
39889
  codex_network_access: codexNetworkAccess
@@ -40003,6 +40045,7 @@ function buildExecutedRunArtifact(input) {
40003
40045
  failure_reason_code: input.reasonCode,
40004
40046
  model_provider: input.run.model_provider,
40005
40047
  model_name: input.run.model_name,
40048
+ runner_model: input.run.runner_model,
40006
40049
  agent_shell: `${input.run.command}-exec`,
40007
40050
  workspace_type: "clean-no-repo-programmatic",
40008
40051
  prompt_version: input.run.prompt_version,
@@ -40330,6 +40373,20 @@ function executorRecoveryCommands(reasonCode, runner) {
40330
40373
  "Upgrade, downgrade, or reconfigure the runner CLI until the planned non-interactive invocation reaches an auth boundary without parser errors."
40331
40374
  ];
40332
40375
  }
40376
+ if (reasonCode === "external_agent_runner_headless_probe_timed_out") {
40377
+ return [
40378
+ `${normalizedRunner} --version`,
40379
+ `${normalizedRunner} --help`,
40380
+ "Retry the dry-run once. If it repeats, reduce the probe/model contention or use a different subscribed runner before launching live evals."
40381
+ ];
40382
+ }
40383
+ if (reasonCode === "external_agent_runner_capacity_unavailable") {
40384
+ return [
40385
+ "gemini --version",
40386
+ "gemini --help",
40387
+ "Retry after the Gemini capacity window resets, or configure a supported lower-contention Gemini model before rerunning the same executor dry-run."
40388
+ ];
40389
+ }
40333
40390
  return [
40334
40391
  "Fix the executor plan input or workspace path and rerun with --dry-run."
40335
40392
  ];
@@ -40640,7 +40697,7 @@ Exit the shell to finalize run.json.
40640
40697
  }), { json: Boolean(opts.json) });
40641
40698
  if (!report.ok) process.exitCode = 1;
40642
40699
  });
40643
- external.command("execute").description("Create a guarded dry-run executor plan for programmable external-agent runners").requiredOption("--batch <path>", "Path to external_agent_batch_plan.v1 batch.json").option("--runner <runner>", "Runner implementation", "codex").option("--workspace-root <path>", "Clean executor workspace root; must be outside the private repo").option("--private-repo-root <path>", "Private repository root to guard against").option("--timeout-minutes <minutes>", "Per-run timeout planned for future execution", "30").option("--codex-sandbox-backend <backend>", "Codex sandbox backend override: default|legacy-landlock", "default").option("--codex-sandbox-mode <mode>", "Codex sandbox mode: workspace-write|danger-full-access", "workspace-write").option("--codex-network-access", "Allow Codex workspace-write sandbox network access for public docs/npm probes").option("--gemini-sandbox-mode <mode>", "Gemini sandbox mode: required|disabled", "required").option("--skip-runner-probe", "Skip local runner binary/help probing").option("--dry-run", "Write the executor plan without launching the runner", true).option("--live", "Launch one controlled external-agent run after writing the guarded plan").option("--json", "Output as JSON").action(async (opts) => {
40700
+ external.command("execute").description("Create a guarded dry-run executor plan for programmable external-agent runners").requiredOption("--batch <path>", "Path to external_agent_batch_plan.v1 batch.json").option("--runner <runner>", "Runner implementation", "codex").option("--workspace-root <path>", "Clean executor workspace root; must be outside the private repo").option("--private-repo-root <path>", "Private repository root to guard against").option("--timeout-minutes <minutes>", "Per-run timeout planned for future execution", "30").option("--codex-model <model>", "Codex model override for controlled evals; recorded in executor-plan.json and run.json").option("--codex-sandbox-backend <backend>", "Codex sandbox backend override: default|legacy-landlock", "default").option("--codex-sandbox-mode <mode>", "Codex sandbox mode: workspace-write|danger-full-access", "workspace-write").option("--codex-network-access", "Allow Codex workspace-write sandbox network access for public docs/npm probes").option("--gemini-sandbox-mode <mode>", "Gemini sandbox mode: required|disabled", "required").option("--skip-runner-probe", "Skip local runner binary/help probing").option("--dry-run", "Write the executor plan without launching the runner", true).option("--live", "Launch one controlled external-agent run after writing the guarded plan").option("--json", "Output as JSON").action(async (opts) => {
40644
40701
  try {
40645
40702
  const plan = createExternalAgentExecutorPlan({
40646
40703
  batchPath: String(opts.batch),
@@ -40648,6 +40705,7 @@ Exit the shell to finalize run.json.
40648
40705
  workspaceRoot: opts.workspaceRoot ? String(opts.workspaceRoot) : void 0,
40649
40706
  privateRepoRoot: opts.privateRepoRoot ? String(opts.privateRepoRoot) : void 0,
40650
40707
  timeoutMinutes: Number(opts.timeoutMinutes || 30),
40708
+ codexModel: opts.codexModel ? String(opts.codexModel) : void 0,
40651
40709
  codexSandboxBackend: String(opts.codexSandboxBackend || "default"),
40652
40710
  codexSandboxMode: String(opts.codexSandboxMode || "workspace-write"),
40653
40711
  codexNetworkAccess: Boolean(opts.codexNetworkAccess),
@@ -5,6 +5,7 @@
5
5
  "failure_reason_code": "docs_widget_proof_path_unclear",
6
6
  "model_provider": "openai",
7
7
  "model_name": "codex",
8
+ "runner_model": "gpt-5.4-mini",
8
9
  "agent_shell": "vscode-terminal",
9
10
  "workspace_type": "clean-no-repo",
10
11
  "prompt_version": "blank-setup.v1",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@f-o-h/cli",
3
- "version": "0.1.58",
3
+ "version": "0.1.60",
4
4
  "description": "FOH CLI - AI-operator provisioning tool for Front Of House",
5
5
  "license": "UNLICENSED",
6
6
  "bin": {
@@ -25,6 +25,7 @@
25
25
  "failure_reason_code": { "type": ["string", "null"] },
26
26
  "model_provider": { "type": "string", "minLength": 1 },
27
27
  "model_name": { "type": "string", "minLength": 1 },
28
+ "runner_model": { "type": ["string", "null"] },
28
29
  "agent_shell": { "type": "string" },
29
30
  "workspace_type": { "type": "string" },
30
31
  "prompt_version": { "type": "string", "minLength": 1 },