npm - @f-o-h/cli - Versions diffs - 0.1.58 → 0.1.60 - Mend

@f-o-h/cli 0.1.58 → 0.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +13 -10
package/dist/foh.js +62 -4
package/examples/external-agent-run.example.json +1 -0
package/package.json +1 -1
package/schemas/external-agent-run.schema.json +1 -0

package/README.md CHANGED Viewed

@@ -181,16 +181,19 @@ failure packet, and convert it with `foh bug improve`.
 For guarded programmable-runner planning:
 ```bash
-foh eval external-agent execute \
-  --runner codex \
-  --batch test-results/external-agent-runs/<batch>/batch.json \
-  --dry-run \
-  --json
-```
-This writes `executor-plan.json`, creates intentionally empty clean workspaces
-outside the private repo, validates the local Codex binary/help flags, and
-prints exact `codex exec` commands without executing them.
+foh eval external-agent execute \
+  --runner codex \
+  --batch test-results/external-agent-runs/<batch>/batch.json \
+  --codex-model gpt-5.4-mini \
+  --dry-run \
+  --json
+```
+This writes `executor-plan.json`, creates intentionally empty clean workspaces
+outside the private repo, validates the local Codex binary/help flags, and
+prints exact `codex exec` commands without executing them. Use `--codex-model`
+to compare the same prompt across explicit Codex models; the selected runner
+model is recorded in `executor-plan.json` and controlled-run `run.json`.
 Before promoting run artifacts, scan and redact them:

package/dist/foh.js CHANGED Viewed

@@ -32853,7 +32853,7 @@ var StdioServerTransport = class {
 };
 // src/lib/cli-version.ts
-var CLI_VERSION = "0.1.58";
+var CLI_VERSION = "0.1.60";
 // src/commands/mcp-serve.ts
 var DEFAULT_TIMEOUT_MS = 12e4;
@@ -39244,6 +39244,7 @@ function readExternalAgentMetadata(runDir) {
 }
 // src/lib/external-agent-executor.ts
+var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
 var CODEX_EXECUTOR_DENIED_ENV_PREFIXES = [
   "SUPABASE_",
   "DATABASE_",
@@ -39411,11 +39412,16 @@ function readBatch(batchPath) {
   return parsed;
 }
 function defaultRunnerProbe(command, args) {
+  const isGeminiHeadlessSmoke = args.includes("FOH_GEMINI_HEADLESS_PROBE");
+  const spawnOptions = {
+    encoding: "utf8",
+    timeout: isGeminiHeadlessSmoke ? GEMINI_HEADLESS_PROBE_TIMEOUT_MS : void 0
+  };
   const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process4.spawnSync)(
     "powershell.exe",
     ["-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", `& ${[command, ...args].map(quotePowerShellArg).join(" ")}`],
-    { encoding: "utf8" }
-  ) : (0, import_child_process4.spawnSync)(command, args, { encoding: "utf8" });
+    spawnOptions
+  ) : (0, import_child_process4.spawnSync)(command, args, spawnOptions);
   return {
     status: typeof result.status === "number" ? result.status : null,
     stdout: String(result.stdout || ""),
@@ -39423,6 +39429,9 @@ function defaultRunnerProbe(command, args) {
     error: result.error
   };
 }
+function geminiCapacityUnavailable(text) {
+  return /MODEL_CAPACITY_EXHAUSTED|RESOURCE_EXHAUSTED|No capacity available|rateLimitExceeded|exhausted your capacity|status 429/i.test(text);
+}
 function quotePowerShellArg(value) {
   return `'${value.replace(/'/g, "''")}'`;
 }
@@ -39517,6 +39526,9 @@ ${yoloHelp.stderr}`;
     "--output-last-message"
   ];
   const missing = commonExecFlags.filter((flag) => !execHelpText.includes(flag));
+  if (String(options.codexModel || "").trim() && !execHelpText.includes("--model")) {
+    missing.push("--model");
+  }
   const supportsLegacyFullAuto = execHelpText.includes("--full-auto");
   const supportsModernApprovalMode = rootHelp.status === 0 && rootHelpText.includes("--ask-for-approval");
   if (!supportsLegacyFullAuto && !supportsModernApprovalMode) {
@@ -39629,7 +39641,19 @@ ${smoke.stderr}`;
       "Gemini runner sandbox is unavailable on this host. Install/configure Docker/Podman or rerun only on an externally isolated host with --gemini-sandbox-mode disabled."
     );
   }
+  if (geminiCapacityUnavailable(smokeText)) {
+    throw new ExternalAgentExecutorError(
+      "external_agent_runner_capacity_unavailable",
+      "Gemini runner reached the provider but the selected model has no available capacity. Retry later or configure a supported lower-contention Gemini model before live eval."
+    );
+  }
   if (smoke.error || smoke.status !== 0 && !/Auth method|GEMINI_API_KEY|GOOGLE_GENAI_USE_VERTEXAI|GOOGLE_GENAI_USE_GCA/i.test(smokeText)) {
+    if (smoke.error?.code === "ETIMEDOUT") {
+      throw new ExternalAgentExecutorError(
+        "external_agent_runner_headless_probe_timed_out",
+        "Gemini runner headless invocation probe timed out before reaching an auth boundary or provider response."
+      );
+    }
     throw new ExternalAgentExecutorError(
       "external_agent_runner_headless_probe_failed",
       "Gemini runner headless invocation probe failed before reaching an auth boundary."
@@ -39675,6 +39699,17 @@ function normalizeCodexSandboxMode(value) {
     `Unsupported Codex sandbox mode: ${value}. Use workspace-write or danger-full-access.`
   );
 }
+function normalizeCodexModel(value) {
+  const normalized = String(value || "").trim();
+  if (!normalized) return null;
+  if (!/^[A-Za-z0-9._:@/-]+$/.test(normalized)) {
+    throw new ExternalAgentExecutorError(
+      "invalid_codex_model",
+      "Unsupported Codex model value. Use a plain model id such as gpt-5.4-mini."
+    );
+  }
+  return normalized;
+}
 function normalizeGeminiSandboxMode(value) {
   const normalized = (value || "required").trim().toLowerCase();
   if (normalized === "required" || normalized === "disabled") return normalized;
@@ -39683,6 +39718,9 @@ function normalizeGeminiSandboxMode(value) {
     `Unsupported Gemini sandbox mode: ${value}. Use required or disabled.`
   );
 }
+function codexModelArgs(model) {
+  return model ? ["--model", model] : [];
+}
 function codexConfigArgs(input) {
   const args = [];
   if (input.backend === "legacy-landlock") {
@@ -39722,6 +39760,7 @@ function createExternalAgentExecutorPlan(options) {
   const runnerProbe = validateRunner(options, runner);
   const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
   const codexSandboxMode = normalizeCodexSandboxMode(options.codexSandboxMode);
+  const codexModel = runner === "codex" ? normalizeCodexModel(options.codexModel) : null;
   const codexNetworkAccess = options.codexNetworkAccess === true;
   const privateRepoRoot = (0, import_path13.resolve)(options.privateRepoRoot || options.cwd || process.cwd());
   const workspaceRoot = resolveWorkspaceRoot({ batchPath, workspaceRoot: options.workspaceRoot, privateRepoRoot });
@@ -39770,6 +39809,7 @@ function createExternalAgentExecutorPlan(options) {
     ] : [
       ...runnerProbe.globalArgs,
       "exec",
+      ...codexModelArgs(codexModel),
       ...codexConfigArgs({ backend: codexSandboxBackend, networkAccess: codexNetworkAccess }),
       "--cd",
       workspaceDir,
@@ -39788,6 +39828,7 @@ function createExternalAgentExecutorPlan(options) {
       run_id: runId,
       model_provider: String(run.model_provider || "unknown"),
       model_name: String(run.model_name || "unknown-model"),
+      runner_model: runner === "codex" ? codexModel : null,
       prompt_version: promptVersion,
       run_dir: runDir,
       prompt_path: promptPath,
@@ -39842,6 +39883,7 @@ function createExternalAgentExecutorPlan(options) {
       },
       runner_automation_mode: runnerProbe.automationMode,
       codex_automation_mode: runner === "codex" ? runnerProbe.automationMode : null,
+      codex_model: runner === "codex" ? codexModel : null,
       codex_sandbox_mode: codexSandboxMode,
       codex_sandbox_backend: codexSandboxBackend,
       codex_network_access: codexNetworkAccess
@@ -40003,6 +40045,7 @@ function buildExecutedRunArtifact(input) {
     failure_reason_code: input.reasonCode,
     model_provider: input.run.model_provider,
     model_name: input.run.model_name,
+    runner_model: input.run.runner_model,
     agent_shell: `${input.run.command}-exec`,
     workspace_type: "clean-no-repo-programmatic",
     prompt_version: input.run.prompt_version,
@@ -40330,6 +40373,20 @@ function executorRecoveryCommands(reasonCode, runner) {
       "Upgrade, downgrade, or reconfigure the runner CLI until the planned non-interactive invocation reaches an auth boundary without parser errors."
     ];
   }
+  if (reasonCode === "external_agent_runner_headless_probe_timed_out") {
+    return [
+      `${normalizedRunner} --version`,
+      `${normalizedRunner} --help`,
+      "Retry the dry-run once. If it repeats, reduce the probe/model contention or use a different subscribed runner before launching live evals."
+    ];
+  }
+  if (reasonCode === "external_agent_runner_capacity_unavailable") {
+    return [
+      "gemini --version",
+      "gemini --help",
+      "Retry after the Gemini capacity window resets, or configure a supported lower-contention Gemini model before rerunning the same executor dry-run."
+    ];
+  }
   return [
     "Fix the executor plan input or workspace path and rerun with --dry-run."
   ];
@@ -40640,7 +40697,7 @@ Exit the shell to finalize run.json.
     }), { json: Boolean(opts.json) });
     if (!report.ok) process.exitCode = 1;
   });
-  external.command("execute").description("Create a guarded dry-run executor plan for programmable external-agent runners").requiredOption("--batch <path>", "Path to external_agent_batch_plan.v1 batch.json").option("--runner <runner>", "Runner implementation", "codex").option("--workspace-root <path>", "Clean executor workspace root; must be outside the private repo").option("--private-repo-root <path>", "Private repository root to guard against").option("--timeout-minutes <minutes>", "Per-run timeout planned for future execution", "30").option("--codex-sandbox-backend <backend>", "Codex sandbox backend override: default|legacy-landlock", "default").option("--codex-sandbox-mode <mode>", "Codex sandbox mode: workspace-write|danger-full-access", "workspace-write").option("--codex-network-access", "Allow Codex workspace-write sandbox network access for public docs/npm probes").option("--gemini-sandbox-mode <mode>", "Gemini sandbox mode: required|disabled", "required").option("--skip-runner-probe", "Skip local runner binary/help probing").option("--dry-run", "Write the executor plan without launching the runner", true).option("--live", "Launch one controlled external-agent run after writing the guarded plan").option("--json", "Output as JSON").action(async (opts) => {
+  external.command("execute").description("Create a guarded dry-run executor plan for programmable external-agent runners").requiredOption("--batch <path>", "Path to external_agent_batch_plan.v1 batch.json").option("--runner <runner>", "Runner implementation", "codex").option("--workspace-root <path>", "Clean executor workspace root; must be outside the private repo").option("--private-repo-root <path>", "Private repository root to guard against").option("--timeout-minutes <minutes>", "Per-run timeout planned for future execution", "30").option("--codex-model <model>", "Codex model override for controlled evals; recorded in executor-plan.json and run.json").option("--codex-sandbox-backend <backend>", "Codex sandbox backend override: default|legacy-landlock", "default").option("--codex-sandbox-mode <mode>", "Codex sandbox mode: workspace-write|danger-full-access", "workspace-write").option("--codex-network-access", "Allow Codex workspace-write sandbox network access for public docs/npm probes").option("--gemini-sandbox-mode <mode>", "Gemini sandbox mode: required|disabled", "required").option("--skip-runner-probe", "Skip local runner binary/help probing").option("--dry-run", "Write the executor plan without launching the runner", true).option("--live", "Launch one controlled external-agent run after writing the guarded plan").option("--json", "Output as JSON").action(async (opts) => {
     try {
       const plan = createExternalAgentExecutorPlan({
         batchPath: String(opts.batch),
@@ -40648,6 +40705,7 @@ Exit the shell to finalize run.json.
         workspaceRoot: opts.workspaceRoot ? String(opts.workspaceRoot) : void 0,
         privateRepoRoot: opts.privateRepoRoot ? String(opts.privateRepoRoot) : void 0,
         timeoutMinutes: Number(opts.timeoutMinutes || 30),
+        codexModel: opts.codexModel ? String(opts.codexModel) : void 0,
         codexSandboxBackend: String(opts.codexSandboxBackend || "default"),
         codexSandboxMode: String(opts.codexSandboxMode || "workspace-write"),
         codexNetworkAccess: Boolean(opts.codexNetworkAccess),

package/examples/external-agent-run.example.json CHANGED Viewed

@@ -5,6 +5,7 @@
   "failure_reason_code": "docs_widget_proof_path_unclear",
   "model_provider": "openai",
   "model_name": "codex",
+  "runner_model": "gpt-5.4-mini",
   "agent_shell": "vscode-terminal",
   "workspace_type": "clean-no-repo",
   "prompt_version": "blank-setup.v1",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@f-o-h/cli",
-  "version": "0.1.58",
+  "version": "0.1.60",
   "description": "FOH CLI - AI-operator provisioning tool for Front Of House",
   "license": "UNLICENSED",
   "bin": {

package/schemas/external-agent-run.schema.json CHANGED Viewed

@@ -25,6 +25,7 @@
     "failure_reason_code": { "type": ["string", "null"] },
     "model_provider": { "type": "string", "minLength": 1 },
     "model_name": { "type": "string", "minLength": 1 },
+    "runner_model": { "type": ["string", "null"] },
     "agent_shell": { "type": "string" },
     "workspace_type": { "type": "string" },
     "prompt_version": { "type": "string", "minLength": 1 },