@f-o-h/cli 0.1.58 → 0.1.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -181,16 +181,19 @@ failure packet, and convert it with `foh bug improve`.
|
|
|
181
181
|
For guarded programmable-runner planning:
|
|
182
182
|
|
|
183
183
|
```bash
|
|
184
|
-
foh eval external-agent execute \
|
|
185
|
-
--runner codex \
|
|
186
|
-
--batch test-results/external-agent-runs/<batch>/batch.json \
|
|
187
|
-
--
|
|
188
|
-
--
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
184
|
+
foh eval external-agent execute \
|
|
185
|
+
--runner codex \
|
|
186
|
+
--batch test-results/external-agent-runs/<batch>/batch.json \
|
|
187
|
+
--codex-model gpt-5.4-mini \
|
|
188
|
+
--dry-run \
|
|
189
|
+
--json
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
This writes `executor-plan.json`, creates intentionally empty clean workspaces
|
|
193
|
+
outside the private repo, validates the local Codex binary/help flags, and
|
|
194
|
+
prints exact `codex exec` commands without executing them. Use `--codex-model`
|
|
195
|
+
to compare the same prompt across explicit Codex models; the selected runner
|
|
196
|
+
model is recorded in `executor-plan.json` and controlled-run `run.json`.
|
|
194
197
|
|
|
195
198
|
Before promoting run artifacts, scan and redact them:
|
|
196
199
|
|
package/dist/foh.js
CHANGED
|
@@ -32853,7 +32853,7 @@ var StdioServerTransport = class {
|
|
|
32853
32853
|
};
|
|
32854
32854
|
|
|
32855
32855
|
// src/lib/cli-version.ts
|
|
32856
|
-
var CLI_VERSION = "0.1.
|
|
32856
|
+
var CLI_VERSION = "0.1.60";
|
|
32857
32857
|
|
|
32858
32858
|
// src/commands/mcp-serve.ts
|
|
32859
32859
|
var DEFAULT_TIMEOUT_MS = 12e4;
|
|
@@ -39244,6 +39244,7 @@ function readExternalAgentMetadata(runDir) {
|
|
|
39244
39244
|
}
|
|
39245
39245
|
|
|
39246
39246
|
// src/lib/external-agent-executor.ts
|
|
39247
|
+
var GEMINI_HEADLESS_PROBE_TIMEOUT_MS = 15e3;
|
|
39247
39248
|
var CODEX_EXECUTOR_DENIED_ENV_PREFIXES = [
|
|
39248
39249
|
"SUPABASE_",
|
|
39249
39250
|
"DATABASE_",
|
|
@@ -39411,11 +39412,16 @@ function readBatch(batchPath) {
|
|
|
39411
39412
|
return parsed;
|
|
39412
39413
|
}
|
|
39413
39414
|
function defaultRunnerProbe(command, args) {
|
|
39415
|
+
const isGeminiHeadlessSmoke = args.includes("FOH_GEMINI_HEADLESS_PROBE");
|
|
39416
|
+
const spawnOptions = {
|
|
39417
|
+
encoding: "utf8",
|
|
39418
|
+
timeout: isGeminiHeadlessSmoke ? GEMINI_HEADLESS_PROBE_TIMEOUT_MS : void 0
|
|
39419
|
+
};
|
|
39414
39420
|
const result = process.platform === "win32" && command.toLowerCase().endsWith(".cmd") ? (0, import_child_process4.spawnSync)(
|
|
39415
39421
|
"powershell.exe",
|
|
39416
39422
|
["-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", `& ${[command, ...args].map(quotePowerShellArg).join(" ")}`],
|
|
39417
|
-
|
|
39418
|
-
) : (0, import_child_process4.spawnSync)(command, args,
|
|
39423
|
+
spawnOptions
|
|
39424
|
+
) : (0, import_child_process4.spawnSync)(command, args, spawnOptions);
|
|
39419
39425
|
return {
|
|
39420
39426
|
status: typeof result.status === "number" ? result.status : null,
|
|
39421
39427
|
stdout: String(result.stdout || ""),
|
|
@@ -39423,6 +39429,9 @@ function defaultRunnerProbe(command, args) {
|
|
|
39423
39429
|
error: result.error
|
|
39424
39430
|
};
|
|
39425
39431
|
}
|
|
39432
|
+
function geminiCapacityUnavailable(text) {
|
|
39433
|
+
return /MODEL_CAPACITY_EXHAUSTED|RESOURCE_EXHAUSTED|No capacity available|rateLimitExceeded|exhausted your capacity|status 429/i.test(text);
|
|
39434
|
+
}
|
|
39426
39435
|
function quotePowerShellArg(value) {
|
|
39427
39436
|
return `'${value.replace(/'/g, "''")}'`;
|
|
39428
39437
|
}
|
|
@@ -39517,6 +39526,9 @@ ${yoloHelp.stderr}`;
|
|
|
39517
39526
|
"--output-last-message"
|
|
39518
39527
|
];
|
|
39519
39528
|
const missing = commonExecFlags.filter((flag) => !execHelpText.includes(flag));
|
|
39529
|
+
if (String(options.codexModel || "").trim() && !execHelpText.includes("--model")) {
|
|
39530
|
+
missing.push("--model");
|
|
39531
|
+
}
|
|
39520
39532
|
const supportsLegacyFullAuto = execHelpText.includes("--full-auto");
|
|
39521
39533
|
const supportsModernApprovalMode = rootHelp.status === 0 && rootHelpText.includes("--ask-for-approval");
|
|
39522
39534
|
if (!supportsLegacyFullAuto && !supportsModernApprovalMode) {
|
|
@@ -39629,7 +39641,19 @@ ${smoke.stderr}`;
|
|
|
39629
39641
|
"Gemini runner sandbox is unavailable on this host. Install/configure Docker/Podman or rerun only on an externally isolated host with --gemini-sandbox-mode disabled."
|
|
39630
39642
|
);
|
|
39631
39643
|
}
|
|
39644
|
+
if (geminiCapacityUnavailable(smokeText)) {
|
|
39645
|
+
throw new ExternalAgentExecutorError(
|
|
39646
|
+
"external_agent_runner_capacity_unavailable",
|
|
39647
|
+
"Gemini runner reached the provider but the selected model has no available capacity. Retry later or configure a supported lower-contention Gemini model before live eval."
|
|
39648
|
+
);
|
|
39649
|
+
}
|
|
39632
39650
|
if (smoke.error || smoke.status !== 0 && !/Auth method|GEMINI_API_KEY|GOOGLE_GENAI_USE_VERTEXAI|GOOGLE_GENAI_USE_GCA/i.test(smokeText)) {
|
|
39651
|
+
if (smoke.error?.code === "ETIMEDOUT") {
|
|
39652
|
+
throw new ExternalAgentExecutorError(
|
|
39653
|
+
"external_agent_runner_headless_probe_timed_out",
|
|
39654
|
+
"Gemini runner headless invocation probe timed out before reaching an auth boundary or provider response."
|
|
39655
|
+
);
|
|
39656
|
+
}
|
|
39633
39657
|
throw new ExternalAgentExecutorError(
|
|
39634
39658
|
"external_agent_runner_headless_probe_failed",
|
|
39635
39659
|
"Gemini runner headless invocation probe failed before reaching an auth boundary."
|
|
@@ -39675,6 +39699,17 @@ function normalizeCodexSandboxMode(value) {
|
|
|
39675
39699
|
`Unsupported Codex sandbox mode: ${value}. Use workspace-write or danger-full-access.`
|
|
39676
39700
|
);
|
|
39677
39701
|
}
|
|
39702
|
+
function normalizeCodexModel(value) {
|
|
39703
|
+
const normalized = String(value || "").trim();
|
|
39704
|
+
if (!normalized) return null;
|
|
39705
|
+
if (!/^[A-Za-z0-9._:@/-]+$/.test(normalized)) {
|
|
39706
|
+
throw new ExternalAgentExecutorError(
|
|
39707
|
+
"invalid_codex_model",
|
|
39708
|
+
"Unsupported Codex model value. Use a plain model id such as gpt-5.4-mini."
|
|
39709
|
+
);
|
|
39710
|
+
}
|
|
39711
|
+
return normalized;
|
|
39712
|
+
}
|
|
39678
39713
|
function normalizeGeminiSandboxMode(value) {
|
|
39679
39714
|
const normalized = (value || "required").trim().toLowerCase();
|
|
39680
39715
|
if (normalized === "required" || normalized === "disabled") return normalized;
|
|
@@ -39683,6 +39718,9 @@ function normalizeGeminiSandboxMode(value) {
|
|
|
39683
39718
|
`Unsupported Gemini sandbox mode: ${value}. Use required or disabled.`
|
|
39684
39719
|
);
|
|
39685
39720
|
}
|
|
39721
|
+
function codexModelArgs(model) {
|
|
39722
|
+
return model ? ["--model", model] : [];
|
|
39723
|
+
}
|
|
39686
39724
|
function codexConfigArgs(input) {
|
|
39687
39725
|
const args = [];
|
|
39688
39726
|
if (input.backend === "legacy-landlock") {
|
|
@@ -39722,6 +39760,7 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39722
39760
|
const runnerProbe = validateRunner(options, runner);
|
|
39723
39761
|
const codexSandboxBackend = normalizeCodexSandboxBackend(options.codexSandboxBackend);
|
|
39724
39762
|
const codexSandboxMode = normalizeCodexSandboxMode(options.codexSandboxMode);
|
|
39763
|
+
const codexModel = runner === "codex" ? normalizeCodexModel(options.codexModel) : null;
|
|
39725
39764
|
const codexNetworkAccess = options.codexNetworkAccess === true;
|
|
39726
39765
|
const privateRepoRoot = (0, import_path13.resolve)(options.privateRepoRoot || options.cwd || process.cwd());
|
|
39727
39766
|
const workspaceRoot = resolveWorkspaceRoot({ batchPath, workspaceRoot: options.workspaceRoot, privateRepoRoot });
|
|
@@ -39770,6 +39809,7 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39770
39809
|
] : [
|
|
39771
39810
|
...runnerProbe.globalArgs,
|
|
39772
39811
|
"exec",
|
|
39812
|
+
...codexModelArgs(codexModel),
|
|
39773
39813
|
...codexConfigArgs({ backend: codexSandboxBackend, networkAccess: codexNetworkAccess }),
|
|
39774
39814
|
"--cd",
|
|
39775
39815
|
workspaceDir,
|
|
@@ -39788,6 +39828,7 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39788
39828
|
run_id: runId,
|
|
39789
39829
|
model_provider: String(run.model_provider || "unknown"),
|
|
39790
39830
|
model_name: String(run.model_name || "unknown-model"),
|
|
39831
|
+
runner_model: runner === "codex" ? codexModel : null,
|
|
39791
39832
|
prompt_version: promptVersion,
|
|
39792
39833
|
run_dir: runDir,
|
|
39793
39834
|
prompt_path: promptPath,
|
|
@@ -39842,6 +39883,7 @@ function createExternalAgentExecutorPlan(options) {
|
|
|
39842
39883
|
},
|
|
39843
39884
|
runner_automation_mode: runnerProbe.automationMode,
|
|
39844
39885
|
codex_automation_mode: runner === "codex" ? runnerProbe.automationMode : null,
|
|
39886
|
+
codex_model: runner === "codex" ? codexModel : null,
|
|
39845
39887
|
codex_sandbox_mode: codexSandboxMode,
|
|
39846
39888
|
codex_sandbox_backend: codexSandboxBackend,
|
|
39847
39889
|
codex_network_access: codexNetworkAccess
|
|
@@ -40003,6 +40045,7 @@ function buildExecutedRunArtifact(input) {
|
|
|
40003
40045
|
failure_reason_code: input.reasonCode,
|
|
40004
40046
|
model_provider: input.run.model_provider,
|
|
40005
40047
|
model_name: input.run.model_name,
|
|
40048
|
+
runner_model: input.run.runner_model,
|
|
40006
40049
|
agent_shell: `${input.run.command}-exec`,
|
|
40007
40050
|
workspace_type: "clean-no-repo-programmatic",
|
|
40008
40051
|
prompt_version: input.run.prompt_version,
|
|
@@ -40330,6 +40373,20 @@ function executorRecoveryCommands(reasonCode, runner) {
|
|
|
40330
40373
|
"Upgrade, downgrade, or reconfigure the runner CLI until the planned non-interactive invocation reaches an auth boundary without parser errors."
|
|
40331
40374
|
];
|
|
40332
40375
|
}
|
|
40376
|
+
if (reasonCode === "external_agent_runner_headless_probe_timed_out") {
|
|
40377
|
+
return [
|
|
40378
|
+
`${normalizedRunner} --version`,
|
|
40379
|
+
`${normalizedRunner} --help`,
|
|
40380
|
+
"Retry the dry-run once. If it repeats, reduce the probe/model contention or use a different subscribed runner before launching live evals."
|
|
40381
|
+
];
|
|
40382
|
+
}
|
|
40383
|
+
if (reasonCode === "external_agent_runner_capacity_unavailable") {
|
|
40384
|
+
return [
|
|
40385
|
+
"gemini --version",
|
|
40386
|
+
"gemini --help",
|
|
40387
|
+
"Retry after the Gemini capacity window resets, or configure a supported lower-contention Gemini model before rerunning the same executor dry-run."
|
|
40388
|
+
];
|
|
40389
|
+
}
|
|
40333
40390
|
return [
|
|
40334
40391
|
"Fix the executor plan input or workspace path and rerun with --dry-run."
|
|
40335
40392
|
];
|
|
@@ -40640,7 +40697,7 @@ Exit the shell to finalize run.json.
|
|
|
40640
40697
|
}), { json: Boolean(opts.json) });
|
|
40641
40698
|
if (!report.ok) process.exitCode = 1;
|
|
40642
40699
|
});
|
|
40643
|
-
external.command("execute").description("Create a guarded dry-run executor plan for programmable external-agent runners").requiredOption("--batch <path>", "Path to external_agent_batch_plan.v1 batch.json").option("--runner <runner>", "Runner implementation", "codex").option("--workspace-root <path>", "Clean executor workspace root; must be outside the private repo").option("--private-repo-root <path>", "Private repository root to guard against").option("--timeout-minutes <minutes>", "Per-run timeout planned for future execution", "30").option("--codex-sandbox-backend <backend>", "Codex sandbox backend override: default|legacy-landlock", "default").option("--codex-sandbox-mode <mode>", "Codex sandbox mode: workspace-write|danger-full-access", "workspace-write").option("--codex-network-access", "Allow Codex workspace-write sandbox network access for public docs/npm probes").option("--gemini-sandbox-mode <mode>", "Gemini sandbox mode: required|disabled", "required").option("--skip-runner-probe", "Skip local runner binary/help probing").option("--dry-run", "Write the executor plan without launching the runner", true).option("--live", "Launch one controlled external-agent run after writing the guarded plan").option("--json", "Output as JSON").action(async (opts) => {
|
|
40700
|
+
external.command("execute").description("Create a guarded dry-run executor plan for programmable external-agent runners").requiredOption("--batch <path>", "Path to external_agent_batch_plan.v1 batch.json").option("--runner <runner>", "Runner implementation", "codex").option("--workspace-root <path>", "Clean executor workspace root; must be outside the private repo").option("--private-repo-root <path>", "Private repository root to guard against").option("--timeout-minutes <minutes>", "Per-run timeout planned for future execution", "30").option("--codex-model <model>", "Codex model override for controlled evals; recorded in executor-plan.json and run.json").option("--codex-sandbox-backend <backend>", "Codex sandbox backend override: default|legacy-landlock", "default").option("--codex-sandbox-mode <mode>", "Codex sandbox mode: workspace-write|danger-full-access", "workspace-write").option("--codex-network-access", "Allow Codex workspace-write sandbox network access for public docs/npm probes").option("--gemini-sandbox-mode <mode>", "Gemini sandbox mode: required|disabled", "required").option("--skip-runner-probe", "Skip local runner binary/help probing").option("--dry-run", "Write the executor plan without launching the runner", true).option("--live", "Launch one controlled external-agent run after writing the guarded plan").option("--json", "Output as JSON").action(async (opts) => {
|
|
40644
40701
|
try {
|
|
40645
40702
|
const plan = createExternalAgentExecutorPlan({
|
|
40646
40703
|
batchPath: String(opts.batch),
|
|
@@ -40648,6 +40705,7 @@ Exit the shell to finalize run.json.
|
|
|
40648
40705
|
workspaceRoot: opts.workspaceRoot ? String(opts.workspaceRoot) : void 0,
|
|
40649
40706
|
privateRepoRoot: opts.privateRepoRoot ? String(opts.privateRepoRoot) : void 0,
|
|
40650
40707
|
timeoutMinutes: Number(opts.timeoutMinutes || 30),
|
|
40708
|
+
codexModel: opts.codexModel ? String(opts.codexModel) : void 0,
|
|
40651
40709
|
codexSandboxBackend: String(opts.codexSandboxBackend || "default"),
|
|
40652
40710
|
codexSandboxMode: String(opts.codexSandboxMode || "workspace-write"),
|
|
40653
40711
|
codexNetworkAccess: Boolean(opts.codexNetworkAccess),
|
package/package.json
CHANGED
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
"failure_reason_code": { "type": ["string", "null"] },
|
|
26
26
|
"model_provider": { "type": "string", "minLength": 1 },
|
|
27
27
|
"model_name": { "type": "string", "minLength": 1 },
|
|
28
|
+
"runner_model": { "type": ["string", "null"] },
|
|
28
29
|
"agent_shell": { "type": "string" },
|
|
29
30
|
"workspace_type": { "type": "string" },
|
|
30
31
|
"prompt_version": { "type": "string", "minLength": 1 },
|