@f-o-h/cli 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +57 -57
  2. package/dist/foh.js +21 -10
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -4,7 +4,7 @@ AI-operator provisioning CLI for Front Of House.
4
4
 
5
5
  Public mirror: https://github.com/iiko38/front-of-house-cli
6
6
 
7
- Current published baseline: `@f-o-h/cli@0.1.12`
7
+ Current published baseline: `@f-o-h/cli@0.1.14`
8
8
 
9
9
  This mirror is a generated release artifact. The private product monorepo is not
10
10
  published here, and no open-source license is granted unless stated separately.
@@ -85,67 +85,67 @@ The CLI defaults to the production API at `https://api.frontofhouse.okii.uk`.
85
85
 
86
86
  ## External-Agent Eval Capture
87
87
 
88
- Use this when testing whether a clean coding agent can start from public docs
89
- and the public npm package without private repo context:
90
-
91
- ```bash
92
- foh eval external-agent batch \
93
- --models openai/codex,anthropic/claude,cursor/agent \
94
- --prompt-version blank-setup.v1 \
95
- --json
96
- ```
97
-
98
- Run each returned launch command in a clean agent terminal:
99
-
100
- ```bash
101
- foh eval external-agent run \
88
+ Use this when testing whether a clean coding agent can start from public docs
89
+ and the public npm package without private repo context:
90
+
91
+ ```bash
92
+ foh eval external-agent batch \
93
+ --models openai/codex,anthropic/claude,cursor/agent \
94
+ --prompt-version blank-setup.v1 \
95
+ --json
96
+ ```
97
+
98
+ Run each returned launch command in a clean agent terminal:
99
+
100
+ ```bash
101
+ foh eval external-agent run \
102
102
  --model-provider openai \
103
103
  --model-name codex \
104
104
  --prompt-version blank-setup.v1
105
105
  ```
106
106
 
107
- The command writes a versioned prompt, launches an instrumented shell, captures
108
- FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
109
- `external_agent_run.v1` artifact when the shell exits.
110
-
111
- For guarded programmable-runner planning:
112
-
113
- ```bash
114
- foh eval external-agent execute \
115
- --runner codex \
116
- --batch test-results/external-agent-runs/<batch>/batch.json \
117
- --dry-run \
118
- --json
119
- ```
120
-
121
- This writes `executor-plan.json`, creates intentionally empty clean workspaces
122
- outside the private repo, validates the local Codex binary/help flags, and
123
- prints exact `codex exec` commands without executing them.
124
-
125
- Before promoting run artifacts, scan and redact them:
126
-
127
- ```bash
128
- foh eval external-agent scan-artifacts \
129
- --run-dir test-results/external-agent-runs/<batch>/<run-id> \
130
- --private-repo-root <private-repo-root> \
131
- --write-redacted \
132
- --json
133
- ```
134
-
135
- After dry-run review, one controlled Codex run can be launched explicitly with
136
- `--live`. Live mode is intentionally limited to one run per batch and finalizes
137
- `run.json` even on timeout or non-zero exit:
138
-
139
- ```bash
140
- foh eval external-agent execute \
141
- --runner codex \
142
- --batch test-results/external-agent-runs/<one-model-batch>/batch.json \
143
- --timeout-minutes 30 \
144
- --live \
145
- --json
146
- ```
147
-
148
- ## Local Scenario Suites
107
+ The command writes a versioned prompt, launches an instrumented shell, captures
108
+ FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
109
+ `external_agent_run.v1` artifact when the shell exits.
110
+
111
+ For guarded programmable-runner planning:
112
+
113
+ ```bash
114
+ foh eval external-agent execute \
115
+ --runner codex \
116
+ --batch test-results/external-agent-runs/<batch>/batch.json \
117
+ --dry-run \
118
+ --json
119
+ ```
120
+
121
+ This writes `executor-plan.json`, creates intentionally empty clean workspaces
122
+ outside the private repo, validates the local Codex binary/help flags, and
123
+ prints exact `codex exec` commands without executing them.
124
+
125
+ Before promoting run artifacts, scan and redact them:
126
+
127
+ ```bash
128
+ foh eval external-agent scan-artifacts \
129
+ --run-dir test-results/external-agent-runs/<batch>/<run-id> \
130
+ --private-repo-root <private-repo-root> \
131
+ --write-redacted \
132
+ --json
133
+ ```
134
+
135
+ After dry-run review, one controlled Codex run can be launched explicitly with
136
+ `--live`. Live mode is intentionally limited to one run per batch and finalizes
137
+ `run.json` even on timeout or non-zero exit:
138
+
139
+ ```bash
140
+ foh eval external-agent execute \
141
+ --runner codex \
142
+ --batch test-results/external-agent-runs/<one-model-batch>/batch.json \
143
+ --timeout-minutes 30 \
144
+ --live \
145
+ --json
146
+ ```
147
+
148
+ ## Local Scenario Suites
149
149
 
150
150
  `foh test run --suite <file>` runs deterministic widget-runtime checks for a
151
151
  specific agent. The suite format supports reply text checks plus structured
package/dist/foh.js CHANGED
@@ -32640,7 +32640,7 @@ var StdioServerTransport = class {
32640
32640
  };
32641
32641
 
32642
32642
  // src/lib/cli-version.ts
32643
- var CLI_VERSION = "0.1.12";
32643
+ var CLI_VERSION = "0.1.14";
32644
32644
 
32645
32645
  // src/commands/mcp-serve.ts
32646
32646
  var DEFAULT_TIMEOUT_MS = 12e4;
@@ -36437,8 +36437,13 @@ function inferReasonCode(artifact) {
36437
36437
  }
36438
36438
  return nonEmpty2(getPath2(artifact, "status"));
36439
36439
  }
36440
- function inferPromotionDecision(sourceType) {
36441
- if (sourceType === "external_agent_run") return "fix_docs";
36440
+ function inferPromotionDecision(sourceType, reasonCode) {
36441
+ const reason = String(reasonCode || "").toLowerCase();
36442
+ if (sourceType === "external_agent_run") {
36443
+ if (reason.includes("exec_policy") || reason.includes("policy_blocked") || reason.includes("auth") || reason.includes("config")) return "fix_config";
36444
+ if (reason.includes("cli") || reason.includes("command") || reason.includes("flag")) return "fix_cli";
36445
+ return "fix_docs";
36446
+ }
36442
36447
  if (sourceType === "knowledge_miss") return "fix_docs";
36443
36448
  if (sourceType === "setup_failure" || sourceType === "proof_failure" || sourceType === "live_proof_failure") return "fix_config";
36444
36449
  if (sourceType === "replay_failure" || sourceType === "runtime_miss") return "add_test";
@@ -36526,7 +36531,6 @@ function readSourceArtifact(path2) {
36526
36531
  function buildImprovementPacket(input) {
36527
36532
  const artifact = input.sourceArtifact ?? null;
36528
36533
  const sourceType = parseEnum(input.sourceType, IMPROVEMENT_SOURCE_TYPES, "--source-type") ?? inferSourceType(artifact);
36529
- const promotionDecision = parseEnum(input.promotionDecision, IMPROVEMENT_DECISIONS, "--recommendation") ?? inferPromotionDecision(sourceType);
36530
36534
  const ids = collectIds(artifact, input.ids);
36531
36535
  assertOrgBoundary(artifact, input.ids?.org_id);
36532
36536
  const reasonCode = nonEmpty2(input.reasonCode) ?? inferReasonCode(artifact);
@@ -36538,6 +36542,7 @@ function buildImprovementPacket(input) {
36538
36542
  statusCode: 400
36539
36543
  });
36540
36544
  }
36545
+ const promotionDecision = parseEnum(input.promotionDecision, IMPROVEMENT_DECISIONS, "--recommendation") ?? inferPromotionDecision(sourceType, reasonCode);
36541
36546
  const evidenceSummary = redactString(
36542
36547
  nonEmpty2(input.evidenceSummary) ?? nonEmpty2(getPath2(artifact, "summary")) ?? `Improvement candidate generated from ${sourceType} with reason ${reasonCode}.`
36543
36548
  );
@@ -38739,6 +38744,9 @@ function proofArtifactPasses(runDir) {
38739
38744
  function readIfExists(path2) {
38740
38745
  return (0, import_fs14.existsSync)(path2) ? (0, import_fs14.readFileSync)(path2, "utf8") : "";
38741
38746
  }
38747
+ function relativeArtifactName(path2) {
38748
+ return (0, import_path12.basename)(path2);
38749
+ }
38742
38750
  function classifyRun(input) {
38743
38751
  if (input.timedOut) return { status: "hold", reasonCode: "codex_runner_timeout" };
38744
38752
  if (!input.artifactSafetyOk) return { status: "fail", reasonCode: "external_agent_artifact_safety_blocked" };
@@ -38749,6 +38757,9 @@ ${stderr}`;
38749
38757
  if (/need[^.\n]*(?:private|source)[^.\n]*repo|cannot[^.\n]*without[^.\n]*(?:private|source)[^.\n]*repo|clone[^.\n]*(?:private|source)[^.\n]*repo/i.test(combined)) {
38750
38758
  return { status: "fail", reasonCode: "private_repo_assumption_detected" };
38751
38759
  }
38760
+ if (/(?:blocked|rejected|declined) by policy|EXEC_POLICY_BLOCKED|command execution was rejected|shell commands were rejected/i.test(combined)) {
38761
+ return { status: "hold", reasonCode: "codex_exec_policy_blocked" };
38762
+ }
38752
38763
  if (/browser|approve|approval|login|auth|sign in/i.test(combined) && !proofArtifactPasses(input.run.run_dir)) {
38753
38764
  return { status: "hold", reasonCode: "auth_browser_approval_required" };
38754
38765
  }
@@ -38791,21 +38802,21 @@ function buildExecutedRunArtifact(input) {
38791
38802
  commands_run: commands.map((command) => command.command),
38792
38803
  docs_pages_used: [],
38793
38804
  artifacts: {
38794
- terminal_transcript: input.run.outputs.jsonl,
38805
+ terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
38795
38806
  command_log: (0, import_fs14.existsSync)((0, import_path12.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
38796
38807
  proof_bundle: (0, import_fs14.existsSync)((0, import_path12.join)(input.run.run_dir, "proof.json")) ? "proof.json" : null,
38797
38808
  replay_packet: (0, import_fs14.existsSync)((0, import_path12.join)(input.run.run_dir, "replay.json")) ? "replay.json" : null,
38798
38809
  knowledge_packet: (0, import_fs14.existsSync)((0, import_path12.join)(input.run.run_dir, "knowledge.json")) ? "knowledge.json" : null,
38799
38810
  improvement_packet: input.status === "pass" ? null : "improvement-packet.json",
38800
38811
  notes: (0, import_fs14.existsSync)((0, import_path12.join)(input.run.run_dir, "notes.md")) ? "notes.md" : null,
38801
- codex_last_message: input.run.outputs.last_message,
38802
- codex_stderr: input.run.outputs.stderr,
38803
- artifact_safety: input.run.outputs.artifact_safety
38812
+ codex_last_message: relativeArtifactName(input.run.outputs.last_message),
38813
+ codex_stderr: relativeArtifactName(input.run.outputs.stderr),
38814
+ artifact_safety: relativeArtifactName(input.run.outputs.artifact_safety)
38804
38815
  },
38805
38816
  summary: input.status === "pass" ? "Controlled Codex external-agent run produced passing proof evidence." : `Controlled Codex external-agent run ended as ${input.status} with reason ${input.reasonCode}.`,
38806
38817
  next_commands: input.status === "pass" ? ["corepack pnpm eval:external-agent:runs:summary"] : [
38807
- `foh eval external-agent scan-artifacts --run-dir ${input.run.run_dir} --private-repo-root <private_repo_root> --write-redacted --json`,
38808
- `foh bug improve --from external-agent-run --file ${input.run.outputs.run} --out ${(0, import_path12.join)(input.run.run_dir, "improvement-packet.json")} --json`,
38818
+ "foh eval external-agent scan-artifacts --run-dir <run_dir> --private-repo-root <private_repo_root> --write-redacted --json",
38819
+ "foh bug improve --from external-agent-run --file <run_dir>/run.json --out <run_dir>/improvement-packet.json --json",
38809
38820
  "corepack pnpm eval:external-agent:runs:summary"
38810
38821
  ]
38811
38822
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@f-o-h/cli",
3
- "version": "0.1.12",
3
+ "version": "0.1.14",
4
4
  "description": "FOH CLI - AI-operator provisioning tool for Front Of House",
5
5
  "license": "UNLICENSED",
6
6
  "bin": {