@f-o-h/cli 0.1.50 → 0.1.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +24 -10
  2. package/dist/foh.js +37 -7
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -118,11 +118,11 @@ Use this when testing whether a clean coding agent can start from public docs
118
118
  and the public npm package without private repo context:
119
119
 
120
120
  ```bash
121
- foh eval external-agent batch \
122
- --models openai/codex,anthropic/claude,cursor/agent \
123
- --prompt-version blank-setup.v1 \
124
- --json
125
- ```
121
+ foh eval external-agent batch \
122
+ --models openai/codex,anthropic/claude,cursor/agent \
123
+ --prompt-version blank-setup.v1 \
124
+ --json
125
+ ```
126
126
 
127
127
  Run each returned launch command in a clean agent terminal:
128
128
 
@@ -137,11 +137,25 @@ The command writes a versioned prompt, launches an instrumented shell, captures
137
137
  FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
138
138
  `external_agent_run.v1` artifact when the shell exits.
139
139
 
140
- Run artifacts include `eval_state` so repeated benchmark runs make reuse
141
- explicit: org, agent, and widget reuse are expected; fresh paid phone-number
142
- creation is not expected.
143
-
144
- For guarded programmable-runner planning:
140
+ Run artifacts include `eval_state` so repeated benchmark runs make reuse
141
+ explicit: org, agent, and widget reuse are expected; fresh paid phone-number
142
+ creation is not expected.
143
+
144
+ For a planted knowledge-miss benchmark:
145
+
146
+ ```bash
147
+ foh eval external-agent batch \
148
+ --models openai/codex \
149
+ --prompt-version knowledge-miss.v1 \
150
+ --knowledge-question "Does 12 Acacia Avenue allow Saturday viewings?" \
151
+ --expected-answer "Saturday viewings are available by appointment only." \
152
+ --json
153
+ ```
154
+
155
+ The prompt instructs the clean agent to run `foh knowledge query`, preserve the
156
+ failure packet, and convert it with `foh bug improve`.
157
+
158
+ For guarded programmable-runner planning:
145
159
 
146
160
  ```bash
147
161
  foh eval external-agent execute \
package/dist/foh.js CHANGED
@@ -32801,7 +32801,7 @@ var StdioServerTransport = class {
32801
32801
  };
32802
32802
 
32803
32803
  // src/lib/cli-version.ts
32804
- var CLI_VERSION = "0.1.50";
32804
+ var CLI_VERSION = "0.1.51";
32805
32805
 
32806
32806
  // src/commands/mcp-serve.ts
32807
32807
  var DEFAULT_TIMEOUT_MS = 12e4;
@@ -40075,8 +40075,26 @@ function replayPromptContext(replayFile) {
40075
40075
  "- If the replay packet exposes trace/test next commands, run the safest read-only command and convert any failure into `foh bug improve` evidence."
40076
40076
  ].join("\n");
40077
40077
  }
40078
+ function knowledgeMissPromptContext(knowledgeQuestion, expectedAnswer) {
40079
+ const question = String(knowledgeQuestion || "").trim();
40080
+ if (!question) return "";
40081
+ const expected = String(expectedAnswer || "").trim();
40082
+ return [
40083
+ "",
40084
+ "Planted knowledge-miss context:",
40085
+ `- Question to diagnose: ${question}`,
40086
+ ...expected ? [`- Expected answer or missing fact: ${expected}`] : [],
40087
+ `- Start by running: npx --yes @f-o-h/cli@latest knowledge query --agent <agent_id> --text ${quoteArg(question)} --explain --json`,
40088
+ "- If the query returns no match or low confidence, write the failure packet and convert it with `foh bug improve --source-type knowledge_miss --from-file <packet.json> --json`.",
40089
+ "- Do not patch around the miss manually; produce the smallest redacted artifact that explains whether the fix belongs to docs, ingestion, retrieval, config, or runtime."
40090
+ ].join("\n");
40091
+ }
40078
40092
  function writePrompt(runDir, promptVersion, context = {}) {
40079
- const prompt = `${PROMPTS[promptVersion] ?? PROMPTS[DEFAULT_PROMPT_VERSION]}${replayPromptContext(context.replayFile)}`;
40093
+ const prompt = [
40094
+ PROMPTS[promptVersion] ?? PROMPTS[DEFAULT_PROMPT_VERSION],
40095
+ replayPromptContext(context.replayFile),
40096
+ knowledgeMissPromptContext(context.knowledgeQuestion, context.expectedAnswer)
40097
+ ].join("");
40080
40098
  const path2 = (0, import_path14.join)(runDir, "prompt.txt");
40081
40099
  (0, import_fs16.writeFileSync)(path2, `${prompt}
40082
40100
  `, "utf8");
@@ -40140,7 +40158,9 @@ function buildRunArtifact(input) {
40140
40158
  docs_pages_used: agentMetadata.docs_pages_used,
40141
40159
  eval_state: buildDefaultEvalState(),
40142
40160
  context: {
40143
- replay_file: input.session.replay_file ?? null
40161
+ replay_file: input.session.replay_file ?? null,
40162
+ knowledge_question: input.session.knowledge_question ?? null,
40163
+ expected_answer: input.session.expected_answer ?? null
40144
40164
  },
40145
40165
  artifacts: {
40146
40166
  terminal_transcript: null,
@@ -40162,17 +40182,19 @@ function buildRunArtifact(input) {
40162
40182
  function registerEval(program3) {
40163
40183
  const evalCommand = program3.command("eval").description("Run or summarize external-agent evaluation workflows");
40164
40184
  const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
40165
- external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
40185
+ external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
40166
40186
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40167
40187
  const batchDir = (0, import_path14.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
40168
40188
  const replayFile = opts.replayFile ? (0, import_path14.resolve)(String(opts.replayFile)) : void 0;
40189
+ const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40190
+ const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40169
40191
  const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
40170
40192
  (0, import_fs16.mkdirSync)(batchDir, { recursive: true });
40171
40193
  const runs = models.map((model, index) => {
40172
40194
  const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
40173
40195
  const runDir = (0, import_path14.join)(batchDir, runId);
40174
40196
  (0, import_fs16.mkdirSync)(runDir, { recursive: true });
40175
- const promptPath = writePrompt(runDir, promptVersion, { replayFile });
40197
+ const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40176
40198
  const commandArgs = [
40177
40199
  "eval",
40178
40200
  "external-agent",
@@ -40191,6 +40213,8 @@ function registerEval(program3) {
40191
40213
  runDir
40192
40214
  ];
40193
40215
  if (replayFile) commandArgs.push("--replay-file", replayFile);
40216
+ if (knowledgeQuestion) commandArgs.push("--knowledge-question", knowledgeQuestion);
40217
+ if (expectedAnswer) commandArgs.push("--expected-answer", expectedAnswer);
40194
40218
  return {
40195
40219
  run_id: runId,
40196
40220
  model_provider: model.provider,
@@ -40208,6 +40232,8 @@ function registerEval(program3) {
40208
40232
  batch_dir: batchDir,
40209
40233
  prompt_version: promptVersion,
40210
40234
  replay_file: replayFile ?? null,
40235
+ knowledge_question: knowledgeQuestion ?? null,
40236
+ expected_answer: expectedAnswer ?? null,
40211
40237
  workspace_type: String(opts.workspaceType || "clean-no-repo"),
40212
40238
  agent_shell: String(opts.agentShell || "vscode-terminal"),
40213
40239
  run_count: runs.length,
@@ -40232,14 +40258,16 @@ function registerEval(program3) {
40232
40258
  extra: { batch }
40233
40259
  }), { json: Boolean(opts.json) });
40234
40260
  });
40235
- external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
40261
+ external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
40236
40262
  const status = normalizeStatus(opts.status);
40237
40263
  const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
40238
40264
  const runDir = (0, import_path14.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
40239
40265
  const replayFile = opts.replayFile ? (0, import_path14.resolve)(String(opts.replayFile)) : void 0;
40266
+ const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
40267
+ const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
40240
40268
  (0, import_fs16.mkdirSync)(runDir, { recursive: true });
40241
40269
  const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
40242
- const promptPath = writePrompt(runDir, promptVersion, { replayFile });
40270
+ const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
40243
40271
  const shell = inferShell(opts.shell);
40244
40272
  const session = {
40245
40273
  schema_version: "external_agent_capture_session.v1",
@@ -40249,6 +40277,8 @@ function registerEval(program3) {
40249
40277
  model_name: String(opts.modelName || "unknown-model"),
40250
40278
  prompt_version: promptVersion,
40251
40279
  replay_file: replayFile ?? null,
40280
+ knowledge_question: knowledgeQuestion ?? null,
40281
+ expected_answer: expectedAnswer ?? null,
40252
40282
  workspace_type: String(opts.workspaceType || "clean-no-repo"),
40253
40283
  agent_shell: String(opts.agentShell || shell.label),
40254
40284
  manual_intervention_count: 0,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@f-o-h/cli",
3
- "version": "0.1.50",
3
+ "version": "0.1.51",
4
4
  "description": "FOH CLI - AI-operator provisioning tool for Front Of House",
5
5
  "license": "UNLICENSED",
6
6
  "bin": {