npm - @f-o-h/cli - Versions diffs - 0.1.50 → 0.1.51 - Mend

@f-o-h/cli 0.1.50 → 0.1.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -118,11 +118,11 @@ Use this when testing whether a clean coding agent can start from public docs
 and the public npm package without private repo context:
 ```bash
-foh eval external-agent batch \
-  --models openai/codex,anthropic/claude,cursor/agent \
-  --prompt-version blank-setup.v1 \
-  --json
-```
+foh eval external-agent batch \
+  --models openai/codex,anthropic/claude,cursor/agent \
+  --prompt-version blank-setup.v1 \
+  --json
+```
 Run each returned launch command in a clean agent terminal:
@@ -137,11 +137,25 @@ The command writes a versioned prompt, launches an instrumented shell, captures
 FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
 `external_agent_run.v1` artifact when the shell exits.
-Run artifacts include `eval_state` so repeated benchmark runs make reuse
-explicit: org, agent, and widget reuse are expected; fresh paid phone-number
-creation is not expected.
-For guarded programmable-runner planning:
+Run artifacts include `eval_state` so repeated benchmark runs make reuse
+explicit: org, agent, and widget reuse are expected; fresh paid phone-number
+creation is not expected.
+For a planted knowledge-miss benchmark:
+```bash
+foh eval external-agent batch \
+  --models openai/codex \
+  --prompt-version knowledge-miss.v1 \
+  --knowledge-question "Does 12 Acacia Avenue allow Saturday viewings?" \
+  --expected-answer "Saturday viewings are available by appointment only." \
+  --json
+```
+The prompt instructs the clean agent to run `foh knowledge query`, preserve the
+failure packet, and convert it with `foh bug improve`.
+For guarded programmable-runner planning:
 ```bash
 foh eval external-agent execute \

package/dist/foh.js CHANGED Viewed

@@ -32801,7 +32801,7 @@ var StdioServerTransport = class {
 };
 // src/lib/cli-version.ts
-var CLI_VERSION = "0.1.50";
+var CLI_VERSION = "0.1.51";
 // src/commands/mcp-serve.ts
 var DEFAULT_TIMEOUT_MS = 12e4;
@@ -40075,8 +40075,26 @@ function replayPromptContext(replayFile) {
     "- If the replay packet exposes trace/test next commands, run the safest read-only command and convert any failure into `foh bug improve` evidence."
   ].join("\n");
 }
+function knowledgeMissPromptContext(knowledgeQuestion, expectedAnswer) {
+  const question = String(knowledgeQuestion || "").trim();
+  if (!question) return "";
+  const expected = String(expectedAnswer || "").trim();
+  return [
+    "",
+    "Planted knowledge-miss context:",
+    `- Question to diagnose: ${question}`,
+    ...expected ? [`- Expected answer or missing fact: ${expected}`] : [],
+    `- Start by running: npx --yes @f-o-h/cli@latest knowledge query --agent <agent_id> --text ${quoteArg(question)} --explain --json`,
+    "- If the query returns no match or low confidence, write the failure packet and convert it with `foh bug improve --source-type knowledge_miss --from-file <packet.json> --json`.",
+    "- Do not patch around the miss manually; produce the smallest redacted artifact that explains whether the fix belongs to docs, ingestion, retrieval, config, or runtime."
+  ].join("\n");
+}
 function writePrompt(runDir, promptVersion, context = {}) {
-  const prompt = `${PROMPTS[promptVersion] ?? PROMPTS[DEFAULT_PROMPT_VERSION]}${replayPromptContext(context.replayFile)}`;
+  const prompt = [
+    PROMPTS[promptVersion] ?? PROMPTS[DEFAULT_PROMPT_VERSION],
+    replayPromptContext(context.replayFile),
+    knowledgeMissPromptContext(context.knowledgeQuestion, context.expectedAnswer)
+  ].join("");
   const path2 = (0, import_path14.join)(runDir, "prompt.txt");
   (0, import_fs16.writeFileSync)(path2, `${prompt}
 `, "utf8");
@@ -40140,7 +40158,9 @@ function buildRunArtifact(input) {
     docs_pages_used: agentMetadata.docs_pages_used,
     eval_state: buildDefaultEvalState(),
     context: {
-      replay_file: input.session.replay_file ?? null
+      replay_file: input.session.replay_file ?? null,
+      knowledge_question: input.session.knowledge_question ?? null,
+      expected_answer: input.session.expected_answer ?? null
     },
     artifacts: {
       terminal_transcript: null,
@@ -40162,17 +40182,19 @@ function buildRunArtifact(input) {
 function registerEval(program3) {
   const evalCommand = program3.command("eval").description("Run or summarize external-agent evaluation workflows");
   const external = evalCommand.command("external-agent").description("Capture clean external coding-agent setup attempts");
-  external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
+  external.command("batch").description("Create a deterministic multi-model external-agent batch plan").option("--models <list>", "Comma-separated provider/model list", DEFAULT_BATCH_MODELS).option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Batch output directory").option("--json", "Output as JSON").action(async (opts) => {
     const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
     const batchDir = (0, import_path14.resolve)(String(opts.outDir || defaultBatchDir(promptVersion)));
     const replayFile = opts.replayFile ? (0, import_path14.resolve)(String(opts.replayFile)) : void 0;
+    const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
+    const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
     const models = parseModelList(String(opts.models || DEFAULT_BATCH_MODELS));
     (0, import_fs16.mkdirSync)(batchDir, { recursive: true });
     const runs = models.map((model, index) => {
       const runId = `${String(index + 1).padStart(2, "0")}-${safeSlug(model.provider)}-${safeSlug(model.name)}`;
       const runDir = (0, import_path14.join)(batchDir, runId);
       (0, import_fs16.mkdirSync)(runDir, { recursive: true });
-      const promptPath = writePrompt(runDir, promptVersion, { replayFile });
+      const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
       const commandArgs = [
         "eval",
         "external-agent",
@@ -40191,6 +40213,8 @@ function registerEval(program3) {
         runDir
       ];
       if (replayFile) commandArgs.push("--replay-file", replayFile);
+      if (knowledgeQuestion) commandArgs.push("--knowledge-question", knowledgeQuestion);
+      if (expectedAnswer) commandArgs.push("--expected-answer", expectedAnswer);
       return {
         run_id: runId,
         model_provider: model.provider,
@@ -40208,6 +40232,8 @@ function registerEval(program3) {
       batch_dir: batchDir,
       prompt_version: promptVersion,
       replay_file: replayFile ?? null,
+      knowledge_question: knowledgeQuestion ?? null,
+      expected_answer: expectedAnswer ?? null,
       workspace_type: String(opts.workspaceType || "clean-no-repo"),
       agent_shell: String(opts.agentShell || "vscode-terminal"),
       run_count: runs.length,
@@ -40232,14 +40258,16 @@ function registerEval(program3) {
       extra: { batch }
     }), { json: Boolean(opts.json) });
   });
-  external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
+  external.command("run").description("Launch an instrumented shell and emit external_agent_run.v1 when it exits").option("--model-provider <name>", "Model provider label", "unknown").option("--model-name <name>", "Model name label", "unknown-model").option("--prompt-version <version>", "Prompt version", DEFAULT_PROMPT_VERSION).option("--replay-file <path>", "Local transcript/replay artifact to seed replay-failure prompts").option("--knowledge-question <text>", "Question to seed knowledge-miss prompts").option("--expected-answer <text>", "Expected answer or missing fact for planted knowledge-miss prompts").option("--workspace-type <type>", "Workspace type label", "clean-no-repo").option("--agent-shell <name>", "Agent shell label", "vscode-terminal").option("--out-dir <path>", "Run output directory").option("--status <status>", "Final status when not interactively classified: pass|hold|fail", "hold").option("--reason-code <code>", "Failure/hold reason code", "external_agent_run_needs_review").option("--shell <command>", "Shell command to launch for capture").option("--no-shell", "Do not launch a shell; create/finalize artifacts immediately").option("--json", "Output as JSON").action(async (opts) => {
     const status = normalizeStatus(opts.status);
     const promptVersion = String(opts.promptVersion || DEFAULT_PROMPT_VERSION);
     const runDir = (0, import_path14.resolve)(String(opts.outDir || defaultRunDir(opts.modelName, promptVersion)));
     const replayFile = opts.replayFile ? (0, import_path14.resolve)(String(opts.replayFile)) : void 0;
+    const knowledgeQuestion = opts.knowledgeQuestion ? String(opts.knowledgeQuestion) : void 0;
+    const expectedAnswer = opts.expectedAnswer ? String(opts.expectedAnswer) : void 0;
     (0, import_fs16.mkdirSync)(runDir, { recursive: true });
     const runId = runDir.split(/[\\/]/).filter(Boolean).slice(-1)[0];
-    const promptPath = writePrompt(runDir, promptVersion, { replayFile });
+    const promptPath = writePrompt(runDir, promptVersion, { replayFile, knowledgeQuestion, expectedAnswer });
     const shell = inferShell(opts.shell);
     const session = {
       schema_version: "external_agent_capture_session.v1",
@@ -40249,6 +40277,8 @@ function registerEval(program3) {
       model_name: String(opts.modelName || "unknown-model"),
       prompt_version: promptVersion,
       replay_file: replayFile ?? null,
+      knowledge_question: knowledgeQuestion ?? null,
+      expected_answer: expectedAnswer ?? null,
       workspace_type: String(opts.workspaceType || "clean-no-repo"),
       agent_shell: String(opts.agentShell || shell.label),
       manual_intervention_count: 0,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@f-o-h/cli",
-  "version": "0.1.50",
+  "version": "0.1.51",
   "description": "FOH CLI - AI-operator provisioning tool for Front Of House",
   "license": "UNLICENSED",
   "bin": {