npm - @f-o-h/cli - Versions diffs - 0.1.35 → 0.1.37 - Mend

@f-o-h/cli 0.1.35 → 0.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +8 -4
package/dist/foh.js +27 -1
package/examples/external-agent-run.example.json +11 -0
package/package.json +1 -1
package/schemas/external-agent-run.schema.json +15 -0

package/README.md CHANGED Viewed

@@ -4,7 +4,7 @@ AI-operator provisioning CLI for Front Of House.
 Public mirror: https://github.com/iiko38/front-of-house-cli
-Current candidate baseline: `@f-o-h/cli@0.1.35`
+Current published baseline: `@f-o-h/cli@0.1.37`
 This mirror is a generated release artifact. The private product monorepo is not
 published here, and no open-source license is granted unless stated separately.
@@ -123,9 +123,13 @@ foh eval external-agent run \
   --prompt-version blank-setup.v1
 ```
-The command writes a versioned prompt, launches an instrumented shell, captures
-FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
-`external_agent_run.v1` artifact when the shell exits.
+The command writes a versioned prompt, launches an instrumented shell, captures
+FOH CLI commands into `commands.ndjson`, and finalizes `run.json` as an
+`external_agent_run.v1` artifact when the shell exits.
+Run artifacts include `eval_state` so repeated benchmark runs make reuse
+explicit: org, agent, and widget reuse are expected; fresh paid phone-number
+creation is not expected.
 For guarded programmable-runner planning:

package/dist/foh.js CHANGED Viewed

@@ -32755,7 +32755,7 @@ var StdioServerTransport = class {
 };
 // src/lib/cli-version.ts
-var CLI_VERSION = "0.1.35";
+var CLI_VERSION = "0.1.37";
 // src/commands/mcp-serve.ts
 var DEFAULT_TIMEOUT_MS = 12e4;
@@ -39292,6 +39292,17 @@ function buildExecutedRunArtifact(input) {
     ],
     commands_run: commands.map((command) => command.command),
     docs_pages_used: [],
+    eval_state: {
+      org_reuse_expected: true,
+      agent_reuse_expected: true,
+      widget_reuse_expected: true,
+      fresh_org_expected: false,
+      fresh_agent_expected: false,
+      phone_purchase_expected: false,
+      paid_resource_creation_expected: false,
+      spend_policy_expected: NO_SPEND_POLICY,
+      rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
+    },
     artifacts: {
       terminal_transcript: relativeArtifactName(input.run.outputs.jsonl),
       command_log: (0, import_fs14.existsSync)((0, import_path12.join)(input.run.run_dir, "commands.ndjson")) ? "commands.ndjson" : null,
@@ -39509,6 +39520,19 @@ function writeSession(runDir, session) {
 `, "utf8");
   return path2;
 }
+function buildDefaultEvalState() {
+  return {
+    org_reuse_expected: true,
+    agent_reuse_expected: true,
+    widget_reuse_expected: true,
+    fresh_org_expected: false,
+    fresh_agent_expected: false,
+    phone_purchase_expected: false,
+    paid_resource_creation_expected: false,
+    spend_policy_expected: "no_spend",
+    rationale: "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
+  };
+}
 function buildRunArtifact(input) {
   const commands = readCommandRecords(input.runDir);
   const startedAt = String(input.session.started_at);
@@ -39545,6 +39569,7 @@ function buildRunArtifact(input) {
     ],
     commands_run: commands.map((command) => command.command),
     docs_pages_used: [],
+    eval_state: buildDefaultEvalState(),
     artifacts: {
       terminal_transcript: null,
       command_log: "commands.ndjson",
@@ -39651,6 +39676,7 @@ function registerEval(program3) {
       manual_intervention_count: 0,
       run_dir: runDir,
       prompt_path: promptPath,
+      eval_state: buildDefaultEvalState(),
       capture_env: {
         [EXTERNAL_AGENT_RUN_DIR_ENV]: runDir,
         [EXTERNAL_AGENT_PROMPT_VERSION_ENV]: promptVersion

package/examples/external-agent-run.example.json CHANGED Viewed

@@ -38,6 +38,17 @@
     "https://frontofhouse.okii.uk/guides/cli-install-and-upgrade",
     "https://frontofhouse.okii.uk/guides/error-handling-and-debugging"
   ],
+  "eval_state": {
+    "org_reuse_expected": true,
+    "agent_reuse_expected": true,
+    "widget_reuse_expected": true,
+    "fresh_org_expected": false,
+    "fresh_agent_expected": false,
+    "phone_purchase_expected": false,
+    "paid_resource_creation_expected": false,
+    "spend_policy_expected": "no_spend",
+    "rationale": "Mass external-agent evals benchmark public docs/CLI/API clarity; reuse avoids paid phone and Twilio inventory churn."
+  },
   "artifacts": {
     "terminal_transcript": "terminal-transcript.txt",
     "proof_bundle": null,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@f-o-h/cli",
-  "version": "0.1.35",
+  "version": "0.1.37",
   "description": "FOH CLI - AI-operator provisioning tool for Front Of House",
   "license": "UNLICENSED",
   "bin": {

package/schemas/external-agent-run.schema.json CHANGED Viewed

@@ -67,6 +67,21 @@
       "type": "array",
       "items": { "type": "string" }
     },
+    "eval_state": {
+      "type": "object",
+      "properties": {
+        "org_reuse_expected": { "type": "boolean" },
+        "agent_reuse_expected": { "type": "boolean" },
+        "widget_reuse_expected": { "type": "boolean" },
+        "fresh_org_expected": { "type": "boolean" },
+        "fresh_agent_expected": { "type": "boolean" },
+        "phone_purchase_expected": { "type": "boolean" },
+        "paid_resource_creation_expected": { "type": "boolean" },
+        "spend_policy_expected": { "type": "string" },
+        "rationale": { "type": "string" }
+      },
+      "additionalProperties": true
+    },
     "artifacts": {
       "type": "object",
       "properties": {