npm - @fonoster/ctl - Versions diffs - 0.17.1 → 0.18.0 - Mend

@fonoster/ctl 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/commands/applications/eval.d.ts +2 -0
package/dist/commands/applications/eval.js +78 -4
package/dist/utils/printEval.d.ts +23 -18
package/dist/utils/printEval.js +127 -71
package/package.json +3 -3

package/dist/commands/applications/eval.d.ts CHANGED Viewed

@@ -4,6 +4,8 @@ export default class EvalIntelligence extends AuthenticatedCommand<typeof EvalIn
     static readonly examples: string[];
     static readonly flags: {
         file: import("@oclif/core/lib/interfaces").OptionFlag<string, import("@oclif/core/lib/interfaces").CustomOptions>;
+        output: import("@oclif/core/lib/interfaces").OptionFlag<string, import("@oclif/core/lib/interfaces").CustomOptions>;
+        "output-file": import("@oclif/core/lib/interfaces").OptionFlag<string, import("@oclif/core/lib/interfaces").CustomOptions>;
     };
     run(): Promise<void>;
 }

package/dist/commands/applications/eval.js CHANGED Viewed

@@ -41,6 +41,13 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
         step((generator = generator.apply(thisArg, _arguments || [])).next());
     });
 };
+var __asyncValues = (this && this.__asyncValues) || function (o) {
+    if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
+    var m = o[Symbol.asyncIterator], i;
+    return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
+    function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
+    function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
+};
 Object.defineProperty(exports, "__esModule", { value: true });
 /**
  * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
@@ -72,6 +79,8 @@ const printEval_1 = require("../../utils/printEval");
 class EvalIntelligence extends AuthenticatedCommand_1.AuthenticatedCommand {
     run() {
         return __awaiter(this, void 0, void 0, function* () {
+            var _a, e_1, _b, _c;
+            var _d;
             const { flags } = yield this.parse(EvalIntelligence);
             const client = yield this.createSdkClient();
             const applications = new SDK.Applications(client);
@@ -104,28 +113,93 @@ class EvalIntelligence extends AuthenticatedCommand_1.AuthenticatedCommand {
             rawAutopilotApplication.intelligence.config.testCases.scenarios =
                 mappedScenarios;
             const parsedAutopilotApplication = common_1.assistantSchema.parse(rawAutopilotApplication.intelligence.config);
-            // We only need the intelligence portion of the application
             const autopilotApplication = {
                 intelligence: {
                     productRef: rawAutopilotApplication.intelligence.productRef,
                     config: parsedAutopilotApplication
                 }
             };
-            const response = yield applications.evaluateIntelligence(autopilotApplication);
-            (0, printEval_1.printEval)(response.results);
+            const stream = applications.evaluateIntelligence(autopilotApplication);
+            const outputJson = flags.output === "json";
+            const writeOutputFile = Boolean(flags["output-file"]);
+            const collectEvents = outputJson || writeOutputFile;
+            const events = [];
+            let currentScenarioRef = null;
+            const stepIndexByScenario = new Map();
+            try {
+                for (var _e = true, stream_1 = __asyncValues(stream), stream_1_1; stream_1_1 = yield stream_1.next(), _a = stream_1_1.done, !_a; _e = true) {
+                    _c = stream_1_1.value;
+                    _e = false;
+                    const event = _c;
+                    if (collectEvents)
+                        events.push(event);
+                    if (outputJson)
+                        continue;
+                    if (event.type === "stepResult") {
+                        if (currentScenarioRef !== event.scenarioRef) {
+                            currentScenarioRef = event.scenarioRef;
+                            (0, printEval_1.printScenarioHeader)(event.scenarioRef);
+                            stepIndexByScenario.set(event.scenarioRef, 0);
+                        }
+                        const stepIndex = (_d = stepIndexByScenario.get(event.scenarioRef)) !== null && _d !== void 0 ? _d : 0;
+                        (0, printEval_1.printStepResult)(event.scenarioRef, stepIndex, event.stepResult);
+                        stepIndexByScenario.set(event.scenarioRef, stepIndex + 1);
+                    }
+                    else if (event.type === "scenarioSummary") {
+                        (0, printEval_1.printScenarioSummary)(event.scenarioRef, event.overallPassed);
+                    }
+                    else if (event.type === "evalError") {
+                        (0, printEval_1.printEvalError)(event.message);
+                    }
+                }
+            }
+            catch (e_1_1) { e_1 = { error: e_1_1 }; }
+            finally {
+                try {
+                    if (!_e && !_a && (_b = stream_1.return)) yield _b.call(stream_1);
+                }
+                finally { if (e_1) throw e_1.error; }
+            }
+            if (!collectEvents)
+                return;
+            const summary = (0, printEval_1.buildEvalSummary)(events);
+            const jsonString = JSON.stringify(summary, null, 2);
+            if (outputJson) {
+                if (writeOutputFile && flags["output-file"]) {
+                    fs.writeFileSync(flags["output-file"], jsonString, "utf8");
+                }
+                else {
+                    console.log(jsonString);
+                }
+            }
+            else if (writeOutputFile && flags["output-file"]) {
+                fs.writeFileSync(flags["output-file"], jsonString, "utf8");
+            }
         });
     }
 }
 EvalIntelligence.description = "experimental command to test an Autopilot application";
 EvalIntelligence.examples = [
     "<%= config.bin %> <%= command.id %> -f assistant.json",
-    "<%= config.bin %> <%= command.id %> -f assistant.yaml"
+    "<%= config.bin %> <%= command.id %> -f assistant.yaml",
+    "<%= config.bin %> <%= command.id %> -f assistant.yaml -o json",
+    "<%= config.bin %> <%= command.id %> -f assistant.yaml -o json --output-file results.json"
 ];
 EvalIntelligence.flags = {
     file: core_1.Flags.string({
         char: "f",
         description: "path to test cases file (json, yaml, or yml)",
         required: true
+    }),
+    output: core_1.Flags.string({
+        char: "o",
+        description: "output format",
+        options: ["pretty", "json"],
+        default: "pretty"
+    }),
+    "output-file": core_1.Flags.string({
+        description: "write JSON summary to this file (with pretty: also show streamed output)",
+        required: false
     })
 };
 exports.default = EvalIntelligence;

package/dist/utils/printEval.d.ts CHANGED Viewed

@@ -1,20 +1,25 @@
+import type { EvaluateIntelligenceEvent, ScenarioEvaluationReport, StepEvaluationReport } from "@fonoster/types";
+export type EvalSummary = {
+    scenarios: ScenarioEvaluationReport[];
+    errors: string[];
+};
 /**
- * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
- * http://github.com/fonoster/fonoster
- *
- * This file is part of Fonoster
- *
- * Licensed under the MIT License (the "License");
- * you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    https://opensource.org/licenses/MIT
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Builds a single JSON summary from streamed eval events.
  */
-import { ScenarioEvaluationReport } from "@fonoster/types";
-export declare function printEval(results: ScenarioEvaluationReport[]): void;
+export declare function buildEvalSummary(events: EvaluateIntelligenceEvent[]): EvalSummary;
+/**
+ * Prints a single step result in vertical layout (Step, Human, AI Expected, AI Actual, Tool, Passed).
+ */
+export declare function printStepResult(_scenarioRef: string, stepIndex: number, step: StepEvaluationReport): void;
+/**
+ * Prints scenario header (call once before first step of a scenario).
+ */
+export declare function printScenarioHeader(scenarioRef: string): void;
+/**
+ * Prints scenario completion summary.
+ */
+export declare function printScenarioSummary(scenarioRef: string, overallPassed: boolean): void;
+/**
+ * Prints an eval error event.
+ */
+export declare function printEvalError(message: string): void;

package/dist/utils/printEval.js CHANGED Viewed

@@ -3,78 +3,134 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.printEval = printEval;
+exports.buildEvalSummary = buildEvalSummary;
+exports.printStepResult = printStepResult;
+exports.printScenarioHeader = printScenarioHeader;
+exports.printScenarioSummary = printScenarioSummary;
+exports.printEvalError = printEvalError;
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 const ansis_1 = __importDefault(require("ansis"));
-const cli_table3_1 = __importDefault(require("cli-table3"));
-function printEval(results) {
-    results.forEach((result) => {
-        console.log(ansis_1.default.bold.blue(`\nScenario: ${result.scenarioRef}`));
-        console.log(ansis_1.default.bold(`Overall Passed: ${result.overallPassed ? ansis_1.default.green("✔") : ansis_1.default.red("✘")}`));
-        const table = new cli_table3_1.default({
-            head: [
-                "Step",
-                "Human Input",
-                "Expected",
-                "AI Response",
-                "Tool Calls",
-                "Passed"
-            ],
-            colWidths: [
-                6, // Step
-                28, // Human Input
-                28, // Expected
-                28, // AI Response
-                null, // Tool Calls - dynamic width
-                8 // Passed
-            ],
-            wordWrap: true
-        });
-        result.steps.forEach((step, index) => {
-            // Format tool evaluations if they exist
-            let toolEvalText = "";
-            if (step.toolEvaluations && step.toolEvaluations.length > 0) {
-                toolEvalText = step.toolEvaluations
-                    .map((toolEval) => {
-                    if (!Object.keys(toolEval.actualParameters || {}).length) {
-                        return `${toolEval.actualTool}()`;
-                    }
-                    const params = JSON.stringify(toolEval.actualParameters || {}, null, 1)
-                        .split("\n")
-                        .map((line, index, arr) => {
-                        if (index === 0)
-                            return "";
-                        if (index === arr.length - 1)
-                            return "";
-                        return " " + line.trim();
-                    })
-                        .join("\n");
-                    return `${toolEval.actualTool}({${params}})`;
-                })
-                    .join("\n\n"); // Add extra line between multiple tool calls
+/**
+ * Builds a single JSON summary from streamed eval events.
+ */
+function buildEvalSummary(events) {
+    const scenariosByRef = new Map();
+    const errors = [];
+    for (const event of events) {
+        if (event.type === "stepResult") {
+            let scenario = scenariosByRef.get(event.scenarioRef);
+            if (!scenario) {
+                scenario = {
+                    scenarioRef: event.scenarioRef,
+                    overallPassed: false,
+                    steps: []
+                };
+                scenariosByRef.set(event.scenarioRef, scenario);
             }
-            table.push([
-                index + 1,
-                step.humanInput,
-                step.expectedResponse,
-                step.aiResponse,
-                toolEvalText,
-                step.passed ? ansis_1.default.green("✔") : ansis_1.default.red("✘")
-            ]);
-            // Print error message if step failed
-            if (!step.passed && step.errorMessage) {
-                console.log(ansis_1.default.red(`\nError in step ${index + 1}:`));
-                console.log(ansis_1.default.red(step.errorMessage));
-            }
-            // Print tool evaluation errors if any
-            if (step.toolEvaluations) {
-                step.toolEvaluations.forEach((toolEval) => {
-                    if (!toolEval.passed && toolEval.errorMessage) {
-                        console.log(ansis_1.default.red(`\nTool Error in step ${index + 1}:`));
-                        console.log(ansis_1.default.red(toolEval.errorMessage));
-                    }
-                });
+            scenario.steps.push(event.stepResult);
+        }
+        else if (event.type === "scenarioSummary") {
+            const scenario = scenariosByRef.get(event.scenarioRef);
+            if (scenario)
+                scenario.overallPassed = event.overallPassed;
+        }
+        else if (event.type === "evalError") {
+            errors.push(event.message);
+        }
+    }
+    return {
+        scenarios: Array.from(scenariosByRef.values()),
+        errors
+    };
+}
+function formatToolCalls(step) {
+    var _a;
+    if (!((_a = step.toolEvaluations) === null || _a === void 0 ? void 0 : _a.length))
+        return "—";
+    return step.toolEvaluations
+        .map((toolEval) => {
+        if (!Object.keys(toolEval.actualParameters || {}).length) {
+            return `${toolEval.actualTool}()`;
+        }
+        const params = JSON.stringify(toolEval.actualParameters || {}, null, 1)
+            .split("\n")
+            .map((line, idx, arr) => {
+            if (idx === 0 || idx === arr.length - 1)
+                return "";
+            return " " + line.trim();
+        })
+            .join("\n");
+        return `${toolEval.actualTool}({${params}})`;
+    })
+        .join(" ");
+}
+const LABEL_PAD = 14; // "AI Expected:  " etc.
+function formatLine(label, value) {
+    return `  ${(label + ":").padEnd(LABEL_PAD + 1)} ${value.replace(/\n/g, " ")}`;
+}
+/**
+ * Prints a single step result in vertical layout (Step, Human, AI Expected, AI Actual, Tool, Passed).
+ */
+function printStepResult(_scenarioRef, stepIndex, step) {
+    if (stepIndex > 0)
+        console.log("");
+    const toolText = formatToolCalls(step);
+    const passedStr = step.passed
+        ? ansis_1.default.green("✔ Passed")
+        : ansis_1.default.red("✘ Failed");
+    console.log(ansis_1.default.bold(`  Step: ${stepIndex + 1}`));
+    console.log(formatLine("Human", step.humanInput || "—"));
+    console.log(formatLine("AI Expected", step.expectedResponse || "—"));
+    console.log(formatLine("AI Actual", step.aiResponse || "(none)"));
+    console.log(formatLine("Tool", toolText));
+    console.log(formatLine("Passed", passedStr));
+    if (!step.passed && step.errorMessage) {
+        console.log(ansis_1.default.red(`    ${step.errorMessage}`));
+    }
+    if (step.toolEvaluations) {
+        for (const toolEval of step.toolEvaluations) {
+            if (!toolEval.passed && toolEval.errorMessage) {
+                console.log(ansis_1.default.red(`    Tool: ${toolEval.errorMessage}`));
             }
-        });
-        console.log(table.toString());
-    });
+        }
+    }
+}
+/**
+ * Prints scenario header (call once before first step of a scenario).
+ */
+function printScenarioHeader(scenarioRef) {
+    console.log("");
+    console.log(ansis_1.default.bold.blue(`Scenario: ${scenarioRef}`));
+    console.log(ansis_1.default.dim("—".repeat(Math.min(60, process.stdout.columns || 60))));
+}
+/**
+ * Prints scenario completion summary.
+ */
+function printScenarioSummary(scenarioRef, overallPassed) {
+    console.log(ansis_1.default.bold(`  Overall: ${overallPassed ? ansis_1.default.green("✔ Passed") : ansis_1.default.red("✘ Failed")}`));
+}
+/**
+ * Prints an eval error event.
+ */
+function printEvalError(message) {
+    console.log("");
+    console.log(ansis_1.default.red("— Eval error —"));
+    console.log(ansis_1.default.red(`  ${message}`));
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fonoster/ctl",
-  "version": "0.17.1",
+  "version": "0.18.0",
   "description": "Fonoster Control Tool",
   "author": "Pedro Sanders <psanders@fonoster.com>",
   "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -30,7 +30,7 @@
   "bugs": {
     "url": "https://github.com/fonoster/fonoster/issues"
   },
-  "gitHead": "404c745620283a15fd16b96e160c5fcd755e545e",
+  "gitHead": "051f172b266db965cf1d1366f563da995a29a93d",
   "bin": {
     "fonoster": "./bin/run.js"
   },
@@ -51,7 +51,7 @@
     }
   },
   "dependencies": {
-    "@fonoster/sdk": "^0.17.1",
+    "@fonoster/sdk": "^0.18.0",
     "@inquirer/prompts": "^7.1.0",
     "@oclif/core": "^4.0.34",
     "@oclif/plugin-warn-if-update-available": "^3.1.28",