@fonoster/ctl 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,8 @@ export default class EvalIntelligence extends AuthenticatedCommand<typeof EvalIn
4
4
  static readonly examples: string[];
5
5
  static readonly flags: {
6
6
  file: import("@oclif/core/lib/interfaces").OptionFlag<string, import("@oclif/core/lib/interfaces").CustomOptions>;
7
+ output: import("@oclif/core/lib/interfaces").OptionFlag<string, import("@oclif/core/lib/interfaces").CustomOptions>;
8
+ "output-file": import("@oclif/core/lib/interfaces").OptionFlag<string, import("@oclif/core/lib/interfaces").CustomOptions>;
7
9
  };
8
10
  run(): Promise<void>;
9
11
  }
@@ -41,6 +41,13 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
41
41
  step((generator = generator.apply(thisArg, _arguments || [])).next());
42
42
  });
43
43
  };
44
+ var __asyncValues = (this && this.__asyncValues) || function (o) {
45
+ if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
46
+ var m = o[Symbol.asyncIterator], i;
47
+ return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
48
+ function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
49
+ function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
50
+ };
44
51
  Object.defineProperty(exports, "__esModule", { value: true });
45
52
  /**
46
53
  * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
@@ -72,6 +79,8 @@ const printEval_1 = require("../../utils/printEval");
72
79
  class EvalIntelligence extends AuthenticatedCommand_1.AuthenticatedCommand {
73
80
  run() {
74
81
  return __awaiter(this, void 0, void 0, function* () {
82
+ var _a, e_1, _b, _c;
83
+ var _d;
75
84
  const { flags } = yield this.parse(EvalIntelligence);
76
85
  const client = yield this.createSdkClient();
77
86
  const applications = new SDK.Applications(client);
@@ -104,28 +113,93 @@ class EvalIntelligence extends AuthenticatedCommand_1.AuthenticatedCommand {
104
113
  rawAutopilotApplication.intelligence.config.testCases.scenarios =
105
114
  mappedScenarios;
106
115
  const parsedAutopilotApplication = common_1.assistantSchema.parse(rawAutopilotApplication.intelligence.config);
107
- // We only need the intelligence portion of the application
108
116
  const autopilotApplication = {
109
117
  intelligence: {
110
118
  productRef: rawAutopilotApplication.intelligence.productRef,
111
119
  config: parsedAutopilotApplication
112
120
  }
113
121
  };
114
- const response = yield applications.evaluateIntelligence(autopilotApplication);
115
- (0, printEval_1.printEval)(response.results);
122
+ const stream = applications.evaluateIntelligence(autopilotApplication);
123
+ const outputJson = flags.output === "json";
124
+ const writeOutputFile = Boolean(flags["output-file"]);
125
+ const collectEvents = outputJson || writeOutputFile;
126
+ const events = [];
127
+ let currentScenarioRef = null;
128
+ const stepIndexByScenario = new Map();
129
+ try {
130
+ for (var _e = true, stream_1 = __asyncValues(stream), stream_1_1; stream_1_1 = yield stream_1.next(), _a = stream_1_1.done, !_a; _e = true) {
131
+ _c = stream_1_1.value;
132
+ _e = false;
133
+ const event = _c;
134
+ if (collectEvents)
135
+ events.push(event);
136
+ if (outputJson)
137
+ continue;
138
+ if (event.type === "stepResult") {
139
+ if (currentScenarioRef !== event.scenarioRef) {
140
+ currentScenarioRef = event.scenarioRef;
141
+ (0, printEval_1.printScenarioHeader)(event.scenarioRef);
142
+ stepIndexByScenario.set(event.scenarioRef, 0);
143
+ }
144
+ const stepIndex = (_d = stepIndexByScenario.get(event.scenarioRef)) !== null && _d !== void 0 ? _d : 0;
145
+ (0, printEval_1.printStepResult)(event.scenarioRef, stepIndex, event.stepResult);
146
+ stepIndexByScenario.set(event.scenarioRef, stepIndex + 1);
147
+ }
148
+ else if (event.type === "scenarioSummary") {
149
+ (0, printEval_1.printScenarioSummary)(event.scenarioRef, event.overallPassed);
150
+ }
151
+ else if (event.type === "evalError") {
152
+ (0, printEval_1.printEvalError)(event.message);
153
+ }
154
+ }
155
+ }
156
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
157
+ finally {
158
+ try {
159
+ if (!_e && !_a && (_b = stream_1.return)) yield _b.call(stream_1);
160
+ }
161
+ finally { if (e_1) throw e_1.error; }
162
+ }
163
+ if (!collectEvents)
164
+ return;
165
+ const summary = (0, printEval_1.buildEvalSummary)(events);
166
+ const jsonString = JSON.stringify(summary, null, 2);
167
+ if (outputJson) {
168
+ if (writeOutputFile && flags["output-file"]) {
169
+ fs.writeFileSync(flags["output-file"], jsonString, "utf8");
170
+ }
171
+ else {
172
+ console.log(jsonString);
173
+ }
174
+ }
175
+ else if (writeOutputFile && flags["output-file"]) {
176
+ fs.writeFileSync(flags["output-file"], jsonString, "utf8");
177
+ }
116
178
  });
117
179
  }
118
180
  }
119
181
  EvalIntelligence.description = "experimental command to test an Autopilot application";
120
182
  EvalIntelligence.examples = [
121
183
  "<%= config.bin %> <%= command.id %> -f assistant.json",
122
- "<%= config.bin %> <%= command.id %> -f assistant.yaml"
184
+ "<%= config.bin %> <%= command.id %> -f assistant.yaml",
185
+ "<%= config.bin %> <%= command.id %> -f assistant.yaml -o json",
186
+ "<%= config.bin %> <%= command.id %> -f assistant.yaml -o json --output-file results.json"
123
187
  ];
124
188
  EvalIntelligence.flags = {
125
189
  file: core_1.Flags.string({
126
190
  char: "f",
127
191
  description: "path to test cases file (json, yaml, or yml)",
128
192
  required: true
193
+ }),
194
+ output: core_1.Flags.string({
195
+ char: "o",
196
+ description: "output format",
197
+ options: ["pretty", "json"],
198
+ default: "pretty"
199
+ }),
200
+ "output-file": core_1.Flags.string({
201
+ description: "write JSON summary to this file (with pretty: also show streamed output)",
202
+ required: false
129
203
  })
130
204
  };
131
205
  exports.default = EvalIntelligence;
@@ -1,20 +1,25 @@
1
+ import type { EvaluateIntelligenceEvent, ScenarioEvaluationReport, StepEvaluationReport } from "@fonoster/types";
2
+ export type EvalSummary = {
3
+ scenarios: ScenarioEvaluationReport[];
4
+ errors: string[];
5
+ };
1
6
  /**
2
- * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
3
- * http://github.com/fonoster/fonoster
4
- *
5
- * This file is part of Fonoster
6
- *
7
- * Licensed under the MIT License (the "License");
8
- * you may not use this file except in compliance with
9
- * the License. You may obtain a copy of the License at
10
- *
11
- * https://opensource.org/licenses/MIT
12
- *
13
- * Unless required by applicable law or agreed to in writing, software
14
- * distributed under the License is distributed on an "AS IS" BASIS,
15
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
- * See the License for the specific language governing permissions and
17
- * limitations under the License.
7
+ * Builds a single JSON summary from streamed eval events.
18
8
  */
19
- import { ScenarioEvaluationReport } from "@fonoster/types";
20
- export declare function printEval(results: ScenarioEvaluationReport[]): void;
9
+ export declare function buildEvalSummary(events: EvaluateIntelligenceEvent[]): EvalSummary;
10
+ /**
11
+ * Prints a single step result in vertical layout (Step, Human, AI Expected, AI Actual, Tool, Passed).
12
+ */
13
+ export declare function printStepResult(_scenarioRef: string, stepIndex: number, step: StepEvaluationReport): void;
14
+ /**
15
+ * Prints scenario header (call once before first step of a scenario).
16
+ */
17
+ export declare function printScenarioHeader(scenarioRef: string): void;
18
+ /**
19
+ * Prints scenario completion summary.
20
+ */
21
+ export declare function printScenarioSummary(scenarioRef: string, overallPassed: boolean): void;
22
+ /**
23
+ * Prints an eval error event.
24
+ */
25
+ export declare function printEvalError(message: string): void;
@@ -3,78 +3,134 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.printEval = printEval;
6
+ exports.buildEvalSummary = buildEvalSummary;
7
+ exports.printStepResult = printStepResult;
8
+ exports.printScenarioHeader = printScenarioHeader;
9
+ exports.printScenarioSummary = printScenarioSummary;
10
+ exports.printEvalError = printEvalError;
11
+ /**
12
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
13
+ * http://github.com/fonoster/fonoster
14
+ *
15
+ * This file is part of Fonoster
16
+ *
17
+ * Licensed under the MIT License (the "License");
18
+ * you may not use this file except in compliance with
19
+ * the License. You may obtain a copy of the License at
20
+ *
21
+ * https://opensource.org/licenses/MIT
22
+ *
23
+ * Unless required by applicable law or agreed to in writing, software
24
+ * distributed under the License is distributed on an "AS IS" BASIS,
25
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26
+ * See the License for the specific language governing permissions and
27
+ * limitations under the License.
28
+ */
7
29
  const ansis_1 = __importDefault(require("ansis"));
8
- const cli_table3_1 = __importDefault(require("cli-table3"));
9
- function printEval(results) {
10
- results.forEach((result) => {
11
- console.log(ansis_1.default.bold.blue(`\nScenario: ${result.scenarioRef}`));
12
- console.log(ansis_1.default.bold(`Overall Passed: ${result.overallPassed ? ansis_1.default.green("✔") : ansis_1.default.red("✘")}`));
13
- const table = new cli_table3_1.default({
14
- head: [
15
- "Step",
16
- "Human Input",
17
- "Expected",
18
- "AI Response",
19
- "Tool Calls",
20
- "Passed"
21
- ],
22
- colWidths: [
23
- 6, // Step
24
- 28, // Human Input
25
- 28, // Expected
26
- 28, // AI Response
27
- null, // Tool Calls - dynamic width
28
- 8 // Passed
29
- ],
30
- wordWrap: true
31
- });
32
- result.steps.forEach((step, index) => {
33
- // Format tool evaluations if they exist
34
- let toolEvalText = "";
35
- if (step.toolEvaluations && step.toolEvaluations.length > 0) {
36
- toolEvalText = step.toolEvaluations
37
- .map((toolEval) => {
38
- if (!Object.keys(toolEval.actualParameters || {}).length) {
39
- return `${toolEval.actualTool}()`;
40
- }
41
- const params = JSON.stringify(toolEval.actualParameters || {}, null, 1)
42
- .split("\n")
43
- .map((line, index, arr) => {
44
- if (index === 0)
45
- return "";
46
- if (index === arr.length - 1)
47
- return "";
48
- return " " + line.trim();
49
- })
50
- .join("\n");
51
- return `${toolEval.actualTool}({${params}})`;
52
- })
53
- .join("\n\n"); // Add extra line between multiple tool calls
30
+ /**
31
+ * Builds a single JSON summary from streamed eval events.
32
+ */
33
+ function buildEvalSummary(events) {
34
+ const scenariosByRef = new Map();
35
+ const errors = [];
36
+ for (const event of events) {
37
+ if (event.type === "stepResult") {
38
+ let scenario = scenariosByRef.get(event.scenarioRef);
39
+ if (!scenario) {
40
+ scenario = {
41
+ scenarioRef: event.scenarioRef,
42
+ overallPassed: false,
43
+ steps: []
44
+ };
45
+ scenariosByRef.set(event.scenarioRef, scenario);
54
46
  }
55
- table.push([
56
- index + 1,
57
- step.humanInput,
58
- step.expectedResponse,
59
- step.aiResponse,
60
- toolEvalText,
61
- step.passed ? ansis_1.default.green("✔") : ansis_1.default.red("✘")
62
- ]);
63
- // Print error message if step failed
64
- if (!step.passed && step.errorMessage) {
65
- console.log(ansis_1.default.red(`\nError in step ${index + 1}:`));
66
- console.log(ansis_1.default.red(step.errorMessage));
67
- }
68
- // Print tool evaluation errors if any
69
- if (step.toolEvaluations) {
70
- step.toolEvaluations.forEach((toolEval) => {
71
- if (!toolEval.passed && toolEval.errorMessage) {
72
- console.log(ansis_1.default.red(`\nTool Error in step ${index + 1}:`));
73
- console.log(ansis_1.default.red(toolEval.errorMessage));
74
- }
75
- });
47
+ scenario.steps.push(event.stepResult);
48
+ }
49
+ else if (event.type === "scenarioSummary") {
50
+ const scenario = scenariosByRef.get(event.scenarioRef);
51
+ if (scenario)
52
+ scenario.overallPassed = event.overallPassed;
53
+ }
54
+ else if (event.type === "evalError") {
55
+ errors.push(event.message);
56
+ }
57
+ }
58
+ return {
59
+ scenarios: Array.from(scenariosByRef.values()),
60
+ errors
61
+ };
62
+ }
63
+ function formatToolCalls(step) {
64
+ var _a;
65
+ if (!((_a = step.toolEvaluations) === null || _a === void 0 ? void 0 : _a.length))
66
+ return "—";
67
+ return step.toolEvaluations
68
+ .map((toolEval) => {
69
+ if (!Object.keys(toolEval.actualParameters || {}).length) {
70
+ return `${toolEval.actualTool}()`;
71
+ }
72
+ const params = JSON.stringify(toolEval.actualParameters || {}, null, 1)
73
+ .split("\n")
74
+ .map((line, idx, arr) => {
75
+ if (idx === 0 || idx === arr.length - 1)
76
+ return "";
77
+ return " " + line.trim();
78
+ })
79
+ .join("\n");
80
+ return `${toolEval.actualTool}({${params}})`;
81
+ })
82
+ .join(" ");
83
+ }
84
+ const LABEL_PAD = 14; // "AI Expected: " etc.
85
+ function formatLine(label, value) {
86
+ return ` ${(label + ":").padEnd(LABEL_PAD + 1)} ${value.replace(/\n/g, " ")}`;
87
+ }
88
+ /**
89
+ * Prints a single step result in vertical layout (Step, Human, AI Expected, AI Actual, Tool, Passed).
90
+ */
91
+ function printStepResult(_scenarioRef, stepIndex, step) {
92
+ if (stepIndex > 0)
93
+ console.log("");
94
+ const toolText = formatToolCalls(step);
95
+ const passedStr = step.passed
96
+ ? ansis_1.default.green("✔ Passed")
97
+ : ansis_1.default.red("✘ Failed");
98
+ console.log(ansis_1.default.bold(` Step: ${stepIndex + 1}`));
99
+ console.log(formatLine("Human", step.humanInput || "—"));
100
+ console.log(formatLine("AI Expected", step.expectedResponse || "—"));
101
+ console.log(formatLine("AI Actual", step.aiResponse || "(none)"));
102
+ console.log(formatLine("Tool", toolText));
103
+ console.log(formatLine("Passed", passedStr));
104
+ if (!step.passed && step.errorMessage) {
105
+ console.log(ansis_1.default.red(` ${step.errorMessage}`));
106
+ }
107
+ if (step.toolEvaluations) {
108
+ for (const toolEval of step.toolEvaluations) {
109
+ if (!toolEval.passed && toolEval.errorMessage) {
110
+ console.log(ansis_1.default.red(` Tool: ${toolEval.errorMessage}`));
76
111
  }
77
- });
78
- console.log(table.toString());
79
- });
112
+ }
113
+ }
114
+ }
115
+ /**
116
+ * Prints scenario header (call once before first step of a scenario).
117
+ */
118
+ function printScenarioHeader(scenarioRef) {
119
+ console.log("");
120
+ console.log(ansis_1.default.bold.blue(`Scenario: ${scenarioRef}`));
121
+ console.log(ansis_1.default.dim("—".repeat(Math.min(60, process.stdout.columns || 60))));
122
+ }
123
+ /**
124
+ * Prints scenario completion summary.
125
+ */
126
+ function printScenarioSummary(scenarioRef, overallPassed) {
127
+ console.log(ansis_1.default.bold(` Overall: ${overallPassed ? ansis_1.default.green("✔ Passed") : ansis_1.default.red("✘ Failed")}`));
128
+ }
129
+ /**
130
+ * Prints an eval error event.
131
+ */
132
+ function printEvalError(message) {
133
+ console.log("");
134
+ console.log(ansis_1.default.red("— Eval error —"));
135
+ console.log(ansis_1.default.red(` ${message}`));
80
136
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fonoster/ctl",
3
- "version": "0.17.1",
3
+ "version": "0.18.0",
4
4
  "description": "Fonoster Control Tool",
5
5
  "author": "Pedro Sanders <psanders@fonoster.com>",
6
6
  "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -30,7 +30,7 @@
30
30
  "bugs": {
31
31
  "url": "https://github.com/fonoster/fonoster/issues"
32
32
  },
33
- "gitHead": "404c745620283a15fd16b96e160c5fcd755e545e",
33
+ "gitHead": "051f172b266db965cf1d1366f563da995a29a93d",
34
34
  "bin": {
35
35
  "fonoster": "./bin/run.js"
36
36
  },
@@ -51,7 +51,7 @@
51
51
  }
52
52
  },
53
53
  "dependencies": {
54
- "@fonoster/sdk": "^0.17.1",
54
+ "@fonoster/sdk": "^0.18.0",
55
55
  "@inquirer/prompts": "^7.1.0",
56
56
  "@oclif/core": "^4.0.34",
57
57
  "@oclif/plugin-warn-if-update-available": "^3.1.28",